diff --git a/compute/zbuild.c b/compute/zbuild.c
index 22f2676df7c3ec145eb454e4b44fabe07c86e1db..6ec2419ca0e1fb3ed94cb1e5b0ab5b25200e5c1a 100644
--- a/compute/zbuild.c
+++ b/compute/zbuild.c
@@ -66,9 +66,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -175,8 +174,7 @@ int CHAMELEON_zbuild( cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeadd.c b/compute/zgeadd.c
index cc14238e9b9b5868a8efe5d6adf297ca6ce21176..e2674316ab921ebe05ecc4911d9af42b1b5f573a 100644
--- a/compute/zgeadd.c
+++ b/compute/zgeadd.c
@@ -75,8 +75,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -211,8 +210,7 @@ int CHAMELEON_zgeadd( cham_trans_t trans, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgelqf.c b/compute/zgelqf.c
index 8c0cc4bec038b8d787854331a87b3c8f33272a12..9752503348ad2496623502c68ccf73b1983ba9be 100644
--- a/compute/zgelqf.c
+++ b/compute/zgelqf.c
@@ -56,9 +56,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -165,8 +164,7 @@ int CHAMELEON_zgelqf( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c
index fecf0f20f31a8e1bff2314c9da0ba9cb6967a6cb..c507463efd24bc87eca387424338611b14abe8a0 100644
--- a/compute/zgelqf_param.c
+++ b/compute/zgelqf_param.c
@@ -54,9 +54,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -164,8 +163,7 @@ int CHAMELEON_zgelqf_param( const libhqr_tree_t *qrtree, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgelqs.c b/compute/zgelqs.c
index 08ec0dd3dcd37eb606db79e900464cda4686f5ad..cc9b89fef023c319bcffa3d960776f0b804f7e4f 100644
--- a/compute/zgelqs.c
+++ b/compute/zgelqs.c
@@ -62,9 +62,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -186,8 +185,7 @@ int CHAMELEON_zgelqs( int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c
index b594dd602962c779e6450134376570ce8feadc04..720ab5d4d8ffb1708a5cb35a05be2032ff1b6eba 100644
--- a/compute/zgelqs_param.c
+++ b/compute/zgelqs_param.c
@@ -64,9 +64,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -192,8 +191,7 @@ int CHAMELEON_zgelqs_param( const libhqr_tree_t *qrtree, int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgels.c b/compute/zgels.c
index 99d7914da27fd1cdc88463a70e07ba5fee7d4675..276c4e3a6dd67569b5347bd3178f7a5b8a87d439 100644
--- a/compute/zgels.c
+++ b/compute/zgels.c
@@ -89,9 +89,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -244,8 +243,7 @@ int CHAMELEON_zgels( cham_trans_t trans, int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \return CHAMELEON_SUCCESS successful exit
+ * @return CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgels_param.c b/compute/zgels_param.c
index ea23c9a4919594174e511c4a3256b61c9233ba4d..34ab5c6098db2a5c176d70b6ff8b0607a9117b25 100644
--- a/compute/zgels_param.c
+++ b/compute/zgels_param.c
@@ -92,9 +92,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -250,8 +249,7 @@ int CHAMELEON_zgels_param( const libhqr_tree_t *qrtree, cham_trans_t trans, int
  *
  *******************************************************************************
  *
- * @return
- *          \return CHAMELEON_SUCCESS successful exit
+ * @return CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgemm.c b/compute/zgemm.c
index 8d7cfba196d5e40338bc0760d5e1167cbaaa480e..e266039adbb24d049cae77f1d426c3015283ae6b 100644
--- a/compute/zgemm.c
+++ b/compute/zgemm.c
@@ -114,8 +114,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -283,8 +282,7 @@ int CHAMELEON_zgemm( cham_trans_t transA, cham_trans_t transB, int M, int N, int
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c
index f365fc5106713605e8a485b5193412494c95e51b..9ccd619c9fd7e6672f9f412a1c6974f81d4a436c 100644
--- a/compute/zgeqrf.c
+++ b/compute/zgeqrf.c
@@ -55,9 +55,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -164,8 +163,7 @@ int CHAMELEON_zgeqrf( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c
index 36cb65d2ad8e391e8b6cd16f425b18e93ea9bf98..d83e3f447bde001a9c2286087179cfc0555a8f8e 100644
--- a/compute/zgeqrf_param.c
+++ b/compute/zgeqrf_param.c
@@ -59,9 +59,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -173,8 +172,7 @@ int CHAMELEON_zgeqrf_param( const libhqr_tree_t *qrtree, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c
index ee2d2bc6d43636e7da152fd1b39ed0e91ebaa803..7af82e43f8b38bc7ea5ae41db9b15aaf99b1e31f 100644
--- a/compute/zgeqrs.c
+++ b/compute/zgeqrs.c
@@ -62,9 +62,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -185,8 +184,7 @@ int CHAMELEON_zgeqrs( int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c
index 15a5ff0d64df8e8dc001aee59844fef3fcf34e40..7fe000a501eb4af3283bf31ea6d8827fccabda9b 100644
--- a/compute/zgeqrs_param.c
+++ b/compute/zgeqrs_param.c
@@ -58,9 +58,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -182,8 +181,7 @@ int CHAMELEON_zgeqrs_param( const libhqr_tree_t *qrtree, int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgesv_incpiv.c b/compute/zgesv_incpiv.c
index 6dd3073f6016555191f353a3b30492edc10743a7..275b6dc6a0c7a20ebda02dcb1e1efd37d79e0e5c 100644
--- a/compute/zgesv_incpiv.c
+++ b/compute/zgesv_incpiv.c
@@ -67,10 +67,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, so the solution could not be computed.
  *
  *******************************************************************************
@@ -189,9 +188,8 @@ int CHAMELEON_zgesv_incpiv( int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, so the solution could not be computed.
  *
  *******************************************************************************
diff --git a/compute/zgesv_nopiv.c b/compute/zgesv_nopiv.c
index 7dbf73caabdf784a30cdca8628235525ff7a4b5a..f7dfbb88070893e20d28f90e6c43ac0d71b79131 100644
--- a/compute/zgesv_nopiv.c
+++ b/compute/zgesv_nopiv.c
@@ -66,10 +66,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, so the solution could not be computed.
  *
  *******************************************************************************
@@ -179,9 +178,8 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, so the solution could not be computed.
  *
  *******************************************************************************
diff --git a/compute/zgesvd.c b/compute/zgesvd.c
index a9ba03d418296d6db8f7c78c0277f58f93be690d..5b3cf9bd6f58c122ec99132e04d46a9f42d0d808 100644
--- a/compute/zgesvd.c
+++ b/compute/zgesvd.c
@@ -129,9 +129,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -318,8 +317,7 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt,
  *
  *******************************************************************************
  *
- * @return
- *          \return CHAMELEON_SUCCESS successful exit
+ * @return CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgetrf_incpiv.c b/compute/zgetrf_incpiv.c
index f990126ebc1a351e7164c4c3cebb114aa3d4c422..273b247f3fc0131d4193b72a483059dbd283d2ec 100644
--- a/compute/zgetrf_incpiv.c
+++ b/compute/zgetrf_incpiv.c
@@ -56,10 +56,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, and division by zero will occur
  *               if it is used to solve a system of equations.
  *
@@ -166,9 +165,8 @@ int CHAMELEON_zgetrf_incpiv( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, and division by zero will occur
  *               if it is used to solve a system of equations.
  *
diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c
index f99d3d9e14cb1fbc75cd51bc579011ea0516ebeb..0e1004c432ccb7dd8b1692bdad1098438b2e4703 100644
--- a/compute/zgetrf_nopiv.c
+++ b/compute/zgetrf_nopiv.c
@@ -50,10 +50,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been
  *               completed, but the factor U is exactly singular, and division
  *               by zero will occur if it is used to solve a system of
  *               equations.
@@ -151,9 +150,8 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, and division by zero will occur
  *               if it is used to solve a system of equations.
  *
diff --git a/compute/zgetrs_incpiv.c b/compute/zgetrs_incpiv.c
index f0cf32cb5195efd4b9409ff39dc9d7953c6ea7c1..8d9aa36302f170407efaa5fccc21902c27448520 100644
--- a/compute/zgetrs_incpiv.c
+++ b/compute/zgetrs_incpiv.c
@@ -69,9 +69,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \return <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @return <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -194,8 +193,7 @@ int CHAMELEON_zgetrs_incpiv( cham_trans_t trans, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgetrs_nopiv.c b/compute/zgetrs_nopiv.c
index fb8ac0722c3e2026689d09cc70abb1cdc4488eeb..33b3cf70b835d2fa212016ef6c662282c3659d1f 100644
--- a/compute/zgetrs_nopiv.c
+++ b/compute/zgetrs_nopiv.c
@@ -64,9 +64,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \return <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @return <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -181,8 +180,7 @@ int CHAMELEON_zgetrs_nopiv( cham_trans_t trans, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zheevd.c b/compute/zheevd.c
index 7f1a8b497ae733f1364976b1fdd5ad061b4e7b01..1291e43a80216b90109c21eeb83e346e5c3acdcf 100644
--- a/compute/zheevd.c
+++ b/compute/zheevd.c
@@ -79,10 +79,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if INFO = i, the algorithm failed to converge; i
  *               off-diagonal elements of an intermediate tridiagonal
  *               form did not converge to zero.
  *
@@ -219,10 +218,9 @@ int CHAMELEON_zheevd( cham_job_t jobz, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if INFO = i, the algorithm failed to converge; i
  *               off-diagonal elements of an intermediate tridiagonal
  *               form did not converge to zero.
  *
diff --git a/compute/zhemm.c b/compute/zhemm.c
index fd968b42d8cb8069db027fb448e6ab938fcad41e..43f123975694bd2edc2be179c9e958d23b816f2b 100644
--- a/compute/zhemm.c
+++ b/compute/zhemm.c
@@ -90,8 +90,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -248,8 +247,7 @@ int CHAMELEON_zhemm( cham_side_t side, cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zher2k.c b/compute/zher2k.c
index 216de17a5501bcedbc327c89f45dc46eee803c3c..fc8a746a376d15afaf21cdc045316bcc4f355a05 100644
--- a/compute/zher2k.c
+++ b/compute/zher2k.c
@@ -92,8 +92,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -249,8 +248,7 @@ int CHAMELEON_zher2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zherk.c b/compute/zherk.c
index 13e59ce4ca1d7b8e0d1dccfd2eb2a93eeb0598d8..ff3b21ddc6932f6e277d5d5b738d7c0480278a69 100644
--- a/compute/zherk.c
+++ b/compute/zherk.c
@@ -82,8 +82,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -225,8 +224,7 @@ int CHAMELEON_zherk( cham_uplo_t uplo, cham_trans_t trans, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zhetrd.c b/compute/zhetrd.c
index 0815e1dd689c39e660a729c133efdade2487e92a..f0686a16cdbf90f06c084cd75225b3fdf921c3c5 100644
--- a/compute/zhetrd.c
+++ b/compute/zhetrd.c
@@ -92,10 +92,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if INFO = i, the algorithm failed to converge; i
  *               off-diagonal elements of an intermediate tridiagonal
  *               form did not converge to zero.
  *
@@ -245,10 +244,9 @@ int CHAMELEON_zhetrd( cham_job_t jobz, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if INFO = i, the algorithm failed to converge; i
  *               off-diagonal elements of an intermediate tridiagonal
  *               form did not converge to zero.
  *
diff --git a/compute/zlacpy.c b/compute/zlacpy.c
index 7bd1696375142e99fbc9e0ab282fddcba63b39db..73ad779e636e58cb542c07635388783e609d29c1 100644
--- a/compute/zlacpy.c
+++ b/compute/zlacpy.c
@@ -180,8 +180,7 @@ int CHAMELEON_zlacpy( cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlange.c b/compute/zlange.c
index 522c8c204369a4042b34ae17b2f592177e924883..b1e9269d2a95b1543fb5a4e13876dcaeb6e5e7ef 100644
--- a/compute/zlange.c
+++ b/compute/zlange.c
@@ -66,8 +66,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval the norm described above.
+ * @retval the norm described above.
  *
  *******************************************************************************
  *
@@ -176,8 +175,7 @@ double CHAMELEON_zlange(cham_normtype_t norm, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlanhe.c b/compute/zlanhe.c
index e2dad154ce1362a68acc633a3f8d9ae682748855..50f3d1f921accf201cea42a8ee2844fd7e0a6ed5 100644
--- a/compute/zlanhe.c
+++ b/compute/zlanhe.c
@@ -66,8 +66,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval the norm described above.
+ * @retval the norm described above.
  *
  *******************************************************************************
  *
@@ -180,8 +179,7 @@ double CHAMELEON_zlanhe(cham_normtype_t norm, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlansy.c b/compute/zlansy.c
index dc9b1236e34d55b0b3ccae8ea3f7cbf19ec2e9c8..c7e39a45357ba69b1119368df1955988887c29bc 100644
--- a/compute/zlansy.c
+++ b/compute/zlansy.c
@@ -66,8 +66,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval the norm described above.
+ * @retval the norm described above.
  *
  *******************************************************************************
  *
@@ -180,8 +179,7 @@ double CHAMELEON_zlansy(cham_normtype_t norm, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlantr.c b/compute/zlantr.c
index 6721a9b9f722346c8d137349360d4f76fb3a141c..005fea133c568a2ab996587efccd6a60f281ec38 100644
--- a/compute/zlantr.c
+++ b/compute/zlantr.c
@@ -78,8 +78,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval the norm described above.
+ * @retval the norm described above.
  *
  *******************************************************************************
  *
@@ -202,8 +201,7 @@ double CHAMELEON_zlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlascal.c b/compute/zlascal.c
index bae3815fbab3de093be482f78e24623f54b163a4..0d0ff18b6c932510fc00752cc10438fdd852d443 100644
--- a/compute/zlascal.c
+++ b/compute/zlascal.c
@@ -57,8 +57,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -168,8 +167,7 @@ int CHAMELEON_zlascal( cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlaset.c b/compute/zlaset.c
index 7001e66a2a328fbd57839a9c03b0082eaf86ac7f..0ab77a34e5d420f76400e57667f3ac7eb7926416 100644
--- a/compute/zlaset.c
+++ b/compute/zlaset.c
@@ -167,8 +167,7 @@ int CHAMELEON_zlaset( cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlauum.c b/compute/zlauum.c
index 9907d0b08e91b7e53bbc51dcd98b0e7b1ec4f042..254eb2b18c531218a96dff303b4e98ecfe5c5c5d 100644
--- a/compute/zlauum.c
+++ b/compute/zlauum.c
@@ -58,9 +58,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -165,8 +164,7 @@ int CHAMELEON_zlauum( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zplghe.c b/compute/zplghe.c
index 3fd07d51100f262c74bf4f886a5015e0ce41ab0b..ceb0a138bd2ea9895e133a76657a65d2a3ff5750 100644
--- a/compute/zplghe.c
+++ b/compute/zplghe.c
@@ -56,9 +56,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -160,8 +159,7 @@ int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zplgsy.c b/compute/zplgsy.c
index 809e2a224489c7b3c7f613eb769e310f03b6682f..ff033d819c0d41ba443b941afdeef091fb8152be 100644
--- a/compute/zplgsy.c
+++ b/compute/zplgsy.c
@@ -56,9 +56,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -160,8 +159,7 @@ int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zplrnt.c b/compute/zplrnt.c
index 3e15ea36a04665199694ec993ddcb95feab5003c..56a3cedaff0d5af567215579cfb88f2a1d4485c3 100644
--- a/compute/zplrnt.c
+++ b/compute/zplrnt.c
@@ -49,9 +49,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -150,8 +149,7 @@ int CHAMELEON_zplrnt( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zposv.c b/compute/zposv.c
index 055f17e887d210b4aaa2b931c12e7017e2ef8a5d..668fec3c5d9c5dfd0a2ba1211204364b6d9dda77 100644
--- a/compute/zposv.c
+++ b/compute/zposv.c
@@ -75,10 +75,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
@@ -206,9 +205,8 @@ int CHAMELEON_zposv( cham_uplo_t uplo, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
diff --git a/compute/zpotrf.c b/compute/zpotrf.c
index bb84853375351a679a23519f2d6624f9fb434139..d7054e42d3b2d31854a6b09351dda7ff268df65b 100644
--- a/compute/zpotrf.c
+++ b/compute/zpotrf.c
@@ -62,10 +62,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
@@ -175,9 +174,8 @@ int CHAMELEON_zpotrf( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
diff --git a/compute/zpotri.c b/compute/zpotri.c
index d903bda64a2f1188ea6e83f8a68b90676e6db7e8..2de905c8d664c2814c121a58f3385c9182e24399 100644
--- a/compute/zpotri.c
+++ b/compute/zpotri.c
@@ -53,10 +53,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the (i,i) element of the factor U or L is
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the (i,i) element of the factor U or L is
  *                zero, and the inverse could not be computed.
  *
  *******************************************************************************
@@ -162,9 +161,8 @@ int CHAMELEON_zpotri( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, the leading minor of order i of A is not
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not
  *               positive definite, so the factorization could not be
  *               completed, and the solution has not been computed.
  *
diff --git a/compute/zpotrimm.c b/compute/zpotrimm.c
index 0f3d8146ebbd28882bfe7722aee8c5b80bd05bb3..ca57f496200bb6ac750c79d2ec45c87283b18cc0 100644
--- a/compute/zpotrimm.c
+++ b/compute/zpotrimm.c
@@ -53,10 +53,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the (i,i) element of the factor U or L is
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the (i,i) element of the factor U or L is
  *                zero, and the inverse could not be computed.
  *
  *******************************************************************************
@@ -184,9 +183,8 @@ int CHAMELEON_zpotrimm( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, the leading minor of order i of A is not
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not
  *               positive definite, so the factorization could not be
  *               completed, and the solution has not been computed.
  *
diff --git a/compute/zpotrs.c b/compute/zpotrs.c
index 3e242d114c0701a73d321e21da6d4fd20d0fda8e..7cce83910f3f4373b2399bcc0b0aa3c27c261ce1 100644
--- a/compute/zpotrs.c
+++ b/compute/zpotrs.c
@@ -61,9 +61,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -183,8 +182,7 @@ int CHAMELEON_zpotrs( cham_uplo_t uplo, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsymm.c b/compute/zsymm.c
index 4a64f907ed3df914ec54976ad7c5ed418e034fb3..13221e335beb7e5f6a33024501cb139a0fa02d05 100644
--- a/compute/zsymm.c
+++ b/compute/zsymm.c
@@ -90,8 +90,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -248,8 +247,7 @@ int CHAMELEON_zsymm( cham_side_t side, cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsyr2k.c b/compute/zsyr2k.c
index 9ef35214280cefa021a263699a7a851272a7f8be..0fe3e6f9e7e06c833b80c8bff071ab04066e1195 100644
--- a/compute/zsyr2k.c
+++ b/compute/zsyr2k.c
@@ -92,8 +92,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -249,8 +248,7 @@ int CHAMELEON_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsyrk.c b/compute/zsyrk.c
index e1c6db98671b0b7919c34cea3bddb6c8a088bb38..91f4627b8eee038f2770768a258f06d6aa79cf0d 100644
--- a/compute/zsyrk.c
+++ b/compute/zsyrk.c
@@ -82,8 +82,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -225,8 +224,7 @@ int CHAMELEON_zsyrk( cham_uplo_t uplo, cham_trans_t trans, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsysv.c b/compute/zsysv.c
index 5b40d66e5f272b524dd80567afda064408b51fe6..baf78e90ecda60750ffa178d8e1481d810972f54 100644
--- a/compute/zsysv.c
+++ b/compute/zsysv.c
@@ -76,9 +76,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -203,8 +202,7 @@ int CHAMELEON_zsysv( cham_uplo_t uplo, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsytrf.c b/compute/zsytrf.c
index 508f93a9aae8998f848c233aa945020f3aa9dd27..44ea078c64fa69630a5b1d894e81c589c71bfe26 100644
--- a/compute/zsytrf.c
+++ b/compute/zsytrf.c
@@ -57,10 +57,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
@@ -169,8 +168,7 @@ int CHAMELEON_zsytrf( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsytrs.c b/compute/zsytrs.c
index 90256661b285ce9c880934f41d71fd02afbb4968..4eeb3d7d307756cdad813137b96e16947b892c6e 100644
--- a/compute/zsytrs.c
+++ b/compute/zsytrs.c
@@ -63,9 +63,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -182,8 +181,7 @@ int CHAMELEON_zsytrs( cham_uplo_t uplo, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztile.c b/compute/ztile.c
index d95a729b7bb430b4fe99fcee7275fe4ef2055252..3cafc9b7b3eec2522daf4c8f54cbd021fa85f0d0 100644
--- a/compute/ztile.c
+++ b/compute/ztile.c
@@ -45,8 +45,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -122,8 +121,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c
index 4be3c82932a45caa1861f42068ca2d2b53c73616..b9d07b870a69e6c3c1aa19b4cd89d7fbfad77399 100644
--- a/compute/ztpgqrt.c
+++ b/compute/ztpgqrt.c
@@ -115,9 +115,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -258,8 +257,7 @@ int CHAMELEON_ztpgqrt( int M, int N, int K, int L,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c
index 8847d9235b61c7920351f710d6c3ce8c7c48d1f0..04e7ddfa6d575afd06920ecad135650ded12f045 100644
--- a/compute/ztpqrt.c
+++ b/compute/ztpqrt.c
@@ -110,9 +110,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -235,8 +234,7 @@ int CHAMELEON_ztpqrt( int M, int N, int L,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztradd.c b/compute/ztradd.c
index f5f2d82172781483fb423282e6974dfb7611b25a..b5e85ec81d06231a65b0a53e681c172bdedb76c0 100644
--- a/compute/ztradd.c
+++ b/compute/ztradd.c
@@ -81,8 +81,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -227,8 +226,7 @@ int CHAMELEON_ztradd( cham_uplo_t uplo, cham_trans_t trans, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztrmm.c b/compute/ztrmm.c
index 96ef0f7e3bcc393d796e0dc7bbe852b3365df85a..3380900f601a27ba11d54afa31da69a86c1b4ca3 100644
--- a/compute/ztrmm.c
+++ b/compute/ztrmm.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -249,8 +248,7 @@ int CHAMELEON_ztrmm( cham_side_t side, cham_uplo_t uplo,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztrsm.c b/compute/ztrsm.c
index abcdf8e9eb89aeeef8270ed779bdbdb85d311585..cc76ab7bd91ca283c3eebeea5528b1d3e0a4b69e 100644
--- a/compute/ztrsm.c
+++ b/compute/ztrsm.c
@@ -83,9 +83,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -247,8 +246,7 @@ int CHAMELEON_ztrsm( cham_side_t side, cham_uplo_t uplo,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztrsmpl.c b/compute/ztrsmpl.c
index dd7859cf82554b3a9a8cf508f2dda6aecf076ef1..2cac2da6252e50183a4770655e6ebd419dd63910 100644
--- a/compute/ztrsmpl.c
+++ b/compute/ztrsmpl.c
@@ -61,9 +61,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -179,8 +178,7 @@ int CHAMELEON_ztrsmpl( int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztrtri.c b/compute/ztrtri.c
index cb19dffa9c01b4fd3fb2297a4f03dff0aaaa2a39..6a2f8f3210b028f87b1231cd23078e22e70445ac 100644
--- a/compute/ztrtri.c
+++ b/compute/ztrtri.c
@@ -61,10 +61,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, A(i,i) is exactly zero.  The triangular
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, A(i,i) is exactly zero.  The triangular
  *               matrix is singular and its inverse can not be computed.
  *
  *******************************************************************************
@@ -182,9 +181,8 @@ int CHAMELEON_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, A(i,i) is exactly zero.  The triangular
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, A(i,i) is exactly zero.  The triangular
  *               matrix is singular and its inverse can not be computed.
  *
  *******************************************************************************
diff --git a/compute/zunglq.c b/compute/zunglq.c
index 41015464634f910376b21d6d3f9df91dd83d4371..ef284b8619fea77d6fc180752f6601f515fe027f 100644
--- a/compute/zunglq.c
+++ b/compute/zunglq.c
@@ -62,9 +62,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -181,8 +180,7 @@ int CHAMELEON_zunglq( int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c
index e6d36954211c8a931a7d8e9aa2b96a55c1953557..09f40a29fe07d4cd5d6a1b2f5db697b3004b95d9 100644
--- a/compute/zunglq_param.c
+++ b/compute/zunglq_param.c
@@ -60,9 +60,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -180,8 +179,7 @@ int CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zungqr.c b/compute/zungqr.c
index c51539616760f3c45ceecf2b1f631846fc5443fc..6ae056b2d64175bc6b890652f73184d7b135ac98 100644
--- a/compute/zungqr.c
+++ b/compute/zungqr.c
@@ -62,9 +62,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -180,8 +179,7 @@ int CHAMELEON_zungqr( int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c
index 5b46d66ff7626ee058fb7119d0ee344728509a3e..9ed032da7ee145eed609138f24acd5a42c902ae6 100644
--- a/compute/zungqr_param.c
+++ b/compute/zungqr_param.c
@@ -60,9 +60,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -181,8 +180,7 @@ int CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunmlq.c b/compute/zunmlq.c
index f460e12e4b353fa1746f03a9bfd4b9706e8f564b..f3948bf3992c474a41095820a1e388a8d8826c2c 100644
--- a/compute/zunmlq.c
+++ b/compute/zunmlq.c
@@ -86,9 +86,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -234,8 +233,7 @@ int CHAMELEON_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c
index 4c0a72358b357b1c3e83253c9dc0844606e2bea4..46372cef9aeb0fcd0fe41d70558f503a9ce55378 100644
--- a/compute/zunmlq_param.c
+++ b/compute/zunmlq_param.c
@@ -86,9 +86,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -233,8 +232,7 @@ int CHAMELEON_zunmlq_param( const libhqr_tree_t *qrtree, cham_side_t side, cham_
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunmqr.c b/compute/zunmqr.c
index 6271ed98eeedbcb23bb88ff909ad303fa8d70c42..78be51f52c3054b5a2aa6b7703e580f8a425adbe 100644
--- a/compute/zunmqr.c
+++ b/compute/zunmqr.c
@@ -88,9 +88,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -236,8 +235,7 @@ int CHAMELEON_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c
index 5674ba090c9ceb0c581b383993b58a8ec0fb6573..434c16a049a445988a238130ed15dc1e5dd5944b 100644
--- a/compute/zunmqr_param.c
+++ b/compute/zunmqr_param.c
@@ -89,9 +89,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -239,8 +238,7 @@ int CHAMELEON_zunmqr_param( const libhqr_tree_t *qrtree,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/control/async.c b/control/async.c
index 5e65c2b092ef64edb8819099cd261986a09a7fd6..55351f6b61fa207817377075de94b67c0fc1a6d8 100644
--- a/control/async.c
+++ b/control/async.c
@@ -86,8 +86,7 @@ int chameleon_sequence_wait(CHAM_context_t *chamctxt, RUNTIME_sequence_t *sequen
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Sequence_Create(RUNTIME_sequence_t **sequence)
@@ -117,8 +116,7 @@ int CHAMELEON_Sequence_Create(RUNTIME_sequence_t **sequence)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Sequence_Destroy(RUNTIME_sequence_t *sequence)
@@ -152,8 +150,7 @@ int CHAMELEON_Sequence_Destroy(RUNTIME_sequence_t *sequence)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Sequence_Wait(RUNTIME_sequence_t *sequence)
@@ -190,8 +187,7 @@ int CHAMELEON_Sequence_Wait(RUNTIME_sequence_t *sequence)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Sequence_Flush(RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
diff --git a/control/auxiliary.c b/control/auxiliary.c
index 032dc06846ead2ffbbc4139d50470ba3536c27ba..5f90b85afc4a93d915ca8f2a9ae80830226341c2 100644
--- a/control/auxiliary.c
+++ b/control/auxiliary.c
@@ -139,8 +139,7 @@ int chameleon_tune(cham_tasktype_t func, int M, int N, int NRHS)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro)
@@ -180,8 +179,7 @@ int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro)
  *
  ******************************************************************************
  *
- * @return
- *          \retval Element size in bytes
+ * @retval Element size in bytes
  *
  */
 int CHAMELEON_Element_Size(int type)
@@ -209,8 +207,7 @@ int CHAMELEON_Element_Size(int type)
  *
  ******************************************************************************
  *
- * @return
- *          \retval MPI rank
+ * @retval MPI rank
  *
  */
 int CHAMELEON_My_Mpi_Rank(void)
diff --git a/control/context.c b/control/context.c
index 881abe9746eae4441b2ba387dfc11474970c76e7..fa0dcd2502c795edde63b2a73153ae263db1b15b 100644
--- a/control/context.c
+++ b/control/context.c
@@ -123,8 +123,7 @@ int chameleon_context_destroy(){
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Enable(int option)
@@ -192,8 +191,7 @@ int CHAMELEON_Enable(int option)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Disable(int option)
@@ -256,8 +254,7 @@ int CHAMELEON_Disable(int option)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Set( int param, int value )
@@ -350,8 +347,7 @@ int CHAMELEON_Set( int param, int value )
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Get(int param, int *value)
diff --git a/control/control.c b/control/control.c
index 8a8de87173c63ad1998baa0ee00e31fcd9df0110..08765ca2da91ba886eb73945ec38be5a657b08e4 100644
--- a/control/control.c
+++ b/control/control.c
@@ -154,8 +154,7 @@ int __chameleon_finalize(void)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Pause(void)
@@ -178,8 +177,7 @@ int CHAMELEON_Pause(void)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Resume(void)
@@ -201,8 +199,7 @@ int CHAMELEON_Resume(void)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Distributed_start(void)
@@ -224,8 +221,7 @@ int CHAMELEON_Distributed_start(void)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Distributed_stop(void)
@@ -294,8 +290,7 @@ int CHAMELEON_Comm_rank()
  *
  ******************************************************************************
  *
- * @return
- *          \retval The number of CPU workers started
+ * @retval The number of CPU workers started
  *
  */
 int CHAMELEON_GetThreadNbr( )
diff --git a/control/descriptor.c b/control/descriptor.c
index 06e52cdec39845cccc8f57af889fd1ee9f3e5c3a..f32800a2847a943f18c70ea6546c1bc125d3e9bb 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -413,8 +413,7 @@ int chameleon_desc_check(const CHAM_desc_t *desc)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz,
@@ -476,8 +475,7 @@ int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz,
@@ -560,8 +558,7 @@ int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz,
@@ -621,8 +618,7 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz,
diff --git a/control/tile.c b/control/tile.c
index 960c88f5d198248c8458127d8bc8efc07466c301..560f5dd7d88ab0fe034a68a62be1f704e7b1a119 100644
--- a/control/tile.c
+++ b/control/tile.c
@@ -44,8 +44,7 @@
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Lapack_to_Tile(void *Af77, int LDA, CHAM_desc_t *A)
@@ -86,8 +85,7 @@ int CHAMELEON_Lapack_to_Tile(void *Af77, int LDA, CHAM_desc_t *A)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Tile_to_Lapack(CHAM_desc_t *A, void *Af77, int LDA)
diff --git a/control/workspace.c b/control/workspace.c
index 8039447fbd09b0a93610ae1a2344eaf8198ddc1c..4a8b078e37fc12e95a4e3fd100490534c107f5d1 100644
--- a/control/workspace.c
+++ b/control/workspace.c
@@ -138,8 +138,7 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Dealloc_Workspace(CHAM_desc_t **desc)
diff --git a/control/workspace_z.c b/control/workspace_z.c
index 6009bac50438fc4002894ae79acc03be7d038441..732d86fe0ba2054fb08ffcaaabd0ded9cd711cc0 100644
--- a/control/workspace_z.c
+++ b/control/workspace_z.c
@@ -45,8 +45,7 @@
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgeev(int N, CHAM_desc_t **descT, int p, int q) {
@@ -70,8 +69,7 @@ int CHAMELEON_Alloc_Workspace_zgeev(int N, CHAM_desc_t **descT, int p, int q) {
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgehrd(int N, CHAM_desc_t **descT, int p, int q) {
@@ -97,8 +95,7 @@ int CHAMELEON_Alloc_Workspace_zgehrd(int N, CHAM_desc_t **descT, int p, int q) {
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgebrd(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -125,8 +122,7 @@ int CHAMELEON_Alloc_Workspace_zgebrd(int M, int N, CHAM_desc_t **descT, int p, i
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgels(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -159,8 +155,7 @@ int CHAMELEON_Alloc_Workspace_zgels(int M, int N, CHAM_desc_t **descT, int p, in
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgels_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -193,8 +188,7 @@ int CHAMELEON_Alloc_Workspace_zgels_Tile(int M, int N, CHAM_desc_t **descT, int
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgeqrf(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -227,8 +221,7 @@ int CHAMELEON_Alloc_Workspace_zgeqrf(int M, int N, CHAM_desc_t **descT, int p, i
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgeqrf_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -261,8 +254,7 @@ int CHAMELEON_Alloc_Workspace_zgeqrf_Tile(int M, int N, CHAM_desc_t **descT, int
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgelqf(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -294,8 +286,7 @@ int CHAMELEON_Alloc_Workspace_zgelqf(int M, int N, CHAM_desc_t **descT, int p, i
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgelqf_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -324,8 +315,7 @@ int CHAMELEON_Alloc_Workspace_zgelqf_Tile(int M, int N, CHAM_desc_t **descT, int
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgesv_incpiv(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) {
@@ -354,8 +344,7 @@ int CHAMELEON_Alloc_Workspace_zgesv_incpiv(int N, CHAM_desc_t **descL, int **IPI
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgesv_incpiv_Tile(int N, CHAM_desc_t **descL, int **IPIV, int p, int q)
@@ -388,8 +377,7 @@ int CHAMELEON_Alloc_Workspace_zgesv_incpiv_Tile(int N, CHAM_desc_t **descL, int
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgesvd(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -421,8 +409,7 @@ int CHAMELEON_Alloc_Workspace_zgesvd(int M, int N, CHAM_desc_t **descT, int p, i
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  ******************************************************************************
  *
@@ -456,8 +443,7 @@ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, i
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgetrf_incpiv_Tile(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) {
@@ -488,8 +474,7 @@ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv_Tile(int N, CHAM_desc_t **descL, int
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zheev(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -520,8 +505,7 @@ int CHAMELEON_Alloc_Workspace_zheev(int M, int N, CHAM_desc_t **descT, int p, in
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zheevd(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -552,8 +536,7 @@ int CHAMELEON_Alloc_Workspace_zheevd(int M, int N, CHAM_desc_t **descT, int p, i
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zhegv(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -584,8 +567,7 @@ int CHAMELEON_Alloc_Workspace_zhegv(int M, int N, CHAM_desc_t **descT, int p, in
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zhegvd(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -616,8 +598,7 @@ int CHAMELEON_Alloc_Workspace_zhegvd(int M, int N, CHAM_desc_t **descT, int p, i
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zhetrd(int M, int N, CHAM_desc_t **descT, int p, int q) {
diff --git a/coreblas/compute/core_zaxpy.c b/coreblas/compute/core_zaxpy.c
index d3477032aaabbf4b96e086d2e70c6dc1bb8f92f4..a982aaafe5b017f84f650dda4f138f3559e0d2e4 100644
--- a/coreblas/compute/core_zaxpy.c
+++ b/coreblas/compute/core_zaxpy.c
@@ -47,9 +47,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgeadd.c b/coreblas/compute/core_zgeadd.c
index a85bec68ad82791840cb6bbb4c16f18ec4c64f13..5afb5a770667100d1a96e0fc53c346686d9b39c5 100644
--- a/coreblas/compute/core_zgeadd.c
+++ b/coreblas/compute/core_zgeadd.c
@@ -71,9 +71,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_zgelqt.c b/coreblas/compute/core_zgelqt.c
index 7a2a74ca07a08a234c95db214485f12c46097f42..7793a76dfe48317139724030174871e59c7a64d9 100644
--- a/coreblas/compute/core_zgelqt.c
+++ b/coreblas/compute/core_zgelqt.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgeqrt.c b/coreblas/compute/core_zgeqrt.c
index 76fcfdfc2ea9e9a273cd70a2e0a78bdfc3c6d26f..ab568186669fc4080cda78d59012ac33a97d0871 100644
--- a/coreblas/compute/core_zgeqrt.c
+++ b/coreblas/compute/core_zgeqrt.c
@@ -83,9 +83,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgesplit.c b/coreblas/compute/core_zgesplit.c
index 5255442c522f938f0218b74ab7e7497268723aaf..0f30ae8161a168e94b1741644cf3c37aa0329178 100644
--- a/coreblas/compute/core_zgesplit.c
+++ b/coreblas/compute/core_zgesplit.c
@@ -56,9 +56,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgessm.c b/coreblas/compute/core_zgessm.c
index 9757800cd92f38fe1ce1a4a977f8e77d0d357412..c395a30ff5e614c6c88c61c8a914a06f2e748255 100644
--- a/coreblas/compute/core_zgessm.c
+++ b/coreblas/compute/core_zgessm.c
@@ -68,9 +68,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgessq.c b/coreblas/compute/core_zgessq.c
index 55bbf091f25b2a1f5a2a8e8c4116cd12ac54f3d3..e6462f97966c1394983610070496b454f7d8046b 100644
--- a/coreblas/compute/core_zgessq.c
+++ b/coreblas/compute/core_zgessq.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
  *
  */
 int CORE_zgessq(int M, int N,
diff --git a/coreblas/compute/core_zgetf2_nopiv.c b/coreblas/compute/core_zgetf2_nopiv.c
index 91c313430f30aa8a1edcb395a5cb44de7789d3c3..18836b6e1a0d87d398ef64ddd2cbe80b89b4ccd3 100644
--- a/coreblas/compute/core_zgetf2_nopiv.c
+++ b/coreblas/compute/core_zgetf2_nopiv.c
@@ -58,10 +58,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/coreblas/compute/core_zgetrf_incpiv.c b/coreblas/compute/core_zgetrf_incpiv.c
index b47084b3cbf23a4ccad1ccdef1974a7c9dd2e0c7..b1355e645a0a767645327463db1cc7cf5091123f 100644
--- a/coreblas/compute/core_zgetrf_incpiv.c
+++ b/coreblas/compute/core_zgetrf_incpiv.c
@@ -71,10 +71,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/coreblas/compute/core_zgetrf_nopiv.c b/coreblas/compute/core_zgetrf_nopiv.c
index b7661ba5c260dc0352e6f2fd5024aa31cb86d56c..fbd34a12877458bcb8308b4c0e3a994444574162 100644
--- a/coreblas/compute/core_zgetrf_nopiv.c
+++ b/coreblas/compute/core_zgetrf_nopiv.c
@@ -60,10 +60,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/coreblas/compute/core_zherfb.c b/coreblas/compute/core_zherfb.c
index d3653d55b79a6e3010bbae702ccb7180e7e3a6b3..d1f952bf7b42fef74f00c61935bf0f85f4d9c751 100644
--- a/coreblas/compute/core_zherfb.c
+++ b/coreblas/compute/core_zherfb.c
@@ -85,9 +85,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_zhessq.c b/coreblas/compute/core_zhessq.c
index 250962aba64c50ab0a611a318a6028ec43cdbe90..d5b9685156af9af0237d6768f9b6bc1eb5db092f 100644
--- a/coreblas/compute/core_zhessq.c
+++ b/coreblas/compute/core_zhessq.c
@@ -88,9 +88,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zlascal.c b/coreblas/compute/core_zlascal.c
index 645bc6714621667debb2747082412450139d8c1d..50654a63b36686a27506209695fc1830f6628f65 100644
--- a/coreblas/compute/core_zlascal.c
+++ b/coreblas/compute/core_zlascal.c
@@ -52,9 +52,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int
diff --git a/coreblas/compute/core_zlatro.c b/coreblas/compute/core_zlatro.c
index 2bdcbfc311199b37303e07beccd1bbfc61a8fd4c..c22ac72ab946c069e3e9845b4af485f3216f032f 100644
--- a/coreblas/compute/core_zlatro.c
+++ b/coreblas/compute/core_zlatro.c
@@ -72,9 +72,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_zpamm.c b/coreblas/compute/core_zpamm.c
index 35c8e049015a9aed8e55bf27b397d078717db1fe..2dd190e9c350ef64723628844e10c1b945f9e6f0 100644
--- a/coreblas/compute/core_zpamm.c
+++ b/coreblas/compute/core_zpamm.c
@@ -174,9 +174,8 @@ static inline int CORE_zpamm_w(cham_side_t side, cham_trans_t trans, cham_uplo_t
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int
diff --git a/coreblas/compute/core_zparfb.c b/coreblas/compute/core_zparfb.c
index a359402d6b90c8aa0484e1d1b587c413060561c4..05d07f72e7b0aba13efc8efce861aac50426a963 100644
--- a/coreblas/compute/core_zparfb.c
+++ b/coreblas/compute/core_zparfb.c
@@ -132,9 +132,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 /* This kernel is never traced so return type on previous line for convert2eztrace.pl script */
diff --git a/coreblas/compute/core_zpemv.c b/coreblas/compute/core_zpemv.c
index 62213c723b142898fc571557e49a7c95788521e9..6b8fc9ed644fa092084b7eeeb85aad7601c9e942 100644
--- a/coreblas/compute/core_zpemv.c
+++ b/coreblas/compute/core_zpemv.c
@@ -113,9 +113,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zssssm.c b/coreblas/compute/core_zssssm.c
index 87d18d295cb546114672a6c4682b3810791870fe..ef5bd6a1708848be54c7b4c7efd0e744b3b5dbb3 100644
--- a/coreblas/compute/core_zssssm.c
+++ b/coreblas/compute/core_zssssm.c
@@ -91,9 +91,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zsyssq.c b/coreblas/compute/core_zsyssq.c
index a2c19544b05dc575c9e818fdd1c520c52e0dbdd5..8bce64cec2667154c21e464b0cbc0dd3dcc6407a 100644
--- a/coreblas/compute/core_zsyssq.c
+++ b/coreblas/compute/core_zsyssq.c
@@ -88,9 +88,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztplqt.c b/coreblas/compute/core_ztplqt.c
index b2794dc6d5a5ad66de31889744430873bce2ac90..e80f80a963ec421613fd16367b6e92a0f274c70e 100644
--- a/coreblas/compute/core_ztplqt.c
+++ b/coreblas/compute/core_ztplqt.c
@@ -77,9 +77,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int CORE_ztplqt( int M, int N, int L, int IB,
diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c
index 5909f19eed9ae021590445237dfe9a5dcdc76e0d..6584e2ba54661d60358ef7d69d00fe8d75f1fd16 100644
--- a/coreblas/compute/core_ztpmqrt.c
+++ b/coreblas/compute/core_ztpmqrt.c
@@ -129,9 +129,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztpqrt.c b/coreblas/compute/core_ztpqrt.c
index ddfbb49ab5948f5a933bd513290ac1e64a6aa457..a251bed84768c8ab8830b81d10c84f2cde64b36a 100644
--- a/coreblas/compute/core_ztpqrt.c
+++ b/coreblas/compute/core_ztpqrt.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int CORE_ztpqrt( int M, int N, int L, int IB,
diff --git a/coreblas/compute/core_ztradd.c b/coreblas/compute/core_ztradd.c
index 831ad069eedc3939b66730b8ab3f2e736a746824..3242ae53a19b2eced1e97631a74c702a77e054f1 100644
--- a/coreblas/compute/core_ztradd.c
+++ b/coreblas/compute/core_ztradd.c
@@ -74,9 +74,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_ztrssq.c b/coreblas/compute/core_ztrssq.c
index f01e63663267cb0c12529416ef7597c214078bc5..61cc2994dbb82cdf124d45164c0df56aa784edee 100644
--- a/coreblas/compute/core_ztrssq.c
+++ b/coreblas/compute/core_ztrssq.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztslqt.c b/coreblas/compute/core_ztslqt.c
index da5b27078042e1436a1f628612ae003d13a17be5..156429d2b372243d73d75cbbef92c60cdf7a6d90 100644
--- a/coreblas/compute/core_ztslqt.c
+++ b/coreblas/compute/core_ztslqt.c
@@ -94,9 +94,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c
index a9324fa04fac4b04ae9676a59570b9f1ff4bf922..c2238aed6ba9d5555c05331c69171b88967d7f2f 100644
--- a/coreblas/compute/core_ztsmlq.c
+++ b/coreblas/compute/core_ztsmlq.c
@@ -121,9 +121,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztsmlq_hetra1.c b/coreblas/compute/core_ztsmlq_hetra1.c
index ff712331781ce4679dc6331ff100a5f11b750fe6..fc0a5abda742d86602cc2da3a8c2ca85ea36d80c 100644
--- a/coreblas/compute/core_ztsmlq_hetra1.c
+++ b/coreblas/compute/core_ztsmlq_hetra1.c
@@ -108,9 +108,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_ztsmqr.c b/coreblas/compute/core_ztsmqr.c
index e4f6815814d4022358593c37b987885bdb0ccff4..aeb35c924e887f29309a987d3add5c63270fe4b2 100644
--- a/coreblas/compute/core_ztsmqr.c
+++ b/coreblas/compute/core_ztsmqr.c
@@ -121,9 +121,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztsmqr_hetra1.c b/coreblas/compute/core_ztsmqr_hetra1.c
index cfba422e72ed6aac96fb75d6fc4f65aeffe5df05..40dcf927085d15a332da64e322e3b1992506116b 100644
--- a/coreblas/compute/core_ztsmqr_hetra1.c
+++ b/coreblas/compute/core_ztsmqr_hetra1.c
@@ -110,9 +110,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_ztsqrt.c b/coreblas/compute/core_ztsqrt.c
index 7564c4edffed7e8d96a2687b6a4354f13747efe9..3bbbd8f1b2028ac449a1581d4946d51518e1d93e 100644
--- a/coreblas/compute/core_ztsqrt.c
+++ b/coreblas/compute/core_ztsqrt.c
@@ -83,9 +83,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztstrf.c b/coreblas/compute/core_ztstrf.c
index c0f5c9ecac6766b558262394314dcf1ddc642fe7..6f03a2664bb5c0956668aafddd09ecf23992b03b 100644
--- a/coreblas/compute/core_ztstrf.c
+++ b/coreblas/compute/core_ztstrf.c
@@ -84,10 +84,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/coreblas/compute/core_zttlqt.c b/coreblas/compute/core_zttlqt.c
index db12242e334aadbea103eb05fc209cfce5450ae5..b331b28712a74cfe2c641a4094b6679efbf1233c 100644
--- a/coreblas/compute/core_zttlqt.c
+++ b/coreblas/compute/core_zttlqt.c
@@ -95,9 +95,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c
index 5b6ee0261ec8e920f4883847931526fc864ddf76..b2fd886918fbaa14379544f51c50b1e127c6d5f5 100644
--- a/coreblas/compute/core_zttmlq.c
+++ b/coreblas/compute/core_zttmlq.c
@@ -113,9 +113,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zttmqr.c b/coreblas/compute/core_zttmqr.c
index 9342ecbe385fffbe86faa234e04f83a9f899341d..850f275993cc4f7880c6e0128737bbedef0fa4ee 100644
--- a/coreblas/compute/core_zttmqr.c
+++ b/coreblas/compute/core_zttmqr.c
@@ -112,9 +112,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zttqrt.c b/coreblas/compute/core_zttqrt.c
index c024dc9595a60cd5cf61757e78b4ec3d6f7ef5b7..4f127334a50d46ce5a4ee08e6cddc059cf33fbf6 100644
--- a/coreblas/compute/core_zttqrt.c
+++ b/coreblas/compute/core_zttqrt.c
@@ -95,9 +95,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zunmlq.c b/coreblas/compute/core_zunmlq.c
index c7ac26f5577764a7818fb8c47a16497dc1414316..3f1593883c548b46777c8520e9c4de921c5542a4 100644
--- a/coreblas/compute/core_zunmlq.c
+++ b/coreblas/compute/core_zunmlq.c
@@ -105,9 +105,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zunmqr.c b/coreblas/compute/core_zunmqr.c
index 59fb4c52591e570c78dc4a8b0d1a49175f52b980..347512a018f62e0d718671cfabf063371723ccc9 100644
--- a/coreblas/compute/core_zunmqr.c
+++ b/coreblas/compute/core_zunmqr.c
@@ -106,9 +106,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/eztrace_module/coreblas_eztrace_module b/coreblas/eztrace_module/coreblas_eztrace_module
index dca1e36d9e89400b90540eabed8170c493390fb0..4a868819259eeaac815d188da438d85ed19db47e 100644
--- a/coreblas/eztrace_module/coreblas_eztrace_module
+++ b/coreblas/eztrace_module/coreblas_eztrace_module
@@ -1172,12 +1172,6 @@ int  CORE_zlatro(int uplo, int trans,
                  void *A, int LDA,
                        void *B, int LDB);
 void CORE_zlauum(int uplo, int N, void *A, int LDA);
-int CORE_zpamm(int op, int side, int storev,
-               int M, int N, int K, int L,
-               void *A1, int LDA1,
-                     void *A2, int LDA2,
-               void *V, int LDV,
-                     void *W, int LDW);
 int  CORE_zparfb(int side, int trans, int direct, int storev,
                  int M1, int N1, int M2, int N2, int K, int L,
                        void *A1, int LDA1,
diff --git a/cudablas/compute/cuda_zgeadd.c b/cudablas/compute/cuda_zgeadd.c
index d7f86784ccf595180b3cff5e8af42ca4710d40d9..e520dfb0a58c3be1032ff7bdc997e8d569f5bed8 100644
--- a/cudablas/compute/cuda_zgeadd.c
+++ b/cudablas/compute/cuda_zgeadd.c
@@ -72,9 +72,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int CUDA_zgeadd(cham_trans_t trans,
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index 4fa07c2b4cb4df1146f85baa3ad7340c7ad58b10..954f675703afab35b7a758fb8e4767bf92647612 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -38,23 +38,13 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
                         int M, CHAMELEON_Complex64_t alpha,
                         const CHAM_desc_t *A, int Am, int An, int incA,
                         const CHAM_desc_t *B, int Bm, int Bn, int incB );
+void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         void *user_data, void* user_build_callback );
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                          CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb );
-void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo,
-                          int m, int n, int nb,
-                          CHAMELEON_Complex64_t alpha,
-                          const CHAM_desc_t *A, int Am, int An, int lda );
-void INSERT_TASK_zbrdalg( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo,
-                          int N, int NB,
-                          const CHAM_desc_t *A,
-                          const CHAM_desc_t *C, int Cm, int Cn,
-                          const CHAM_desc_t *S, int Sm, int Sn,
-                          int i, int j, int m, int grsiz, int BAND,
-                          int *PCOL, int *ACOL, int *MCOL );
 void INSERT_TASK_zgelqt( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -65,39 +55,6 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zgemm2( const RUNTIME_option_t *options,
-                         cham_trans_t transA, cham_trans_t transB,
-                         int m, int n, int k, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zgemm_f2( const RUNTIME_option_t *options,
-                           cham_trans_t transA, cham_trans_t transB,
-                           int m, int n, int k, int nb,
-                           CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                           const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                           CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc,
-                           const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
-                           const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
-void INSERT_TASK_zgemm_p2( const RUNTIME_option_t *options,
-                           cham_trans_t transA, cham_trans_t transB,
-                           int m, int n, int k, int nb,
-                           CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                           const CHAMELEON_Complex64_t **B, int ldb,
-                           CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zgemm_p2f1( const RUNTIME_option_t *options,
-                             cham_trans_t transA, cham_trans_t transB,
-                             int m, int n, int k, int nb,
-                             CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                             const CHAMELEON_Complex64_t **B, int ldb,
-                             CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc,
-                             const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1 );
-void INSERT_TASK_zgemm_p3( const RUNTIME_option_t *options,
-                           cham_trans_t transA, cham_trans_t transB,
-                           int m, int n, int k, int nb,
-                           CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                           const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                           CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t **C, int ldc );
 void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -126,28 +83,6 @@ void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options,
 void INSERT_TASK_zgetrf_nopiv( const RUNTIME_option_t *options,
                                int m, int n, int ib, int nb,
                                const CHAM_desc_t *A, int Am, int An, int lda, int iinfo );
-void INSERT_TASK_zgetrf_reclap( const RUNTIME_option_t *options,
-                                int m, int n, int nb,
-                                const CHAM_desc_t *A, int Am, int An, int lda,
-                                int *IPIV,
-
-                                cham_bool_t check_info, int iinfo,
-                                int nbthread );
-void INSERT_TASK_zgetrf_rectil( const RUNTIME_option_t *options,
-                                const CHAM_desc_t A, const CHAM_desc_t *Amn, int Amnm, int Amnn, int size,
-                                int *IPIV,
-
-                                cham_bool_t check_info, int iinfo,
-                                int nbthread );
-void INSERT_TASK_zgetrip( const RUNTIME_option_t *options,
-                          int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA );
-void INSERT_TASK_zgetrip_f1( const RUNTIME_option_t *options,
-                             int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA,
-                             const CHAM_desc_t *fake, int fakem, int faken, int szeF, int paramF );
-void INSERT_TASK_zgetrip_f2( const RUNTIME_option_t *options,
-                             int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA,
-                             const CHAM_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1,
-                             const CHAM_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2 );
 void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
                          cham_uplo_t uplo,
                          int m, int n, int mb,
@@ -159,16 +94,6 @@ void INSERT_TASK_zhemm( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zhegst( const RUNTIME_option_t *options,
-                         int itype, cham_uplo_t uplo, int N,
-                         const CHAM_desc_t *A, int Am, int An, int LDA,
-                         const CHAM_desc_t *B, int Bm, int Bn, int LDB,
-                         int iinfo );
-void INSERT_TASK_zherk( const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, cham_trans_t trans,
-                        int n, int k, int nb,
-                        double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
 void INSERT_TASK_zher2k( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans,
                          int n, int k, int nb,
@@ -181,6 +106,15 @@ void INSERT_TASK_zherfb( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
                          const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+void INSERT_TASK_zherk( const RUNTIME_option_t *options,
+                        cham_uplo_t uplo, cham_trans_t trans,
+                        int n, int k, int nb,
+                        double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                        double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int n,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int m, int n, int mb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -196,10 +130,6 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options,
 void INSERT_TASK_zlange_max( const RUNTIME_option_t *options,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *B, int Bm, int Bn );
-void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_zlanhe( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
                          const CHAM_desc_t *A, int Am, int An, int LDA,
@@ -213,31 +143,18 @@ void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
                          int M, int N, int NB,
                          const CHAM_desc_t *A, int Am, int An, int LDA,
                          const CHAM_desc_t *B, int Bm, int Bn );
+void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo,
+                          int m, int n, int nb,
+                          CHAMELEON_Complex64_t alpha,
+                          const CHAM_desc_t *A, int Am, int An, int lda );
 void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
-                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
+                         cham_uplo_t uplo, int n1, int n2,
+                         CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
+                         const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
 void INSERT_TASK_zlaset2( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
                           const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
-void INSERT_TASK_zlaswp( const RUNTIME_option_t *options,
-                         int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                         int i1,  int i2, int *ipiv, int inc );
-void INSERT_TASK_zlaswp_f2( const RUNTIME_option_t *options,
-                            int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                            int i1,  int i2, int *ipiv, int inc,
-                            const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
-                            const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
-void INSERT_TASK_zlaswp_ontile( const RUNTIME_option_t *options,
-                                const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
-                                int i1,  int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel );
-void INSERT_TASK_zlaswp_ontile_f2( const RUNTIME_option_t *options,
-                                   const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
-                                   int i1,  int i2, int *ipiv, int inc,
-                                   const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
-                                   const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
-void INSERT_TASK_zlaswpc_ontile( const RUNTIME_option_t *options,
-                                 const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
-                                 int i1,  int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel );
 void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -254,17 +171,15 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
                          int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
                          int bigM, int m0, int n0, unsigned long long int seed );
+void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
+                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn );
+void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn );
 void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
-
                          int iinfo );
-void INSERT_TASK_zshift( const RUNTIME_option_t *options,
-                         int s, int m, int n, int L,
-                         CHAMELEON_Complex64_t *A );
-void INSERT_TASK_zshiftw( const RUNTIME_option_t *options,
-                          int s, int cl, int m, int n, int L,
-                          const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t *W );
 void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
                          int m1, int n1, int m2, int n2, int k, int ib, int nb,
                          const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
@@ -278,17 +193,17 @@ void INSERT_TASK_zsymm( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, cham_trans_t trans,
-                        int n, int k, int nb,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
 void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans,
                          int n, int k, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *B, int Bm, int Bn, int LDB,
                          CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
+                        cham_uplo_t uplo, cham_trans_t trans,
+                        int n, int k, int nb,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -297,13 +212,6 @@ void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
                                cham_uplo_t uplo, int n, int nb,
                                const CHAM_desc_t *A, int Am, int An, int lda,
                                int iinfo );
-void INSERT_TASK_zswpab( const RUNTIME_option_t *options,
-                         int i, int n1, int n2,
-                         const CHAM_desc_t *A, int Am, int An, int szeA );
-void INSERT_TASK_zswptr_ontile( const RUNTIME_option_t *options,
-                                const CHAM_desc_t descA, const CHAM_desc_t *Aij, int Aijm, int Aijn,
-                                int i1,  int i2, int *ipiv, int inc,
-                                const CHAM_desc_t *Akk, int Akkm, int Akkn, int ldak );
 void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
                          int m, int n, int l, int ib, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -328,14 +236,6 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                          const CHAM_desc_t *T, int Tm, int Tn, int ldt );
-void INSERT_TASK_ztrdalg( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo,
-                          int N, int NB,
-                          const CHAM_desc_t *A,
-                          const CHAM_desc_t *C, int Cm, int Cn,
-                          const CHAM_desc_t *S, int Sm, int Sn,
-                          int i, int j, int m, int grsiz, int BAND,
-                          int *PCOL, int *ACOL, int *MCOL );
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
@@ -349,11 +249,6 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
                         int m, int n, int nb,
                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                         const CHAM_desc_t *B, int Bm, int Bn, int ldb );
-void INSERT_TASK_ztrmm_p2( const RUNTIME_option_t *options,
-                           cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                           int m, int n, int nb,
-                           CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                           CHAMELEON_Complex64_t **B, int ldb );
 void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
                         cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                         int m, int n, int nb,
@@ -390,18 +285,6 @@ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
                          const CHAM_desc_t *L, int Lm, int Ln, int ldl,
                          int *IPIV,
                          cham_bool_t check_info, int iinfo );
-void INSERT_TASK_zpamm( const RUNTIME_option_t *options,
-                        int op, cham_side_t side, cham_store_t storev,
-                        int m, int n, int k, int l,
-                        const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                        const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                        const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                        const CHAM_desc_t *W, int Wm, int Wn, int ldw );
-void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
-                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn );
-void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn );
 void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int m, int n, int ib,  int nb, int k,
@@ -414,10 +297,6 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
                          const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         void *user_data, void* user_build_callback );
-
 
 /**
  * Keep these insert_task for retro-compatibility
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index cabe559c252719de9501e0f95093de147fa9ec18..309dea8965cd4b14209eb23ece2be43f9df3f3b7 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -28,7 +28,6 @@
 # List of codelets required by all runtimes
 # -----------------------------------------
 set(CODELETS_ZSRC
-    codelets/codelet_ztile_zero.c
     codelets/codelet_zasum.c
     ##################
     # BLAS 1
diff --git a/runtime/openmp/codelets/codelet_zgeadd.c b/runtime/openmp/codelets/codelet_zgeadd.c
index 1d18ff18f83a3d4f7f6343e92ec16246d265ef96..2ceeb81594eafa6be29085ef33bc2caa8ed80a4f 100644
--- a/runtime/openmp/codelets/codelet_zgeadd.c
+++ b/runtime/openmp/codelets/codelet_zgeadd.c
@@ -31,7 +31,7 @@
  *
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
+ * @brief Adds two general matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -75,15 +75,14 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
-                       cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
+                         cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c
index 3341a8f01532ea77a638799a7557c7414cb1299f..8dd282d6259b0e684ceca96576d09af9e54eab3f 100644
--- a/runtime/openmp/codelets/codelet_zgelqt.c
+++ b/runtime/openmp/codelets/codelet_zgelqt.c
@@ -84,9 +84,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/runtime/openmp/codelets/codelet_zgemm.c b/runtime/openmp/codelets/codelet_zgemm.c
index 68aec8de49aa853b5ca3aa8bd9d54ddd9ca2eb30..b2737c3884e7180e8a5fe1e7454e204956dc63af 100644
--- a/runtime/openmp/codelets/codelet_zgemm.c
+++ b/runtime/openmp/codelets/codelet_zgemm.c
@@ -43,7 +43,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn])
+#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
     CORE_zgemm(transA, transB,
         m, n, k,
         alpha, ptrA, lda,
diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c
index 6428375b2e9b2ef6c20e6bbc8f803e986e4a1ef2..f8bf811afa1756f8265e7ad73b1694742018d1b1 100644
--- a/runtime/openmp/codelets/codelet_zgeqrt.c
+++ b/runtime/openmp/codelets/codelet_zgeqrt.c
@@ -85,9 +85,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/runtime/openmp/codelets/codelet_zgessm.c b/runtime/openmp/codelets/codelet_zgessm.c
index cd24a4ac0a6e9e1fd04743d12ac110dc4334f4f8..2ed15696ad7e68429bca9fc1da16b0f34b986870 100644
--- a/runtime/openmp/codelets/codelet_zgessm.c
+++ b/runtime/openmp/codelets/codelet_zgessm.c
@@ -68,9 +68,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
@@ -83,6 +82,6 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrD = RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn);
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0:Dm*Dn]) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0]) depend(inout:ptrA[0])
     CORE_zgessm(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda);
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf.c b/runtime/openmp/codelets/codelet_zgetrf.c
index d7cc9fe75179ed236a3a0a80246ff01fc6962b30..ab9869f7ebc4d5ddc54e49cc670c9218ed39bea7 100644
--- a/runtime/openmp/codelets/codelet_zgetrf.c
+++ b/runtime/openmp/codelets/codelet_zgetrf.c
@@ -34,6 +34,6 @@ void INSERT_TASK_zgetrf(const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     int info = 0;
-#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0])
+#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0])
     CORE_zgetrf( m, n, ptrA, lda, IPIV, &info );
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
index 20b5e92d3cd22eb6d323769e5f7d65d020858452..9f26a7064a8597129deaa5fdbdbb3801cfcc355c 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
@@ -73,10 +73,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
@@ -92,6 +91,6 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     int info = 0;
-#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0])
     CORE_zgetrf_incpiv(m, n, ib, ptrA, lda, IPIV, &info);
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
index 5f26b76e9b5ba139af2d25c21d0302811f5e6baf..829b5473e869d928085743c108776db2527ecbac 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
@@ -63,10 +63,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
@@ -80,6 +79,6 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     int info = 0;
-#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0])
     CORE_zgetrf_nopiv(m, n, ib, ptrA, lda, &info);
 }
diff --git a/runtime/openmp/codelets/codelet_zhemm.c b/runtime/openmp/codelets/codelet_zhemm.c
index 331459e479f3d6330b793a5833d7f1e1575dcca0..4d632655a2eb547813ee747c40ab2f341a341bee 100644
--- a/runtime/openmp/codelets/codelet_zhemm.c
+++ b/runtime/openmp/codelets/codelet_zhemm.c
@@ -43,7 +43,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn])
+#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
     CORE_zhemm(side, uplo,
         m, n,
         alpha, ptrA, lda,
diff --git a/runtime/openmp/codelets/codelet_zhessq.c b/runtime/openmp/codelets/codelet_zhessq.c
index 46cd0f5c7037b64610dd3d2aa7cfc501101114ad..409d413ddfd9a78deb7182f1caca79d63f8dddb2 100644
--- a/runtime/openmp/codelets/codelet_zhessq.c
+++ b/runtime/openmp/codelets/codelet_zhessq.c
@@ -31,6 +31,6 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
-#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0:SCALESUMSQm*SCALESUMSQn]) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0]) depend(inout:ptrA[0])
     CORE_zhessq( uplo, n, ptrA, lda, &ptrScaleSum[0], &ptrScaleSum[1] );
 }
diff --git a/runtime/openmp/codelets/codelet_zlacpy.c b/runtime/openmp/codelets/codelet_zlacpy.c
index 74e420c31c178c45a96f4fd57ac4d05751ad4abd..44ea300eb4f8675400ae2de3cd4055cbdc5104b3 100644
--- a/runtime/openmp/codelets/codelet_zlacpy.c
+++ b/runtime/openmp/codelets/codelet_zlacpy.c
@@ -33,10 +33,10 @@
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
  */
-void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, int m, int n, int nb,
-                        int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int m, int n, int nb,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A + displA, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B + displB, CHAMELEON_Complex64_t, Bm, Bn);
@@ -44,12 +44,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
     CORE_zlacpy(uplo, m, n, ptrA, lda, ptrB, ldb);
 }
 
-void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                        0, A, Am, An, lda,
-                        0, B, Bm, Bn, ldb );
+                         0, A, Am, An, lda,
+                         0, B, Bm, Bn, ldb );
 }
diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c
index b65a938fec6ae4ac078210d7befc9820fc0c7bb9..26a024cd0a9b0d8ffd3aa35ead905e44d8e820df 100644
--- a/runtime/openmp/codelets/codelet_zlag2c.c
+++ b/runtime/openmp/codelets/codelet_zlag2c.c
@@ -31,13 +31,24 @@
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
  */
-void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
-                       int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlag2c( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn])
+    CHAMELEON_Complex32_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn);
+#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
     CORE_zlag2c( m, n, ptrA, lda, ptrB, ldb);
 }
+
+void INSERT_TASK_clag2z( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+{
+    CHAMELEON_Complex32_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An);
+    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
+#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
+    CORE_clag2z( m, n, ptrA, lda, ptrB, ldb);
+}
diff --git a/runtime/openmp/codelets/codelet_zlascal.c b/runtime/openmp/codelets/codelet_zlascal.c
index d579bb39ae0f47273c14c301b5f7fd05f9d665ec..2aa990418ec83c10e2e6e2658a96636b8bee3cf7 100644
--- a/runtime/openmp/codelets/codelet_zlascal.c
+++ b/runtime/openmp/codelets/codelet_zlascal.c
@@ -51,9 +51,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
@@ -64,6 +63,6 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
                         const CHAM_desc_t *A, int Am, int An, int lda)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0])
     CORE_zlascal(uplo, m, n, alpha, ptrA, lda);
 }
diff --git a/runtime/openmp/codelets/codelet_zlatro.c b/runtime/openmp/codelets/codelet_zlatro.c
index 6f7ba5fa5bae73976ee6b81a4cdc609c2cf4962d..ec50bb9cfb1ab86d62e8e0bbf5800ef979b6cbb6 100644
--- a/runtime/openmp/codelets/codelet_zlatro.c
+++ b/runtime/openmp/codelets/codelet_zlatro.c
@@ -33,14 +33,14 @@
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
  */
-void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans,
+                         int m, int n, int mb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn])
+#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
     CORE_zlatro(uplo, trans, m, n, ptrA, lda, ptrB, ldb);
 }
diff --git a/runtime/openmp/codelets/codelet_zlauum.c b/runtime/openmp/codelets/codelet_zlauum.c
index 7ab7c8b99de5c9e5646562eb5b3ab3ebc32b0209..70030fc19d84452c4f5b0d67cec643ce08f61fee 100644
--- a/runtime/openmp/codelets/codelet_zlauum.c
+++ b/runtime/openmp/codelets/codelet_zlauum.c
@@ -38,6 +38,6 @@ void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
                        const CHAM_desc_t *A, int Am, int An, int lda)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0])
     CORE_zlauum(uplo, n, ptrA, lda);
 }
diff --git a/runtime/openmp/codelets/codelet_zplghe.c b/runtime/openmp/codelets/codelet_zplghe.c
index 06e890a459444492cf59384f5eaebe65f469a92e..a785b19c2bf0dcaf26e8f667cb2dc4e1340c063a 100644
--- a/runtime/openmp/codelets/codelet_zplghe.c
+++ b/runtime/openmp/codelets/codelet_zplghe.c
@@ -28,13 +28,11 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/*   INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
-
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                        double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
     CORE_zplghe( bump, m, n, ptrA, lda, bigM, m0, n0, seed );
 }
diff --git a/runtime/openmp/codelets/codelet_zplgsy.c b/runtime/openmp/codelets/codelet_zplgsy.c
index 5269d527617a8efac61a2f841401af2b9fcb03aa..4a3cea2f68787ab24e4b0aec63158198954f3f6f 100644
--- a/runtime/openmp/codelets/codelet_zplgsy.c
+++ b/runtime/openmp/codelets/codelet_zplgsy.c
@@ -28,11 +28,9 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/*   INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
-
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
-                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
 #pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
diff --git a/runtime/openmp/codelets/codelet_zplrnt.c b/runtime/openmp/codelets/codelet_zplrnt.c
index 35cb6300c55c4d8fcf3276f952767fd24321981b..139f44c8bce94b472019fa6beb9e5ae639799666 100644
--- a/runtime/openmp/codelets/codelet_zplrnt.c
+++ b/runtime/openmp/codelets/codelet_zplrnt.c
@@ -28,11 +28,9 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/*   INSERT_TASK_zplrnt - Generate a tile for random matrix. */
-
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                        int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
 #pragma omp task firstprivate(m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
diff --git a/runtime/openmp/codelets/codelet_zplssq.c b/runtime/openmp/codelets/codelet_zplssq.c
index 7ee45f66de44b77073ee4714ea1e7ebf5d9504cc..cec083dca98e2fe5d69829bb72feb0d2a54d7173 100644
--- a/runtime/openmp/codelets/codelet_zplssq.c
+++ b/runtime/openmp/codelets/codelet_zplssq.c
@@ -29,7 +29,7 @@
  *
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_zplssq returns: scl * sqrt(ssq)
+ * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
  *
  * with scl and ssq such that
  *
@@ -74,7 +74,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
     CHAMELEON_Complex64_t *res = RTBLKADDR(RESULT, CHAMELEON_Complex64_t, RESULTm, RESULTn);
 
diff --git a/runtime/openmp/codelets/codelet_zssssm.c b/runtime/openmp/codelets/codelet_zssssm.c
index 38d9ad5e3c116a48251199d610ac494efa411f0b..db82b480eb690cad20be54444ed6afbaf0c67a37 100644
--- a/runtime/openmp/codelets/codelet_zssssm.c
+++ b/runtime/openmp/codelets/codelet_zssssm.c
@@ -91,9 +91,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
@@ -109,10 +108,8 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
     CHAMELEON_Complex64_t *ptrL1 = RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n);
     CHAMELEON_Complex64_t *ptrL2 = RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n);
-#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV)\
-    depend(inout:ptrA1[0])\
-    depend(inout:ptrA2[0])\
-    depend(in:ptrL1[0])\
-    depend(in:ptrL2[0])
+
+#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV) \
+    depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrL1[0], ptrL2[0])
     CORE_zssssm(m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrL1, ldl1, ptrL2, ldl2, IPIV);
 }
diff --git a/runtime/openmp/codelets/codelet_zsymm.c b/runtime/openmp/codelets/codelet_zsymm.c
index efe71b42578d7b5f71c1b3a3dc770705da213dbe..76d6ec7b70665be5f78253d7016984c8a2ecb132 100644
--- a/runtime/openmp/codelets/codelet_zsymm.c
+++ b/runtime/openmp/codelets/codelet_zsymm.c
@@ -41,7 +41,7 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn])
+#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
     CORE_zsymm(side, uplo,
         m, n,
         alpha, ptrA, lda,
diff --git a/runtime/openmp/codelets/codelet_zsyssq.c b/runtime/openmp/codelets/codelet_zsyssq.c
index c2d69dc57523b0340e89253aec2e985eb78ee6ee..86b58eb001623830450f47c006f55ea01acee0b6 100644
--- a/runtime/openmp/codelets/codelet_zsyssq.c
+++ b/runtime/openmp/codelets/codelet_zsyssq.c
@@ -29,6 +29,6 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
-#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0:Am*An]) depend(inout:ptrSCALESUMSQ[0])
+#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0]) depend(inout:ptrSCALESUMSQ[0])
     CORE_zsyssq( uplo, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1] );
 }
diff --git a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
index 1ebd1aa08878024b0379a6bdef55f06bdd48a0f0..73032cf982d18a8d41b9044ed8786fd5f27b80e5 100644
--- a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
@@ -32,6 +32,6 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
                              int iinfo)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0])
     CORE_zsytf2_nopiv(uplo, n, ptrA, lda);
 }
diff --git a/runtime/openmp/codelets/codelet_ztile_zero.c b/runtime/openmp/codelets/codelet_ztile_zero.c
deleted file mode 100644
index 96ef911bf329829e3f282b448b11f277ce114a27..0000000000000000000000000000000000000000
--- a/runtime/openmp/codelets/codelet_ztile_zero.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- *
- * @file openmp/codelet_ztile_zero.c
- *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon ztile_zero StarPU codelet
- *
- * @version 1.0.0
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Jakub Kurzak
- * @date 2010-11-15
- * @precisions normal z -> c d s
- *
- */
-
-#include "chameleon_openmp.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas.h"
-/**
- *
- */
-void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options,
-                            int X1, int X2, int Y1, int Y2,
-                            const CHAM_desc_t *A, int Am, int An, int lda )
-{
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    int x, y;
-    for (x = X1; x < X2; x++)
-        for (y = Y1; y < Y2; y++)
-            ptrA[lda*x+y] = 0.0;
-}
diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c
index 4bb4f16f030a9f5aa58a4dc9e66f28cefc1b4a39..367e437a75308d151ee3c0dcb6c1e31242491071 100644
--- a/runtime/openmp/codelets/codelet_ztplqt.c
+++ b/runtime/openmp/codelets/codelet_ztplqt.c
@@ -20,12 +20,12 @@
 
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void
-INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
-                   int M, int N, int L, int ib, int nb,
-                   const CHAM_desc_t *A, int Am, int An, int lda,
-                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                   const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+
+void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
@@ -36,8 +36,7 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
     {
       CHAMELEON_Complex64_t work[ws_size];
 
-      CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt);
-
+      CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt );
       CORE_ztplqt( M, N, L, ib,
                    ptrA, lda, ptrB, ldb, ptrT, ldt, work );
     }
diff --git a/runtime/openmp/codelets/codelet_ztpmlqt.c b/runtime/openmp/codelets/codelet_ztpmlqt.c
index 543704822c5ddecca1c432a875ac357ace7c2a66..a5da0f533070ee0dc5416736ec409729f9b4c2dd 100644
--- a/runtime/openmp/codelets/codelet_ztpmlqt.c
+++ b/runtime/openmp/codelets/codelet_ztpmlqt.c
@@ -17,24 +17,25 @@
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void
-INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
-                    cham_side_t side, cham_trans_t trans,
-                    int M, int N, int K, int L, int ib, int nb,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                    const CHAM_desc_t *A, int Am, int An, int lda,
-                    const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+
+void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
+                          cham_side_t side, cham_trans_t trans,
+                          int M, int N, int K, int L, int ib, int nb,
+                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                          const CHAM_desc_t *A, int Am, int An, int lda,
+                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
     CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
     int ws_size = options->ws_wsize;
+
 #pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
     {
-      CHAMELEON_Complex64_t work[ws_size];
-      CORE_ztpmlqt( side, trans, M, N, K, L, ib,
-                    ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work );
+        CHAMELEON_Complex64_t work[ws_size];
+        CORE_ztpmlqt( side, trans, M, N, K, L, ib,
+                      ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztpmqrt.c b/runtime/openmp/codelets/codelet_ztpmqrt.c
index 4f3262221eb3027fb82d1d2f93b9d8cd0ad09aeb..5378a2a5b5af0257991ca8ac6cb8736c8f898a00 100644
--- a/runtime/openmp/codelets/codelet_ztpmqrt.c
+++ b/runtime/openmp/codelets/codelet_ztpmqrt.c
@@ -17,20 +17,21 @@
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void
-INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
-                    cham_side_t side, cham_trans_t trans,
-                    int M, int N, int K, int L, int ib, int nb,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                    const CHAM_desc_t *A, int Am, int An, int lda,
-                    const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+
+void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
+                          cham_side_t side, cham_trans_t trans,
+                          int M, int N, int K, int L, int ib, int nb,
+                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                          const CHAM_desc_t *A, int Am, int An, int lda,
+                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
     CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
     int ws_size = options->ws_wsize;
+
 #pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, nb, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
     {
         CHAMELEON_Complex64_t tmp[ws_size];
diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c
index 7381f6ebdc7682b4f2fc73c976d5e00f4cf21bf6..755de21bdd376553e9e22490be44c605f41c8c20 100644
--- a/runtime/openmp/codelets/codelet_ztpqrt.c
+++ b/runtime/openmp/codelets/codelet_ztpqrt.c
@@ -19,12 +19,12 @@
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void
-INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
-                   int M, int N, int L, int ib, int nb,
-                   const CHAM_desc_t *A, int Am, int An, int lda,
-                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                   const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+
+void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
@@ -35,9 +35,8 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
     {
       CHAMELEON_Complex64_t tmp[ws_size];
 
-      CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt);
-
+      CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt );
       CORE_ztpqrt( M, N, L, ib,
-          ptrA, lda, ptrB, ldb, ptrT, ldt, tmp );
+                   ptrA, lda, ptrB, ldb, ptrT, ldt, tmp );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztradd.c b/runtime/openmp/codelets/codelet_ztradd.c
index 9a39aaf56d2ac9366c1e7c8c2986d678f9379db1..384ba192da8c797e51c62f3a279001842402275e 100644
--- a/runtime/openmp/codelets/codelet_ztradd.c
+++ b/runtime/openmp/codelets/codelet_ztradd.c
@@ -22,12 +22,13 @@
 
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+
 /**
  ******************************************************************************
  *
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd.
+ * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -77,18 +78,18 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
+
 #pragma omp task firstprivate(uplo, trans, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
     CORE_ztradd(uplo, trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb);
 }
diff --git a/runtime/openmp/codelets/codelet_ztstrf.c b/runtime/openmp/codelets/codelet_ztstrf.c
index cb612cb6fd9fa8a0667218b3f2ff70878964c4d1..4072b5d68998d0d497e77267ab899b6a27c55874 100644
--- a/runtime/openmp/codelets/codelet_ztstrf.c
+++ b/runtime/openmp/codelets/codelet_ztstrf.c
@@ -83,10 +83,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c
index 348b290a1973097fcebf2acac241916a7b5de906..0dd8f263dcb597e85a218a8577598e75369c0d49 100644
--- a/runtime/openmp/codelets/codelet_zunmlq.c
+++ b/runtime/openmp/codelets/codelet_zunmlq.c
@@ -105,9 +105,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c
index 42765470397c5f2fa9feb7bb1e27a4b7019f83e4..ed40c42110b843ba304eea3f001df82cc3fd52fe 100644
--- a/runtime/openmp/codelets/codelet_zunmqr.c
+++ b/runtime/openmp/codelets/codelet_zunmqr.c
@@ -105,9 +105,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/runtime/parsec/codelets/codelet_zgeadd.c b/runtime/parsec/codelets/codelet_zgeadd.c
index 7ad41db2f6756522128d93ba79292216fa7dbcaa..7d937857fb1b4b09c6501747397bdd32407029db 100644
--- a/runtime/parsec/codelets/codelet_zgeadd.c
+++ b/runtime/parsec/codelets/codelet_zgeadd.c
@@ -52,7 +52,7 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context,
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
+ * @brief Adds two general matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -96,15 +96,14 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
-                       cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
+                         cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
diff --git a/runtime/parsec/codelets/codelet_zgelqt.c b/runtime/parsec/codelets/codelet_zgelqt.c
index 4ef5b5b7a6dbe1f5d385a775e580d6a13199f082..32a7dfaa96e6c629f43c21fa22d85e63243a3264 100644
--- a/runtime/parsec/codelets/codelet_zgelqt.c
+++ b/runtime/parsec/codelets/codelet_zgelqt.c
@@ -76,9 +76,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 static inline int
diff --git a/runtime/parsec/codelets/codelet_zgeqrt.c b/runtime/parsec/codelets/codelet_zgeqrt.c
index 53ac8ac042386c01281c123e0488819bb299ee90..3aaaf84cf85fe02c488c5a8f546130b0b92e0a02 100644
--- a/runtime/parsec/codelets/codelet_zgeqrt.c
+++ b/runtime/parsec/codelets/codelet_zgeqrt.c
@@ -77,9 +77,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 static inline int
diff --git a/runtime/parsec/codelets/codelet_zgessm.c b/runtime/parsec/codelets/codelet_zgessm.c
index a7f62dc4b8bf1ad8614abcfd4908f4f8960e6fa7..a4762cfa289b975da19f5065ed9988494a50e9dc 100644
--- a/runtime/parsec/codelets/codelet_zgessm.c
+++ b/runtime/parsec/codelets/codelet_zgessm.c
@@ -60,9 +60,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 static inline int
diff --git a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
index 09ef6c4012089c7d4f54c2a46b61ff9ab82a6a55..55a1fe635d96c08e99211c3d31dfe8ff13242d53 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
@@ -65,10 +65,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
index ab7f49bb183fa7d35b7884b0b0996ec6960f347d..0aadb3c900ca6fdee6ece700d9ac6c161fa9a461 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
@@ -58,10 +58,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/parsec/codelets/codelet_zlacpy.c b/runtime/parsec/codelets/codelet_zlacpy.c
index 64c777e38e09d26d0b7efb26fbc7d1bfd4ed96ab..d79617ccb3e482fb35a23b53a0b81fe2894a5115 100644
--- a/runtime/parsec/codelets/codelet_zlacpy.c
+++ b/runtime/parsec/codelets/codelet_zlacpy.c
@@ -49,11 +49,10 @@ CORE_zlacpyx_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int nb,
-                         int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                         int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                          cham_uplo_t uplo, int m, int n, int nb,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
-
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
     parsec_dtd_taskpool_insert_task(
@@ -71,12 +70,12 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
     (void)nb;
 }
 
-void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                        0, A, Am, An, lda,
-                        0, B, Bm, Bn, ldb );
+                         0, A, Am, An, lda,
+                         0, B, Bm, Bn, ldb );
 }
diff --git a/runtime/parsec/codelets/codelet_zpamm.c b/runtime/parsec/codelets/codelet_zpamm.c
deleted file mode 100644
index 3d075b0143f6e7531db8819bd25e97d6f6656b00..0000000000000000000000000000000000000000
--- a/runtime/parsec/codelets/codelet_zpamm.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/**
- *
- * @file parsec/codelet_zpamm.c
- *
- * @copyright 2009-2015 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon zpamm PaRSEC codelet
- *
- * @version 1.0.0
- * @author Reazul Hoque
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_parsec.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  ZPAMM  performs one of the matrix-matrix operations
- *
- *                    LEFT                      RIGHT
- *     OP ChameleonW  :  W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
- *     OP ChameleonA2 :  A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
- *
- *  where  op( V ) is one of
- *
- *     op( V ) = V   or   op( V ) = V**T   or   op( V ) = V**H,
- *
- *  A1, A2 and W are general matrices, and V is:
- *
- *        l = k: rectangle + triangle
- *        l < k: rectangle + trapezoid
- *        l = 0: rectangle
- *
- *  Size of V, both rowwise and columnwise, is:
- *
- *         ----------------------
- *          side   trans    size
- *         ----------------------
- *          left     N     M x K
- *                   T     K x M
- *          right    N     K x N
- *                   T     N x K
- *         ----------------------
- *
- *  LEFT (columnwise and rowwise):
- *
- *              |    K    |                 |         M         |
- *           _  __________   _              _______________        _
- *              |    |    |                 |             | \
- *     V:       |    |    |            V':  |_____________|___\    K
- *              |    |    | M-L             |                  |
- *           M  |    |    |                 |__________________|   _
- *              |____|    |  _
- *              \    |    |                 |    M - L    | L  |
- *                \  |    |  L
- *           _      \|____|  _
- *
- *  RIGHT (columnwise and rowwise):
- *
- *          |         K         |                   |    N    |
- *          _______________        _             _  __________   _
- *          |             | \                       |    |    |
- *     V':  |_____________|___\    N        V:      |    |    |
- *          |                  |                    |    |    | K-L
- *          |__________________|   _             K  |    |    |
- *                                                  |____|    |  _
- *          |    K - L    | L  |                    \    |    |
- *                                                    \  |    |  L
- *                                               _      \|____|  _
- *
- *  Arguments
- *  ==========
- *
- * @param[in] op
- *
- *         OP specifies which operation to perform:
- *
- *         @arg ChameleonW  : W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
- *         @arg ChameleonA2 : A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
- *
- * @param[in] side
- *
- *         SIDE specifies whether  op( V ) multiplies A2
- *         or W from the left or right as follows:
- *
- *         @arg ChamLeft  : multiply op( V ) from the left
- *                            OP ChameleonW  :  W  = A1 + op(V) * A2
- *                            OP ChameleonA2 :  A2 = A2 - op(V) * W
- *
- *         @arg ChamRight : multiply op( V ) from the right
- *                            OP ChameleonW  :  W  = A1 + A2 * op(V)
- *                            OP ChameleonA2 :  A2 = A2 - W * op(V)
- *
- * @param[in] storev
- *
- *         Indicates how the vectors which define the elementary
- *         reflectors are stored in V:
- *
- *         @arg ChamColumnwise
- *         @arg ChamRowwise
- *
- * @param[in] M
- *         The number of rows of the A1, A2 and W
- *         If SIDE is ChamLeft, the number of rows of op( V )
- *
- * @param[in] N
- *         The number of columns of the A1, A2 and W
- *         If SIDE is ChamRight, the number of columns of op( V )
- *
- * @param[in] K
- *         If SIDE is ChamLeft, the number of columns of op( V )
- *         If SIDE is ChamRight, the number of rows of op( V )
- *
- * @param[in] L
- *         The size of the triangular part of V
- *
- * @param[in] A1
- *         On entry, the M-by-N tile A1.
- *
- * @param[in] LDA1
- *         The leading dimension of the array A1. LDA1 >= max(1,M).
- *
- * @param[in,out] A2
- *         On entry, the M-by-N tile A2.
- *         On exit, if OP is ChameleonA2 A2 is overwritten
- *
- * @param[in] LDA2
- *         The leading dimension of the tile A2. LDA2 >= max(1,M).
- *
- * @param[in] V
- *         The matrix V as described above.
- *         If SIDE is ChamLeft : op( V ) is M-by-K
- *         If SIDE is ChamRight: op( V ) is K-by-N
- *
- * @param[in] LDV
- *         The leading dimension of the array V.
- *
- * @param[in,out] W
- *         On entry, the M-by-N matrix W.
- *         On exit, W is overwritten either if OP is ChameleonA2 or ChameleonW.
- *         If OP is ChameleonA2, W is an input and is used as a workspace.
- *
- * @param[in] LDW
- *         The leading dimension of array WORK.
- *
- *******************************************************************************
- *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *
- */
-
-
-/**/
-
-static inline int
-CORE_zpamm_parsec( parsec_execution_stream_t *context,
-                    parsec_task_t             *this_task )
-{
-    int op;
-    cham_side_t side;
-    cham_store_t storev;
-    int M;
-    int N;
-    int K;
-    int L;
-    CHAMELEON_Complex64_t *A1;
-    int LDA1;
-    CHAMELEON_Complex64_t *A2;
-    int LDA2;
-    CHAMELEON_Complex64_t *V;
-    int LDV;
-    CHAMELEON_Complex64_t *W;
-    int LDW;
-
-    parsec_dtd_unpack_args(
-        this_task, &op, &side, &storev, &M, &N, &K, &L, &A1, &LDA1, &A2, &LDA2, &V, &LDV, &W, &LDW );
-
-    CORE_zpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW );
-
-    (void)context;
-    return PARSEC_HOOK_RETURN_DONE;
-}
-
-void
-INSERT_TASK_zpamm(const RUNTIME_option_t *options,
-                 int op, cham_side_t side, cham_store_t storev,
-                 int m, int n, int k, int l,
-                 const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                 const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                       const CHAM_desc_t *W, int Wm, int Wn, int ldw)
-{
-    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-
-    parsec_dtd_taskpool_insert_task(
-        PARSEC_dtd_taskpool, CORE_zpamm_parsec, options->priority, "pamm",
-        sizeof(int),                        &op,                VALUE,
-        sizeof(int),                 &side,              VALUE,
-        sizeof(int),                 &storev,            VALUE,
-        sizeof(int),                        &m,                 VALUE,
-        sizeof(int),                        &n,                 VALUE,
-        sizeof(int),                        &k,                 VALUE,
-        sizeof(int),                        &l,                 VALUE,
-        PASSED_BY_REF,         RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INPUT,
-        sizeof(int),                        &lda1,              VALUE,
-        PASSED_BY_REF,         RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
-        sizeof(int),                        &lda2,              VALUE,
-        PASSED_BY_REF,         RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT,
-        sizeof(int),                        &ldv,               VALUE,
-        PASSED_BY_REF,         RTBLKADDR( W, CHAMELEON_Complex64_t, Wm, Wn ), chameleon_parsec_get_arena_index( W ) | INOUT,
-        sizeof(int),                        &ldw,               VALUE,
-        PARSEC_DTD_ARG_END );
-}
diff --git a/runtime/parsec/codelets/codelet_zplssq.c b/runtime/parsec/codelets/codelet_zplssq.c
index ed1fde76fcd4d9fa51f1cd6a2e717f52b92f5792..99006c131417af6b554d8c312bf589bb0a3a4ef0 100644
--- a/runtime/parsec/codelets/codelet_zplssq.c
+++ b/runtime/parsec/codelets/codelet_zplssq.c
@@ -21,11 +21,35 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
+static inline int
+CORE_zplssq_parsec( parsec_execution_stream_t *context,
+                    parsec_task_t             *this_task )
+{
+    double *SCLSSQ_IN;
+    double *SCLSSQ_OUT;
+
+    parsec_dtd_unpack_args(
+        this_task, &SCLSSQ_IN, &SCLSSQ_OUT );
+
+    assert( SCLSSQ_OUT[0] >= 0. );
+    if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
+        SCLSSQ_OUT[1] = SCLSSQ_IN[1]  + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
+        SCLSSQ_OUT[0] = SCLSSQ_IN[0];
+    } else {
+        if ( SCLSSQ_OUT[0] > 0 ) {
+            SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1]  * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
+        }
+    }
+
+    (void)context;
+    return PARSEC_HOOK_RETURN_DONE;
+}
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zplssq returns: scl * sqrt(ssq)
+ * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
  *
  * with scl and ssq such that
  *
@@ -52,33 +76,9 @@
  *          On exit, result contains scl * sqrt( ssq )
  *
  */
-static inline int
-CORE_zplssq_parsec( parsec_execution_stream_t *context,
-                    parsec_task_t             *this_task )
-{
-    double *SCLSSQ_IN;
-    double *SCLSSQ_OUT;
-
-    parsec_dtd_unpack_args(
-        this_task, &SCLSSQ_IN, &SCLSSQ_OUT );
-
-    assert( SCLSSQ_OUT[0] >= 0. );
-    if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
-        SCLSSQ_OUT[1] = SCLSSQ_IN[1]  + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
-        SCLSSQ_OUT[0] = SCLSSQ_IN[0];
-    } else {
-        if ( SCLSSQ_OUT[0] > 0 ) {
-            SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1]  * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
-        }
-    }
-
-    (void)context;
-    return PARSEC_HOOK_RETURN_DONE;
-}
-
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
-                        const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
+                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
@@ -91,7 +91,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
 
 static inline int
 CORE_zplssq2_parsec( parsec_execution_stream_t *context,
-                    parsec_task_t             *this_task )
+                     parsec_task_t             *this_task )
 {
     double *RESULT;
 
@@ -105,7 +105,7 @@ CORE_zplssq2_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
diff --git a/runtime/parsec/codelets/codelet_ztile_zero.c b/runtime/parsec/codelets/codelet_ztile_zero.c
deleted file mode 100644
index e07175cb16591730697aad13b721354c36e9982d..0000000000000000000000000000000000000000
--- a/runtime/parsec/codelets/codelet_ztile_zero.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- *
- * @file parsec/codelet_ztile_zero.c
- *
- * @copyright 2009-2015 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon ztile_zero PaRSEC codelet
- *
- * @version 1.0.0
- * @author Reazul Hoque
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_parsec.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-static inline int
-CORE_ztile_zero_parsec( parsec_execution_stream_t *context,
-                        parsec_task_t             *this_task )
-{
-    int X1;
-    int X2;
-    int Y1;
-    int Y2;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int x, y;
-
-    parsec_dtd_unpack_args(
-        this_task, &X1, &X2, &Y1, &Y2, &A, &lda );
-
-    for (x = X1; x < X2; x++)
-        for (y = Y1; y < Y2; y++)
-            A[lda * x + y] = 0.0;
-
-    (void)context;
-    return PARSEC_HOOK_RETURN_DONE;
-}
-
-void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options,
-                            int X1, int X2, int Y1, int Y2,
-                            const CHAM_desc_t *A, int Am, int An, int lda )
-{
-    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-
-    parsec_dtd_taskpool_insert_task(
-        PARSEC_dtd_taskpool, CORE_ztile_zero_parsec, options->priority, "tile zero",
-        sizeof(int),       &X1,                       VALUE,
-        sizeof(int),       &X2,                       VALUE,
-        sizeof(int),       &Y1,                       VALUE,
-        sizeof(int),       &Y2,                       VALUE,
-        PASSED_BY_REF,     RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY,
-        sizeof(int),       &lda,                      VALUE,
-        PARSEC_DTD_ARG_END );
-}
diff --git a/runtime/parsec/codelets/codelet_ztradd.c b/runtime/parsec/codelets/codelet_ztradd.c
index b431f983a83695fb2c20a945c58246b21f380baa..267ac386a3658ff96a3f0e93e76dde86f06994d0 100644
--- a/runtime/parsec/codelets/codelet_ztradd.c
+++ b/runtime/parsec/codelets/codelet_ztradd.c
@@ -52,7 +52,7 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context,
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pztradd.
+ * @brief Adds two trapezoidal matrices together as in PBLAS pztradd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -102,15 +102,14 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
diff --git a/runtime/quark/codelets/codelet_zgeadd.c b/runtime/quark/codelets/codelet_zgeadd.c
index 16287953ca163d8ce1f341d8f6eb4c1a77f2f3b0..935bc0ba1394e243cb4cb2c922961f56a909149b 100644
--- a/runtime/quark/codelets/codelet_zgeadd.c
+++ b/runtime/quark/codelets/codelet_zgeadd.c
@@ -47,7 +47,7 @@ void CORE_zgeadd_quark(Quark *quark)
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
+ * @brief Adds two general matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -91,15 +91,14 @@ void CORE_zgeadd_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
-                       cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
+                         cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEADD;
diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c
index 240773c983156402febe2242d86d5a0b560bfeee..45db83e556754dd6e47c7b89ac8e72c24bdf6e43 100644
--- a/runtime/quark/codelets/codelet_zgelqt.c
+++ b/runtime/quark/codelets/codelet_zgelqt.c
@@ -100,9 +100,8 @@ void CORE_zgelqt_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c
index 09ed24eef4662df09ea5e8fb59029ed8d8cf46df..33ad21a52ac116c7aec197d68eca08cf92dbd84a 100644
--- a/runtime/quark/codelets/codelet_zgeqrt.c
+++ b/runtime/quark/codelets/codelet_zgeqrt.c
@@ -101,9 +101,8 @@ void CORE_zgeqrt_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_zgessm.c b/runtime/quark/codelets/codelet_zgessm.c
index d31d3dc9b1b70be57786881d8437dd6e94a65a0e..63b59a664ec8f384f65f594a0e3261a6c2c91206 100644
--- a/runtime/quark/codelets/codelet_zgessm.c
+++ b/runtime/quark/codelets/codelet_zgessm.c
@@ -86,9 +86,8 @@ void CORE_zgessm_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_zgetrf_incpiv.c b/runtime/quark/codelets/codelet_zgetrf_incpiv.c
index 7ba0886d1ca1b6a508aeae1181ba76b808e5b1aa..9b9d29a7f021be10bc6e86801c7cc5cb5c2b0568 100644
--- a/runtime/quark/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/quark/codelets/codelet_zgetrf_incpiv.c
@@ -94,10 +94,9 @@ void CORE_zgetrf_incpiv_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/quark/codelets/codelet_zgetrf_nopiv.c b/runtime/quark/codelets/codelet_zgetrf_nopiv.c
index dfee169e63daebeb876f077bf65bd69b886c57b3..c7115e7d5df13822c36c69ea86352aee56cc96fd 100644
--- a/runtime/quark/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/quark/codelets/codelet_zgetrf_nopiv.c
@@ -81,10 +81,9 @@ void CORE_zgetrf_nopiv_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c
index 99a0dc89ab9d2eeee87d91d69a08670af7dfee46..8aa18403bf02087fb52e02508f596f7162433835 100644
--- a/runtime/quark/codelets/codelet_zlacpy.c
+++ b/runtime/quark/codelets/codelet_zlacpy.c
@@ -43,10 +43,10 @@ static inline void CORE_zlacpy_quark(Quark *quark)
     CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
 }
 
-void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, int m, int n, int nb,
-                        int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int m, int n, int nb,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LACPY;
@@ -63,12 +63,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
         0);
 }
 
-void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                        0, A, Am, An, lda,
-                        0, B, Bm, Bn, ldb );
+                         0, A, Am, An, lda,
+                         0, B, Bm, Bn, ldb );
 }
diff --git a/runtime/quark/codelets/codelet_zpamm.c b/runtime/quark/codelets/codelet_zpamm.c
deleted file mode 100644
index ba9de2a29acf452282bebbadfdb952a3581db85b..0000000000000000000000000000000000000000
--- a/runtime/quark/codelets/codelet_zpamm.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/**
- *
- * @file quark/codelet_zpamm.c
- *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon zpamm Quark codelet
- *
- * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 1.0.0
- * @author Dulceneia Becker
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2011-06-14
- * @precisions normal z -> c d s
- *
- */
-#include "coreblas/cblas.h"
-#include "chameleon_quark.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-void
-CORE_zpamm_quark(Quark *quark)
-{
-    int op;
-    cham_side_t side;
-    cham_store_t storev;
-    int M;
-    int N;
-    int K;
-    int L;
-    CHAMELEON_Complex64_t *A1;
-    int LDA1;
-    CHAMELEON_Complex64_t *A2;
-    int LDA2;
-    CHAMELEON_Complex64_t *V;
-    int LDV;
-    CHAMELEON_Complex64_t *W;
-    int LDW;
-
-    quark_unpack_args_15(quark, op, side, storev, M, N, K, L,
-            A1, LDA1, A2, LDA2, V, LDV, W, LDW);
-
-    CORE_zpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW);
-}
-
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  ZPAMM  performs one of the matrix-matrix operations
- *
- *                    LEFT                      RIGHT
- *     OP ChameleonW  :  W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
- *     OP ChameleonA2 :  A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
- *
- *  where  op( V ) is one of
- *
- *     op( V ) = V   or   op( V ) = V**T   or   op( V ) = V**H,
- *
- *  A1, A2 and W are general matrices, and V is:
- *
- *        l = k: rectangle + triangle
- *        l < k: rectangle + trapezoid
- *        l = 0: rectangle
- *
- *  Size of V, both rowwise and columnwise, is:
- *
- *         ----------------------
- *          side   trans    size
- *         ----------------------
- *          left     N     M x K
- *                   T     K x M
- *          right    N     K x N
- *                   T     N x K
- *         ----------------------
- *
- *  LEFT (columnwise and rowwise):
- *
- *              |    K    |                 |         M         |
- *           _  __________   _              _______________        _
- *              |    |    |                 |             | \
- *     V:       |    |    |            V':  |_____________|___\    K
- *              |    |    | M-L             |                  |
- *           M  |    |    |                 |__________________|   _
- *              |____|    |  _
- *              \    |    |                 |    M - L    | L  |
- *                \  |    |  L
- *           _      \|____|  _
- *
- *  RIGHT (columnwise and rowwise):
- *
- *          |         K         |                   |    N    |
- *          _______________        _             _  __________   _
- *          |             | \                       |    |    |
- *     V':  |_____________|___\    N        V:      |    |    |
- *          |                  |                    |    |    | K-L
- *          |__________________|   _             K  |    |    |
- *                                                  |____|    |  _
- *          |    K - L    | L  |                    \    |    |
- *                                                    \  |    |  L
- *                                               _      \|____|  _
- *
- *  Arguments
- *  ==========
- *
- * @param[in] op
- *
- *         OP specifies which operation to perform:
- *
- *         @arg ChameleonW  : W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
- *         @arg ChameleonA2 : A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
- *
- * @param[in] side
- *
- *         SIDE specifies whether  op( V ) multiplies A2
- *         or W from the left or right as follows:
- *
- *         @arg ChamLeft  : multiply op( V ) from the left
- *                            OP ChameleonW  :  W  = A1 + op(V) * A2
- *                            OP ChameleonA2 :  A2 = A2 - op(V) * W
- *
- *         @arg ChamRight : multiply op( V ) from the right
- *                            OP ChameleonW  :  W  = A1 + A2 * op(V)
- *                            OP ChameleonA2 :  A2 = A2 - W * op(V)
- *
- * @param[in] storev
- *
- *         Indicates how the vectors which define the elementary
- *         reflectors are stored in V:
- *
- *         @arg ChamColumnwise
- *         @arg ChamRowwise
- *
- * @param[in] M
- *         The number of rows of the A1, A2 and W
- *         If SIDE is ChamLeft, the number of rows of op( V )
- *
- * @param[in] N
- *         The number of columns of the A1, A2 and W
- *         If SIDE is ChamRight, the number of columns of op( V )
- *
- * @param[in] K
- *         If SIDE is ChamLeft, the number of columns of op( V )
- *         If SIDE is ChamRight, the number of rows of op( V )
- *
- * @param[in] L
- *         The size of the triangular part of V
- *
- * @param[in] A1
- *         On entry, the M-by-N tile A1.
- *
- * @param[in] LDA1
- *         The leading dimension of the array A1. LDA1 >= max(1,M).
- *
- * @param[in,out] A2
- *         On entry, the M-by-N tile A2.
- *         On exit, if OP is ChameleonA2 A2 is overwritten
- *
- * @param[in] LDA2
- *         The leading dimension of the tile A2. LDA2 >= max(1,M).
- *
- * @param[in] V
- *         The matrix V as described above.
- *         If SIDE is ChamLeft : op( V ) is M-by-K
- *         If SIDE is ChamRight: op( V ) is K-by-N
- *
- * @param[in] LDV
- *         The leading dimension of the array V.
- *
- * @param[in,out] W
- *         On entry, the M-by-N matrix W.
- *         On exit, W is overwritten either if OP is ChameleonA2 or ChameleonW.
- *         If OP is ChameleonA2, W is an input and is used as a workspace.
- *
- * @param[in] LDW
- *         The leading dimension of array WORK.
- *
- *******************************************************************************
- *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *
- */
-void
-INSERT_TASK_zpamm(const RUNTIME_option_t *options,
-                 int op, cham_side_t side, cham_store_t storev,
-                 int m, int n, int k, int l,
-                 const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                 const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                       const CHAM_desc_t *W, int Wm, int Wn, int ldw)
-{
-    QUARK_Insert_Task(opt->quark, CORE_zpamm_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),                        &op,      VALUE,
-        sizeof(int),                &side,    VALUE,
-        sizeof(int),                &storev,  VALUE,
-        sizeof(int),                        &m,       VALUE,
-        sizeof(int),                        &n,       VALUE,
-        sizeof(int),                        &k,       VALUE,
-        sizeof(int),                        &l,       VALUE,
-        sizeof(CHAMELEON_Complex64_t)*m*k,     RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),           INPUT,
-        sizeof(int),                        &lda1,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*k*n,     RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),           INOUT,
-        sizeof(int),                        &lda2,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*m*n,     RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),            INPUT,
-        sizeof(int),                        &ldv,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*m*n,     RTBLKADDR(W, CHAMELEON_Complex64_t, Wm, Wn),            INOUT,
-        sizeof(int),                        &ldw,     VALUE,
-        0);
-}
diff --git a/runtime/quark/codelets/codelet_zplssq.c b/runtime/quark/codelets/codelet_zplssq.c
index 79067050a5b8094a95e7695f1cde016cd2f61709..14418c58fdf30c1c4fa35e71d892c9cceb000cde 100644
--- a/runtime/quark/codelets/codelet_zplssq.c
+++ b/runtime/quark/codelets/codelet_zplssq.c
@@ -46,7 +46,7 @@ void CORE_zplssq_quark(Quark *quark)
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zplssq returns: scl * sqrt(ssq)
+ * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
  *
  * with scl and ssq such that
  *
@@ -74,8 +74,8 @@ void CORE_zplssq_quark(Quark *quark)
  *
  */
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
-                        const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
+                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     QUARK_Insert_Task(opt->quark, CORE_zplssq_quark, (Quark_Task_Flags*)opt,
@@ -94,7 +94,7 @@ void CORE_zplssq2_quark(Quark *quark)
 }
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     QUARK_Insert_Task(opt->quark, CORE_zplssq2_quark, (Quark_Task_Flags*)opt,
diff --git a/runtime/quark/codelets/codelet_zssssm.c b/runtime/quark/codelets/codelet_zssssm.c
index ea18309646c645047a0f72efad7c343033474585..878b17a56d17bfa78b0d832d3990ef13867dd1c0 100644
--- a/runtime/quark/codelets/codelet_zssssm.c
+++ b/runtime/quark/codelets/codelet_zssssm.c
@@ -115,9 +115,8 @@ void CORE_zssssm_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_ztile_zero.c b/runtime/quark/codelets/codelet_ztile_zero.c
deleted file mode 100644
index 68f52d47cecd4f1ec8e63bd392f396c51e768549..0000000000000000000000000000000000000000
--- a/runtime/quark/codelets/codelet_ztile_zero.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- *
- * @file quark/codelet_ztile_zero.c
- *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon ztile_zero Quark codelet
- *
- * @version 1.0.0
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Jakub Kurzak
- * @date 2010-11-15
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_quark.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-void CORE_ztile_zero_quark(Quark *quark)
-{
-    int X1;
-    int X2;
-    int Y1;
-    int Y2;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-
-    int x, y;
-
-    quark_unpack_args_6(quark, X1, X2, Y1, Y2, A, lda);
-
-    for (x = X1; x < X2; x++)
-        for (y = Y1; y < Y2; y++)
-            A[lda*x+y] = 0.0;
-
-}
-
-void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options,
-                            int X1, int X2, int Y1, int Y2,
-                            const CHAM_desc_t *A, int Am, int An, int lda )
-{
-    quark_option_t *opt = (quark_option_t*)(options->schedopt);
-    QUARK_Insert_Task(opt->quark, CORE_ztile_zero_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),                       &X1,                                       VALUE,
-        sizeof(int),                       &X2,                                       VALUE,
-        sizeof(int),                       &Y1,                                       VALUE,
-        sizeof(int),                       &Y2,                                       VALUE,
-        sizeof(CHAMELEON_Complex64_t)*A->bsiz,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),  OUTPUT | LOCALITY,
-        sizeof(int),                       &lda,                                      VALUE,
-        0);
-}
diff --git a/runtime/quark/codelets/codelet_ztradd.c b/runtime/quark/codelets/codelet_ztradd.c
index d18aa1db8bb9dde42532b5526fdd5121b4f11488..8c42160bcfc46948e83417150d49eb6bd292a137 100644
--- a/runtime/quark/codelets/codelet_ztradd.c
+++ b/runtime/quark/codelets/codelet_ztradd.c
@@ -46,7 +46,7 @@ void CORE_ztradd_quark(Quark *quark)
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd.
+ * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -96,15 +96,14 @@ void CORE_ztradd_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEADD;
diff --git a/runtime/quark/codelets/codelet_ztstrf.c b/runtime/quark/codelets/codelet_ztstrf.c
index d44bb81a1e401d11f0ec0fab51a258490f45acdb..1c31704e225257ce1ca589a7b5c64957a78c0462 100644
--- a/runtime/quark/codelets/codelet_ztstrf.c
+++ b/runtime/quark/codelets/codelet_ztstrf.c
@@ -115,10 +115,9 @@ void CORE_ztstrf_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c
index f87e193ad5959e7f0f48b46ec8089bc76c7bade5..af020eaec18244930eda9dd4691b7e54a29d78cb 100644
--- a/runtime/quark/codelets/codelet_zunmlq.c
+++ b/runtime/quark/codelets/codelet_zunmlq.c
@@ -129,9 +129,8 @@ void CORE_zunmlq_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c
index d78a12faadaa6f1b7b0fc746604ce49374f540cf..8407d8967bb53834c3ae219cf2429f7faea41c6a 100644
--- a/runtime/quark/codelets/codelet_zunmqr.c
+++ b/runtime/quark/codelets/codelet_zunmqr.c
@@ -129,9 +129,8 @@ void CORE_zunmqr_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_zasum.c b/runtime/starpu/codelets/codelet_zasum.c
index e8aa0aeca41392e2b01a298e82ef63886b61ad1b..6bfaf2c136f960c7cb7286f3eb81cae9c40fd263 100644
--- a/runtime/starpu/codelets/codelet_zasum.c
+++ b/runtime/starpu/codelets/codelet_zasum.c
@@ -22,10 +22,33 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
-                       cham_store_t storev, cham_uplo_t uplo, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_store_t storev;
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *work;
+
+    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda);
+    CORE_dzasum(storev, uplo, M, N, A, lda, work);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func)
+
+void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
+                         cham_store_t storev, cham_uplo_t uplo, int M, int N,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_zasum;
     void (*callback)(void*) = options->profiling ? cl_zasum_callback : NULL;
@@ -51,27 +74,3 @@ void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_store_t storev;
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *work;
-
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda);
-    CORE_dzasum(storev, uplo, M, N, A, lda, work);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c
index 0b70bb6f9f6ecf46599184cb56d4797c401383dd..5280eae12a4c72871b2db533b344689a0b8a100f 100644
--- a/runtime/starpu/codelets/codelet_zaxpy.c
+++ b/runtime/starpu/codelets/codelet_zaxpy.c
@@ -20,10 +20,32 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
-                      int M, CHAMELEON_Complex64_t alpha,
-                      const CHAM_desc_t *A, int Am, int An, int incA,
-                      const CHAM_desc_t *B, int Bm, int Bn, int incB)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg)
+{
+    int M;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int incA;
+    CHAMELEON_Complex64_t *B;
+    int incB;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB);
+    CORE_zaxpy(M, alpha, A, incA, B, incB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func)
+
+void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
+                        int M, CHAMELEON_Complex64_t alpha,
+                        const CHAM_desc_t *A, int Am, int An, int incA,
+                        const CHAM_desc_t *B, int Bm, int Bn, int incB )
 {
     struct starpu_codelet *codelet = &cl_zaxpy;
     void (*callback)(void*) = options->profiling ? cl_zaxpy_callback : NULL;
@@ -48,26 +70,3 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
 #endif
             0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg)
-{
-    int M;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int incA;
-    CHAMELEON_Complex64_t *B;
-    int incB;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB);
-    CORE_zaxpy(M, alpha, A, incA, B, incB);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c
index 4aa45bf5bc9900b6f996e0cb275d4339268d83be..a2610a9bc8aa96b73a370c7703a2eefad17b90a9 100644
--- a/runtime/starpu/codelets/codelet_zbuild.c
+++ b/runtime/starpu/codelets/codelet_zbuild.c
@@ -27,9 +27,35 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
+{
+  CHAMELEON_Complex64_t *A;
+  int ld;
+  void *user_data;
+  void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
+  int row_min, row_max, col_min, col_max;
+
+  A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+  starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback );
+
+  /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
+   * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
+   * and store it at the address 'buffer' with leading dimension 'ld'
+   */
+  user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data);
+
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
+
 void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        void *user_data, void* user_build_callback )
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         void *user_data, void* user_build_callback )
 {
 
   struct starpu_codelet *codelet = &cl_zbuild;
@@ -61,30 +87,3 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
-{
-  CHAMELEON_Complex64_t *A;
-  int ld;
-  void *user_data;
-  void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
-  int row_min, row_max, col_min, col_max;
-
-  A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-  starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback );
-
-  /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
-   * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
-   * and store it at the address 'buffer' with leading dimension 'ld'
-   */
-  user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data);
-
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index 7c296d750bc4fa32d425a877f1b8731389e4e41e..eb0adebe51e8fe29593cd50d8d59437a59f37778 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -24,12 +24,76 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_trans_t trans;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    const CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t beta;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
+    CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
+    return;
+}
+
+#ifdef CHAMELEON_USE_CUBLAS_V2
+static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
+{
+    cham_trans_t trans;
+    int M;
+    int N;
+    cuDoubleComplex alpha;
+    const cuDoubleComplex *A;
+    int lda;
+    cuDoubleComplex beta;
+    cuDoubleComplex *B;
+    int ldb;
+
+    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb);
+
+    RUNTIME_getStream( stream );
+
+    CUDA_zgeadd(
+        trans,
+        M, N,
+        &alpha, A, lda,
+        &beta,  B, ldb,
+        stream);
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+
+    return;
+}
+#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+#if defined(CHAMELEON_USE_CUBLAS_V2)
+CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
+#else
+CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
+#endif
+
 /**
  ******************************************************************************
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
+ * @brief Adds two general matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -73,15 +137,14 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
-                       cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
+                         cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_zgeadd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
@@ -111,68 +174,3 @@ void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
 
     (void)nb;
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_trans_t trans;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
-    CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
-    return;
-}
-
-#ifdef CHAMELEON_USE_CUBLAS_V2
-static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
-{
-    cham_trans_t trans;
-    int M;
-    int N;
-    cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int lda;
-    cuDoubleComplex beta;
-    cuDoubleComplex *B;
-    int ldb;
-
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb);
-
-    RUNTIME_getStream( stream );
-
-    CUDA_zgeadd(
-        trans,
-        M, N,
-        &alpha, A, lda,
-        &beta,  B, ldb,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-#if defined(CHAMELEON_USE_CUBLAS_V2)
-CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
-#else
-CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 8ffad6e1ada200916e25ef6e685898e1c8c53c08..abdf1954c9f9a5d587e5baf8360915fc160dbe58 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -112,9 +112,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 205da5e35aa7be306588b42be3471a9cc302cc6a..fed1350a71df6d7c1b0881dd92639b88e4363356 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
-                      cham_trans_t transA, cham_trans_t transB,
-                      int m, int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                                                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zgemm;
-    void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &transA,            sizeof(int),
-        STARPU_VALUE,    &transB,            sizeof(int),
-        STARPU_VALUE,    &m,                 sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_VALUE,    &k,                 sizeof(int),
-        STARPU_VALUE,    &alpha,             sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &lda,               sizeof(int),
-        STARPU_R,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,    &ldb,               sizeof(int),
-        STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,    &ldc,               sizeof(int),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zgemm",
-#endif
-        0);
-}
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -144,3 +99,48 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
+                      cham_trans_t transA, cham_trans_t transB,
+                      int m, int n, int k, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                                                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zgemm;
+    void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &transA,            sizeof(int),
+        STARPU_VALUE,    &transB,            sizeof(int),
+        STARPU_VALUE,    &m,                 sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_VALUE,    &k,                 sizeof(int),
+        STARPU_VALUE,    &alpha,             sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &lda,               sizeof(int),
+        STARPU_R,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,    &ldb,               sizeof(int),
+        STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,    &ldc,               sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zgemm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index bee5168f95baef5c9e0b4ea8da3d4adbe6c43625..1ff57d18546e4bec5670e25dcd00e6a32368ba06 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -114,9 +114,8 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c
index 2dac8366a196e4baab0f3f4a671891bac11df686..72736b064f50833b271001bec25499d6636cc1fd 100644
--- a/runtime/starpu/codelets/codelet_zgessm.c
+++ b/runtime/starpu/codelets/codelet_zgessm.c
@@ -26,6 +26,32 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    int k;
+    int ib;
+    int *IPIV;
+    int ldl;
+    CHAMELEON_Complex64_t *D;
+    int ldd;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+
+    D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
+    CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -66,18 +92,17 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
-void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
-                       int m, int n, int k, int ib, int nb,
-                       int *IPIV,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
-                       const CHAM_desc_t *D, int Dm, int Dn, int ldd,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
+                         int m, int n, int k, int ib, int nb,
+                         int *IPIV,
+                         const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                         const CHAM_desc_t *D, int Dm, int Dn, int ldd,
+                         const CHAM_desc_t *A, int Am, int An, int lda )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgessm;
@@ -109,30 +134,3 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int k;
-    int ib;
-    int *IPIV;
-    int ldl;
-    CHAMELEON_Complex64_t *D;
-    int ldd;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-
-    D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
-    CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c
index a9cdc2ff812c03338f9d45af2cfe8846163315db..e22f803bc469664416e80c8cd3dd0ab7f579652b 100644
--- a/runtime/starpu/codelets/codelet_zgessq.c
+++ b/runtime/starpu/codelets/codelet_zgessq.c
@@ -22,10 +22,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgessq_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *SCALESUMSQ;
+
+    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda);
+    CORE_zgessq( m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func)
+
 void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
-                        int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_zgessq;
     void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL;
@@ -49,25 +70,3 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgessq_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
-
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda);
-    CORE_zgessq( m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c
index b6bf892c4c6d72c84165c67c2802f19a86d5ab14..977e8c2c5bd10b8d87116559ec2717a063321b74 100644
--- a/runtime/starpu/codelets/codelet_zgetrf.c
+++ b/runtime/starpu/codelets/codelet_zgetrf.c
@@ -24,6 +24,36 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int *IPIV;
+    cham_bool_t check_info;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request);
+    CORE_zgetrf( m, n, A, lda, IPIV, &info );
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func)
+
 void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
                          int m, int n, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -56,34 +86,3 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int *IPIV;
-    cham_bool_t check_info;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request);
-    CORE_zgetrf( m, n, A, lda, IPIV, &info );
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
index 94113ee7d83960a08b18bcecc2fd880754d8d99e..66a5201ca6bbc77567500ac5f5d987048e92aa13 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
@@ -26,6 +26,38 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
+{
+    CHAMELEON_starpu_ws_t *h_work;
+    int m;
+    int n;
+    int ib;
+    CHAMELEON_Complex64_t *A;
+    int lda, ldl;
+    int *IPIV;
+    cham_bool_t check_info;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request);
+    CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -71,10 +103,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
@@ -122,36 +153,3 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
-{
-    CHAMELEON_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda, ldl;
-    int *IPIV;
-    cham_bool_t check_info;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request);
-    CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
index 9f3a0a8d2466943ca239759c8a93228a120b256b..3efbe362e5ffeb6cd86998c26ac7528809379c11 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
@@ -23,6 +23,38 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+/*
+ * Codelet CPU
+ */
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    int ib;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request);
+    CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -61,10 +93,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
@@ -101,35 +132,3 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-/*
- * Codelet CPU
- */
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request);
-    CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
index 4c562fbeb6fbc0e32f1976a475de43c8cb98941c..11ce2183419c09dd3d9e89ee53d9801d1206d986 100644
--- a/runtime/starpu/codelets/codelet_zhe2ge.c
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -18,6 +18,29 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    const CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB);
+    CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -54,26 +77,3 @@ void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB);
-    CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index 9396feebc0a72b723eef2b0ac1120e4cb80d46f3..4f769835253ae243d2c48eb0f88dccb2f9c26008 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zhemm;
-    void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &side,                sizeof(int),
-        STARPU_VALUE,    &uplo,                sizeof(int),
-        STARPU_VALUE,       &m,                        sizeof(int),
-        STARPU_VALUE,       &n,                        sizeof(int),
-        STARPU_VALUE,   &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,     &lda,                        sizeof(int),
-        STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,     &ldb,                        sizeof(int),
-        STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,     &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zhemm",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -142,3 +97,47 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
+                      cham_side_t side, cham_uplo_t uplo,
+                      int m, int n, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zhemm;
+    void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &side,                sizeof(int),
+        STARPU_VALUE,    &uplo,                sizeof(int),
+        STARPU_VALUE,       &m,                        sizeof(int),
+        STARPU_VALUE,       &n,                        sizeof(int),
+        STARPU_VALUE,   &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,     &lda,                        sizeof(int),
+        STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,     &ldb,                        sizeof(int),
+        STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,     &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zhemm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 668ee4246fc3c3225f0d800c21df39735159df93..24553aba498fb368fc9cdd238a79af32e87bb670 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zher2k;
-    void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,     &trans,                sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,         &k,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,       &lda,                        sizeof(int),
-        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,       &ldb,                        sizeof(int),
-        STARPU_VALUE,      &beta,                     sizeof(double),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,       &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zher2k",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
 {
@@ -135,3 +90,47 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
+                       cham_uplo_t uplo, cham_trans_t trans,
+                       int n, int k, int nb,
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                       double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zher2k;
+    void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,      &uplo,                sizeof(int),
+        STARPU_VALUE,     &trans,                sizeof(int),
+        STARPU_VALUE,         &n,                        sizeof(int),
+        STARPU_VALUE,         &k,                        sizeof(int),
+        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,       &lda,                        sizeof(int),
+        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,       &ldb,                        sizeof(int),
+        STARPU_VALUE,      &beta,                     sizeof(double),
+        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,       &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zher2k",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c
index 21c97dcf8855b72d64dba0cc3154131fd3d9e313..d833149236288da24c4dd978d4ec3bf199f39b72 100644
--- a/runtime/starpu/codelets/codelet_zherfb.c
+++ b/runtime/starpu/codelets/codelet_zherfb.c
@@ -20,51 +20,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo,
-                       int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    struct starpu_codelet *codelet = &cl_zherfb;
-    void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(T, Tm, Tn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &uplo,              sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_VALUE,    &k,                 sizeof(int),
-        STARPU_VALUE,    &ib,                sizeof(int),
-        STARPU_VALUE,    &nb,                sizeof(int),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &lda,               sizeof(int),
-        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
-        STARPU_VALUE,    &ldt,               sizeof(int),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,    &ldc,               sizeof(int),
-        STARPU_SCRATCH,   options->ws_worker,
-        STARPU_VALUE,    &nb,                sizeof(int),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zherfb",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zherfb_cpu_func(void *descr[], void *cl_arg)
 {
@@ -131,3 +86,47 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
+                       cham_uplo_t uplo,
+                       int n, int k, int ib, int nb,
+                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    struct starpu_codelet *codelet = &cl_zherfb;
+    void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(T, Tm, Tn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &uplo,              sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_VALUE,    &k,                 sizeof(int),
+        STARPU_VALUE,    &ib,                sizeof(int),
+        STARPU_VALUE,    &nb,                sizeof(int),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &lda,               sizeof(int),
+        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
+        STARPU_VALUE,    &ldt,               sizeof(int),
+        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,    &ldc,               sizeof(int),
+        STARPU_SCRATCH,   options->ws_worker,
+        STARPU_VALUE,    &nb,                sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zherfb",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 101eef19c7be79c5cd63bf76529769f8d0615c2a..d8709bac154d2fc755b8328ab054e273d0db26ca 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -26,46 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zherk(const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, cham_trans_t trans,
-                      int n, int k, int nb,
-                      double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zherk;
-    void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &uplo,              sizeof(int),
-        STARPU_VALUE,    &trans,             sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_VALUE,    &k,                 sizeof(int),
-        STARPU_VALUE,    &alpha,             sizeof(double),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &lda,               sizeof(int),
-        STARPU_VALUE,    &beta,              sizeof(double),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,    &ldc,               sizeof(int),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zherk",
-#endif
-        0);
-}
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
 {
@@ -129,3 +89,43 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zherk(const RUNTIME_option_t *options,
+                      cham_uplo_t uplo, cham_trans_t trans,
+                      int n, int k, int nb,
+                      double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zherk;
+    void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &uplo,              sizeof(int),
+        STARPU_VALUE,    &trans,             sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_VALUE,    &k,                 sizeof(int),
+        STARPU_VALUE,    &alpha,             sizeof(double),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &lda,               sizeof(int),
+        STARPU_VALUE,    &beta,              sizeof(double),
+        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,    &ldc,               sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zherk",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c
index c47e0871d113394d877f8e5ef4e5e82c2854c531..c2ebde6af7fcbf0a94f1da2fac293318fb25204f 100644
--- a/runtime/starpu/codelets/codelet_zhessq.c
+++ b/runtime/starpu/codelets/codelet_zhessq.c
@@ -22,6 +22,27 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zhessq_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *SCALESUMSQ;
+
+    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda);
+    CORE_zhessq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zhessq, 2, cl_zhessq_cpu_func)
+
 void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, int n,
                         const CHAM_desc_t *A, int Am, int An, int lda,
@@ -49,25 +70,3 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zhessq_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
-
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda);
-    CORE_zhessq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zhessq, 2, cl_zhessq_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index 0d2426400e3eb0216d694b4563543fb3c642aabb..9c53e6e4efd3a0981a970b6307d6daafcf91b4f8 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -26,15 +26,40 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    int displA;
+    int displB;
+    const CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB);
+    CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int nb,
-                         int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                         int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int m, int n, int nb,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlacpy;
@@ -64,37 +89,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
         0);
 }
 
-void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, int m, int n, int nb,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
                          0, A, Am, An, lda,
                          0, B, Bm, Bn, ldb );
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    int displA;
-    int displB;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB);
-    CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c
index 43b4314c6b58b9a3bea0d39b667e6006883e2e6f..46eea274085fcd6e422dba232af80961419e8cd3 100644
--- a/runtime/starpu/codelets/codelet_zlag2c.c
+++ b/runtime/starpu/codelets/codelet_zlag2c.c
@@ -24,6 +24,28 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    CHAMELEON_Complex32_t *B;
+    int ldb;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb);
+    CORE_zlag2c( m, n, A, lda, B, ldb);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -60,22 +82,27 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
 }
 
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
+static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
 {
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
+    CHAMELEON_Complex32_t *A;
     int lda;
-    CHAMELEON_Complex32_t *B;
+    CHAMELEON_Complex64_t *B;
     int ldb;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb);
-    CORE_zlag2c( m, n, A, lda, B, ldb);
+    CORE_clag2z( m, n, A, lda, B, ldb);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func)
+
 void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
                        int m, int n, int nb,
                        const CHAM_desc_t *A, int Am, int An, int lda,
@@ -105,30 +132,3 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    CHAMELEON_Complex32_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
-
-    A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb);
-    CORE_clag2z( m, n, A, lda, B, ldb);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func)
-/*
- * Codelet definition
- */
-CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c
index 9ab611908607b4440277005c057598e35d530825..4b389bbb2821528c6ea6379b389c90db6bc1e05b 100644
--- a/runtime/starpu/codelets/codelet_zlange.c
+++ b/runtime/starpu/codelets/codelet_zlange.c
@@ -24,6 +24,30 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlange_cpu_func(void *descr[], void *cl_arg)
+{
+    double *normA;
+    cham_normtype_t norm;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    double *work;
+
+    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA);
+    CORE_zlange( norm, M, N, A, LDA, work, normA );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlange, 3, cl_zlange_cpu_func)
+
 void INSERT_TASK_zlange( const RUNTIME_option_t *options,
                          cham_normtype_t norm, int M, int N, int NB,
                          const CHAM_desc_t *A, int Am, int An, int LDA,
@@ -56,28 +80,25 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options,
 }
 
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zlange_cpu_func(void *descr[], void *cl_arg)
+static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg)
 {
-    double *normA;
-    cham_normtype_t norm;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    double *work;
+    double *A;
+    double *B;
 
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA);
-    CORE_zlange( norm, M, N, A, LDA, work, normA );
+    A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+
+    if ( *A > *B ) {
+        *B = *A;
+    }
+    (void)cl_arg;
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlange, 3, cl_zlange_cpu_func)
+CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func)
 
 void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
                            const CHAM_desc_t *A, int Am, int An,
@@ -102,24 +123,3 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg)
-{
-    double *A;
-    double *B;
-
-    A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-    if ( *A > *B ) {
-        *B = *A;
-    }
-    (void)cl_arg;
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c
index d29b5b3d53be49d3d24b758c5139b5ea08aafa36..4fc51d3d0c0de634e66a0a190d4b64d5cd7963e0 100644
--- a/runtime/starpu/codelets/codelet_zlanhe.c
+++ b/runtime/starpu/codelets/codelet_zlanhe.c
@@ -24,6 +24,30 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg)
+{
+    double *normA;
+    cham_normtype_t norm;
+    cham_uplo_t uplo;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    double *work;
+
+    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA);
+    CORE_zlanhe( norm, uplo, N, A, LDA, work, normA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func)
+
 void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
                        const CHAM_desc_t *A, int Am, int An, int LDA,
@@ -55,27 +79,3 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
 
     (void)NB;
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg)
-{
-    double *normA;
-    cham_normtype_t norm;
-    cham_uplo_t uplo;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    double *work;
-
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA);
-    CORE_zlanhe( norm, uplo, N, A, LDA, work, normA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c
index 6fd7cae043869316a5e8527c637f273815b4c261..fdea83309d17af28ab3dd368a87d3cd48bbdf3a0 100644
--- a/runtime/starpu/codelets/codelet_zlansy.c
+++ b/runtime/starpu/codelets/codelet_zlansy.c
@@ -24,10 +24,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlansy_cpu_func(void *descr[], void *cl_arg)
+{
+    double *normA;
+    cham_normtype_t norm;
+    cham_uplo_t uplo;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    double *work;
+
+    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA);
+    CORE_zlansy( norm, uplo, N, A, LDA, work, normA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func)
+
+void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)NB;
     struct starpu_codelet *codelet = &cl_zlansy;
@@ -54,27 +78,3 @@ void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlansy_cpu_func(void *descr[], void *cl_arg)
-{
-    double *normA;
-    cham_normtype_t norm;
-    cham_uplo_t uplo;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    double *work;
-
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA);
-    CORE_zlansy( norm, uplo, N, A, LDA, work, normA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c
index b4a5da8051780a99d1c60fa53f0aa8d20ad74313..078b81d6bcc73de624e4922bef9853ddb4bed5a7 100644
--- a/runtime/starpu/codelets/codelet_zlantr.c
+++ b/runtime/starpu/codelets/codelet_zlantr.c
@@ -22,11 +22,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
-                       int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlantr_cpu_func(void *descr[], void *cl_arg)
+{
+    double *normA;
+    cham_normtype_t norm, uplo, diag;
+    int M, N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    double *work;
+
+    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA);
+    CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func)
+
+void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
+                         int M, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_zlantr;
     void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL;
@@ -56,26 +79,3 @@ void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
 
     (void)NB;
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlantr_cpu_func(void *descr[], void *cl_arg)
-{
-    double *normA;
-    cham_normtype_t norm, uplo, diag;
-    int M, N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    double *work;
-
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA);
-    CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index 61d63bcfcc49a6ebac95fd16fa9beb1659883bf6..f625d883067e3aeb99fda6989bfbd3390dc064cb 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -22,6 +22,28 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
+    CORE_zlascal(uplo, M, N, alpha, A, LDA);
+    return;
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -49,12 +71,10 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-
 void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int nb,
@@ -84,26 +104,3 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
-    CORE_zlascal(uplo, M, N, alpha, A, LDA);
-    return;
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c
index 80ab8c2c158860a4525f57e04bf76fbeac0c58cd..9108167de991387c9ac6d0fc1c6d409ede9b4ba5 100644
--- a/runtime/starpu/codelets/codelet_zlaset.c
+++ b/runtime/starpu/codelets/codelet_zlaset.c
@@ -25,6 +25,27 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlaset_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t beta;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA);
+    CORE_zlaset(uplo, M, N, alpha, beta, A, LDA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func)
 
 /**
  *
@@ -90,26 +111,3 @@ void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlaset_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA);
-    CORE_zlaset(uplo, M, N, alpha, beta, A, LDA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c
index c3514735dac3ac4fad2c510ed5a0a73a947ca12f..0e67ee39d2fdbc81cbd43fbadf65d11444d5401f 100644
--- a/runtime/starpu/codelets/codelet_zlaset2.c
+++ b/runtime/starpu/codelets/codelet_zlaset2.c
@@ -25,6 +25,26 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
+    CORE_zlaset2(uplo, M, N, alpha, A, LDA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func)
 
 /**
  *
@@ -86,25 +106,3 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
-    CORE_zlaset2(uplo, M, N, alpha, A, LDA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c
index d92ddaf478722a11e9b958958a8d65c2ed620cf6..063b7aea8cd4d32783e69f81a079eea7f2381ff3 100644
--- a/runtime/starpu/codelets/codelet_zlatro.c
+++ b/runtime/starpu/codelets/codelet_zlatro.c
@@ -26,16 +26,40 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlatro_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    cham_trans_t trans;
+    int M;
+    int N;
+    const CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB);
+    CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans,
+                         int m, int n, int mb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_zlatro;
     void (*callback)(void*) = NULL;
@@ -63,27 +87,3 @@ void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
         0);
     (void)mb;
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlatro_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    cham_trans_t trans;
-    int M;
-    int N;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB);
-    CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index 166b138812159cabd2faafc2359b4be7fb471636..2344c5171b897ddf8da452b9f7c047fa1b6fc44b 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -26,14 +26,33 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
+    CORE_zlauum(uplo, N, A, LDA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlauum;
@@ -56,23 +75,3 @@ void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
-    CORE_zlauum(uplo, N, A, LDA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c
index ae014c297e8a9bec69359c93aee8684cd488234d..345d18a24e39f606431c9e2d71783982e2117905 100644
--- a/runtime/starpu/codelets/codelet_zplghe.c
+++ b/runtime/starpu/codelets/codelet_zplghe.c
@@ -26,13 +26,36 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/*   INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
+/*   cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
 
-void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                        double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
 {
+    double bump;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int bigM;
+    int m0;
+    int n0;
+    unsigned long long int seed;
 
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed );
+    CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func)
+
+void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
+                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
+{
     struct starpu_codelet *codelet = &cl_zplghe;
     void (*callback)(void*) = options->profiling ? cl_zplghe_callback : NULL;
 
@@ -58,29 +81,3 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-/*   cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
-{
-    double bump;
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int bigM;
-    int m0;
-    int n0;
-    unsigned long long int seed;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed );
-    CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c
index de00e603352526149032aa4982686d0af59dffb3..9141ecd0fce9abd5b18c0d841eb64bec4da10afa 100644
--- a/runtime/starpu/codelets/codelet_zplgsy.c
+++ b/runtime/starpu/codelets/codelet_zplgsy.c
@@ -26,7 +26,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/*   INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
+/*   cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
+{
+    CHAMELEON_Complex64_t bump;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int bigM;
+    int m0;
+    int n0;
+    unsigned long long int seed;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed );
+    CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func)
 
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
@@ -58,29 +82,3 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-/*   cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
-{
-    CHAMELEON_Complex64_t bump;
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int bigM;
-    int m0;
-    int n0;
-    unsigned long long int seed;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed );
-    CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c
index 24f7c9159e92127f9cfa76377a3073faa12b0cf0..d824485dac38087d5bf3189bdf24a49bbd6360b0 100644
--- a/runtime/starpu/codelets/codelet_zplrnt.c
+++ b/runtime/starpu/codelets/codelet_zplrnt.c
@@ -26,11 +26,32 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/*   INSERT_TASK_zplrnt - Generate a tile for random matrix. */
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int bigM;
+    int m0;
+    int n0;
+    unsigned long long int seed;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed );
+    CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func)
 
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                        int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
 
     struct starpu_codelet *codelet = &cl_zplrnt;
@@ -57,28 +78,3 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-/*   cl_zplrnt_cpu_func - Generate a tile for random matrix. */
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int bigM;
-    int m0;
-    int n0;
-    unsigned long long int seed;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed );
-    CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c
index 4fdbaf6c3d402da601267f52788ce6eeedd82653..2fe5d2a7fdd0ea9d671a5a785d6a25141714d510 100644
--- a/runtime/starpu/codelets/codelet_zplssq.c
+++ b/runtime/starpu/codelets/codelet_zplssq.c
@@ -23,11 +23,39 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
+{
+    double *SCLSSQ_IN;
+    double *SCLSSQ_OUT;
+
+    SCLSSQ_IN  = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+
+    assert( SCLSSQ_OUT[0] >= 0. );
+    if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
+        SCLSSQ_OUT[1] = SCLSSQ_IN[1]  + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
+        SCLSSQ_OUT[0] = SCLSSQ_IN[0];
+    } else {
+        if ( SCLSSQ_OUT[0] > 0 ) {
+            SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1]  * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
+        }
+    }
+
+    (void)cl_arg;
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zplssq returns: scl * sqrt(ssq)
+ * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
  *
  * with scl and ssq such that
  *
@@ -78,25 +106,14 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
         0);
 }
 
-
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
+static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg)
 {
-    double *SCLSSQ_IN;
-    double *SCLSSQ_OUT;
+    double *RESULT;
 
-    SCLSSQ_IN  = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
 
-    assert( SCLSSQ_OUT[0] >= 0. );
-    if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
-        SCLSSQ_OUT[1] = SCLSSQ_IN[1]  + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
-        SCLSSQ_OUT[0] = SCLSSQ_IN[0];
-    } else {
-        if ( SCLSSQ_OUT[0] > 0 ) {
-            SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1]  * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
-        }
-    }
+    RESULT[0] = RESULT[0] * sqrt( RESULT[1] );
 
     (void)cl_arg;
 }
@@ -105,10 +122,10 @@ static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func)
+CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func)
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
     struct starpu_codelet *codelet = &cl_zplssq2;
     void (*callback)(void*) = options->profiling ? cl_zplssq2_callback : NULL;
@@ -127,22 +144,3 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg)
-{
-    double *RESULT;
-
-    RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    RESULT[0] = RESULT[0] * sqrt( RESULT[1] );
-
-    (void)cl_arg;
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index a43f31723f90b93265deadc10fa5e291f1c5097d..bbfe817742d59742d2945aeb77ca71b0d399ad4f 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -26,6 +26,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request);
+    CORE_zpotrf(uplo, n, A, lda, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -61,33 +89,3 @@ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request);
-    CORE_zpotrf(uplo, n, A, lda, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func)
-
diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c
index 9efbd985f0451cac32b07623016d2ccb6b489d77..ecae613ee3f5b83fb78c25cf5ac9282609c3429b 100644
--- a/runtime/starpu/codelets/codelet_zssssm.c
+++ b/runtime/starpu/codelets/codelet_zssssm.c
@@ -26,6 +26,39 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
+{
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    CHAMELEON_Complex64_t *A1;
+    int lda1;
+    CHAMELEON_Complex64_t *A2;
+    int lda2;
+    CHAMELEON_Complex64_t *L1;
+    int ldl1;
+    CHAMELEON_Complex64_t *L2;
+    int ldl2;
+    int *IPIV;
+
+    A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
+    CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -91,19 +124,17 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
-
-void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
-                       int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                       const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                       const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
-                       const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
-                       const int *IPIV)
+void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
+                         int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                         const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
+                         const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
+                         const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
+                         const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
+                         const int *IPIV )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zssssm;
@@ -140,38 +171,3 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
-{
-    int m1;
-    int n1;
-    int m2;
-    int n2;
-    int k;
-    int ib;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *L1;
-    int ldl1;
-    CHAMELEON_Complex64_t *L2;
-    int ldl2;
-    int *IPIV;
-
-    A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
-    CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func)
-
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 455d118f19c5426a9c017b93a24fb5d99e684ef4..49d3af5d9700e90f9b01056514195bf276cd3450 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zsymm;
-    void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &side,                sizeof(int),
-        STARPU_VALUE,    &uplo,                sizeof(int),
-        STARPU_VALUE,       &m,                        sizeof(int),
-        STARPU_VALUE,       &n,                        sizeof(int),
-        STARPU_VALUE,   &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,     &lda,                        sizeof(int),
-        STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,     &ldb,                        sizeof(int),
-        STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,     &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zsymm",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zsymm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -142,3 +97,47 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zsymm, 3, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
+                      cham_side_t side, cham_uplo_t uplo,
+                      int m, int n, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zsymm;
+    void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &side,                sizeof(int),
+        STARPU_VALUE,    &uplo,                sizeof(int),
+        STARPU_VALUE,       &m,                        sizeof(int),
+        STARPU_VALUE,       &n,                        sizeof(int),
+        STARPU_VALUE,   &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,     &lda,                        sizeof(int),
+        STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,     &ldb,                        sizeof(int),
+        STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,     &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zsymm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index bafefa0f7e73c76f8e11f120026d8f5a087dae1a..27b63010a7daea7d8632cc835a7135ff6876e542 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zsyr2k;
-    void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,     &trans,                sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,         &k,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,       &lda,                        sizeof(int),
-        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,       &ldb,                        sizeof(int),
-        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,       &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zsyr2k",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg)
 {
@@ -135,3 +90,47 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zsyr2k, 3, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
+                       cham_uplo_t uplo, cham_trans_t trans,
+                       int n, int k, int nb,
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zsyr2k;
+    void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,      &uplo,                sizeof(int),
+        STARPU_VALUE,     &trans,                sizeof(int),
+        STARPU_VALUE,         &n,                        sizeof(int),
+        STARPU_VALUE,         &k,                        sizeof(int),
+        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,       &lda,                        sizeof(int),
+        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,       &ldb,                        sizeof(int),
+        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,       &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zsyr2k",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 6f72802e5e0f3432d2f20ae97035617843a2e3e0..e089904530812ab76b4828b0123021b1e8f09131 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -26,47 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, cham_trans_t trans,
-                      int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zsyrk;
-    void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,     &trans,                sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,         &k,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,       &lda,                        sizeof(int),
-        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,       &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zsyrk",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
 {
@@ -130,3 +89,43 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zsyrk, 2, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
+                      cham_uplo_t uplo, cham_trans_t trans,
+                      int n, int k, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zsyrk;
+    void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,      &uplo,                sizeof(int),
+        STARPU_VALUE,     &trans,                sizeof(int),
+        STARPU_VALUE,         &n,                        sizeof(int),
+        STARPU_VALUE,         &k,                        sizeof(int),
+        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,       &lda,                        sizeof(int),
+        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,       &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zsyrk",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c
index 2e2ae7676cc456ab5ff078ad36e378dc6e9f2c49..3f07e618d18c5ca86ddaa65cce78bc10ca0f1227 100644
--- a/runtime/starpu/codelets/codelet_zsyssq.c
+++ b/runtime/starpu/codelets/codelet_zsyssq.c
@@ -22,10 +22,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *SCALESUMSQ;
+
+    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda);
+    CORE_zsyssq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func)
+
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
+                         cham_uplo_t uplo, int n,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_zsyssq;
     void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL;
@@ -49,25 +70,3 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
-
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda);
-    CORE_zsyssq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
index b1f741affa82fa9388a5b41d8d243d22215aa7cd..06c4775e74f53dc4f288fa31b85e3993be9839da 100644
--- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
@@ -26,10 +26,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
-                             cham_uplo_t uplo, int n, int nb,
-                             const CHAM_desc_t *A, int Am, int An, int lda,
-                             int iinfo)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int iinfo;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
+    CORE_zsytf2_nopiv(uplo, n, A, lda);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func)
+
+void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
+                              cham_uplo_t uplo, int n, int nb,
+                               const CHAM_desc_t *A, int Am, int An, int lda,
+                               int iinfo )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsytrf_nopiv;
@@ -54,25 +75,3 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int iinfo;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
-    CORE_zsytf2_nopiv(uplo, n, A, lda);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztile_zero.c b/runtime/starpu/codelets/codelet_ztile_zero.c
deleted file mode 100644
index c59115b410222dd329cdd0b794ab72223b8f2ef8..0000000000000000000000000000000000000000
--- a/runtime/starpu/codelets/codelet_ztile_zero.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- *
- * @file starpu/codelet_ztile_zero.c
- *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon ztile_zero StarPU codelet
- *
- * @version 1.0.0
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Jakub Kurzak
- * @date 2010-11-15
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_starpu.h"
-#include "runtime_codelet_z.h"
-
-/**
- *
- */
-void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options,
-                            int X1, int X2, int Y1, int Y2,
-                            const CHAM_desc_t *A, int Am, int An, int lda )
-{
-    struct starpu_codelet *codelet;
-    codelet = &cl_ztile_zero;
-    void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_W(A, Am, An);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE, &X1,  sizeof(int),
-        STARPU_VALUE, &X2,  sizeof(int),
-        STARPU_VALUE, &Y1,  sizeof(int),
-        STARPU_VALUE, &Y2,  sizeof(int),
-        STARPU_W,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE, &lda, sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback, NULL,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztile_zero",
-#endif
-        0);
-}
-
-/**
- *
- */
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztile_zero_cpu_func(void *descr[], void *cl_arg)
-{
-    int X1;
-    int X2;
-    int Y1;
-    int Y2;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-
-    int x, y;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &X1, &X2, &Y1, &Y2, &lda);
-
-    for (x = X1; x < X2; x++)
-        for (y = Y1; y < Y2; y++)
-            A[lda*x+y] = 0.0;
-
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztile_zero, 1, cl_ztile_zero_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c
index 44615d5c3bef1f0b958de2c89cfbf41181ae9a49..8132a27dde9d032918c933b45da63493ae72568e 100644
--- a/runtime/starpu/codelets/codelet_ztplqt.c
+++ b/runtime/starpu/codelets/codelet_ztplqt.c
@@ -54,12 +54,11 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func)
 
-void
-INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
-                   int M, int N, int L, int ib, int nb,
-                   const CHAM_desc_t *A, int Am, int An, int lda,
-                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                   const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
     struct starpu_codelet *codelet = &cl_ztplqt;
     void (*callback)(void*) = options->profiling ? cl_ztplqt_callback : NULL;
diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c
index 8dffa4ff2292c448c710713a7556bec3920f3d73..54a24a070c666a25b366ad5af07bd101bc297390 100644
--- a/runtime/starpu/codelets/codelet_ztpmlqt.c
+++ b/runtime/starpu/codelets/codelet_ztpmlqt.c
@@ -103,14 +103,13 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
  */
 CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC)
 
-void
-INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
-                    cham_side_t side, cham_trans_t trans,
-                    int M, int N, int K, int L, int ib, int nb,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                    const CHAM_desc_t *A, int Am, int An, int lda,
-                    const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
+                          cham_side_t side, cham_trans_t trans,
+                          int M, int N, int K, int L, int ib, int nb,
+                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                          const CHAM_desc_t *A, int Am, int An, int lda,
+                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_ztpmlqt;
     void (*callback)(void*) = options->profiling ? cl_ztpmlqt_callback : NULL;
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
index 6684e59f82d579cd129a1ff40c25dd377bc6166e..c94a33b43b0b4ed4d983b0d36384f962f1cbb413 100644
--- a/runtime/starpu/codelets/codelet_ztpmqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -104,14 +104,13 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
  */
 CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
 
-void
-INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
-                     cham_side_t side, cham_trans_t trans,
-                     int M, int N, int K, int L, int ib, int nb,
-                     const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                     const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                     const CHAM_desc_t *A, int Am, int An, int lda,
-                     const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
+                          cham_side_t side, cham_trans_t trans,
+                          int M, int N, int K, int L, int ib, int nb,
+                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                          const CHAM_desc_t *A, int Am, int An, int lda,
+                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_ztpmqrt;
     void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c
index 6fbd0afe65501a497ceda71c9c6f40444a50369a..143d613eb360ff65cb1ec52489c6791a2ed23cf9 100644
--- a/runtime/starpu/codelets/codelet_ztpqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpqrt.c
@@ -54,12 +54,11 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
 
-void
-INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
-                   int M, int N, int L, int ib, int nb,
-                   const CHAM_desc_t *A, int Am, int An, int lda,
-                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                   const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
     struct starpu_codelet *codelet = &cl_ztpqrt;
     void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL;
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index f6265c28c28b50a2750abfd87c6344bd8ff54ba5..57fa58e178e0771e45c2788c270a442dff96bb53 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -22,12 +22,39 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztradd_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    cham_trans_t trans;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t beta;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
+    CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB);
+    return;
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func)
+
 /**
  ******************************************************************************
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd.
+ * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -77,15 +104,14 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_ztradd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
@@ -116,31 +142,3 @@ void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
 
     (void)nb;
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztradd_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    cham_trans_t trans;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
-    CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB);
-    return;
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c
index c1d154aadc361f391f388514ec155fd798fc1749..1ca5a1a6a0ebf413c4bf8d5a77cc7d028592e6a6 100644
--- a/runtime/starpu/codelets/codelet_ztrasm.c
+++ b/runtime/starpu/codelets/codelet_ztrasm.c
@@ -22,10 +22,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_ztrasm(const RUNTIME_option_t *options,
-                       cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_store_t storev;
+    cham_uplo_t uplo;
+    cham_diag_t diag;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *work;
+
+    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda);
+    CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func)
+
+void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
+                         cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_ztrasm;
     void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL;
@@ -48,32 +72,7 @@ void INSERT_TASK_ztrasm(const RUNTIME_option_t *options,
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-            STARPU_NAME, "ztrasm",
+        STARPU_NAME, "ztrasm",
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_store_t storev;
-    cham_uplo_t uplo;
-    cham_diag_t diag;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *work;
-
-    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda);
-    CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index b125de67fedf22555b58f0abb5bb6227fddf1507..b9f553b645f2c250f717ec150d52f61cdea5b59d 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -26,48 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_ztrmm;
-    void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_RW(B, Bm, Bn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,      &side,                sizeof(int),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,    &transA,                sizeof(int),
-        STARPU_VALUE,      &diag,                sizeof(int),
-        STARPU_VALUE,         &m,                        sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,       &lda,                        sizeof(int),
-        STARPU_RW,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,       &ldb,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztrmm",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -136,3 +94,44 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(ztrmm, 2, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
+                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
+                      int m, int n, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_ztrmm;
+    void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(B, Bm, Bn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,      &side,                sizeof(int),
+        STARPU_VALUE,      &uplo,                sizeof(int),
+        STARPU_VALUE,    &transA,                sizeof(int),
+        STARPU_VALUE,      &diag,                sizeof(int),
+        STARPU_VALUE,         &m,                        sizeof(int),
+        STARPU_VALUE,         &n,                        sizeof(int),
+        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,       &lda,                        sizeof(int),
+        STARPU_RW,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,       &ldb,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "ztrmm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index e48a4eb1609cb0697de2af913197f06a12c5c795..83310ab1bbffd079e5fa8932d859fa48a1be9fe2 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -26,48 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_ztrsm;
-    void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_RW(B, Bm, Bn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &side,               sizeof(int),
-        STARPU_VALUE,    &uplo,               sizeof(int),
-        STARPU_VALUE,    &transA,             sizeof(int),
-        STARPU_VALUE,    &diag,               sizeof(int),
-        STARPU_VALUE,    &m,                  sizeof(int),
-        STARPU_VALUE,    &n,                  sizeof(int),
-        STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &lda,                sizeof(int),
-        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,    &ldb,                sizeof(int),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztrsm",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -134,3 +92,44 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(ztrsm, 2, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
+                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
+                      int m, int n, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_ztrsm;
+    void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(B, Bm, Bn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &side,               sizeof(int),
+        STARPU_VALUE,    &uplo,               sizeof(int),
+        STARPU_VALUE,    &transA,             sizeof(int),
+        STARPU_VALUE,    &diag,               sizeof(int),
+        STARPU_VALUE,    &m,                  sizeof(int),
+        STARPU_VALUE,    &n,                  sizeof(int),
+        STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &lda,                sizeof(int),
+        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,    &ldb,                sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "ztrsm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c
index aac4b9fbd963ba9a3bf67ff248d2b3729129c845..e7d0de9985f22b19bfb12561b28ce00935067296 100644
--- a/runtime/starpu/codelets/codelet_ztrssq.c
+++ b/runtime/starpu/codelets/codelet_ztrssq.c
@@ -22,11 +22,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    cham_diag_t diag;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *SCALESUMSQ;
+
+    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda);
+    CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func)
+
 void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, cham_diag_t diag,
-                        int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
+                         cham_uplo_t uplo, cham_diag_t diag,
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_ztrssq;
     void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL;
@@ -52,27 +75,3 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    cham_diag_t diag;
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
-
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda);
-    CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index 81ee2923e2e2130c67fa250e625abfd48d678d6d..804d21b0204d0b0f2700ec59c7372a4e7a355593 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -26,16 +26,45 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    cham_diag_t diag;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request);
+    CORE_ztrtri(uplo, diag, N, A, LDA, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_ztrtri(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_diag_t diag,
-                       int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       int iinfo)
+void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_diag_t diag,
+                         int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         int iinfo )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztrtri;
@@ -62,33 +91,3 @@ void INSERT_TASK_ztrtri(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    cham_diag_t diag;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request);
-    CORE_ztrtri(uplo, diag, N, A, LDA, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
index d68e2bebf803ac1c449c30435dec5ea1883ba86e..4e82f101c56bdf38e67e5eaf86ac242873f5bae7 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
@@ -22,18 +22,60 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    int nb;
+    CHAMELEON_Complex64_t *A1;
+    int lda1;
+    CHAMELEON_Complex64_t *A2;
+    int lda2;
+    CHAMELEON_Complex64_t *V;
+    int ldv;
+    CHAMELEON_Complex64_t *T;
+    int ldt;
+
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+
+    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
+
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
+                                &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork);
+    CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
+                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
-                              cham_side_t side, cham_trans_t trans,
-                              int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
-                              const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt)
+void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options,
+                                cham_side_t side, cham_trans_t trans,
+                                int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                                const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
+                                const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
+                                const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
+                                const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt )
 {
     struct starpu_codelet *codelet = &cl_ztsmlq_hetra1;
     void (*callback)(void*) = options->profiling ? cl_ztsmlq_hetra1_callback : NULL;
@@ -75,45 +117,3 @@ void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m1;
-    int n1;
-    int m2;
-    int n2;
-    int k;
-    int ib;
-    int nb;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *V;
-    int ldv;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-
-    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
-
-    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
-                                &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork);
-    CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
-                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
index af9f2adcc3197cf3b6f6e3648d6f25b645a2de23..66fa69daba82900221e05c6161ff5cc78ebb1c0f 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
@@ -22,18 +22,60 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    CHAMELEON_Complex64_t *A1;
+    int lda1;
+    CHAMELEON_Complex64_t *A2;
+    int lda2;
+    CHAMELEON_Complex64_t *V;
+    int ldv;
+    CHAMELEON_Complex64_t *T;
+    int ldt;
+
+    /* TODO: manage workspace */
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+
+    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]);
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
+                               &ib, &lda1, &lda2, &ldv, &ldt, &ldwork);
+    CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
+                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
-                              cham_side_t side, cham_trans_t trans,
-                              int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
-                              const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt)
+void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options,
+                                cham_side_t side, cham_trans_t trans,
+                                int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                                const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
+                                const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
+                                const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
+                                const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt )
 {
     struct starpu_codelet *codelet = &cl_ztsmqr_hetra1;
     void (*callback)(void*) = options->profiling ? cl_ztsmqr_hetra1_callback : NULL;
@@ -74,45 +116,3 @@ void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m1;
-    int n1;
-    int m2;
-    int n2;
-    int k;
-    int ib;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *V;
-    int ldv;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-
-    /* TODO: manage workspace */
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-
-    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]);
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
-                               &ib, &lda1, &lda2, &ldv, &ldt, &ldwork);
-    CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
-                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c
index e139931a6c90ab49e0a19396004213fda3dd9c4f..7e1dfd92a31d58f5c03f57759f484b8b14aabd12 100644
--- a/runtime/starpu/codelets/codelet_ztstrf.c
+++ b/runtime/starpu/codelets/codelet_ztstrf.c
@@ -26,6 +26,51 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
+{
+    CHAMELEON_starpu_ws_t *d_work;
+    int m;
+    int n;
+    int ib;
+    int nb;
+    CHAMELEON_Complex64_t *U;
+    int ldu;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    CHAMELEON_Complex64_t *L;
+    int ldl;
+    int *IPIV;
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+    cham_bool_t check_info;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl,
+                               &IPIV, &d_work, &ldwork, &check_info, &iinfo,
+                               &sequence, &request);
+
+    CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -83,23 +128,21 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
  *
  */
-
-void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
-                       int m, int n, int ib, int nb,
-                       const CHAM_desc_t *U, int Um, int Un, int ldu,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
-                       int *IPIV,
-                       cham_bool_t check_info, int iinfo)
+void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
+                         int m, int n, int ib, int nb,
+                         const CHAM_desc_t *U, int Um, int Un, int ldu,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                         int *IPIV,
+                         cham_bool_t check_info, int iinfo )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztstrf;
@@ -139,50 +182,3 @@ void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
-{
-    CHAMELEON_starpu_ws_t *d_work;
-    int m;
-    int n;
-    int ib;
-    int nb;
-    CHAMELEON_Complex64_t *U;
-    int ldu;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *L;
-    int ldl;
-    int *IPIV;
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-    cham_bool_t check_info;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl,
-                               &IPIV, &d_work, &ldwork, &check_info, &iinfo,
-                               &sequence, &request);
-
-    CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func)
-
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index 89ef1c851bde0804aaa9b1901a0e6664aa0ec28f..046b4e568756e9cc36e2249bc6d485a8d9843e28 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -27,6 +27,75 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m;
+    int n;
+    int k;
+    int ib;
+    const CHAMELEON_Complex64_t *A;
+    int lda;
+    const CHAMELEON_Complex64_t *T;
+    int ldt;
+    CHAMELEON_Complex64_t *C;
+    int ldc;
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+
+    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
+                               &lda, &ldt, &ldc, &ldwork);
+
+    CORE_zunmlq(side, trans, m, n, k, ib,
+                A, lda, T, ldt, C, ldc, WORK, ldwork);
+}
+
+#if defined(CHAMELEON_USE_CUDA)
+static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m;
+    int n;
+    int k;
+    int ib;
+    const cuDoubleComplex *A, *T;
+    cuDoubleComplex *C, *WORK;
+    int lda, ldt, ldc, ldwork;
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
+                               &lda, &ldt, &ldc, &ldwork);
+
+    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    RUNTIME_getStream(stream);
+
+    CUDA_zunmlqt(
+            side, trans, m, n, k, ib,
+            A, lda, T, ldt, C, ldc, WORK, ldwork, stream );
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -105,18 +174,16 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-
-void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
-                       cham_side_t side, cham_trans_t trans,
-                       int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
+                         cham_side_t side, cham_trans_t trans,
+                         int m, int n, int k, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                         const CHAM_desc_t *C, int Cm, int Cn, int ldc )
 {
     struct starpu_codelet *codelet = &cl_zunmlq;
     void (*callback)(void*) = options->profiling ? cl_zunmlq_callback : NULL;
@@ -151,73 +218,3 @@ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m;
-    int n;
-    int k;
-    int ib;
-    const CHAMELEON_Complex64_t *A;
-    int lda;
-    const CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-
-    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
-                               &lda, &ldt, &ldc, &ldwork);
-
-    CORE_zunmlq(side, trans, m, n, k, ib,
-                A, lda, T, ldt, C, ldc, WORK, ldwork);
-}
-
-#if defined(CHAMELEON_USE_CUDA)
-static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m;
-    int n;
-    int k;
-    int ib;
-    const cuDoubleComplex *A, *T;
-    cuDoubleComplex *C, *WORK;
-    int lda, ldt, ldc, ldwork;
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
-                               &lda, &ldt, &ldc, &ldwork);
-
-    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    RUNTIME_getStream(stream);
-
-    CUDA_zunmlqt(
-            side, trans, m, n, k, ib,
-            A, lda, T, ldt, C, ldc, WORK, ldwork, stream );
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-}
-#endif /* defined(CHAMELEON_USE_CUDA) */
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index e6f97c032a4bb5910aa3a3e767a41318f6983851..afa04149b405938eb777ef409fb801a907ebf9f0 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -26,6 +26,75 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m;
+    int n;
+    int k;
+    int ib;
+    const CHAMELEON_Complex64_t *A;
+    int lda;
+    const CHAMELEON_Complex64_t *T;
+    int ldt;
+    CHAMELEON_Complex64_t *C;
+    int ldc;
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+
+    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
+                               &lda, &ldt, &ldc, &ldwork);
+
+    CORE_zunmqr(side, trans, m, n, k, ib,
+                A, lda, T, ldt, C, ldc, WORK, ldwork);
+}
+
+#if defined(CHAMELEON_USE_CUDA)
+static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m;
+    int n;
+    int k;
+    int ib;
+    const cuDoubleComplex *A, *T;
+    cuDoubleComplex *C, *WORK;
+    int lda, ldt, ldc, ldwork;
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
+                               &lda, &ldt, &ldc, &ldwork);
+
+    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    RUNTIME_getStream(stream);
+
+    CUDA_zunmqrt(
+            side, trans, m, n, k, ib,
+            A, lda, T, ldt, C, ldc, WORK, ldwork, stream );
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -105,18 +174,16 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-
-void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
-                       cham_side_t side, cham_trans_t trans,
-                       int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
+                         cham_side_t side, cham_trans_t trans,
+                         int m, int n, int k, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                         const CHAM_desc_t *C, int Cm, int Cn, int ldc )
 {
     struct starpu_codelet *codelet = &cl_zunmqr;
     void (*callback)(void*) = options->profiling ? cl_zunmqr_callback : NULL;
@@ -151,73 +218,3 @@ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m;
-    int n;
-    int k;
-    int ib;
-    const CHAMELEON_Complex64_t *A;
-    int lda;
-    const CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-
-    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
-                               &lda, &ldt, &ldc, &ldwork);
-
-    CORE_zunmqr(side, trans, m, n, k, ib,
-                A, lda, T, ldt, C, ldc, WORK, ldwork);
-}
-
-#if defined(CHAMELEON_USE_CUDA)
-static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m;
-    int n;
-    int k;
-    int ib;
-    const cuDoubleComplex *A, *T;
-    cuDoubleComplex *C, *WORK;
-    int lda, ldt, ldc, ldwork;
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
-                               &lda, &ldt, &ldc, &ldwork);
-
-    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    RUNTIME_getStream(stream);
-
-    CUDA_zunmqrt(
-            side, trans, m, n, k, ib,
-            A, lda, T, ldt, C, ldc, WORK, ldwork, stream );
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-}
-#endif /* defined(CHAMELEON_USE_CUDA) */
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 509abacfc9a3dd9c2fd09729f8a7e7a351778476..b97e06ba890fc9806131e4446a6e2dfd8268204d 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -33,11 +33,6 @@
 #endif
 #endif
 
-/*
- * Management functions
- */
-ZCODELETS_HEADER(tile_zero)
-
 /*
  * BLAS 1 functions
  */