diff --git a/compute/zbuild.c b/compute/zbuild.c index 22f2676df7c3ec145eb454e4b44fabe07c86e1db..6ec2419ca0e1fb3ed94cb1e5b0ab5b25200e5c1a 100644 --- a/compute/zbuild.c +++ b/compute/zbuild.c @@ -66,9 +66,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -175,8 +174,7 @@ int CHAMELEON_zbuild( cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeadd.c b/compute/zgeadd.c index cc14238e9b9b5868a8efe5d6adf297ca6ce21176..e2674316ab921ebe05ecc4911d9af42b1b5f573a 100644 --- a/compute/zgeadd.c +++ b/compute/zgeadd.c @@ -75,8 +75,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -211,8 +210,7 @@ int CHAMELEON_zgeadd( cham_trans_t trans, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgelqf.c b/compute/zgelqf.c index 8c0cc4bec038b8d787854331a87b3c8f33272a12..9752503348ad2496623502c68ccf73b1983ba9be 100644 --- a/compute/zgelqf.c +++ b/compute/zgelqf.c @@ -56,9 +56,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -165,8 +164,7 @@ int CHAMELEON_zgelqf( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c index fecf0f20f31a8e1bff2314c9da0ba9cb6967a6cb..c507463efd24bc87eca387424338611b14abe8a0 100644 --- a/compute/zgelqf_param.c +++ b/compute/zgelqf_param.c @@ -54,9 +54,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -164,8 +163,7 @@ int CHAMELEON_zgelqf_param( const libhqr_tree_t *qrtree, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgelqs.c b/compute/zgelqs.c index 08ec0dd3dcd37eb606db79e900464cda4686f5ad..cc9b89fef023c319bcffa3d960776f0b804f7e4f 100644 --- a/compute/zgelqs.c +++ b/compute/zgelqs.c @@ -62,9 +62,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -186,8 +185,7 @@ int CHAMELEON_zgelqs( int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c index b594dd602962c779e6450134376570ce8feadc04..720ab5d4d8ffb1708a5cb35a05be2032ff1b6eba 100644 --- a/compute/zgelqs_param.c +++ b/compute/zgelqs_param.c @@ -64,9 +64,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -192,8 +191,7 @@ int CHAMELEON_zgelqs_param( const libhqr_tree_t *qrtree, int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgels.c b/compute/zgels.c index 99d7914da27fd1cdc88463a70e07ba5fee7d4675..276c4e3a6dd67569b5347bd3178f7a5b8a87d439 100644 --- a/compute/zgels.c +++ b/compute/zgels.c @@ -89,9 +89,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -244,8 +243,7 @@ int CHAMELEON_zgels( cham_trans_t trans, int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \return CHAMELEON_SUCCESS successful exit + * @return CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgels_param.c b/compute/zgels_param.c index ea23c9a4919594174e511c4a3256b61c9233ba4d..34ab5c6098db2a5c176d70b6ff8b0607a9117b25 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -92,9 +92,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -250,8 +249,7 @@ int CHAMELEON_zgels_param( const libhqr_tree_t *qrtree, cham_trans_t trans, int * ******************************************************************************* * - * @return - * \return CHAMELEON_SUCCESS successful exit + * @return CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgemm.c b/compute/zgemm.c index 8d7cfba196d5e40338bc0760d5e1167cbaaa480e..e266039adbb24d049cae77f1d426c3015283ae6b 100644 --- a/compute/zgemm.c +++ b/compute/zgemm.c @@ -114,8 +114,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -283,8 +282,7 @@ int CHAMELEON_zgemm( cham_trans_t transA, cham_trans_t transB, int M, int N, int * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c index f365fc5106713605e8a485b5193412494c95e51b..9ccd619c9fd7e6672f9f412a1c6974f81d4a436c 100644 --- a/compute/zgeqrf.c +++ b/compute/zgeqrf.c @@ -55,9 +55,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -164,8 +163,7 @@ int CHAMELEON_zgeqrf( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c index 36cb65d2ad8e391e8b6cd16f425b18e93ea9bf98..d83e3f447bde001a9c2286087179cfc0555a8f8e 100644 --- a/compute/zgeqrf_param.c +++ b/compute/zgeqrf_param.c @@ -59,9 +59,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -173,8 +172,7 @@ int CHAMELEON_zgeqrf_param( const libhqr_tree_t *qrtree, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c index ee2d2bc6d43636e7da152fd1b39ed0e91ebaa803..7af82e43f8b38bc7ea5ae41db9b15aaf99b1e31f 100644 --- a/compute/zgeqrs.c +++ b/compute/zgeqrs.c @@ -62,9 +62,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -185,8 +184,7 @@ int CHAMELEON_zgeqrs( int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c index 15a5ff0d64df8e8dc001aee59844fef3fcf34e40..7fe000a501eb4af3283bf31ea6d8827fccabda9b 100644 --- a/compute/zgeqrs_param.c +++ b/compute/zgeqrs_param.c @@ -58,9 +58,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -182,8 +181,7 @@ int CHAMELEON_zgeqrs_param( const libhqr_tree_t *qrtree, int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgesv_incpiv.c b/compute/zgesv_incpiv.c index 6dd3073f6016555191f353a3b30492edc10743a7..275b6dc6a0c7a20ebda02dcb1e1efd37d79e0e5c 100644 --- a/compute/zgesv_incpiv.c +++ b/compute/zgesv_incpiv.c @@ -67,10 +67,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* @@ -189,9 +188,8 @@ int CHAMELEON_zgesv_incpiv( int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* diff --git a/compute/zgesv_nopiv.c b/compute/zgesv_nopiv.c index 7dbf73caabdf784a30cdca8628235525ff7a4b5a..f7dfbb88070893e20d28f90e6c43ac0d71b79131 100644 --- a/compute/zgesv_nopiv.c +++ b/compute/zgesv_nopiv.c @@ -66,10 +66,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* @@ -179,9 +178,8 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* diff --git a/compute/zgesvd.c b/compute/zgesvd.c index a9ba03d418296d6db8f7c78c0277f58f93be690d..5b3cf9bd6f58c122ec99132e04d46a9f42d0d808 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -129,9 +129,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -318,8 +317,7 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, * ******************************************************************************* * - * @return - * \return CHAMELEON_SUCCESS successful exit + * @return CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgetrf_incpiv.c b/compute/zgetrf_incpiv.c index f990126ebc1a351e7164c4c3cebb114aa3d4c422..273b247f3fc0131d4193b72a483059dbd283d2ec 100644 --- a/compute/zgetrf_incpiv.c +++ b/compute/zgetrf_incpiv.c @@ -56,10 +56,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, and division by zero will occur * if it is used to solve a system of equations. * @@ -166,9 +165,8 @@ int CHAMELEON_zgetrf_incpiv( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, and division by zero will occur * if it is used to solve a system of equations. * diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c index f99d3d9e14cb1fbc75cd51bc579011ea0516ebeb..0e1004c432ccb7dd8b1692bdad1098438b2e4703 100644 --- a/compute/zgetrf_nopiv.c +++ b/compute/zgetrf_nopiv.c @@ -50,10 +50,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been * completed, but the factor U is exactly singular, and division * by zero will occur if it is used to solve a system of * equations. @@ -151,9 +150,8 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, and division by zero will occur * if it is used to solve a system of equations. * diff --git a/compute/zgetrs_incpiv.c b/compute/zgetrs_incpiv.c index f0cf32cb5195efd4b9409ff39dc9d7953c6ea7c1..8d9aa36302f170407efaa5fccc21902c27448520 100644 --- a/compute/zgetrs_incpiv.c +++ b/compute/zgetrs_incpiv.c @@ -69,9 +69,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \return <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @return <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -194,8 +193,7 @@ int CHAMELEON_zgetrs_incpiv( cham_trans_t trans, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgetrs_nopiv.c b/compute/zgetrs_nopiv.c index fb8ac0722c3e2026689d09cc70abb1cdc4488eeb..33b3cf70b835d2fa212016ef6c662282c3659d1f 100644 --- a/compute/zgetrs_nopiv.c +++ b/compute/zgetrs_nopiv.c @@ -64,9 +64,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \return <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @return <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -181,8 +180,7 @@ int CHAMELEON_zgetrs_nopiv( cham_trans_t trans, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zheevd.c b/compute/zheevd.c index 7f1a8b497ae733f1364976b1fdd5ad061b4e7b01..1291e43a80216b90109c21eeb83e346e5c3acdcf 100644 --- a/compute/zheevd.c +++ b/compute/zheevd.c @@ -79,10 +79,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if INFO = i, the algorithm failed to converge; i + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * @@ -219,10 +218,9 @@ int CHAMELEON_zheevd( cham_job_t jobz, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if INFO = i, the algorithm failed to converge; i + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * diff --git a/compute/zhemm.c b/compute/zhemm.c index fd968b42d8cb8069db027fb448e6ab938fcad41e..43f123975694bd2edc2be179c9e958d23b816f2b 100644 --- a/compute/zhemm.c +++ b/compute/zhemm.c @@ -90,8 +90,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -248,8 +247,7 @@ int CHAMELEON_zhemm( cham_side_t side, cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zher2k.c b/compute/zher2k.c index 216de17a5501bcedbc327c89f45dc46eee803c3c..fc8a746a376d15afaf21cdc045316bcc4f355a05 100644 --- a/compute/zher2k.c +++ b/compute/zher2k.c @@ -92,8 +92,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -249,8 +248,7 @@ int CHAMELEON_zher2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zherk.c b/compute/zherk.c index 13e59ce4ca1d7b8e0d1dccfd2eb2a93eeb0598d8..ff3b21ddc6932f6e277d5d5b738d7c0480278a69 100644 --- a/compute/zherk.c +++ b/compute/zherk.c @@ -82,8 +82,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -225,8 +224,7 @@ int CHAMELEON_zherk( cham_uplo_t uplo, cham_trans_t trans, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zhetrd.c b/compute/zhetrd.c index 0815e1dd689c39e660a729c133efdade2487e92a..f0686a16cdbf90f06c084cd75225b3fdf921c3c5 100644 --- a/compute/zhetrd.c +++ b/compute/zhetrd.c @@ -92,10 +92,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if INFO = i, the algorithm failed to converge; i + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * @@ -245,10 +244,9 @@ int CHAMELEON_zhetrd( cham_job_t jobz, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if INFO = i, the algorithm failed to converge; i + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * diff --git a/compute/zlacpy.c b/compute/zlacpy.c index 7bd1696375142e99fbc9e0ab282fddcba63b39db..73ad779e636e58cb542c07635388783e609d29c1 100644 --- a/compute/zlacpy.c +++ b/compute/zlacpy.c @@ -180,8 +180,7 @@ int CHAMELEON_zlacpy( cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlange.c b/compute/zlange.c index 522c8c204369a4042b34ae17b2f592177e924883..b1e9269d2a95b1543fb5a4e13876dcaeb6e5e7ef 100644 --- a/compute/zlange.c +++ b/compute/zlange.c @@ -66,8 +66,7 @@ * ******************************************************************************* * - * @return - * \retval the norm described above. + * @retval the norm described above. * ******************************************************************************* * @@ -176,8 +175,7 @@ double CHAMELEON_zlange(cham_normtype_t norm, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlanhe.c b/compute/zlanhe.c index e2dad154ce1362a68acc633a3f8d9ae682748855..50f3d1f921accf201cea42a8ee2844fd7e0a6ed5 100644 --- a/compute/zlanhe.c +++ b/compute/zlanhe.c @@ -66,8 +66,7 @@ * ******************************************************************************* * - * @return - * \retval the norm described above. + * @retval the norm described above. * ******************************************************************************* * @@ -180,8 +179,7 @@ double CHAMELEON_zlanhe(cham_normtype_t norm, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlansy.c b/compute/zlansy.c index dc9b1236e34d55b0b3ccae8ea3f7cbf19ec2e9c8..c7e39a45357ba69b1119368df1955988887c29bc 100644 --- a/compute/zlansy.c +++ b/compute/zlansy.c @@ -66,8 +66,7 @@ * ******************************************************************************* * - * @return - * \retval the norm described above. + * @retval the norm described above. * ******************************************************************************* * @@ -180,8 +179,7 @@ double CHAMELEON_zlansy(cham_normtype_t norm, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlantr.c b/compute/zlantr.c index 6721a9b9f722346c8d137349360d4f76fb3a141c..005fea133c568a2ab996587efccd6a60f281ec38 100644 --- a/compute/zlantr.c +++ b/compute/zlantr.c @@ -78,8 +78,7 @@ * ******************************************************************************* * - * @return - * \retval the norm described above. + * @retval the norm described above. * ******************************************************************************* * @@ -202,8 +201,7 @@ double CHAMELEON_zlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlascal.c b/compute/zlascal.c index bae3815fbab3de093be482f78e24623f54b163a4..0d0ff18b6c932510fc00752cc10438fdd852d443 100644 --- a/compute/zlascal.c +++ b/compute/zlascal.c @@ -57,8 +57,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -168,8 +167,7 @@ int CHAMELEON_zlascal( cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlaset.c b/compute/zlaset.c index 7001e66a2a328fbd57839a9c03b0082eaf86ac7f..0ab77a34e5d420f76400e57667f3ac7eb7926416 100644 --- a/compute/zlaset.c +++ b/compute/zlaset.c @@ -167,8 +167,7 @@ int CHAMELEON_zlaset( cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlauum.c b/compute/zlauum.c index 9907d0b08e91b7e53bbc51dcd98b0e7b1ec4f042..254eb2b18c531218a96dff303b4e98ecfe5c5c5d 100644 --- a/compute/zlauum.c +++ b/compute/zlauum.c @@ -58,9 +58,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -165,8 +164,7 @@ int CHAMELEON_zlauum( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zplghe.c b/compute/zplghe.c index 3fd07d51100f262c74bf4f886a5015e0ce41ab0b..ceb0a138bd2ea9895e133a76657a65d2a3ff5750 100644 --- a/compute/zplghe.c +++ b/compute/zplghe.c @@ -56,9 +56,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -160,8 +159,7 @@ int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zplgsy.c b/compute/zplgsy.c index 809e2a224489c7b3c7f613eb769e310f03b6682f..ff033d819c0d41ba443b941afdeef091fb8152be 100644 --- a/compute/zplgsy.c +++ b/compute/zplgsy.c @@ -56,9 +56,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -160,8 +159,7 @@ int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zplrnt.c b/compute/zplrnt.c index 3e15ea36a04665199694ec993ddcb95feab5003c..56a3cedaff0d5af567215579cfb88f2a1d4485c3 100644 --- a/compute/zplrnt.c +++ b/compute/zplrnt.c @@ -49,9 +49,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -150,8 +149,7 @@ int CHAMELEON_zplrnt( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zposv.c b/compute/zposv.c index 055f17e887d210b4aaa2b931c12e7017e2ef8a5d..668fec3c5d9c5dfd0a2ba1211204364b6d9dda77 100644 --- a/compute/zposv.c +++ b/compute/zposv.c @@ -75,10 +75,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* @@ -206,9 +205,8 @@ int CHAMELEON_zposv( cham_uplo_t uplo, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* diff --git a/compute/zpotrf.c b/compute/zpotrf.c index bb84853375351a679a23519f2d6624f9fb434139..d7054e42d3b2d31854a6b09351dda7ff268df65b 100644 --- a/compute/zpotrf.c +++ b/compute/zpotrf.c @@ -62,10 +62,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* @@ -175,9 +174,8 @@ int CHAMELEON_zpotrf( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* diff --git a/compute/zpotri.c b/compute/zpotri.c index d903bda64a2f1188ea6e83f8a68b90676e6db7e8..2de905c8d664c2814c121a58f3385c9182e24399 100644 --- a/compute/zpotri.c +++ b/compute/zpotri.c @@ -53,10 +53,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the (i,i) element of the factor U or L is + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the (i,i) element of the factor U or L is * zero, and the inverse could not be computed. * ******************************************************************************* @@ -162,9 +161,8 @@ int CHAMELEON_zpotri( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, the leading minor of order i of A is not + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not * positive definite, so the factorization could not be * completed, and the solution has not been computed. * diff --git a/compute/zpotrimm.c b/compute/zpotrimm.c index 0f3d8146ebbd28882bfe7722aee8c5b80bd05bb3..ca57f496200bb6ac750c79d2ec45c87283b18cc0 100644 --- a/compute/zpotrimm.c +++ b/compute/zpotrimm.c @@ -53,10 +53,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the (i,i) element of the factor U or L is + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the (i,i) element of the factor U or L is * zero, and the inverse could not be computed. * ******************************************************************************* @@ -184,9 +183,8 @@ int CHAMELEON_zpotrimm( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, the leading minor of order i of A is not + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not * positive definite, so the factorization could not be * completed, and the solution has not been computed. * diff --git a/compute/zpotrs.c b/compute/zpotrs.c index 3e242d114c0701a73d321e21da6d4fd20d0fda8e..7cce83910f3f4373b2399bcc0b0aa3c27c261ce1 100644 --- a/compute/zpotrs.c +++ b/compute/zpotrs.c @@ -61,9 +61,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -183,8 +182,7 @@ int CHAMELEON_zpotrs( cham_uplo_t uplo, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsymm.c b/compute/zsymm.c index 4a64f907ed3df914ec54976ad7c5ed418e034fb3..13221e335beb7e5f6a33024501cb139a0fa02d05 100644 --- a/compute/zsymm.c +++ b/compute/zsymm.c @@ -90,8 +90,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -248,8 +247,7 @@ int CHAMELEON_zsymm( cham_side_t side, cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsyr2k.c b/compute/zsyr2k.c index 9ef35214280cefa021a263699a7a851272a7f8be..0fe3e6f9e7e06c833b80c8bff071ab04066e1195 100644 --- a/compute/zsyr2k.c +++ b/compute/zsyr2k.c @@ -92,8 +92,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -249,8 +248,7 @@ int CHAMELEON_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsyrk.c b/compute/zsyrk.c index e1c6db98671b0b7919c34cea3bddb6c8a088bb38..91f4627b8eee038f2770768a258f06d6aa79cf0d 100644 --- a/compute/zsyrk.c +++ b/compute/zsyrk.c @@ -82,8 +82,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -225,8 +224,7 @@ int CHAMELEON_zsyrk( cham_uplo_t uplo, cham_trans_t trans, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsysv.c b/compute/zsysv.c index 5b40d66e5f272b524dd80567afda064408b51fe6..baf78e90ecda60750ffa178d8e1481d810972f54 100644 --- a/compute/zsysv.c +++ b/compute/zsysv.c @@ -76,9 +76,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -203,8 +202,7 @@ int CHAMELEON_zsysv( cham_uplo_t uplo, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsytrf.c b/compute/zsytrf.c index 508f93a9aae8998f848c233aa945020f3aa9dd27..44ea078c64fa69630a5b1d894e81c589c71bfe26 100644 --- a/compute/zsytrf.c +++ b/compute/zsytrf.c @@ -57,10 +57,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* @@ -169,8 +168,7 @@ int CHAMELEON_zsytrf( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsytrs.c b/compute/zsytrs.c index 90256661b285ce9c880934f41d71fd02afbb4968..4eeb3d7d307756cdad813137b96e16947b892c6e 100644 --- a/compute/zsytrs.c +++ b/compute/zsytrs.c @@ -63,9 +63,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -182,8 +181,7 @@ int CHAMELEON_zsytrs( cham_uplo_t uplo, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztile.c b/compute/ztile.c index d95a729b7bb430b4fe99fcee7275fe4ef2055252..3cafc9b7b3eec2522daf4c8f54cbd021fa85f0d0 100644 --- a/compute/ztile.c +++ b/compute/ztile.c @@ -45,8 +45,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -122,8 +121,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c index 4be3c82932a45caa1861f42068ca2d2b53c73616..b9d07b870a69e6c3c1aa19b4cd89d7fbfad77399 100644 --- a/compute/ztpgqrt.c +++ b/compute/ztpgqrt.c @@ -115,9 +115,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -258,8 +257,7 @@ int CHAMELEON_ztpgqrt( int M, int N, int K, int L, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c index 8847d9235b61c7920351f710d6c3ce8c7c48d1f0..04e7ddfa6d575afd06920ecad135650ded12f045 100644 --- a/compute/ztpqrt.c +++ b/compute/ztpqrt.c @@ -110,9 +110,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -235,8 +234,7 @@ int CHAMELEON_ztpqrt( int M, int N, int L, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztradd.c b/compute/ztradd.c index f5f2d82172781483fb423282e6974dfb7611b25a..b5e85ec81d06231a65b0a53e681c172bdedb76c0 100644 --- a/compute/ztradd.c +++ b/compute/ztradd.c @@ -81,8 +81,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -227,8 +226,7 @@ int CHAMELEON_ztradd( cham_uplo_t uplo, cham_trans_t trans, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztrmm.c b/compute/ztrmm.c index 96ef0f7e3bcc393d796e0dc7bbe852b3365df85a..3380900f601a27ba11d54afa31da69a86c1b4ca3 100644 --- a/compute/ztrmm.c +++ b/compute/ztrmm.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -249,8 +248,7 @@ int CHAMELEON_ztrmm( cham_side_t side, cham_uplo_t uplo, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztrsm.c b/compute/ztrsm.c index abcdf8e9eb89aeeef8270ed779bdbdb85d311585..cc76ab7bd91ca283c3eebeea5528b1d3e0a4b69e 100644 --- a/compute/ztrsm.c +++ b/compute/ztrsm.c @@ -83,9 +83,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -247,8 +246,7 @@ int CHAMELEON_ztrsm( cham_side_t side, cham_uplo_t uplo, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztrsmpl.c b/compute/ztrsmpl.c index dd7859cf82554b3a9a8cf508f2dda6aecf076ef1..2cac2da6252e50183a4770655e6ebd419dd63910 100644 --- a/compute/ztrsmpl.c +++ b/compute/ztrsmpl.c @@ -61,9 +61,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -179,8 +178,7 @@ int CHAMELEON_ztrsmpl( int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztrtri.c b/compute/ztrtri.c index cb19dffa9c01b4fd3fb2297a4f03dff0aaaa2a39..6a2f8f3210b028f87b1231cd23078e22e70445ac 100644 --- a/compute/ztrtri.c +++ b/compute/ztrtri.c @@ -61,10 +61,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, A(i,i) is exactly zero. The triangular + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, A(i,i) is exactly zero. The triangular * matrix is singular and its inverse can not be computed. * ******************************************************************************* @@ -182,9 +181,8 @@ int CHAMELEON_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, A(i,i) is exactly zero. The triangular + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, A(i,i) is exactly zero. The triangular * matrix is singular and its inverse can not be computed. * ******************************************************************************* diff --git a/compute/zunglq.c b/compute/zunglq.c index 41015464634f910376b21d6d3f9df91dd83d4371..ef284b8619fea77d6fc180752f6601f515fe027f 100644 --- a/compute/zunglq.c +++ b/compute/zunglq.c @@ -62,9 +62,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -181,8 +180,7 @@ int CHAMELEON_zunglq( int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c index e6d36954211c8a931a7d8e9aa2b96a55c1953557..09f40a29fe07d4cd5d6a1b2f5db697b3004b95d9 100644 --- a/compute/zunglq_param.c +++ b/compute/zunglq_param.c @@ -60,9 +60,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -180,8 +179,7 @@ int CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zungqr.c b/compute/zungqr.c index c51539616760f3c45ceecf2b1f631846fc5443fc..6ae056b2d64175bc6b890652f73184d7b135ac98 100644 --- a/compute/zungqr.c +++ b/compute/zungqr.c @@ -62,9 +62,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -180,8 +179,7 @@ int CHAMELEON_zungqr( int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 5b46d66ff7626ee058fb7119d0ee344728509a3e..9ed032da7ee145eed609138f24acd5a42c902ae6 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -60,9 +60,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -181,8 +180,7 @@ int CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunmlq.c b/compute/zunmlq.c index f460e12e4b353fa1746f03a9bfd4b9706e8f564b..f3948bf3992c474a41095820a1e388a8d8826c2c 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -86,9 +86,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -234,8 +233,7 @@ int CHAMELEON_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c index 4c0a72358b357b1c3e83253c9dc0844606e2bea4..46372cef9aeb0fcd0fe41d70558f503a9ce55378 100644 --- a/compute/zunmlq_param.c +++ b/compute/zunmlq_param.c @@ -86,9 +86,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -233,8 +232,7 @@ int CHAMELEON_zunmlq_param( const libhqr_tree_t *qrtree, cham_side_t side, cham_ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunmqr.c b/compute/zunmqr.c index 6271ed98eeedbcb23bb88ff909ad303fa8d70c42..78be51f52c3054b5a2aa6b7703e580f8a425adbe 100644 --- a/compute/zunmqr.c +++ b/compute/zunmqr.c @@ -88,9 +88,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -236,8 +235,7 @@ int CHAMELEON_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c index 5674ba090c9ceb0c581b383993b58a8ec0fb6573..434c16a049a445988a238130ed15dc1e5dd5944b 100644 --- a/compute/zunmqr_param.c +++ b/compute/zunmqr_param.c @@ -89,9 +89,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -239,8 +238,7 @@ int CHAMELEON_zunmqr_param( const libhqr_tree_t *qrtree, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/control/async.c b/control/async.c index 5e65c2b092ef64edb8819099cd261986a09a7fd6..55351f6b61fa207817377075de94b67c0fc1a6d8 100644 --- a/control/async.c +++ b/control/async.c @@ -86,8 +86,7 @@ int chameleon_sequence_wait(CHAM_context_t *chamctxt, RUNTIME_sequence_t *sequen * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Sequence_Create(RUNTIME_sequence_t **sequence) @@ -117,8 +116,7 @@ int CHAMELEON_Sequence_Create(RUNTIME_sequence_t **sequence) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Sequence_Destroy(RUNTIME_sequence_t *sequence) @@ -152,8 +150,7 @@ int CHAMELEON_Sequence_Destroy(RUNTIME_sequence_t *sequence) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Sequence_Wait(RUNTIME_sequence_t *sequence) @@ -190,8 +187,7 @@ int CHAMELEON_Sequence_Wait(RUNTIME_sequence_t *sequence) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Sequence_Flush(RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) diff --git a/control/auxiliary.c b/control/auxiliary.c index 032dc06846ead2ffbbc4139d50470ba3536c27ba..5f90b85afc4a93d915ca8f2a9ae80830226341c2 100644 --- a/control/auxiliary.c +++ b/control/auxiliary.c @@ -139,8 +139,7 @@ int chameleon_tune(cham_tasktype_t func, int M, int N, int NRHS) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro) @@ -180,8 +179,7 @@ int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro) * ****************************************************************************** * - * @return - * \retval Element size in bytes + * @retval Element size in bytes * */ int CHAMELEON_Element_Size(int type) @@ -209,8 +207,7 @@ int CHAMELEON_Element_Size(int type) * ****************************************************************************** * - * @return - * \retval MPI rank + * @retval MPI rank * */ int CHAMELEON_My_Mpi_Rank(void) diff --git a/control/context.c b/control/context.c index 881abe9746eae4441b2ba387dfc11474970c76e7..fa0dcd2502c795edde63b2a73153ae263db1b15b 100644 --- a/control/context.c +++ b/control/context.c @@ -123,8 +123,7 @@ int chameleon_context_destroy(){ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Enable(int option) @@ -192,8 +191,7 @@ int CHAMELEON_Enable(int option) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Disable(int option) @@ -256,8 +254,7 @@ int CHAMELEON_Disable(int option) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Set( int param, int value ) @@ -350,8 +347,7 @@ int CHAMELEON_Set( int param, int value ) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Get(int param, int *value) diff --git a/control/control.c b/control/control.c index 8a8de87173c63ad1998baa0ee00e31fcd9df0110..08765ca2da91ba886eb73945ec38be5a657b08e4 100644 --- a/control/control.c +++ b/control/control.c @@ -154,8 +154,7 @@ int __chameleon_finalize(void) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Pause(void) @@ -178,8 +177,7 @@ int CHAMELEON_Pause(void) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Resume(void) @@ -201,8 +199,7 @@ int CHAMELEON_Resume(void) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Distributed_start(void) @@ -224,8 +221,7 @@ int CHAMELEON_Distributed_start(void) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Distributed_stop(void) @@ -294,8 +290,7 @@ int CHAMELEON_Comm_rank() * ****************************************************************************** * - * @return - * \retval The number of CPU workers started + * @retval The number of CPU workers started * */ int CHAMELEON_GetThreadNbr( ) diff --git a/control/descriptor.c b/control/descriptor.c index 06e52cdec39845cccc8f57af889fd1ee9f3e5c3a..f32800a2847a943f18c70ea6546c1bc125d3e9bb 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -413,8 +413,7 @@ int chameleon_desc_check(const CHAM_desc_t *desc) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, @@ -476,8 +475,7 @@ int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, @@ -560,8 +558,7 @@ int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz, @@ -621,8 +618,7 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz, diff --git a/control/tile.c b/control/tile.c index 960c88f5d198248c8458127d8bc8efc07466c301..560f5dd7d88ab0fe034a68a62be1f704e7b1a119 100644 --- a/control/tile.c +++ b/control/tile.c @@ -44,8 +44,7 @@ * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Lapack_to_Tile(void *Af77, int LDA, CHAM_desc_t *A) @@ -86,8 +85,7 @@ int CHAMELEON_Lapack_to_Tile(void *Af77, int LDA, CHAM_desc_t *A) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Tile_to_Lapack(CHAM_desc_t *A, void *Af77, int LDA) diff --git a/control/workspace.c b/control/workspace.c index 8039447fbd09b0a93610ae1a2344eaf8198ddc1c..4a8b078e37fc12e95a4e3fd100490534c107f5d1 100644 --- a/control/workspace.c +++ b/control/workspace.c @@ -138,8 +138,7 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Dealloc_Workspace(CHAM_desc_t **desc) diff --git a/control/workspace_z.c b/control/workspace_z.c index 6009bac50438fc4002894ae79acc03be7d038441..732d86fe0ba2054fb08ffcaaabd0ded9cd711cc0 100644 --- a/control/workspace_z.c +++ b/control/workspace_z.c @@ -45,8 +45,7 @@ * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgeev(int N, CHAM_desc_t **descT, int p, int q) { @@ -70,8 +69,7 @@ int CHAMELEON_Alloc_Workspace_zgeev(int N, CHAM_desc_t **descT, int p, int q) { * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgehrd(int N, CHAM_desc_t **descT, int p, int q) { @@ -97,8 +95,7 @@ int CHAMELEON_Alloc_Workspace_zgehrd(int N, CHAM_desc_t **descT, int p, int q) { * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgebrd(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -125,8 +122,7 @@ int CHAMELEON_Alloc_Workspace_zgebrd(int M, int N, CHAM_desc_t **descT, int p, i * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgels(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -159,8 +155,7 @@ int CHAMELEON_Alloc_Workspace_zgels(int M, int N, CHAM_desc_t **descT, int p, in * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgels_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -193,8 +188,7 @@ int CHAMELEON_Alloc_Workspace_zgels_Tile(int M, int N, CHAM_desc_t **descT, int * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgeqrf(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -227,8 +221,7 @@ int CHAMELEON_Alloc_Workspace_zgeqrf(int M, int N, CHAM_desc_t **descT, int p, i * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgeqrf_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -261,8 +254,7 @@ int CHAMELEON_Alloc_Workspace_zgeqrf_Tile(int M, int N, CHAM_desc_t **descT, int * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgelqf(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -294,8 +286,7 @@ int CHAMELEON_Alloc_Workspace_zgelqf(int M, int N, CHAM_desc_t **descT, int p, i * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgelqf_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -324,8 +315,7 @@ int CHAMELEON_Alloc_Workspace_zgelqf_Tile(int M, int N, CHAM_desc_t **descT, int * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgesv_incpiv(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) { @@ -354,8 +344,7 @@ int CHAMELEON_Alloc_Workspace_zgesv_incpiv(int N, CHAM_desc_t **descL, int **IPI * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgesv_incpiv_Tile(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) @@ -388,8 +377,7 @@ int CHAMELEON_Alloc_Workspace_zgesv_incpiv_Tile(int N, CHAM_desc_t **descL, int * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgesvd(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -421,8 +409,7 @@ int CHAMELEON_Alloc_Workspace_zgesvd(int M, int N, CHAM_desc_t **descT, int p, i * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ****************************************************************************** * @@ -456,8 +443,7 @@ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, i * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv_Tile(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) { @@ -488,8 +474,7 @@ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv_Tile(int N, CHAM_desc_t **descL, int * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zheev(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -520,8 +505,7 @@ int CHAMELEON_Alloc_Workspace_zheev(int M, int N, CHAM_desc_t **descT, int p, in * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zheevd(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -552,8 +536,7 @@ int CHAMELEON_Alloc_Workspace_zheevd(int M, int N, CHAM_desc_t **descT, int p, i * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zhegv(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -584,8 +567,7 @@ int CHAMELEON_Alloc_Workspace_zhegv(int M, int N, CHAM_desc_t **descT, int p, in * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zhegvd(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -616,8 +598,7 @@ int CHAMELEON_Alloc_Workspace_zhegvd(int M, int N, CHAM_desc_t **descT, int p, i * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zhetrd(int M, int N, CHAM_desc_t **descT, int p, int q) { diff --git a/coreblas/compute/core_zaxpy.c b/coreblas/compute/core_zaxpy.c index d3477032aaabbf4b96e086d2e70c6dc1bb8f92f4..a982aaafe5b017f84f650dda4f138f3559e0d2e4 100644 --- a/coreblas/compute/core_zaxpy.c +++ b/coreblas/compute/core_zaxpy.c @@ -47,9 +47,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgeadd.c b/coreblas/compute/core_zgeadd.c index a85bec68ad82791840cb6bbb4c16f18ec4c64f13..5afb5a770667100d1a96e0fc53c346686d9b39c5 100644 --- a/coreblas/compute/core_zgeadd.c +++ b/coreblas/compute/core_zgeadd.c @@ -71,9 +71,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_zgelqt.c b/coreblas/compute/core_zgelqt.c index 7a2a74ca07a08a234c95db214485f12c46097f42..7793a76dfe48317139724030174871e59c7a64d9 100644 --- a/coreblas/compute/core_zgelqt.c +++ b/coreblas/compute/core_zgelqt.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgeqrt.c b/coreblas/compute/core_zgeqrt.c index 76fcfdfc2ea9e9a273cd70a2e0a78bdfc3c6d26f..ab568186669fc4080cda78d59012ac33a97d0871 100644 --- a/coreblas/compute/core_zgeqrt.c +++ b/coreblas/compute/core_zgeqrt.c @@ -83,9 +83,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgesplit.c b/coreblas/compute/core_zgesplit.c index 5255442c522f938f0218b74ab7e7497268723aaf..0f30ae8161a168e94b1741644cf3c37aa0329178 100644 --- a/coreblas/compute/core_zgesplit.c +++ b/coreblas/compute/core_zgesplit.c @@ -56,9 +56,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgessm.c b/coreblas/compute/core_zgessm.c index 9757800cd92f38fe1ce1a4a977f8e77d0d357412..c395a30ff5e614c6c88c61c8a914a06f2e748255 100644 --- a/coreblas/compute/core_zgessm.c +++ b/coreblas/compute/core_zgessm.c @@ -68,9 +68,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgessq.c b/coreblas/compute/core_zgessq.c index 55bbf091f25b2a1f5a2a8e8c4116cd12ac54f3d3..e6462f97966c1394983610070496b454f7d8046b 100644 --- a/coreblas/compute/core_zgessq.c +++ b/coreblas/compute/core_zgessq.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval -k, the k-th argument had an illegal value * */ int CORE_zgessq(int M, int N, diff --git a/coreblas/compute/core_zgetf2_nopiv.c b/coreblas/compute/core_zgetf2_nopiv.c index 91c313430f30aa8a1edcb395a5cb44de7789d3c3..18836b6e1a0d87d398ef64ddd2cbe80b89b4ccd3 100644 --- a/coreblas/compute/core_zgetf2_nopiv.c +++ b/coreblas/compute/core_zgetf2_nopiv.c @@ -58,10 +58,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/coreblas/compute/core_zgetrf_incpiv.c b/coreblas/compute/core_zgetrf_incpiv.c index b47084b3cbf23a4ccad1ccdef1974a7c9dd2e0c7..b1355e645a0a767645327463db1cc7cf5091123f 100644 --- a/coreblas/compute/core_zgetrf_incpiv.c +++ b/coreblas/compute/core_zgetrf_incpiv.c @@ -71,10 +71,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/coreblas/compute/core_zgetrf_nopiv.c b/coreblas/compute/core_zgetrf_nopiv.c index b7661ba5c260dc0352e6f2fd5024aa31cb86d56c..fbd34a12877458bcb8308b4c0e3a994444574162 100644 --- a/coreblas/compute/core_zgetrf_nopiv.c +++ b/coreblas/compute/core_zgetrf_nopiv.c @@ -60,10 +60,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/coreblas/compute/core_zherfb.c b/coreblas/compute/core_zherfb.c index d3653d55b79a6e3010bbae702ccb7180e7e3a6b3..d1f952bf7b42fef74f00c61935bf0f85f4d9c751 100644 --- a/coreblas/compute/core_zherfb.c +++ b/coreblas/compute/core_zherfb.c @@ -85,9 +85,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_zhessq.c b/coreblas/compute/core_zhessq.c index 250962aba64c50ab0a611a318a6028ec43cdbe90..d5b9685156af9af0237d6768f9b6bc1eb5db092f 100644 --- a/coreblas/compute/core_zhessq.c +++ b/coreblas/compute/core_zhessq.c @@ -88,9 +88,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zlascal.c b/coreblas/compute/core_zlascal.c index 645bc6714621667debb2747082412450139d8c1d..50654a63b36686a27506209695fc1830f6628f65 100644 --- a/coreblas/compute/core_zlascal.c +++ b/coreblas/compute/core_zlascal.c @@ -52,9 +52,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int diff --git a/coreblas/compute/core_zlatro.c b/coreblas/compute/core_zlatro.c index 2bdcbfc311199b37303e07beccd1bbfc61a8fd4c..c22ac72ab946c069e3e9845b4af485f3216f032f 100644 --- a/coreblas/compute/core_zlatro.c +++ b/coreblas/compute/core_zlatro.c @@ -72,9 +72,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_zpamm.c b/coreblas/compute/core_zpamm.c index 35c8e049015a9aed8e55bf27b397d078717db1fe..2dd190e9c350ef64723628844e10c1b945f9e6f0 100644 --- a/coreblas/compute/core_zpamm.c +++ b/coreblas/compute/core_zpamm.c @@ -174,9 +174,8 @@ static inline int CORE_zpamm_w(cham_side_t side, cham_trans_t trans, cham_uplo_t * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int diff --git a/coreblas/compute/core_zparfb.c b/coreblas/compute/core_zparfb.c index a359402d6b90c8aa0484e1d1b587c413060561c4..05d07f72e7b0aba13efc8efce861aac50426a963 100644 --- a/coreblas/compute/core_zparfb.c +++ b/coreblas/compute/core_zparfb.c @@ -132,9 +132,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ /* This kernel is never traced so return type on previous line for convert2eztrace.pl script */ diff --git a/coreblas/compute/core_zpemv.c b/coreblas/compute/core_zpemv.c index 62213c723b142898fc571557e49a7c95788521e9..6b8fc9ed644fa092084b7eeeb85aad7601c9e942 100644 --- a/coreblas/compute/core_zpemv.c +++ b/coreblas/compute/core_zpemv.c @@ -113,9 +113,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zssssm.c b/coreblas/compute/core_zssssm.c index 87d18d295cb546114672a6c4682b3810791870fe..ef5bd6a1708848be54c7b4c7efd0e744b3b5dbb3 100644 --- a/coreblas/compute/core_zssssm.c +++ b/coreblas/compute/core_zssssm.c @@ -91,9 +91,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zsyssq.c b/coreblas/compute/core_zsyssq.c index a2c19544b05dc575c9e818fdd1c520c52e0dbdd5..8bce64cec2667154c21e464b0cbc0dd3dcc6407a 100644 --- a/coreblas/compute/core_zsyssq.c +++ b/coreblas/compute/core_zsyssq.c @@ -88,9 +88,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztplqt.c b/coreblas/compute/core_ztplqt.c index b2794dc6d5a5ad66de31889744430873bce2ac90..e80f80a963ec421613fd16367b6e92a0f274c70e 100644 --- a/coreblas/compute/core_ztplqt.c +++ b/coreblas/compute/core_ztplqt.c @@ -77,9 +77,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int CORE_ztplqt( int M, int N, int L, int IB, diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c index 5909f19eed9ae021590445237dfe9a5dcdc76e0d..6584e2ba54661d60358ef7d69d00fe8d75f1fd16 100644 --- a/coreblas/compute/core_ztpmqrt.c +++ b/coreblas/compute/core_ztpmqrt.c @@ -129,9 +129,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztpqrt.c b/coreblas/compute/core_ztpqrt.c index ddfbb49ab5948f5a933bd513290ac1e64a6aa457..a251bed84768c8ab8830b81d10c84f2cde64b36a 100644 --- a/coreblas/compute/core_ztpqrt.c +++ b/coreblas/compute/core_ztpqrt.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int CORE_ztpqrt( int M, int N, int L, int IB, diff --git a/coreblas/compute/core_ztradd.c b/coreblas/compute/core_ztradd.c index 831ad069eedc3939b66730b8ab3f2e736a746824..3242ae53a19b2eced1e97631a74c702a77e054f1 100644 --- a/coreblas/compute/core_ztradd.c +++ b/coreblas/compute/core_ztradd.c @@ -74,9 +74,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_ztrssq.c b/coreblas/compute/core_ztrssq.c index f01e63663267cb0c12529416ef7597c214078bc5..61cc2994dbb82cdf124d45164c0df56aa784edee 100644 --- a/coreblas/compute/core_ztrssq.c +++ b/coreblas/compute/core_ztrssq.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztslqt.c b/coreblas/compute/core_ztslqt.c index da5b27078042e1436a1f628612ae003d13a17be5..156429d2b372243d73d75cbbef92c60cdf7a6d90 100644 --- a/coreblas/compute/core_ztslqt.c +++ b/coreblas/compute/core_ztslqt.c @@ -94,9 +94,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c index a9324fa04fac4b04ae9676a59570b9f1ff4bf922..c2238aed6ba9d5555c05331c69171b88967d7f2f 100644 --- a/coreblas/compute/core_ztsmlq.c +++ b/coreblas/compute/core_ztsmlq.c @@ -121,9 +121,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztsmlq_hetra1.c b/coreblas/compute/core_ztsmlq_hetra1.c index ff712331781ce4679dc6331ff100a5f11b750fe6..fc0a5abda742d86602cc2da3a8c2ca85ea36d80c 100644 --- a/coreblas/compute/core_ztsmlq_hetra1.c +++ b/coreblas/compute/core_ztsmlq_hetra1.c @@ -108,9 +108,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_ztsmqr.c b/coreblas/compute/core_ztsmqr.c index e4f6815814d4022358593c37b987885bdb0ccff4..aeb35c924e887f29309a987d3add5c63270fe4b2 100644 --- a/coreblas/compute/core_ztsmqr.c +++ b/coreblas/compute/core_ztsmqr.c @@ -121,9 +121,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztsmqr_hetra1.c b/coreblas/compute/core_ztsmqr_hetra1.c index cfba422e72ed6aac96fb75d6fc4f65aeffe5df05..40dcf927085d15a332da64e322e3b1992506116b 100644 --- a/coreblas/compute/core_ztsmqr_hetra1.c +++ b/coreblas/compute/core_ztsmqr_hetra1.c @@ -110,9 +110,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_ztsqrt.c b/coreblas/compute/core_ztsqrt.c index 7564c4edffed7e8d96a2687b6a4354f13747efe9..3bbbd8f1b2028ac449a1581d4946d51518e1d93e 100644 --- a/coreblas/compute/core_ztsqrt.c +++ b/coreblas/compute/core_ztsqrt.c @@ -83,9 +83,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztstrf.c b/coreblas/compute/core_ztstrf.c index c0f5c9ecac6766b558262394314dcf1ddc642fe7..6f03a2664bb5c0956668aafddd09ecf23992b03b 100644 --- a/coreblas/compute/core_ztstrf.c +++ b/coreblas/compute/core_ztstrf.c @@ -84,10 +84,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/coreblas/compute/core_zttlqt.c b/coreblas/compute/core_zttlqt.c index db12242e334aadbea103eb05fc209cfce5450ae5..b331b28712a74cfe2c641a4094b6679efbf1233c 100644 --- a/coreblas/compute/core_zttlqt.c +++ b/coreblas/compute/core_zttlqt.c @@ -95,9 +95,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c index 5b6ee0261ec8e920f4883847931526fc864ddf76..b2fd886918fbaa14379544f51c50b1e127c6d5f5 100644 --- a/coreblas/compute/core_zttmlq.c +++ b/coreblas/compute/core_zttmlq.c @@ -113,9 +113,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zttmqr.c b/coreblas/compute/core_zttmqr.c index 9342ecbe385fffbe86faa234e04f83a9f899341d..850f275993cc4f7880c6e0128737bbedef0fa4ee 100644 --- a/coreblas/compute/core_zttmqr.c +++ b/coreblas/compute/core_zttmqr.c @@ -112,9 +112,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zttqrt.c b/coreblas/compute/core_zttqrt.c index c024dc9595a60cd5cf61757e78b4ec3d6f7ef5b7..4f127334a50d46ce5a4ee08e6cddc059cf33fbf6 100644 --- a/coreblas/compute/core_zttqrt.c +++ b/coreblas/compute/core_zttqrt.c @@ -95,9 +95,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zunmlq.c b/coreblas/compute/core_zunmlq.c index c7ac26f5577764a7818fb8c47a16497dc1414316..3f1593883c548b46777c8520e9c4de921c5542a4 100644 --- a/coreblas/compute/core_zunmlq.c +++ b/coreblas/compute/core_zunmlq.c @@ -105,9 +105,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zunmqr.c b/coreblas/compute/core_zunmqr.c index 59fb4c52591e570c78dc4a8b0d1a49175f52b980..347512a018f62e0d718671cfabf063371723ccc9 100644 --- a/coreblas/compute/core_zunmqr.c +++ b/coreblas/compute/core_zunmqr.c @@ -106,9 +106,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/eztrace_module/coreblas_eztrace_module b/coreblas/eztrace_module/coreblas_eztrace_module index dca1e36d9e89400b90540eabed8170c493390fb0..4a868819259eeaac815d188da438d85ed19db47e 100644 --- a/coreblas/eztrace_module/coreblas_eztrace_module +++ b/coreblas/eztrace_module/coreblas_eztrace_module @@ -1172,12 +1172,6 @@ int CORE_zlatro(int uplo, int trans, void *A, int LDA, void *B, int LDB); void CORE_zlauum(int uplo, int N, void *A, int LDA); -int CORE_zpamm(int op, int side, int storev, - int M, int N, int K, int L, - void *A1, int LDA1, - void *A2, int LDA2, - void *V, int LDV, - void *W, int LDW); int CORE_zparfb(int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, int L, void *A1, int LDA1, diff --git a/cudablas/compute/cuda_zgeadd.c b/cudablas/compute/cuda_zgeadd.c index d7f86784ccf595180b3cff5e8af42ca4710d40d9..e520dfb0a58c3be1032ff7bdc997e8d569f5bed8 100644 --- a/cudablas/compute/cuda_zgeadd.c +++ b/cudablas/compute/cuda_zgeadd.c @@ -72,9 +72,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int CUDA_zgeadd(cham_trans_t trans, diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index 4fa07c2b4cb4df1146f85baa3ad7340c7ad58b10..954f675703afab35b7a758fb8e4767bf92647612 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -38,23 +38,13 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options, int M, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int incA, const CHAM_desc_t *B, int Bm, int Bn, int incB ); +void INSERT_TASK_zbuild( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An, int lda, + void *user_data, void* user_build_callback ); void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); -void INSERT_TASK_zlascal( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An, int lda ); -void INSERT_TASK_zbrdalg( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int N, int NB, - const CHAM_desc_t *A, - const CHAM_desc_t *C, int Cm, int Cn, - const CHAM_desc_t *S, int Sm, int Sn, - int i, int j, int m, int grsiz, int BAND, - int *PCOL, int *ACOL, int *MCOL ); void INSERT_TASK_zgelqt( const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -65,39 +55,6 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zgemm2( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zgemm_f2( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); -void INSERT_TASK_zgemm_p2( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAMELEON_Complex64_t **B, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zgemm_p2f1( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAMELEON_Complex64_t **B, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1 ); -void INSERT_TASK_zgemm_p3( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t **C, int ldc ); void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -126,28 +83,6 @@ void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_nopiv( const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, int iinfo ); -void INSERT_TASK_zgetrf_reclap( const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int *IPIV, - - cham_bool_t check_info, int iinfo, - int nbthread ); -void INSERT_TASK_zgetrf_rectil( const RUNTIME_option_t *options, - const CHAM_desc_t A, const CHAM_desc_t *Amn, int Amnm, int Amnn, int size, - int *IPIV, - - cham_bool_t check_info, int iinfo, - int nbthread ); -void INSERT_TASK_zgetrip( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA ); -void INSERT_TASK_zgetrip_f1( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA, - const CHAM_desc_t *fake, int fakem, int faken, int szeF, int paramF ); -void INSERT_TASK_zgetrip_f2( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2 ); void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int mb, @@ -159,16 +94,6 @@ void INSERT_TASK_zhemm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zhegst( const RUNTIME_option_t *options, - int itype, cham_uplo_t uplo, int N, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn, int LDB, - int iinfo ); -void INSERT_TASK_zherk( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - double alpha, const CHAM_desc_t *A, int Am, int An, int lda, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zher2k( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, @@ -181,6 +106,15 @@ void INSERT_TASK_zherfb( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *T, int Tm, int Tn, int ldt, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zherk( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + double alpha, const CHAM_desc_t *A, int Am, int An, int lda, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zhessq( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int mb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -196,10 +130,6 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options, void INSERT_TASK_zlange_max( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *B, int Bm, int Bn ); -void INSERT_TASK_zhessq( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); void INSERT_TASK_zlanhe( const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, const CHAM_desc_t *A, int Am, int An, int LDA, @@ -213,31 +143,18 @@ void INSERT_TASK_zlantr( const RUNTIME_option_t *options, int M, int N, int NB, const CHAM_desc_t *A, int Am, int An, int LDA, const CHAM_desc_t *B, int Bm, int Bn ); +void INSERT_TASK_zlascal( const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int lda ); void INSERT_TASK_zlaset( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); + cham_uplo_t uplo, int n1, int n2, + CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, + const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); void INSERT_TASK_zlaset2( const RUNTIME_option_t *options, cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); -void INSERT_TASK_zlaswp( const RUNTIME_option_t *options, - int n, const CHAM_desc_t *A, int Am, int An, int lda, - int i1, int i2, int *ipiv, int inc ); -void INSERT_TASK_zlaswp_f2( const RUNTIME_option_t *options, - int n, const CHAM_desc_t *A, int Am, int An, int lda, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); -void INSERT_TASK_zlaswp_ontile( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, - int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel ); -void INSERT_TASK_zlaswp_ontile_f2( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); -void INSERT_TASK_zlaswpc_ontile( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, - int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel ); void INSERT_TASK_zlatro( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -254,17 +171,15 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, int bigM, int m0, int n0, unsigned long long int seed ); +void INSERT_TASK_zplssq( const RUNTIME_option_t *options, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ); +void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ); void INSERT_TASK_zpotrf( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo ); -void INSERT_TASK_zshift( const RUNTIME_option_t *options, - int s, int m, int n, int L, - CHAMELEON_Complex64_t *A ); -void INSERT_TASK_zshiftw( const RUNTIME_option_t *options, - int s, int cl, int m, int n, int L, - const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t *W ); void INSERT_TASK_zssssm( const RUNTIME_option_t *options, int m1, int n1, int m2, int n2, int k, int ib, int nb, const CHAM_desc_t *A1, int A1m, int A1n, int lda1, @@ -278,17 +193,17 @@ void INSERT_TASK_zsymm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int LDB, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, const CHAM_desc_t *A, int Am, int An, int lda, @@ -297,13 +212,6 @@ void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, const CHAM_desc_t *A, int Am, int An, int lda, int iinfo ); -void INSERT_TASK_zswpab( const RUNTIME_option_t *options, - int i, int n1, int n2, - const CHAM_desc_t *A, int Am, int An, int szeA ); -void INSERT_TASK_zswptr_ontile( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *Aij, int Aijm, int Aijn, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *Akk, int Akkm, int Akkn, int ldak ); void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, int m, int n, int l, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -328,14 +236,6 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_ztrdalg( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int N, int NB, - const CHAM_desc_t *A, - const CHAM_desc_t *C, int Cm, int Cn, - const CHAM_desc_t *S, int Sm, int Sn, - int i, int j, int m, int grsiz, int BAND, - int *PCOL, int *ACOL, int *MCOL ); void INSERT_TASK_ztradd( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, @@ -349,11 +249,6 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); -void INSERT_TASK_ztrmm_p2( const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t **B, int ldb ); void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, int m, int n, int nb, @@ -390,18 +285,6 @@ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, const CHAM_desc_t *L, int Lm, int Ln, int ldl, int *IPIV, cham_bool_t check_info, int iinfo ); -void INSERT_TASK_zpamm( const RUNTIME_option_t *options, - int op, cham_side_t side, cham_store_t storev, - int m, int n, int k, int l, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *W, int Wm, int Wn, int ldw ); -void INSERT_TASK_zplssq( const RUNTIME_option_t *options, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, - const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ); -void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ); void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, int m, int n, int ib, int nb, int k, @@ -414,10 +297,6 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *T, int Tm, int Tn, int ldt, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zbuild( const RUNTIME_option_t *options, - const CHAM_desc_t *A, int Am, int An, int lda, - void *user_data, void* user_build_callback ); - /** * Keep these insert_task for retro-compatibility diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index cabe559c252719de9501e0f95093de147fa9ec18..309dea8965cd4b14209eb23ece2be43f9df3f3b7 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -28,7 +28,6 @@ # List of codelets required by all runtimes # ----------------------------------------- set(CODELETS_ZSRC - codelets/codelet_ztile_zero.c codelets/codelet_zasum.c ################## # BLAS 1 diff --git a/runtime/openmp/codelets/codelet_zgeadd.c b/runtime/openmp/codelets/codelet_zgeadd.c index 1d18ff18f83a3d4f7f6343e92ec16246d265ef96..2ceeb81594eafa6be29085ef33bc2caa8ed80a4f 100644 --- a/runtime/openmp/codelets/codelet_zgeadd.c +++ b/runtime/openmp/codelets/codelet_zgeadd.c @@ -31,7 +31,7 @@ * * @ingroup CORE_CHAMELEON_Complex64_t * - * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * @brief Adds two general matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -75,15 +75,14 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c index 3341a8f01532ea77a638799a7557c7414cb1299f..8dd282d6259b0e684ceca96576d09af9e54eab3f 100644 --- a/runtime/openmp/codelets/codelet_zgelqt.c +++ b/runtime/openmp/codelets/codelet_zgelqt.c @@ -84,9 +84,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/runtime/openmp/codelets/codelet_zgemm.c b/runtime/openmp/codelets/codelet_zgemm.c index 68aec8de49aa853b5ca3aa8bd9d54ddd9ca2eb30..b2737c3884e7180e8a5fe1e7454e204956dc63af 100644 --- a/runtime/openmp/codelets/codelet_zgemm.c +++ b/runtime/openmp/codelets/codelet_zgemm.c @@ -43,7 +43,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); -#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) +#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0]) CORE_zgemm(transA, transB, m, n, k, alpha, ptrA, lda, diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c index 6428375b2e9b2ef6c20e6bbc8f803e986e4a1ef2..f8bf811afa1756f8265e7ad73b1694742018d1b1 100644 --- a/runtime/openmp/codelets/codelet_zgeqrt.c +++ b/runtime/openmp/codelets/codelet_zgeqrt.c @@ -85,9 +85,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/runtime/openmp/codelets/codelet_zgessm.c b/runtime/openmp/codelets/codelet_zgessm.c index cd24a4ac0a6e9e1fd04743d12ac110dc4334f4f8..2ed15696ad7e68429bca9fc1da16b0f34b986870 100644 --- a/runtime/openmp/codelets/codelet_zgessm.c +++ b/runtime/openmp/codelets/codelet_zgessm.c @@ -68,9 +68,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ @@ -83,6 +82,6 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrD = RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn); CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0:Dm*Dn]) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0]) depend(inout:ptrA[0]) CORE_zgessm(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda); } diff --git a/runtime/openmp/codelets/codelet_zgetrf.c b/runtime/openmp/codelets/codelet_zgetrf.c index d7cc9fe75179ed236a3a0a80246ff01fc6962b30..ab9869f7ebc4d5ddc54e49cc670c9218ed39bea7 100644 --- a/runtime/openmp/codelets/codelet_zgetrf.c +++ b/runtime/openmp/codelets/codelet_zgetrf.c @@ -34,6 +34,6 @@ void INSERT_TASK_zgetrf(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); int info = 0; -#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0]) +#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0]) CORE_zgetrf( m, n, ptrA, lda, IPIV, &info ); } diff --git a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c index 20b5e92d3cd22eb6d323769e5f7d65d020858452..9f26a7064a8597129deaa5fdbdbb3801cfcc355c 100644 --- a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c @@ -73,10 +73,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. @@ -92,6 +91,6 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); int info = 0; -#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0]) CORE_zgetrf_incpiv(m, n, ib, ptrA, lda, IPIV, &info); } diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c index 5f26b76e9b5ba139af2d25c21d0302811f5e6baf..829b5473e869d928085743c108776db2527ecbac 100644 --- a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c @@ -63,10 +63,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. @@ -80,6 +79,6 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); int info = 0; -#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0]) CORE_zgetrf_nopiv(m, n, ib, ptrA, lda, &info); } diff --git a/runtime/openmp/codelets/codelet_zhemm.c b/runtime/openmp/codelets/codelet_zhemm.c index 331459e479f3d6330b793a5833d7f1e1575dcca0..4d632655a2eb547813ee747c40ab2f341a341bee 100644 --- a/runtime/openmp/codelets/codelet_zhemm.c +++ b/runtime/openmp/codelets/codelet_zhemm.c @@ -43,7 +43,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); -#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) +#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0]) CORE_zhemm(side, uplo, m, n, alpha, ptrA, lda, diff --git a/runtime/openmp/codelets/codelet_zhessq.c b/runtime/openmp/codelets/codelet_zhessq.c index 46cd0f5c7037b64610dd3d2aa7cfc501101114ad..409d413ddfd9a78deb7182f1caca79d63f8dddb2 100644 --- a/runtime/openmp/codelets/codelet_zhessq.c +++ b/runtime/openmp/codelets/codelet_zhessq.c @@ -31,6 +31,6 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); -#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0:SCALESUMSQm*SCALESUMSQn]) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0]) depend(inout:ptrA[0]) CORE_zhessq( uplo, n, ptrA, lda, &ptrScaleSum[0], &ptrScaleSum[1] ); } diff --git a/runtime/openmp/codelets/codelet_zlacpy.c b/runtime/openmp/codelets/codelet_zlacpy.c index 74e420c31c178c45a96f4fd57ac4d05751ad4abd..44ea300eb4f8675400ae2de3cd4055cbdc5104b3 100644 --- a/runtime/openmp/codelets/codelet_zlacpy.c +++ b/runtime/openmp/codelets/codelet_zlacpy.c @@ -33,10 +33,10 @@ * @ingroup CORE_CHAMELEON_Complex64_t * */ -void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A + displA, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B + displB, CHAMELEON_Complex64_t, Bm, Bn); @@ -44,12 +44,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, CORE_zlacpy(uplo, m, n, ptrA, lda, ptrB, ldb); } -void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, - 0, A, Am, An, lda, - 0, B, Bm, Bn, ldb ); + 0, A, Am, An, lda, + 0, B, Bm, Bn, ldb ); } diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c index b65a938fec6ae4ac078210d7befc9820fc0c7bb9..26a024cd0a9b0d8ffd3aa35ead905e44d8e820df 100644 --- a/runtime/openmp/codelets/codelet_zlag2c.c +++ b/runtime/openmp/codelets/codelet_zlag2c.c @@ -31,13 +31,24 @@ * @ingroup CORE_CHAMELEON_Complex64_t * */ -void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); - CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); -#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CHAMELEON_Complex32_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn); +#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) CORE_zlag2c( m, n, ptrA, lda, ptrB, ldb); } + +void INSERT_TASK_clag2z( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex32_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_clag2z( m, n, ptrA, lda, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zlascal.c b/runtime/openmp/codelets/codelet_zlascal.c index d579bb39ae0f47273c14c301b5f7fd05f9d665ec..2aa990418ec83c10e2e6e2658a96636b8bee3cf7 100644 --- a/runtime/openmp/codelets/codelet_zlascal.c +++ b/runtime/openmp/codelets/codelet_zlascal.c @@ -51,9 +51,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ @@ -64,6 +63,6 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0]) CORE_zlascal(uplo, m, n, alpha, ptrA, lda); } diff --git a/runtime/openmp/codelets/codelet_zlatro.c b/runtime/openmp/codelets/codelet_zlatro.c index 6f7ba5fa5bae73976ee6b81a4cdc609c2cf4962d..ec50bb9cfb1ab86d62e8e0bbf5800ef979b6cbb6 100644 --- a/runtime/openmp/codelets/codelet_zlatro.c +++ b/runtime/openmp/codelets/codelet_zlatro.c @@ -33,14 +33,14 @@ * @ingroup CORE_CHAMELEON_Complex64_t * */ -void INSERT_TASK_zlatro(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlatro( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); -#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) +#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) CORE_zlatro(uplo, trans, m, n, ptrA, lda, ptrB, ldb); } diff --git a/runtime/openmp/codelets/codelet_zlauum.c b/runtime/openmp/codelets/codelet_zlauum.c index 7ab7c8b99de5c9e5646562eb5b3ab3ebc32b0209..70030fc19d84452c4f5b0d67cec643ce08f61fee 100644 --- a/runtime/openmp/codelets/codelet_zlauum.c +++ b/runtime/openmp/codelets/codelet_zlauum.c @@ -38,6 +38,6 @@ void INSERT_TASK_zlauum(const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0]) CORE_zlauum(uplo, n, ptrA, lda); } diff --git a/runtime/openmp/codelets/codelet_zplghe.c b/runtime/openmp/codelets/codelet_zplghe.c index 06e890a459444492cf59384f5eaebe65f469a92e..a785b19c2bf0dcaf26e8f667cb2dc4e1340c063a 100644 --- a/runtime/openmp/codelets/codelet_zplghe.c +++ b/runtime/openmp/codelets/codelet_zplghe.c @@ -28,13 +28,11 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_z.h" -/* INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ - void INSERT_TASK_zplghe( const RUNTIME_option_t *options, - double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) + double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0]) CORE_zplghe( bump, m, n, ptrA, lda, bigM, m0, n0, seed ); } diff --git a/runtime/openmp/codelets/codelet_zplgsy.c b/runtime/openmp/codelets/codelet_zplgsy.c index 5269d527617a8efac61a2f841401af2b9fcb03aa..4a3cea2f68787ab24e4b0aec63158198954f3f6f 100644 --- a/runtime/openmp/codelets/codelet_zplgsy.c +++ b/runtime/openmp/codelets/codelet_zplgsy.c @@ -28,11 +28,9 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_z.h" -/* INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ - void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, - CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) + CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); #pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0]) diff --git a/runtime/openmp/codelets/codelet_zplrnt.c b/runtime/openmp/codelets/codelet_zplrnt.c index 35cb6300c55c4d8fcf3276f952767fd24321981b..139f44c8bce94b472019fa6beb9e5ae639799666 100644 --- a/runtime/openmp/codelets/codelet_zplrnt.c +++ b/runtime/openmp/codelets/codelet_zplrnt.c @@ -28,11 +28,9 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_z.h" -/* INSERT_TASK_zplrnt - Generate a tile for random matrix. */ - void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) + int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); #pragma omp task firstprivate(m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0]) diff --git a/runtime/openmp/codelets/codelet_zplssq.c b/runtime/openmp/codelets/codelet_zplssq.c index 7ee45f66de44b77073ee4714ea1e7ebf5d9504cc..cec083dca98e2fe5d69829bb72feb0d2a54d7173 100644 --- a/runtime/openmp/codelets/codelet_zplssq.c +++ b/runtime/openmp/codelets/codelet_zplssq.c @@ -29,7 +29,7 @@ * * @ingroup CORE_CHAMELEON_Complex64_t * - * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq) * * with scl and ssq such that * @@ -74,7 +74,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options, } void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) { CHAMELEON_Complex64_t *res = RTBLKADDR(RESULT, CHAMELEON_Complex64_t, RESULTm, RESULTn); diff --git a/runtime/openmp/codelets/codelet_zssssm.c b/runtime/openmp/codelets/codelet_zssssm.c index 38d9ad5e3c116a48251199d610ac494efa411f0b..db82b480eb690cad20be54444ed6afbaf0c67a37 100644 --- a/runtime/openmp/codelets/codelet_zssssm.c +++ b/runtime/openmp/codelets/codelet_zssssm.c @@ -91,9 +91,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ @@ -109,10 +108,8 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrL1 = RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n); CHAMELEON_Complex64_t *ptrL2 = RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n); -#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV)\ - depend(inout:ptrA1[0])\ - depend(inout:ptrA2[0])\ - depend(in:ptrL1[0])\ - depend(in:ptrL2[0]) + +#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV) \ + depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrL1[0], ptrL2[0]) CORE_zssssm(m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrL1, ldl1, ptrL2, ldl2, IPIV); } diff --git a/runtime/openmp/codelets/codelet_zsymm.c b/runtime/openmp/codelets/codelet_zsymm.c index efe71b42578d7b5f71c1b3a3dc770705da213dbe..76d6ec7b70665be5f78253d7016984c8a2ecb132 100644 --- a/runtime/openmp/codelets/codelet_zsymm.c +++ b/runtime/openmp/codelets/codelet_zsymm.c @@ -41,7 +41,7 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); -#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) +#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0]) CORE_zsymm(side, uplo, m, n, alpha, ptrA, lda, diff --git a/runtime/openmp/codelets/codelet_zsyssq.c b/runtime/openmp/codelets/codelet_zsyssq.c index c2d69dc57523b0340e89253aec2e985eb78ee6ee..86b58eb001623830450f47c006f55ea01acee0b6 100644 --- a/runtime/openmp/codelets/codelet_zsyssq.c +++ b/runtime/openmp/codelets/codelet_zsyssq.c @@ -29,6 +29,6 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); -#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0:Am*An]) depend(inout:ptrSCALESUMSQ[0]) +#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0]) depend(inout:ptrSCALESUMSQ[0]) CORE_zsyssq( uplo, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1] ); } diff --git a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c index 1ebd1aa08878024b0379a6bdef55f06bdd48a0f0..73032cf982d18a8d41b9044ed8786fd5f27b80e5 100644 --- a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c +++ b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c @@ -32,6 +32,6 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options, int iinfo) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0]) CORE_zsytf2_nopiv(uplo, n, ptrA, lda); } diff --git a/runtime/openmp/codelets/codelet_ztile_zero.c b/runtime/openmp/codelets/codelet_ztile_zero.c deleted file mode 100644 index 96ef911bf329829e3f282b448b11f277ce114a27..0000000000000000000000000000000000000000 --- a/runtime/openmp/codelets/codelet_ztile_zero.c +++ /dev/null @@ -1,38 +0,0 @@ -/** - * - * @file openmp/codelet_ztile_zero.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztile_zero StarPU codelet - * - * @version 1.0.0 - * @author Hatem Ltaief - * @author Mathieu Faverge - * @author Jakub Kurzak - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ - -#include "chameleon_openmp.h" -#include "chameleon/tasks_z.h" -#include "coreblas.h" -/** - * - */ -void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, - int X1, int X2, int Y1, int Y2, - const CHAM_desc_t *A, int Am, int An, int lda ) -{ - CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); - int x, y; - for (x = X1; x < X2; x++) - for (y = Y1; y < Y2; y++) - ptrA[lda*x+y] = 0.0; -} diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c index 4bb4f16f030a9f5aa58a4dc9e66f28cefc1b4a39..367e437a75308d151ee3c0dcb6c1e31242491071 100644 --- a/runtime/openmp/codelets/codelet_ztplqt.c +++ b/runtime/openmp/codelets/codelet_ztplqt.c @@ -20,12 +20,12 @@ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" -void -INSERT_TASK_ztplqt( const RUNTIME_option_t *options, - int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) + +void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); @@ -36,8 +36,7 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options, { CHAMELEON_Complex64_t work[ws_size]; - CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt); - + CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt ); CORE_ztplqt( M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt, work ); } diff --git a/runtime/openmp/codelets/codelet_ztpmlqt.c b/runtime/openmp/codelets/codelet_ztpmlqt.c index 543704822c5ddecca1c432a875ac357ace7c2a66..a5da0f533070ee0dc5416736ec409729f9b4c2dd 100644 --- a/runtime/openmp/codelets/codelet_ztpmlqt.c +++ b/runtime/openmp/codelets/codelet_ztpmlqt.c @@ -17,24 +17,25 @@ */ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" -void -INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + +void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); int ws_size = options->ws_wsize; + #pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) { - CHAMELEON_Complex64_t work[ws_size]; - CORE_ztpmlqt( side, trans, M, N, K, L, ib, - ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); + CHAMELEON_Complex64_t work[ws_size]; + CORE_ztpmlqt( side, trans, M, N, K, L, ib, + ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); } } diff --git a/runtime/openmp/codelets/codelet_ztpmqrt.c b/runtime/openmp/codelets/codelet_ztpmqrt.c index 4f3262221eb3027fb82d1d2f93b9d8cd0ad09aeb..5378a2a5b5af0257991ca8ac6cb8736c8f898a00 100644 --- a/runtime/openmp/codelets/codelet_ztpmqrt.c +++ b/runtime/openmp/codelets/codelet_ztpmqrt.c @@ -17,20 +17,21 @@ */ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" -void -INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + +void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); int ws_size = options->ws_wsize; + #pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, nb, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) { CHAMELEON_Complex64_t tmp[ws_size]; diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c index 7381f6ebdc7682b4f2fc73c976d5e00f4cf21bf6..755de21bdd376553e9e22490be44c605f41c8c20 100644 --- a/runtime/openmp/codelets/codelet_ztpqrt.c +++ b/runtime/openmp/codelets/codelet_ztpqrt.c @@ -19,12 +19,12 @@ */ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" -void -INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, - int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) + +void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); @@ -35,9 +35,8 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, { CHAMELEON_Complex64_t tmp[ws_size]; - CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt); - + CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt ); CORE_ztpqrt( M, N, L, ib, - ptrA, lda, ptrB, ldb, ptrT, ldt, tmp ); + ptrA, lda, ptrB, ldb, ptrT, ldt, tmp ); } } diff --git a/runtime/openmp/codelets/codelet_ztradd.c b/runtime/openmp/codelets/codelet_ztradd.c index 9a39aaf56d2ac9366c1e7c8c2986d678f9379db1..384ba192da8c797e51c62f3a279001842402275e 100644 --- a/runtime/openmp/codelets/codelet_ztradd.c +++ b/runtime/openmp/codelets/codelet_ztradd.c @@ -22,12 +22,13 @@ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" + /** ****************************************************************************** * * @ingroup CORE_CHAMELEON_Complex64_t * - * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -77,18 +78,18 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_ztradd(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_ztradd( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + #pragma omp task firstprivate(uplo, trans, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) CORE_ztradd(uplo, trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb); } diff --git a/runtime/openmp/codelets/codelet_ztstrf.c b/runtime/openmp/codelets/codelet_ztstrf.c index cb612cb6fd9fa8a0667218b3f2ff70878964c4d1..4072b5d68998d0d497e77267ab899b6a27c55874 100644 --- a/runtime/openmp/codelets/codelet_ztstrf.c +++ b/runtime/openmp/codelets/codelet_ztstrf.c @@ -83,10 +83,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c index 348b290a1973097fcebf2acac241916a7b5de906..0dd8f263dcb597e85a218a8577598e75369c0d49 100644 --- a/runtime/openmp/codelets/codelet_zunmlq.c +++ b/runtime/openmp/codelets/codelet_zunmlq.c @@ -105,9 +105,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c index 42765470397c5f2fa9feb7bb1e27a4b7019f83e4..ed40c42110b843ba304eea3f001df82cc3fd52fe 100644 --- a/runtime/openmp/codelets/codelet_zunmqr.c +++ b/runtime/openmp/codelets/codelet_zunmqr.c @@ -105,9 +105,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/runtime/parsec/codelets/codelet_zgeadd.c b/runtime/parsec/codelets/codelet_zgeadd.c index 7ad41db2f6756522128d93ba79292216fa7dbcaa..7d937857fb1b4b09c6501747397bdd32407029db 100644 --- a/runtime/parsec/codelets/codelet_zgeadd.c +++ b/runtime/parsec/codelets/codelet_zgeadd.c @@ -52,7 +52,7 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context, * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * @brief Adds two general matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -96,15 +96,14 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); diff --git a/runtime/parsec/codelets/codelet_zgelqt.c b/runtime/parsec/codelets/codelet_zgelqt.c index 4ef5b5b7a6dbe1f5d385a775e580d6a13199f082..32a7dfaa96e6c629f43c21fa22d85e63243a3264 100644 --- a/runtime/parsec/codelets/codelet_zgelqt.c +++ b/runtime/parsec/codelets/codelet_zgelqt.c @@ -76,9 +76,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ static inline int diff --git a/runtime/parsec/codelets/codelet_zgeqrt.c b/runtime/parsec/codelets/codelet_zgeqrt.c index 53ac8ac042386c01281c123e0488819bb299ee90..3aaaf84cf85fe02c488c5a8f546130b0b92e0a02 100644 --- a/runtime/parsec/codelets/codelet_zgeqrt.c +++ b/runtime/parsec/codelets/codelet_zgeqrt.c @@ -77,9 +77,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ static inline int diff --git a/runtime/parsec/codelets/codelet_zgessm.c b/runtime/parsec/codelets/codelet_zgessm.c index a7f62dc4b8bf1ad8614abcfd4908f4f8960e6fa7..a4762cfa289b975da19f5065ed9988494a50e9dc 100644 --- a/runtime/parsec/codelets/codelet_zgessm.c +++ b/runtime/parsec/codelets/codelet_zgessm.c @@ -60,9 +60,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ static inline int diff --git a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c index 09ef6c4012089c7d4f54c2a46b61ff9ab82a6a55..55a1fe635d96c08e99211c3d31dfe8ff13242d53 100644 --- a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c @@ -65,10 +65,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c index ab7f49bb183fa7d35b7884b0b0996ec6960f347d..0aadb3c900ca6fdee6ece700d9ac6c161fa9a461 100644 --- a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c @@ -58,10 +58,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/parsec/codelets/codelet_zlacpy.c b/runtime/parsec/codelets/codelet_zlacpy.c index 64c777e38e09d26d0b7efb26fbc7d1bfd4ed96ab..d79617ccb3e482fb35a23b53a0b81fe2894a5115 100644 --- a/runtime/parsec/codelets/codelet_zlacpy.c +++ b/runtime/parsec/codelets/codelet_zlacpy.c @@ -49,11 +49,10 @@ CORE_zlacpyx_parsec( parsec_execution_stream_t *context, } void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); parsec_dtd_taskpool_insert_task( @@ -71,12 +70,12 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, (void)nb; } -void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, - 0, A, Am, An, lda, - 0, B, Bm, Bn, ldb ); + 0, A, Am, An, lda, + 0, B, Bm, Bn, ldb ); } diff --git a/runtime/parsec/codelets/codelet_zpamm.c b/runtime/parsec/codelets/codelet_zpamm.c deleted file mode 100644 index 3d075b0143f6e7531db8819bd25e97d6f6656b00..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_zpamm.c +++ /dev/null @@ -1,224 +0,0 @@ -/** - * - * @file parsec/codelet_zpamm.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zpamm PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * ZPAMM performs one of the matrix-matrix operations - * - * LEFT RIGHT - * OP ChameleonW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) - * OP ChameleonA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V) - * - * where op( V ) is one of - * - * op( V ) = V or op( V ) = V**T or op( V ) = V**H, - * - * A1, A2 and W are general matrices, and V is: - * - * l = k: rectangle + triangle - * l < k: rectangle + trapezoid - * l = 0: rectangle - * - * Size of V, both rowwise and columnwise, is: - * - * ---------------------- - * side trans size - * ---------------------- - * left N M x K - * T K x M - * right N K x N - * T N x K - * ---------------------- - * - * LEFT (columnwise and rowwise): - * - * | K | | M | - * _ __________ _ _______________ _ - * | | | | | \ - * V: | | | V': |_____________|___\ K - * | | | M-L | | - * M | | | |__________________| _ - * |____| | _ - * \ | | | M - L | L | - * \ | | L - * _ \|____| _ - * - * RIGHT (columnwise and rowwise): - * - * | K | | N | - * _______________ _ _ __________ _ - * | | \ | | | - * V': |_____________|___\ N V: | | | - * | | | | | K-L - * |__________________| _ K | | | - * |____| | _ - * | K - L | L | \ | | - * \ | | L - * _ \|____| _ - * - * Arguments - * ========== - * - * @param[in] op - * - * OP specifies which operation to perform: - * - * @arg ChameleonW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) - * @arg ChameleonA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V) - * - * @param[in] side - * - * SIDE specifies whether op( V ) multiplies A2 - * or W from the left or right as follows: - * - * @arg ChamLeft : multiply op( V ) from the left - * OP ChameleonW : W = A1 + op(V) * A2 - * OP ChameleonA2 : A2 = A2 - op(V) * W - * - * @arg ChamRight : multiply op( V ) from the right - * OP ChameleonW : W = A1 + A2 * op(V) - * OP ChameleonA2 : A2 = A2 - W * op(V) - * - * @param[in] storev - * - * Indicates how the vectors which define the elementary - * reflectors are stored in V: - * - * @arg ChamColumnwise - * @arg ChamRowwise - * - * @param[in] M - * The number of rows of the A1, A2 and W - * If SIDE is ChamLeft, the number of rows of op( V ) - * - * @param[in] N - * The number of columns of the A1, A2 and W - * If SIDE is ChamRight, the number of columns of op( V ) - * - * @param[in] K - * If SIDE is ChamLeft, the number of columns of op( V ) - * If SIDE is ChamRight, the number of rows of op( V ) - * - * @param[in] L - * The size of the triangular part of V - * - * @param[in] A1 - * On entry, the M-by-N tile A1. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M). - * - * @param[in,out] A2 - * On entry, the M-by-N tile A2. - * On exit, if OP is ChameleonA2 A2 is overwritten - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M). - * - * @param[in] V - * The matrix V as described above. - * If SIDE is ChamLeft : op( V ) is M-by-K - * If SIDE is ChamRight: op( V ) is K-by-N - * - * @param[in] LDV - * The leading dimension of the array V. - * - * @param[in,out] W - * On entry, the M-by-N matrix W. - * On exit, W is overwritten either if OP is ChameleonA2 or ChameleonW. - * If OP is ChameleonA2, W is an input and is used as a workspace. - * - * @param[in] LDW - * The leading dimension of array WORK. - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - - -/**/ - -static inline int -CORE_zpamm_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - int op; - cham_side_t side; - cham_store_t storev; - int M; - int N; - int K; - int L; - CHAMELEON_Complex64_t *A1; - int LDA1; - CHAMELEON_Complex64_t *A2; - int LDA2; - CHAMELEON_Complex64_t *V; - int LDV; - CHAMELEON_Complex64_t *W; - int LDW; - - parsec_dtd_unpack_args( - this_task, &op, &side, &storev, &M, &N, &K, &L, &A1, &LDA1, &A2, &LDA2, &V, &LDV, &W, &LDW ); - - CORE_zpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW ); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void -INSERT_TASK_zpamm(const RUNTIME_option_t *options, - int op, cham_side_t side, cham_store_t storev, - int m, int n, int k, int l, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *W, int Wm, int Wn, int ldw) -{ - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_zpamm_parsec, options->priority, "pamm", - sizeof(int), &op, VALUE, - sizeof(int), &side, VALUE, - sizeof(int), &storev, VALUE, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &l, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INPUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT, - sizeof(int), &ldv, VALUE, - PASSED_BY_REF, RTBLKADDR( W, CHAMELEON_Complex64_t, Wm, Wn ), chameleon_parsec_get_arena_index( W ) | INOUT, - sizeof(int), &ldw, VALUE, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_zplssq.c b/runtime/parsec/codelets/codelet_zplssq.c index ed1fde76fcd4d9fa51f1cd6a2e717f52b92f5792..99006c131417af6b554d8c312bf589bb0a3a4ef0 100644 --- a/runtime/parsec/codelets/codelet_zplssq.c +++ b/runtime/parsec/codelets/codelet_zplssq.c @@ -21,11 +21,35 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_z.h" +static inline int +CORE_zplssq_parsec( parsec_execution_stream_t *context, + parsec_task_t *this_task ) +{ + double *SCLSSQ_IN; + double *SCLSSQ_OUT; + + parsec_dtd_unpack_args( + this_task, &SCLSSQ_IN, &SCLSSQ_OUT ); + + assert( SCLSSQ_OUT[0] >= 0. ); + if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) { + SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ))); + SCLSSQ_OUT[0] = SCLSSQ_IN[0]; + } else { + if ( SCLSSQ_OUT[0] > 0 ) { + SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ))); + } + } + + (void)context; + return PARSEC_HOOK_RETURN_DONE; +} + /** * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq) * * with scl and ssq such that * @@ -52,33 +76,9 @@ * On exit, result contains scl * sqrt( ssq ) * */ -static inline int -CORE_zplssq_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - double *SCLSSQ_IN; - double *SCLSSQ_OUT; - - parsec_dtd_unpack_args( - this_task, &SCLSSQ_IN, &SCLSSQ_OUT ); - - assert( SCLSSQ_OUT[0] >= 0. ); - if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) { - SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ))); - SCLSSQ_OUT[0] = SCLSSQ_IN[0]; - } else { - if ( SCLSSQ_OUT[0] > 0 ) { - SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ))); - } - } - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - void INSERT_TASK_zplssq( const RUNTIME_option_t *options, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, - const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); @@ -91,7 +91,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options, static inline int CORE_zplssq2_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) + parsec_task_t *this_task ) { double *RESULT; @@ -105,7 +105,7 @@ CORE_zplssq2_parsec( parsec_execution_stream_t *context, } void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); diff --git a/runtime/parsec/codelets/codelet_ztile_zero.c b/runtime/parsec/codelets/codelet_ztile_zero.c deleted file mode 100644 index e07175cb16591730697aad13b721354c36e9982d..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_ztile_zero.c +++ /dev/null @@ -1,61 +0,0 @@ -/** - * - * @file parsec/codelet_ztile_zero.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztile_zero PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_ztile_zero_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - int X1; - int X2; - int Y1; - int Y2; - CHAMELEON_Complex64_t *A; - int lda; - int x, y; - - parsec_dtd_unpack_args( - this_task, &X1, &X2, &Y1, &Y2, &A, &lda ); - - for (x = X1; x < X2; x++) - for (y = Y1; y < Y2; y++) - A[lda * x + y] = 0.0; - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, - int X1, int X2, int Y1, int Y2, - const CHAM_desc_t *A, int Am, int An, int lda ) -{ - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_ztile_zero_parsec, options->priority, "tile zero", - sizeof(int), &X1, VALUE, - sizeof(int), &X2, VALUE, - sizeof(int), &Y1, VALUE, - sizeof(int), &Y2, VALUE, - PASSED_BY_REF, RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY, - sizeof(int), &lda, VALUE, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_ztradd.c b/runtime/parsec/codelets/codelet_ztradd.c index b431f983a83695fb2c20a945c58246b21f380baa..267ac386a3658ff96a3f0e93e76dde86f06994d0 100644 --- a/runtime/parsec/codelets/codelet_ztradd.c +++ b/runtime/parsec/codelets/codelet_ztradd.c @@ -52,7 +52,7 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context, * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pztradd. + * @brief Adds two trapezoidal matrices together as in PBLAS pztradd. * * B <- alpha * op(A) + beta * B, * @@ -102,15 +102,14 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_ztradd(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_ztradd( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); diff --git a/runtime/quark/codelets/codelet_zgeadd.c b/runtime/quark/codelets/codelet_zgeadd.c index 16287953ca163d8ce1f341d8f6eb4c1a77f2f3b0..935bc0ba1394e243cb4cb2c922961f56a909149b 100644 --- a/runtime/quark/codelets/codelet_zgeadd.c +++ b/runtime/quark/codelets/codelet_zgeadd.c @@ -47,7 +47,7 @@ void CORE_zgeadd_quark(Quark *quark) * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * @brief Adds two general matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -91,15 +91,14 @@ void CORE_zgeadd_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_GEADD; diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c index 240773c983156402febe2242d86d5a0b560bfeee..45db83e556754dd6e47c7b89ac8e72c24bdf6e43 100644 --- a/runtime/quark/codelets/codelet_zgelqt.c +++ b/runtime/quark/codelets/codelet_zgelqt.c @@ -100,9 +100,8 @@ void CORE_zgelqt_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c index 09ed24eef4662df09ea5e8fb59029ed8d8cf46df..33ad21a52ac116c7aec197d68eca08cf92dbd84a 100644 --- a/runtime/quark/codelets/codelet_zgeqrt.c +++ b/runtime/quark/codelets/codelet_zgeqrt.c @@ -101,9 +101,8 @@ void CORE_zgeqrt_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_zgessm.c b/runtime/quark/codelets/codelet_zgessm.c index d31d3dc9b1b70be57786881d8437dd6e94a65a0e..63b59a664ec8f384f65f594a0e3261a6c2c91206 100644 --- a/runtime/quark/codelets/codelet_zgessm.c +++ b/runtime/quark/codelets/codelet_zgessm.c @@ -86,9 +86,8 @@ void CORE_zgessm_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ void INSERT_TASK_zgessm(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_zgetrf_incpiv.c b/runtime/quark/codelets/codelet_zgetrf_incpiv.c index 7ba0886d1ca1b6a508aeae1181ba76b808e5b1aa..9b9d29a7f021be10bc6e86801c7cc5cb5c2b0568 100644 --- a/runtime/quark/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/quark/codelets/codelet_zgetrf_incpiv.c @@ -94,10 +94,9 @@ void CORE_zgetrf_incpiv_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/quark/codelets/codelet_zgetrf_nopiv.c b/runtime/quark/codelets/codelet_zgetrf_nopiv.c index dfee169e63daebeb876f077bf65bd69b886c57b3..c7115e7d5df13822c36c69ea86352aee56cc96fd 100644 --- a/runtime/quark/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/quark/codelets/codelet_zgetrf_nopiv.c @@ -81,10 +81,9 @@ void CORE_zgetrf_nopiv_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c index 99a0dc89ab9d2eeee87d91d69a08670af7dfee46..8aa18403bf02087fb52e02508f596f7162433835 100644 --- a/runtime/quark/codelets/codelet_zlacpy.c +++ b/runtime/quark/codelets/codelet_zlacpy.c @@ -43,10 +43,10 @@ static inline void CORE_zlacpy_quark(Quark *quark) CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB); } -void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LACPY; @@ -63,12 +63,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, 0); } -void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, - 0, A, Am, An, lda, - 0, B, Bm, Bn, ldb ); + 0, A, Am, An, lda, + 0, B, Bm, Bn, ldb ); } diff --git a/runtime/quark/codelets/codelet_zpamm.c b/runtime/quark/codelets/codelet_zpamm.c deleted file mode 100644 index ba9de2a29acf452282bebbadfdb952a3581db85b..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_zpamm.c +++ /dev/null @@ -1,220 +0,0 @@ -/** - * - * @file quark/codelet_zpamm.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zpamm Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2011-06-14 - * @precisions normal z -> c d s - * - */ -#include "coreblas/cblas.h" -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void -CORE_zpamm_quark(Quark *quark) -{ - int op; - cham_side_t side; - cham_store_t storev; - int M; - int N; - int K; - int L; - CHAMELEON_Complex64_t *A1; - int LDA1; - CHAMELEON_Complex64_t *A2; - int LDA2; - CHAMELEON_Complex64_t *V; - int LDV; - CHAMELEON_Complex64_t *W; - int LDW; - - quark_unpack_args_15(quark, op, side, storev, M, N, K, L, - A1, LDA1, A2, LDA2, V, LDV, W, LDW); - - CORE_zpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * ZPAMM performs one of the matrix-matrix operations - * - * LEFT RIGHT - * OP ChameleonW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) - * OP ChameleonA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V) - * - * where op( V ) is one of - * - * op( V ) = V or op( V ) = V**T or op( V ) = V**H, - * - * A1, A2 and W are general matrices, and V is: - * - * l = k: rectangle + triangle - * l < k: rectangle + trapezoid - * l = 0: rectangle - * - * Size of V, both rowwise and columnwise, is: - * - * ---------------------- - * side trans size - * ---------------------- - * left N M x K - * T K x M - * right N K x N - * T N x K - * ---------------------- - * - * LEFT (columnwise and rowwise): - * - * | K | | M | - * _ __________ _ _______________ _ - * | | | | | \ - * V: | | | V': |_____________|___\ K - * | | | M-L | | - * M | | | |__________________| _ - * |____| | _ - * \ | | | M - L | L | - * \ | | L - * _ \|____| _ - * - * RIGHT (columnwise and rowwise): - * - * | K | | N | - * _______________ _ _ __________ _ - * | | \ | | | - * V': |_____________|___\ N V: | | | - * | | | | | K-L - * |__________________| _ K | | | - * |____| | _ - * | K - L | L | \ | | - * \ | | L - * _ \|____| _ - * - * Arguments - * ========== - * - * @param[in] op - * - * OP specifies which operation to perform: - * - * @arg ChameleonW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) - * @arg ChameleonA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V) - * - * @param[in] side - * - * SIDE specifies whether op( V ) multiplies A2 - * or W from the left or right as follows: - * - * @arg ChamLeft : multiply op( V ) from the left - * OP ChameleonW : W = A1 + op(V) * A2 - * OP ChameleonA2 : A2 = A2 - op(V) * W - * - * @arg ChamRight : multiply op( V ) from the right - * OP ChameleonW : W = A1 + A2 * op(V) - * OP ChameleonA2 : A2 = A2 - W * op(V) - * - * @param[in] storev - * - * Indicates how the vectors which define the elementary - * reflectors are stored in V: - * - * @arg ChamColumnwise - * @arg ChamRowwise - * - * @param[in] M - * The number of rows of the A1, A2 and W - * If SIDE is ChamLeft, the number of rows of op( V ) - * - * @param[in] N - * The number of columns of the A1, A2 and W - * If SIDE is ChamRight, the number of columns of op( V ) - * - * @param[in] K - * If SIDE is ChamLeft, the number of columns of op( V ) - * If SIDE is ChamRight, the number of rows of op( V ) - * - * @param[in] L - * The size of the triangular part of V - * - * @param[in] A1 - * On entry, the M-by-N tile A1. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M). - * - * @param[in,out] A2 - * On entry, the M-by-N tile A2. - * On exit, if OP is ChameleonA2 A2 is overwritten - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M). - * - * @param[in] V - * The matrix V as described above. - * If SIDE is ChamLeft : op( V ) is M-by-K - * If SIDE is ChamRight: op( V ) is K-by-N - * - * @param[in] LDV - * The leading dimension of the array V. - * - * @param[in,out] W - * On entry, the M-by-N matrix W. - * On exit, W is overwritten either if OP is ChameleonA2 or ChameleonW. - * If OP is ChameleonA2, W is an input and is used as a workspace. - * - * @param[in] LDW - * The leading dimension of array WORK. - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void -INSERT_TASK_zpamm(const RUNTIME_option_t *options, - int op, cham_side_t side, cham_store_t storev, - int m, int n, int k, int l, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *W, int Wm, int Wn, int ldw) -{ - QUARK_Insert_Task(opt->quark, CORE_zpamm_quark, (Quark_Task_Flags*)opt, - sizeof(int), &op, VALUE, - sizeof(int), &side, VALUE, - sizeof(int), &storev, VALUE, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &l, VALUE, - sizeof(CHAMELEON_Complex64_t)*m*k, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INPUT, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*k*n, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*m*n, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT, - sizeof(int), &ldv, VALUE, - sizeof(CHAMELEON_Complex64_t)*m*n, RTBLKADDR(W, CHAMELEON_Complex64_t, Wm, Wn), INOUT, - sizeof(int), &ldw, VALUE, - 0); -} diff --git a/runtime/quark/codelets/codelet_zplssq.c b/runtime/quark/codelets/codelet_zplssq.c index 79067050a5b8094a95e7695f1cde016cd2f61709..14418c58fdf30c1c4fa35e71d892c9cceb000cde 100644 --- a/runtime/quark/codelets/codelet_zplssq.c +++ b/runtime/quark/codelets/codelet_zplssq.c @@ -46,7 +46,7 @@ void CORE_zplssq_quark(Quark *quark) * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq) * * with scl and ssq such that * @@ -74,8 +74,8 @@ void CORE_zplssq_quark(Quark *quark) * */ void INSERT_TASK_zplssq( const RUNTIME_option_t *options, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, - const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); QUARK_Insert_Task(opt->quark, CORE_zplssq_quark, (Quark_Task_Flags*)opt, @@ -94,7 +94,7 @@ void CORE_zplssq2_quark(Quark *quark) } void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); QUARK_Insert_Task(opt->quark, CORE_zplssq2_quark, (Quark_Task_Flags*)opt, diff --git a/runtime/quark/codelets/codelet_zssssm.c b/runtime/quark/codelets/codelet_zssssm.c index ea18309646c645047a0f72efad7c343033474585..878b17a56d17bfa78b0d832d3990ef13867dd1c0 100644 --- a/runtime/quark/codelets/codelet_zssssm.c +++ b/runtime/quark/codelets/codelet_zssssm.c @@ -115,9 +115,8 @@ void CORE_zssssm_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ void INSERT_TASK_zssssm(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_ztile_zero.c b/runtime/quark/codelets/codelet_ztile_zero.c deleted file mode 100644 index 68f52d47cecd4f1ec8e63bd392f396c51e768549..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_ztile_zero.c +++ /dev/null @@ -1,58 +0,0 @@ -/** - * - * @file quark/codelet_ztile_zero.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztile_zero Quark codelet - * - * @version 1.0.0 - * @author Hatem Ltaief - * @author Mathieu Faverge - * @author Jakub Kurzak - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_ztile_zero_quark(Quark *quark) -{ - int X1; - int X2; - int Y1; - int Y2; - CHAMELEON_Complex64_t *A; - int lda; - - int x, y; - - quark_unpack_args_6(quark, X1, X2, Y1, Y2, A, lda); - - for (x = X1; x < X2; x++) - for (y = Y1; y < Y2; y++) - A[lda*x+y] = 0.0; - -} - -void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, - int X1, int X2, int Y1, int Y2, - const CHAM_desc_t *A, int Am, int An, int lda ) -{ - quark_option_t *opt = (quark_option_t*)(options->schedopt); - QUARK_Insert_Task(opt->quark, CORE_ztile_zero_quark, (Quark_Task_Flags*)opt, - sizeof(int), &X1, VALUE, - sizeof(int), &X2, VALUE, - sizeof(int), &Y1, VALUE, - sizeof(int), &Y2, VALUE, - sizeof(CHAMELEON_Complex64_t)*A->bsiz, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), OUTPUT | LOCALITY, - sizeof(int), &lda, VALUE, - 0); -} diff --git a/runtime/quark/codelets/codelet_ztradd.c b/runtime/quark/codelets/codelet_ztradd.c index d18aa1db8bb9dde42532b5526fdd5121b4f11488..8c42160bcfc46948e83417150d49eb6bd292a137 100644 --- a/runtime/quark/codelets/codelet_ztradd.c +++ b/runtime/quark/codelets/codelet_ztradd.c @@ -46,7 +46,7 @@ void CORE_ztradd_quark(Quark *quark) * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -96,15 +96,14 @@ void CORE_ztradd_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_ztradd(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_ztradd( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_GEADD; diff --git a/runtime/quark/codelets/codelet_ztstrf.c b/runtime/quark/codelets/codelet_ztstrf.c index d44bb81a1e401d11f0ec0fab51a258490f45acdb..1c31704e225257ce1ca589a7b5c64957a78c0462 100644 --- a/runtime/quark/codelets/codelet_ztstrf.c +++ b/runtime/quark/codelets/codelet_ztstrf.c @@ -115,10 +115,9 @@ void CORE_ztstrf_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c index f87e193ad5959e7f0f48b46ec8089bc76c7bade5..af020eaec18244930eda9dd4691b7e54a29d78cb 100644 --- a/runtime/quark/codelets/codelet_zunmlq.c +++ b/runtime/quark/codelets/codelet_zunmlq.c @@ -129,9 +129,8 @@ void CORE_zunmlq_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c index d78a12faadaa6f1b7b0fc746604ce49374f540cf..8407d8967bb53834c3ae219cf2429f7faea41c6a 100644 --- a/runtime/quark/codelets/codelet_zunmqr.c +++ b/runtime/quark/codelets/codelet_zunmqr.c @@ -129,9 +129,8 @@ void CORE_zunmqr_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zasum.c b/runtime/starpu/codelets/codelet_zasum.c index e8aa0aeca41392e2b01a298e82ef63886b61ad1b..6bfaf2c136f960c7cb7286f3eb81cae9c40fd263 100644 --- a/runtime/starpu/codelets/codelet_zasum.c +++ b/runtime/starpu/codelets/codelet_zasum.c @@ -22,10 +22,33 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_dzasum(const RUNTIME_option_t *options, - cham_store_t storev, cham_uplo_t uplo, int M, int N, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn) +#if !defined(CHAMELEON_SIMULATION) +static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) +{ + cham_store_t storev; + cham_uplo_t uplo; + int M; + int N; + CHAMELEON_Complex64_t *A; + int lda; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda); + CORE_dzasum(storev, uplo, M, N, A, lda, work); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func) + +void INSERT_TASK_dzasum( const RUNTIME_option_t *options, + cham_store_t storev, cham_uplo_t uplo, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_zasum; void (*callback)(void*) = options->profiling ? cl_zasum_callback : NULL; @@ -51,27 +74,3 @@ void INSERT_TASK_dzasum(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) -{ - cham_store_t storev; - cham_uplo_t uplo; - int M; - int N; - CHAMELEON_Complex64_t *A; - int lda; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda); - CORE_dzasum(storev, uplo, M, N, A, lda, work); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index 0b70bb6f9f6ecf46599184cb56d4797c401383dd..5280eae12a4c72871b2db533b344689a0b8a100f 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -20,10 +20,32 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, - int M, CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An, int incA, - const CHAM_desc_t *B, int Bm, int Bn, int incB) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg) +{ + int M; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int incA; + CHAMELEON_Complex64_t *B; + int incB; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB); + CORE_zaxpy(M, alpha, A, incA, B, incB); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func) + +void INSERT_TASK_zaxpy( const RUNTIME_option_t *options, + int M, CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int incA, + const CHAM_desc_t *B, int Bm, int Bn, int incB ) { struct starpu_codelet *codelet = &cl_zaxpy; void (*callback)(void*) = options->profiling ? cl_zaxpy_callback : NULL; @@ -48,26 +70,3 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg) -{ - int M; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t *A; - int incA; - CHAMELEON_Complex64_t *B; - int incB; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB); - CORE_zaxpy(M, alpha, A, incA, B, incB); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c index 4aa45bf5bc9900b6f996e0cb275d4339268d83be..a2610a9bc8aa96b73a370c7703a2eefad17b90a9 100644 --- a/runtime/starpu/codelets/codelet_zbuild.c +++ b/runtime/starpu/codelets/codelet_zbuild.c @@ -27,9 +27,35 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_Complex64_t *A; + int ld; + void *user_data; + void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; + int row_min, row_max, col_min, col_max; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback ); + + /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max] + * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran) + * and store it at the address 'buffer' with leading dimension 'ld' + */ + user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data); + +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func) + void INSERT_TASK_zbuild( const RUNTIME_option_t *options, - const CHAM_desc_t *A, int Am, int An, int lda, - void *user_data, void* user_build_callback ) + const CHAM_desc_t *A, int Am, int An, int lda, + void *user_data, void* user_build_callback ) { struct starpu_codelet *codelet = &cl_zbuild; @@ -61,30 +87,3 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_Complex64_t *A; - int ld; - void *user_data; - void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; - int row_min, row_max, col_min, col_max; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback ); - - /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max] - * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran) - * and store it at the address 'buffer' with leading dimension 'ld' - */ - user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data); - -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index 7c296d750bc4fa32d425a877f1b8731389e4e41e..eb0adebe51e8fe29593cd50d8d59437a59f37778 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -24,12 +24,76 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) +{ + cham_trans_t trans; + int M; + int N; + CHAMELEON_Complex64_t alpha; + const CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t beta; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); + CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); + return; +} + +#ifdef CHAMELEON_USE_CUBLAS_V2 +static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) +{ + cham_trans_t trans; + int M; + int N; + cuDoubleComplex alpha; + const cuDoubleComplex *A; + int lda; + cuDoubleComplex beta; + cuDoubleComplex *B; + int ldb; + + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb); + + RUNTIME_getStream( stream ); + + CUDA_zgeadd( + trans, + M, N, + &alpha, A, lda, + &beta, B, ldb, + stream); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif + + return; +} +#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +#if defined(CHAMELEON_USE_CUBLAS_V2) +CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC) +#else +CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) +#endif + /** ****************************************************************************** * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * @brief Adds two general matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -73,15 +137,14 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_zgeadd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; @@ -111,68 +174,3 @@ void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, (void)nb; } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) -{ - cham_trans_t trans; - int M; - int N; - CHAMELEON_Complex64_t alpha; - const CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t beta; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); - CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); - return; -} - -#ifdef CHAMELEON_USE_CUBLAS_V2 -static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) -{ - cham_trans_t trans; - int M; - int N; - cuDoubleComplex alpha; - const cuDoubleComplex *A; - int lda; - cuDoubleComplex beta; - cuDoubleComplex *B; - int ldb; - - A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb); - - RUNTIME_getStream( stream ); - - CUDA_zgeadd( - trans, - M, N, - &alpha, A, lda, - &beta, B, ldb, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -#if defined(CHAMELEON_USE_CUBLAS_V2) -CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC) -#else -CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index 8ffad6e1ada200916e25ef6e685898e1c8c53c08..abdf1954c9f9a5d587e5baf8360915fc160dbe58 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -112,9 +112,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 205da5e35aa7be306588b42be3471a9cc302cc6a..fed1350a71df6d7c1b0881dd92639b88e4363356 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zgemm(const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zgemm; - void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &transB, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zgemm", -#endif - 0); -} - #if !defined(CHAMELEON_SIMULATION) static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) { @@ -144,3 +99,48 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zgemm(const RUNTIME_option_t *options, + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zgemm; + void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &transA, sizeof(int), + STARPU_VALUE, &transB, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zgemm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index bee5168f95baef5c9e0b4ea8da3d4adbe6c43625..1ff57d18546e4bec5670e25dcd00e6a32368ba06 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -114,9 +114,8 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c index 2dac8366a196e4baab0f3f4a671891bac11df686..72736b064f50833b271001bec25499d6636cc1fd 100644 --- a/runtime/starpu/codelets/codelet_zgessm.c +++ b/runtime/starpu/codelets/codelet_zgessm.c @@ -26,6 +26,32 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgessm_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + int k; + int ib; + int *IPIV; + int ldl; + CHAMELEON_Complex64_t *D; + int ldd; + CHAMELEON_Complex64_t *A; + int lda; + + D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda); + CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -66,18 +92,17 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ -void INSERT_TASK_zgessm(const RUNTIME_option_t *options, - int m, int n, int k, int ib, int nb, - int *IPIV, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, - const CHAM_desc_t *D, int Dm, int Dn, int ldd, - const CHAM_desc_t *A, int Am, int An, int lda) +void INSERT_TASK_zgessm( const RUNTIME_option_t *options, + int m, int n, int k, int ib, int nb, + int *IPIV, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + const CHAM_desc_t *D, int Dm, int Dn, int ldd, + const CHAM_desc_t *A, int Am, int An, int lda ) { (void)nb; struct starpu_codelet *codelet = &cl_zgessm; @@ -109,30 +134,3 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgessm_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int k; - int ib; - int *IPIV; - int ldl; - CHAMELEON_Complex64_t *D; - int ldd; - CHAMELEON_Complex64_t *A; - int lda; - - D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda); - CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c index a9cdc2ff812c03338f9d45af2cfe8846163315db..e22f803bc469664416e80c8cd3dd0ab7f579652b 100644 --- a/runtime/starpu/codelets/codelet_zgessq.c +++ b/runtime/starpu/codelets/codelet_zgessq.c @@ -22,10 +22,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgessq_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + double *SCALESUMSQ; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &m, &n, &lda); + CORE_zgessq( m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func) + void INSERT_TASK_zgessq( const RUNTIME_option_t *options, - int m, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_zgessq; void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL; @@ -49,25 +70,3 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgessq_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - double *SCALESUMSQ; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda); - CORE_zgessq( m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c index b6bf892c4c6d72c84165c67c2802f19a86d5ab14..977e8c2c5bd10b8d87116559ec2717a063321b74 100644 --- a/runtime/starpu/codelets/codelet_zgetrf.c +++ b/runtime/starpu/codelets/codelet_zgetrf.c @@ -24,6 +24,36 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int *IPIV; + cham_bool_t check_info; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request); + CORE_zgetrf( m, n, A, lda, IPIV, &info ); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func) + void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, int m, int n, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -56,34 +86,3 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int *IPIV; - cham_bool_t check_info; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request); - CORE_zgetrf( m, n, A, lda, IPIV, &info ); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c index 94113ee7d83960a08b18bcecc2fd880754d8d99e..66a5201ca6bbc77567500ac5f5d987048e92aa13 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c @@ -26,6 +26,38 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_starpu_ws_t *h_work; + int m; + int n; + int ib; + CHAMELEON_Complex64_t *A; + int lda, ldl; + int *IPIV; + cham_bool_t check_info; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request); + CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -71,10 +103,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. @@ -122,36 +153,3 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_starpu_ws_t *h_work; - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A; - int lda, ldl; - int *IPIV; - cham_bool_t check_info; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request); - CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c index 9f3a0a8d2466943ca239759c8a93228a120b256b..3efbe362e5ffeb6cd86998c26ac7528809379c11 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c @@ -23,6 +23,38 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +/* + * Codelet CPU + */ +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + int ib; + CHAMELEON_Complex64_t *A; + int lda; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request); + CORE_zgetrf_nopiv(m, n, ib, A, lda, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -61,10 +93,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. @@ -101,35 +132,3 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, #endif 0); } - -/* - * Codelet CPU - */ -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A; - int lda; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request); - CORE_zgetrf_nopiv(m, n, ib, A, lda, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c index 4c562fbeb6fbc0e32f1976a475de43c8cb98941c..11ce2183419c09dd3d9e89ee53d9801d1206d986 100644 --- a/runtime/starpu/codelets/codelet_zhe2ge.c +++ b/runtime/starpu/codelets/codelet_zhe2ge.c @@ -18,6 +18,29 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + const CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB); + CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -54,26 +77,3 @@ void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - const CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB); - CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 9396feebc0a72b723eef2b0ac1120e4cb80d46f3..4f769835253ae243d2c48eb0f88dccb2f9c26008 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zhemm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zhemm; - void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zhemm", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zhemm_cpu_func(void *descr[], void *cl_arg) { @@ -142,3 +97,47 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zhemm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zhemm; + void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(int), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zhemm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 668ee4246fc3c3225f0d800c21df39735159df93..24553aba498fb368fc9cdd238a79af32e87bb670 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zher2k(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zher2k; - void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(double), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zher2k", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zher2k_cpu_func(void *descr[], void *cl_arg) { @@ -135,3 +90,47 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zher2k(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zher2k; + void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &trans, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(double), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zher2k", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c index 21c97dcf8855b72d64dba0cc3154131fd3d9e313..d833149236288da24c4dd978d4ec3bf199f39b72 100644 --- a/runtime/starpu/codelets/codelet_zherfb.c +++ b/runtime/starpu/codelets/codelet_zherfb.c @@ -20,51 +20,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zherfb(const RUNTIME_option_t *options, - cham_uplo_t uplo, - int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - struct starpu_codelet *codelet = &cl_zherfb; - void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(T, Tm, Tn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_VALUE, &nb, sizeof(int), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_SCRATCH, options->ws_worker, - STARPU_VALUE, &nb, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zherfb", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zherfb_cpu_func(void *descr[], void *cl_arg) { @@ -131,3 +86,47 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zherfb(const RUNTIME_option_t *options, + cham_uplo_t uplo, + int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + struct starpu_codelet *codelet = &cl_zherfb; + void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(T, Tm, Tn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &ib, sizeof(int), + STARPU_VALUE, &nb, sizeof(int), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), + STARPU_VALUE, &ldt, sizeof(int), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_SCRATCH, options->ws_worker, + STARPU_VALUE, &nb, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zherfb", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 101eef19c7be79c5cd63bf76529769f8d0615c2a..d8709bac154d2fc755b8328ab054e273d0db26ca 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -26,46 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zherk(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - double alpha, const CHAM_desc_t *A, int Am, int An, int lda, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zherk; - void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(double), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_VALUE, &beta, sizeof(double), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zherk", -#endif - 0); -} - #if !defined(CHAMELEON_SIMULATION) static void cl_zherk_cpu_func(void *descr[], void *cl_arg) { @@ -129,3 +89,43 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zherk(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + double alpha, const CHAM_desc_t *A, int Am, int An, int lda, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zherk; + void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &trans, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(double), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_VALUE, &beta, sizeof(double), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zherk", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c index c47e0871d113394d877f8e5ef4e5e82c2854c531..c2ebde6af7fcbf0a94f1da2fac293318fb25204f 100644 --- a/runtime/starpu/codelets/codelet_zhessq.c +++ b/runtime/starpu/codelets/codelet_zhessq.c @@ -22,6 +22,27 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zhessq_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int n; + CHAMELEON_Complex64_t *A; + int lda; + double *SCALESUMSQ; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda); + CORE_zhessq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zhessq, 2, cl_zhessq_cpu_func) + void INSERT_TASK_zhessq( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, const CHAM_desc_t *A, int Am, int An, int lda, @@ -49,25 +70,3 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zhessq_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int n; - CHAMELEON_Complex64_t *A; - int lda; - double *SCALESUMSQ; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda); - CORE_zhessq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zhessq, 2, cl_zhessq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index 0d2426400e3eb0216d694b4563543fb3c642aabb..9c53e6e4efd3a0981a970b6307d6daafcf91b4f8 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -26,15 +26,40 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + int displA; + int displB; + const CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB); + CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { (void)nb; struct starpu_codelet *codelet = &cl_zlacpy; @@ -64,37 +89,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, 0); } -void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, 0, A, Am, An, lda, 0, B, Bm, Bn, ldb ); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - int displA; - int displB; - const CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB); - CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c index 43b4314c6b58b9a3bea0d39b667e6006883e2e6f..46eea274085fcd6e422dba232af80961419e8cd3 100644 --- a/runtime/starpu/codelets/codelet_zlag2c.c +++ b/runtime/starpu/codelets/codelet_zlag2c.c @@ -24,6 +24,28 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + CHAMELEON_Complex32_t *B; + int ldb; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb); + CORE_zlag2c( m, n, A, lda, B, ldb); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -60,22 +82,27 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, } #if !defined(CHAMELEON_SIMULATION) -static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) +static void cl_clag2z_cpu_func(void *descr[], void *cl_arg) { int m; int n; - CHAMELEON_Complex64_t *A; + CHAMELEON_Complex32_t *A; int lda; - CHAMELEON_Complex32_t *B; + CHAMELEON_Complex64_t *B; int ldb; - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]); + A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb); - CORE_zlag2c( m, n, A, lda, B, ldb); + CORE_clag2z( m, n, A, lda, B, ldb); } #endif /* !defined(CHAMELEON_SIMULATION) */ +/* + * Codelet definition + */ +CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func) + void INSERT_TASK_clag2z(const RUNTIME_option_t *options, int m, int n, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -105,30 +132,3 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_clag2z_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - CHAMELEON_Complex32_t *A; - int lda; - CHAMELEON_Complex64_t *B; - int ldb; - - A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb); - CORE_clag2z( m, n, A, lda, B, ldb); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func) -/* - * Codelet definition - */ -CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c index 9ab611908607b4440277005c057598e35d530825..4b389bbb2821528c6ea6379b389c90db6bc1e05b 100644 --- a/runtime/starpu/codelets/codelet_zlange.c +++ b/runtime/starpu/codelets/codelet_zlange.c @@ -24,6 +24,30 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlange_cpu_func(void *descr[], void *cl_arg) +{ + double *normA; + cham_normtype_t norm; + int M; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA); + CORE_zlange( norm, M, N, A, LDA, work, normA ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlange, 3, cl_zlange_cpu_func) + void INSERT_TASK_zlange( const RUNTIME_option_t *options, cham_normtype_t norm, int M, int N, int NB, const CHAM_desc_t *A, int Am, int An, int LDA, @@ -56,28 +80,25 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options, } #if !defined(CHAMELEON_SIMULATION) -static void cl_zlange_cpu_func(void *descr[], void *cl_arg) +static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg) { - double *normA; - cham_normtype_t norm; - int M; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - double *work; + double *A; + double *B; - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA); - CORE_zlange( norm, M, N, A, LDA, work, normA ); + A = (double *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + + if ( *A > *B ) { + *B = *A; + } + (void)cl_arg; } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlange, 3, cl_zlange_cpu_func) +CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func) void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, @@ -102,24 +123,3 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg) -{ - double *A; - double *B; - - A = (double *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - - if ( *A > *B ) { - *B = *A; - } - (void)cl_arg; -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c index d29b5b3d53be49d3d24b758c5139b5ea08aafa36..4fc51d3d0c0de634e66a0a190d4b64d5cd7963e0 100644 --- a/runtime/starpu/codelets/codelet_zlanhe.c +++ b/runtime/starpu/codelets/codelet_zlanhe.c @@ -24,6 +24,30 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg) +{ + double *normA; + cham_normtype_t norm; + cham_uplo_t uplo; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); + CORE_zlanhe( norm, uplo, N, A, LDA, work, normA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func) + void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, const CHAM_desc_t *A, int Am, int An, int LDA, @@ -55,27 +79,3 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, (void)NB; } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg) -{ - double *normA; - cham_normtype_t norm; - cham_uplo_t uplo; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); - CORE_zlanhe( norm, uplo, N, A, LDA, work, normA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c index 6fd7cae043869316a5e8527c637f273815b4c261..fdea83309d17af28ab3dd368a87d3cd48bbdf3a0 100644 --- a/runtime/starpu/codelets/codelet_zlansy.c +++ b/runtime/starpu/codelets/codelet_zlansy.c @@ -24,10 +24,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zlansy(const RUNTIME_option_t *options, - cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlansy_cpu_func(void *descr[], void *cl_arg) +{ + double *normA; + cham_normtype_t norm; + cham_uplo_t uplo; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); + CORE_zlansy( norm, uplo, N, A, LDA, work, normA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func) + +void INSERT_TASK_zlansy( const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ) { (void)NB; struct starpu_codelet *codelet = &cl_zlansy; @@ -54,27 +78,3 @@ void INSERT_TASK_zlansy(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlansy_cpu_func(void *descr[], void *cl_arg) -{ - double *normA; - cham_normtype_t norm; - cham_uplo_t uplo; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); - CORE_zlansy( norm, uplo, N, A, LDA, work, normA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c index b4a5da8051780a99d1c60fa53f0aa8d20ad74313..078b81d6bcc73de624e4922bef9853ddb4bed5a7 100644 --- a/runtime/starpu/codelets/codelet_zlantr.c +++ b/runtime/starpu/codelets/codelet_zlantr.c @@ -22,11 +22,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zlantr(const RUNTIME_option_t *options, - cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, - int M, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlantr_cpu_func(void *descr[], void *cl_arg) +{ + double *normA; + cham_normtype_t norm, uplo, diag; + int M, N; + CHAMELEON_Complex64_t *A; + int LDA; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA); + CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func) + +void INSERT_TASK_zlantr( const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, + int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_zlantr; void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL; @@ -56,26 +79,3 @@ void INSERT_TASK_zlantr(const RUNTIME_option_t *options, (void)NB; } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlantr_cpu_func(void *descr[], void *cl_arg) -{ - double *normA; - cham_normtype_t norm, uplo, diag; - int M, N; - CHAMELEON_Complex64_t *A; - int LDA; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA); - CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index 61d63bcfcc49a6ebac95fd16fa9beb1659883bf6..f625d883067e3aeb99fda6989bfbd3390dc064cb 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -22,6 +22,28 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int LDA; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); + CORE_zlascal(uplo, M, N, alpha, A, LDA); + return; +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -49,12 +71,10 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ - void INSERT_TASK_zlascal(const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, @@ -84,26 +104,3 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t *A; - int LDA; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); - CORE_zlascal(uplo, M, N, alpha, A, LDA); - return; -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c index 80ab8c2c158860a4525f57e04bf76fbeac0c58cd..9108167de991387c9ac6d0fc1c6d409ede9b4ba5 100644 --- a/runtime/starpu/codelets/codelet_zlaset.c +++ b/runtime/starpu/codelets/codelet_zlaset.c @@ -25,6 +25,27 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlaset_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t beta; + CHAMELEON_Complex64_t *A; + int LDA; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA); + CORE_zlaset(uplo, M, N, alpha, beta, A, LDA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func) /** * @@ -90,26 +111,3 @@ void INSERT_TASK_zlaset(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlaset_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t beta; - CHAMELEON_Complex64_t *A; - int LDA; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA); - CORE_zlaset(uplo, M, N, alpha, beta, A, LDA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c index c3514735dac3ac4fad2c510ed5a0a73a947ca12f..0e67ee39d2fdbc81cbd43fbadf65d11444d5401f 100644 --- a/runtime/starpu/codelets/codelet_zlaset2.c +++ b/runtime/starpu/codelets/codelet_zlaset2.c @@ -25,6 +25,26 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int LDA; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); + CORE_zlaset2(uplo, M, N, alpha, A, LDA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func) /** * @@ -86,25 +106,3 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t *A; - int LDA; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); - CORE_zlaset2(uplo, M, N, alpha, A, LDA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c index d92ddaf478722a11e9b958958a8d65c2ed620cf6..063b7aea8cd4d32783e69f81a079eea7f2381ff3 100644 --- a/runtime/starpu/codelets/codelet_zlatro.c +++ b/runtime/starpu/codelets/codelet_zlatro.c @@ -26,16 +26,40 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlatro_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + cham_trans_t trans; + int M; + int N; + const CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB); + CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_zlatro(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlatro( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_zlatro; void (*callback)(void*) = NULL; @@ -63,27 +87,3 @@ void INSERT_TASK_zlatro(const RUNTIME_option_t *options, 0); (void)mb; } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlatro_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - cham_trans_t trans; - int M; - int N; - const CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB); - CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index 166b138812159cabd2faafc2359b4be7fb471636..2344c5171b897ddf8da452b9f7c047fa1b6fc44b 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -26,14 +26,33 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA); + CORE_zlauum(uplo, N, A, LDA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_zlauum(const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda) +void INSERT_TASK_zlauum( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda ) { (void)nb; struct starpu_codelet *codelet = &cl_zlauum; @@ -56,23 +75,3 @@ void INSERT_TASK_zlauum(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA); - CORE_zlauum(uplo, N, A, LDA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c index ae014c297e8a9bec69359c93aee8684cd488234d..345d18a24e39f606431c9e2d71783982e2117905 100644 --- a/runtime/starpu/codelets/codelet_zplghe.c +++ b/runtime/starpu/codelets/codelet_zplghe.c @@ -26,13 +26,36 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ +/* cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ -void INSERT_TASK_zplghe( const RUNTIME_option_t *options, - double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) { + double bump; + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int bigM; + int m0; + int n0; + unsigned long long int seed; + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); + CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func) + +void INSERT_TASK_zplghe( const RUNTIME_option_t *options, + double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ struct starpu_codelet *codelet = &cl_zplghe; void (*callback)(void*) = options->profiling ? cl_zplghe_callback : NULL; @@ -58,29 +81,3 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options, #endif 0); } - -/* cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) -{ - double bump; - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int bigM; - int m0; - int n0; - unsigned long long int seed; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c index de00e603352526149032aa4982686d0af59dffb3..9141ecd0fce9abd5b18c0d841eb64bec4da10afa 100644 --- a/runtime/starpu/codelets/codelet_zplgsy.c +++ b/runtime/starpu/codelets/codelet_zplgsy.c @@ -26,7 +26,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ +/* cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_Complex64_t bump; + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int bigM; + int m0; + int n0; + unsigned long long int seed; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); + CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func) void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, @@ -58,29 +82,3 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, #endif 0); } - -/* cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_Complex64_t bump; - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int bigM; - int m0; - int n0; - unsigned long long int seed; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c index 24f7c9159e92127f9cfa76377a3073faa12b0cf0..d824485dac38087d5bf3189bdf24a49bbd6360b0 100644 --- a/runtime/starpu/codelets/codelet_zplrnt.c +++ b/runtime/starpu/codelets/codelet_zplrnt.c @@ -26,11 +26,32 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* INSERT_TASK_zplrnt - Generate a tile for random matrix. */ +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int bigM; + int m0; + int n0; + unsigned long long int seed; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed ); + CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func) void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) + int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) { struct starpu_codelet *codelet = &cl_zplrnt; @@ -57,28 +78,3 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, #endif 0); } - -/* cl_zplrnt_cpu_func - Generate a tile for random matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int bigM; - int m0; - int n0; - unsigned long long int seed; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c index 4fdbaf6c3d402da601267f52788ce6eeedd82653..2fe5d2a7fdd0ea9d671a5a785d6a25141714d510 100644 --- a/runtime/starpu/codelets/codelet_zplssq.c +++ b/runtime/starpu/codelets/codelet_zplssq.c @@ -23,11 +23,39 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplssq_cpu_func(void *descr[], void *cl_arg) +{ + double *SCLSSQ_IN; + double *SCLSSQ_OUT; + + SCLSSQ_IN = (double *)STARPU_MATRIX_GET_PTR(descr[0]); + SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + + assert( SCLSSQ_OUT[0] >= 0. ); + if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) { + SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ))); + SCLSSQ_OUT[0] = SCLSSQ_IN[0]; + } else { + if ( SCLSSQ_OUT[0] > 0 ) { + SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ))); + } + } + + (void)cl_arg; +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq) * * with scl and ssq such that * @@ -78,25 +106,14 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options, 0); } - #if !defined(CHAMELEON_SIMULATION) -static void cl_zplssq_cpu_func(void *descr[], void *cl_arg) +static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg) { - double *SCLSSQ_IN; - double *SCLSSQ_OUT; + double *RESULT; - SCLSSQ_IN = (double *)STARPU_MATRIX_GET_PTR(descr[0]); - SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]); - assert( SCLSSQ_OUT[0] >= 0. ); - if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) { - SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ))); - SCLSSQ_OUT[0] = SCLSSQ_IN[0]; - } else { - if ( SCLSSQ_OUT[0] > 0 ) { - SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ))); - } - } + RESULT[0] = RESULT[0] * sqrt( RESULT[1] ); (void)cl_arg; } @@ -105,10 +122,10 @@ static void cl_zplssq_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func) +CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func) void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) { struct starpu_codelet *codelet = &cl_zplssq2; void (*callback)(void*) = options->profiling ? cl_zplssq2_callback : NULL; @@ -127,22 +144,3 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg) -{ - double *RESULT; - - RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]); - - RESULT[0] = RESULT[0] * sqrt( RESULT[1] ); - - (void)cl_arg; -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index a43f31723f90b93265deadc10fa5e291f1c5097d..bbfe817742d59742d2945aeb77ca71b0d399ad4f 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -26,6 +26,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request); + CORE_zpotrf(uplo, n, A, lda, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -61,33 +89,3 @@ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request); - CORE_zpotrf(uplo, n, A, lda, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func) - diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c index 9efbd985f0451cac32b07623016d2ccb6b489d77..ecae613ee3f5b83fb78c25cf5ac9282609c3429b 100644 --- a/runtime/starpu/codelets/codelet_zssssm.c +++ b/runtime/starpu/codelets/codelet_zssssm.c @@ -26,6 +26,39 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zssssm_cpu_func(void *descr[], void *cl_arg) +{ + int m1; + int n1; + int m2; + int n2; + int k; + int ib; + CHAMELEON_Complex64_t *A1; + int lda1; + CHAMELEON_Complex64_t *A2; + int lda2; + CHAMELEON_Complex64_t *L1; + int ldl1; + CHAMELEON_Complex64_t *L2; + int ldl2; + int *IPIV; + + A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV); + CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -91,19 +124,17 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ - -void INSERT_TASK_zssssm(const RUNTIME_option_t *options, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, - const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, - const int *IPIV) +void INSERT_TASK_zssssm( const RUNTIME_option_t *options, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, + const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, + const int *IPIV ) { (void)nb; struct starpu_codelet *codelet = &cl_zssssm; @@ -140,38 +171,3 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zssssm_cpu_func(void *descr[], void *cl_arg) -{ - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *L1; - int ldl1; - CHAMELEON_Complex64_t *L2; - int ldl2; - int *IPIV; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV); - CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func) - diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 455d118f19c5426a9c017b93a24fb5d99e684ef4..49d3af5d9700e90f9b01056514195bf276cd3450 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zsymm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zsymm; - void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zsymm", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zsymm_cpu_func(void *descr[], void *cl_arg) { @@ -142,3 +97,47 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zsymm, 3, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zsymm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zsymm; + void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(int), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zsymm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index bafefa0f7e73c76f8e11f120026d8f5a087dae1a..27b63010a7daea7d8632cc835a7135ff6876e542 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zsyr2k; - void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zsyr2k", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg) { @@ -135,3 +90,47 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zsyr2k, 3, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zsyr2k; + void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &trans, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zsyr2k", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 6f72802e5e0f3432d2f20ae97035617843a2e3e0..e089904530812ab76b4828b0123021b1e8f09131 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -26,47 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zsyrk; - void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zsyrk", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) { @@ -130,3 +89,43 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zsyrk, 2, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zsyrk; + void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &trans, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zsyrk", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c index 2e2ae7676cc456ab5ff078ad36e378dc6e9f2c49..3f07e618d18c5ca86ddaa65cce78bc10ca0f1227 100644 --- a/runtime/starpu/codelets/codelet_zsyssq.c +++ b/runtime/starpu/codelets/codelet_zsyssq.c @@ -22,10 +22,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int n; + CHAMELEON_Complex64_t *A; + int lda; + double *SCALESUMSQ; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda); + CORE_zsyssq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func) + void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_zsyssq; void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL; @@ -49,25 +70,3 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int n; - CHAMELEON_Complex64_t *A; - int lda; - double *SCALESUMSQ; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda); - CORE_zsyssq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c index b1f741affa82fa9388a5b41d8d243d22215aa7cd..06c4775e74f53dc4f288fa31b85e3993be9839da 100644 --- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c @@ -26,10 +26,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int iinfo; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo); + CORE_zsytf2_nopiv(uplo, n, A, lda); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func) + +void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo ) { (void)nb; struct starpu_codelet *codelet = &cl_zsytrf_nopiv; @@ -54,25 +75,3 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int iinfo; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo); - CORE_zsytf2_nopiv(uplo, n, A, lda); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztile_zero.c b/runtime/starpu/codelets/codelet_ztile_zero.c deleted file mode 100644 index c59115b410222dd329cdd0b794ab72223b8f2ef8..0000000000000000000000000000000000000000 --- a/runtime/starpu/codelets/codelet_ztile_zero.c +++ /dev/null @@ -1,84 +0,0 @@ -/** - * - * @file starpu/codelet_ztile_zero.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztile_zero StarPU codelet - * - * @version 1.0.0 - * @author Hatem Ltaief - * @author Mathieu Faverge - * @author Jakub Kurzak - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - */ -void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, - int X1, int X2, int Y1, int Y2, - const CHAM_desc_t *A, int Am, int An, int lda ) -{ - struct starpu_codelet *codelet; - codelet = &cl_ztile_zero; - void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_W(A, Am, An); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &X1, sizeof(int), - STARPU_VALUE, &X2, sizeof(int), - STARPU_VALUE, &Y1, sizeof(int), - STARPU_VALUE, &Y2, sizeof(int), - STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, NULL, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztile_zero", -#endif - 0); -} - -/** - * - */ -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztile_zero_cpu_func(void *descr[], void *cl_arg) -{ - int X1; - int X2; - int Y1; - int Y2; - CHAMELEON_Complex64_t *A; - int lda; - - int x, y; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &X1, &X2, &Y1, &Y2, &lda); - - for (x = X1; x < X2; x++) - for (y = Y1; y < Y2; y++) - A[lda*x+y] = 0.0; - -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztile_zero, 1, cl_ztile_zero_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c index 44615d5c3bef1f0b958de2c89cfbf41181ae9a49..8132a27dde9d032918c933b45da63493ae72568e 100644 --- a/runtime/starpu/codelets/codelet_ztplqt.c +++ b/runtime/starpu/codelets/codelet_ztplqt.c @@ -54,12 +54,11 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func) -void -INSERT_TASK_ztplqt( const RUNTIME_option_t *options, - int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztplqt; void (*callback)(void*) = options->profiling ? cl_ztplqt_callback : NULL; diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c index 8dffa4ff2292c448c710713a7556bec3920f3d73..54a24a070c666a25b366ad5af07bd101bc297390 100644 --- a/runtime/starpu/codelets/codelet_ztpmlqt.c +++ b/runtime/starpu/codelets/codelet_ztpmlqt.c @@ -103,14 +103,13 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg) */ CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC) -void -INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) +void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_ztpmlqt; void (*callback)(void*) = options->profiling ? cl_ztpmlqt_callback : NULL; diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index 6684e59f82d579cd129a1ff40c25dd377bc6166e..c94a33b43b0b4ed4d983b0d36384f962f1cbb413 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -104,14 +104,13 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) */ CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC) -void -INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) +void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_ztpmqrt; void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL; diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index 6fbd0afe65501a497ceda71c9c6f40444a50369a..143d613eb360ff65cb1ec52489c6791a2ed23cf9 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -54,12 +54,11 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func) -void -INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, - int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztpqrt; void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL; diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index f6265c28c28b50a2750abfd87c6344bd8ff54ba5..57fa58e178e0771e45c2788c270a442dff96bb53 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -22,12 +22,39 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + cham_trans_t trans; + int M; + int N; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t beta; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); + CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB); + return; +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func) + /** ****************************************************************************** * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -77,15 +104,14 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_ztradd(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_ztradd( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_ztradd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; @@ -116,31 +142,3 @@ void INSERT_TASK_ztradd(const RUNTIME_option_t *options, (void)nb; } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - cham_trans_t trans; - int M; - int N; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t beta; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); - CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB); - return; -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c index c1d154aadc361f391f388514ec155fd798fc1749..1ca5a1a6a0ebf413c4bf8d5a77cc7d028592e6a6 100644 --- a/runtime/starpu/codelets/codelet_ztrasm.c +++ b/runtime/starpu/codelets/codelet_ztrasm.c @@ -22,10 +22,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_ztrasm(const RUNTIME_option_t *options, - cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn) +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg) +{ + cham_store_t storev; + cham_uplo_t uplo; + cham_diag_t diag; + int M; + int N; + CHAMELEON_Complex64_t *A; + int lda; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda); + CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func) + +void INSERT_TASK_ztrasm( const RUNTIME_option_t *options, + cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_ztrasm; void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL; @@ -48,32 +72,7 @@ void INSERT_TASK_ztrasm(const RUNTIME_option_t *options, STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrasm", + STARPU_NAME, "ztrasm", #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg) -{ - cham_store_t storev; - cham_uplo_t uplo; - cham_diag_t diag; - int M; - int N; - CHAMELEON_Complex64_t *A; - int lda; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda); - CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index b125de67fedf22555b58f0abb5bb6227fddf1507..b9f553b645f2c250f717ec150d52f61cdea5b59d 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -26,48 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_ztrmm; - void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_RW(B, Bm, Bn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrmm", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) { @@ -136,3 +94,44 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(ztrmm, 2, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_ztrmm; + void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_RW(B, Bm, Bn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(int), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &transA, sizeof(int), + STARPU_VALUE, &diag, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "ztrmm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index e48a4eb1609cb0697de2af913197f06a12c5c795..83310ab1bbffd079e5fa8932d859fa48a1be9fe2 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -26,48 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_ztrsm; - void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_RW(B, Bm, Bn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrsm", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) { @@ -134,3 +92,44 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(ztrsm, 2, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_ztrsm; + void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_RW(B, Bm, Bn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(int), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &transA, sizeof(int), + STARPU_VALUE, &diag, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "ztrsm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c index aac4b9fbd963ba9a3bf67ff248d2b3729129c845..e7d0de9985f22b19bfb12561b28ce00935067296 100644 --- a/runtime/starpu/codelets/codelet_ztrssq.c +++ b/runtime/starpu/codelets/codelet_ztrssq.c @@ -22,11 +22,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + cham_diag_t diag; + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + double *SCALESUMSQ; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda); + CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func) + void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_diag_t diag, - int m, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) + cham_uplo_t uplo, cham_diag_t diag, + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_ztrssq; void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL; @@ -52,27 +75,3 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - cham_diag_t diag; - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - double *SCALESUMSQ; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda); - CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index 81ee2923e2e2130c67fa250e625abfd48d678d6d..804d21b0204d0b0f2700ec59c7372a4e7a355593 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -26,16 +26,45 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + cham_diag_t diag; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request); + CORE_ztrtri(uplo, diag, N, A, LDA, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_ztrtri(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_diag_t diag, - int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo) +void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_diag_t diag, + int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo ) { (void)nb; struct starpu_codelet *codelet = &cl_ztrtri; @@ -62,33 +91,3 @@ void INSERT_TASK_ztrtri(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - cham_diag_t diag; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request); - CORE_ztrtri(uplo, diag, N, A, LDA, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c index d68e2bebf803ac1c449c30435dec5ea1883ba86e..4e82f101c56bdf38e67e5eaf86ac242873f5bae7 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c @@ -22,18 +22,60 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m1; + int n1; + int m2; + int n2; + int k; + int ib; + int nb; + CHAMELEON_Complex64_t *A1; + int lda1; + CHAMELEON_Complex64_t *A2; + int lda2; + CHAMELEON_Complex64_t *V; + int ldv; + CHAMELEON_Complex64_t *T; + int ldt; + + CHAMELEON_Complex64_t *WORK; + int ldwork; + + A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ + + starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, + &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork); + CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, + ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) +void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztsmlq_hetra1; void (*callback)(void*) = options->profiling ? cl_ztsmlq_hetra1_callback : NULL; @@ -75,45 +117,3 @@ void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - int nb; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, - &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork); - CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, - ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c index af9f2adcc3197cf3b6f6e3648d6f25b645a2de23..66fa69daba82900221e05c6161ff5cc78ebb1c0f 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c @@ -22,18 +22,60 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m1; + int n1; + int m2; + int n2; + int k; + int ib; + CHAMELEON_Complex64_t *A1; + int lda1; + CHAMELEON_Complex64_t *A2; + int lda2; + CHAMELEON_Complex64_t *V; + int ldv; + CHAMELEON_Complex64_t *T; + int ldt; + + /* TODO: manage workspace */ + CHAMELEON_Complex64_t *WORK; + int ldwork; + + A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, + &ib, &lda1, &lda2, &ldv, &ldt, &ldwork); + CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, + ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) +void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztsmqr_hetra1; void (*callback)(void*) = options->profiling ? cl_ztsmqr_hetra1_callback : NULL; @@ -74,45 +116,3 @@ void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - - /* TODO: manage workspace */ - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, - &ib, &lda1, &lda2, &ldv, &ldt, &ldwork); - CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, - ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c index e139931a6c90ab49e0a19396004213fda3dd9c4f..7e1dfd92a31d58f5c03f57759f484b8b14aabd12 100644 --- a/runtime/starpu/codelets/codelet_ztstrf.c +++ b/runtime/starpu/codelets/codelet_ztstrf.c @@ -26,6 +26,51 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_starpu_ws_t *d_work; + int m; + int n; + int ib; + int nb; + CHAMELEON_Complex64_t *U; + int ldu; + CHAMELEON_Complex64_t *A; + int lda; + CHAMELEON_Complex64_t *L; + int ldl; + int *IPIV; + CHAMELEON_Complex64_t *WORK; + int ldwork; + cham_bool_t check_info; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, + &IPIV, &d_work, &ldwork, &check_info, &iinfo, + &sequence, &request); + + CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -83,23 +128,21 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * */ - -void INSERT_TASK_ztstrf(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *U, int Um, int Un, int ldu, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, - int *IPIV, - cham_bool_t check_info, int iinfo) +void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *U, int Um, int Un, int ldu, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + int *IPIV, + cham_bool_t check_info, int iinfo ) { (void)nb; struct starpu_codelet *codelet = &cl_ztstrf; @@ -139,50 +182,3 @@ void INSERT_TASK_ztstrf(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_starpu_ws_t *d_work; - int m; - int n; - int ib; - int nb; - CHAMELEON_Complex64_t *U; - int ldu; - CHAMELEON_Complex64_t *A; - int lda; - CHAMELEON_Complex64_t *L; - int ldl; - int *IPIV; - CHAMELEON_Complex64_t *WORK; - int ldwork; - cham_bool_t check_info; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, - &IPIV, &d_work, &ldwork, &check_info, &iinfo, - &sequence, &request); - - CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) - diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index 89ef1c851bde0804aaa9b1901a0e6664aa0ec28f..046b4e568756e9cc36e2249bc6d485a8d9843e28 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -27,6 +27,75 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m; + int n; + int k; + int ib; + const CHAMELEON_Complex64_t *A; + int lda; + const CHAMELEON_Complex64_t *T; + int ldt; + CHAMELEON_Complex64_t *C; + int ldc; + CHAMELEON_Complex64_t *WORK; + int ldwork; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, + &lda, &ldt, &ldc, &ldwork); + + CORE_zunmlq(side, trans, m, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork); +} + +#if defined(CHAMELEON_USE_CUDA) +static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m; + int n; + int k; + int ib; + const cuDoubleComplex *A, *T; + cuDoubleComplex *C, *WORK; + int lda, ldt, ldc, ldwork; + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, + &lda, &ldt, &ldc, &ldwork); + + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); + C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + RUNTIME_getStream(stream); + + CUDA_zunmlqt( + side, trans, m, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif +} +#endif /* defined(CHAMELEON_USE_CUDA) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -105,18 +174,16 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ - -void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m, int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc) +void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc ) { struct starpu_codelet *codelet = &cl_zunmlq; void (*callback)(void*) = options->profiling ? cl_zunmlq_callback : NULL; @@ -151,73 +218,3 @@ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m; - int n; - int k; - int ib; - const CHAMELEON_Complex64_t *A; - int lda; - const CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *C; - int ldc; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); - - CORE_zunmlq(side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork); -} - -#if defined(CHAMELEON_USE_CUDA) -static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m; - int n; - int k; - int ib; - const cuDoubleComplex *A, *T; - cuDoubleComplex *C, *WORK; - int lda, ldt, ldc, ldwork; - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); - - A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - RUNTIME_getStream(stream); - - CUDA_zunmlqt( - side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif -} -#endif /* defined(CHAMELEON_USE_CUDA) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index e6f97c032a4bb5910aa3a3e767a41318f6983851..afa04149b405938eb777ef409fb801a907ebf9f0 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -26,6 +26,75 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m; + int n; + int k; + int ib; + const CHAMELEON_Complex64_t *A; + int lda; + const CHAMELEON_Complex64_t *T; + int ldt; + CHAMELEON_Complex64_t *C; + int ldc; + CHAMELEON_Complex64_t *WORK; + int ldwork; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, + &lda, &ldt, &ldc, &ldwork); + + CORE_zunmqr(side, trans, m, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork); +} + +#if defined(CHAMELEON_USE_CUDA) +static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m; + int n; + int k; + int ib; + const cuDoubleComplex *A, *T; + cuDoubleComplex *C, *WORK; + int lda, ldt, ldc, ldwork; + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, + &lda, &ldt, &ldc, &ldwork); + + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); + C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + RUNTIME_getStream(stream); + + CUDA_zunmqrt( + side, trans, m, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif +} +#endif /* defined(CHAMELEON_USE_CUDA) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -105,18 +174,16 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ - -void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m, int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc) +void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc ) { struct starpu_codelet *codelet = &cl_zunmqr; void (*callback)(void*) = options->profiling ? cl_zunmqr_callback : NULL; @@ -151,73 +218,3 @@ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m; - int n; - int k; - int ib; - const CHAMELEON_Complex64_t *A; - int lda; - const CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *C; - int ldc; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); - - CORE_zunmqr(side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork); -} - -#if defined(CHAMELEON_USE_CUDA) -static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m; - int n; - int k; - int ib; - const cuDoubleComplex *A, *T; - cuDoubleComplex *C, *WORK; - int lda, ldt, ldc, ldwork; - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); - - A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - RUNTIME_getStream(stream); - - CUDA_zunmqrt( - side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif -} -#endif /* defined(CHAMELEON_USE_CUDA) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index 509abacfc9a3dd9c2fd09729f8a7e7a351778476..b97e06ba890fc9806131e4446a6e2dfd8268204d 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -33,11 +33,6 @@ #endif #endif -/* - * Management functions - */ -ZCODELETS_HEADER(tile_zero) - /* * BLAS 1 functions */