From 978a5d43489c004514a76e0f56b8fb0e446a3d4e Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Fri, 8 Feb 2019 13:41:44 +0100 Subject: [PATCH] Cleanup OpenMP codelet and doc --- compute/zbuild.c | 8 +- compute/zgeadd.c | 6 +- compute/zgelqf.c | 8 +- compute/zgelqf_param.c | 8 +- compute/zgelqs.c | 8 +- compute/zgelqs_param.c | 8 +- compute/zgels.c | 8 +- compute/zgels_param.c | 8 +- compute/zgemm.c | 6 +- compute/zgeqrf.c | 8 +- compute/zgeqrf_param.c | 8 +- compute/zgeqrs.c | 8 +- compute/zgeqrs_param.c | 8 +- compute/zgesv_incpiv.c | 12 +- compute/zgesv_nopiv.c | 12 +- compute/zgesvd.c | 8 +- compute/zgetrf_incpiv.c | 12 +- compute/zgetrf_nopiv.c | 12 +- compute/zgetrs_incpiv.c | 8 +- compute/zgetrs_nopiv.c | 8 +- compute/zheevd.c | 14 +- compute/zhemm.c | 6 +- compute/zher2k.c | 6 +- compute/zherk.c | 6 +- compute/zhetrd.c | 14 +- compute/zlacpy.c | 3 +- compute/zlange.c | 6 +- compute/zlanhe.c | 6 +- compute/zlansy.c | 6 +- compute/zlantr.c | 6 +- compute/zlascal.c | 6 +- compute/zlaset.c | 3 +- compute/zlauum.c | 8 +- compute/zplghe.c | 8 +- compute/zplgsy.c | 8 +- compute/zplrnt.c | 8 +- compute/zposv.c | 12 +- compute/zpotrf.c | 12 +- compute/zpotri.c | 12 +- compute/zpotrimm.c | 12 +- compute/zpotrs.c | 8 +- compute/zsymm.c | 6 +- compute/zsyr2k.c | 6 +- compute/zsyrk.c | 6 +- compute/zsysv.c | 8 +- compute/zsytrf.c | 10 +- compute/zsytrs.c | 8 +- compute/ztile.c | 6 +- compute/ztpgqrt.c | 8 +- compute/ztpqrt.c | 8 +- compute/ztradd.c | 6 +- compute/ztrmm.c | 8 +- compute/ztrsm.c | 8 +- compute/ztrsmpl.c | 8 +- compute/ztrtri.c | 12 +- compute/zunglq.c | 8 +- compute/zunglq_param.c | 8 +- compute/zungqr.c | 8 +- compute/zungqr_param.c | 8 +- compute/zunmlq.c | 8 +- compute/zunmlq_param.c | 8 +- compute/zunmqr.c | 8 +- compute/zunmqr_param.c | 8 +- control/async.c | 12 +- control/auxiliary.c | 9 +- control/context.c | 12 +- control/control.c | 15 +- control/descriptor.c | 12 +- control/tile.c | 6 +- control/workspace.c | 3 +- control/workspace_z.c | 57 ++--- coreblas/compute/core_zaxpy.c | 5 +- coreblas/compute/core_zgeadd.c | 5 +- coreblas/compute/core_zgelqt.c | 5 +- coreblas/compute/core_zgeqrt.c | 5 +- coreblas/compute/core_zgesplit.c | 5 +- coreblas/compute/core_zgessm.c | 5 +- coreblas/compute/core_zgessq.c | 5 +- coreblas/compute/core_zgetf2_nopiv.c | 7 +- coreblas/compute/core_zgetrf_incpiv.c | 7 +- coreblas/compute/core_zgetrf_nopiv.c | 7 +- coreblas/compute/core_zherfb.c | 5 +- coreblas/compute/core_zhessq.c | 5 +- coreblas/compute/core_zlascal.c | 5 +- coreblas/compute/core_zlatro.c | 5 +- coreblas/compute/core_zpamm.c | 5 +- coreblas/compute/core_zparfb.c | 5 +- coreblas/compute/core_zpemv.c | 5 +- coreblas/compute/core_zssssm.c | 5 +- coreblas/compute/core_zsyssq.c | 5 +- coreblas/compute/core_ztplqt.c | 5 +- coreblas/compute/core_ztpmqrt.c | 5 +- coreblas/compute/core_ztpqrt.c | 5 +- coreblas/compute/core_ztradd.c | 5 +- coreblas/compute/core_ztrssq.c | 5 +- coreblas/compute/core_ztslqt.c | 5 +- coreblas/compute/core_ztsmlq.c | 5 +- coreblas/compute/core_ztsmlq_hetra1.c | 5 +- coreblas/compute/core_ztsmqr.c | 5 +- coreblas/compute/core_ztsmqr_hetra1.c | 5 +- coreblas/compute/core_ztsqrt.c | 5 +- coreblas/compute/core_ztstrf.c | 7 +- coreblas/compute/core_zttlqt.c | 5 +- coreblas/compute/core_zttmlq.c | 5 +- coreblas/compute/core_zttmqr.c | 5 +- coreblas/compute/core_zttqrt.c | 5 +- coreblas/compute/core_zunmlq.c | 5 +- coreblas/compute/core_zunmqr.c | 5 +- .../eztrace_module/coreblas_eztrace_module | 6 - cudablas/compute/cuda_zgeadd.c | 5 +- include/chameleon/tasks_z.h | 181 +++----------- runtime/CMakeLists.txt | 1 - runtime/openmp/codelets/codelet_zgeadd.c | 15 +- runtime/openmp/codelets/codelet_zgelqt.c | 5 +- runtime/openmp/codelets/codelet_zgemm.c | 2 +- runtime/openmp/codelets/codelet_zgeqrt.c | 5 +- runtime/openmp/codelets/codelet_zgessm.c | 7 +- runtime/openmp/codelets/codelet_zgetrf.c | 2 +- .../openmp/codelets/codelet_zgetrf_incpiv.c | 9 +- .../openmp/codelets/codelet_zgetrf_nopiv.c | 9 +- runtime/openmp/codelets/codelet_zhemm.c | 2 +- runtime/openmp/codelets/codelet_zhessq.c | 2 +- runtime/openmp/codelets/codelet_zlacpy.c | 20 +- runtime/openmp/codelets/codelet_zlag2c.c | 23 +- runtime/openmp/codelets/codelet_zlascal.c | 7 +- runtime/openmp/codelets/codelet_zlatro.c | 12 +- runtime/openmp/codelets/codelet_zlauum.c | 2 +- runtime/openmp/codelets/codelet_zplghe.c | 8 +- runtime/openmp/codelets/codelet_zplgsy.c | 6 +- runtime/openmp/codelets/codelet_zplrnt.c | 6 +- runtime/openmp/codelets/codelet_zplssq.c | 4 +- runtime/openmp/codelets/codelet_zssssm.c | 13 +- runtime/openmp/codelets/codelet_zsymm.c | 2 +- runtime/openmp/codelets/codelet_zsyssq.c | 2 +- .../openmp/codelets/codelet_zsytrf_nopiv.c | 2 +- runtime/openmp/codelets/codelet_ztile_zero.c | 38 --- runtime/openmp/codelets/codelet_ztplqt.c | 15 +- runtime/openmp/codelets/codelet_ztpmlqt.c | 23 +- runtime/openmp/codelets/codelet_ztpmqrt.c | 17 +- runtime/openmp/codelets/codelet_ztpqrt.c | 17 +- runtime/openmp/codelets/codelet_ztradd.c | 17 +- runtime/openmp/codelets/codelet_ztstrf.c | 7 +- runtime/openmp/codelets/codelet_zunmlq.c | 5 +- runtime/openmp/codelets/codelet_zunmqr.c | 5 +- runtime/parsec/codelets/codelet_zgeadd.c | 15 +- runtime/parsec/codelets/codelet_zgelqt.c | 5 +- runtime/parsec/codelets/codelet_zgeqrt.c | 5 +- runtime/parsec/codelets/codelet_zgessm.c | 5 +- .../parsec/codelets/codelet_zgetrf_incpiv.c | 7 +- .../parsec/codelets/codelet_zgetrf_nopiv.c | 7 +- runtime/parsec/codelets/codelet_zlacpy.c | 19 +- runtime/parsec/codelets/codelet_zpamm.c | 224 ------------------ runtime/parsec/codelets/codelet_zplssq.c | 58 ++--- runtime/parsec/codelets/codelet_ztile_zero.c | 61 ----- runtime/parsec/codelets/codelet_ztradd.c | 15 +- runtime/quark/codelets/codelet_zgeadd.c | 15 +- runtime/quark/codelets/codelet_zgelqt.c | 5 +- runtime/quark/codelets/codelet_zgeqrt.c | 5 +- runtime/quark/codelets/codelet_zgessm.c | 5 +- .../quark/codelets/codelet_zgetrf_incpiv.c | 7 +- runtime/quark/codelets/codelet_zgetrf_nopiv.c | 7 +- runtime/quark/codelets/codelet_zlacpy.c | 20 +- runtime/quark/codelets/codelet_zpamm.c | 220 ----------------- runtime/quark/codelets/codelet_zplssq.c | 8 +- runtime/quark/codelets/codelet_zssssm.c | 5 +- runtime/quark/codelets/codelet_ztile_zero.c | 58 ----- runtime/quark/codelets/codelet_ztradd.c | 15 +- runtime/quark/codelets/codelet_ztstrf.c | 7 +- runtime/quark/codelets/codelet_zunmlq.c | 5 +- runtime/quark/codelets/codelet_zunmqr.c | 5 +- runtime/starpu/codelets/codelet_zasum.c | 55 +++-- runtime/starpu/codelets/codelet_zaxpy.c | 53 ++--- runtime/starpu/codelets/codelet_zbuild.c | 57 +++-- runtime/starpu/codelets/codelet_zgeadd.c | 144 ++++++----- runtime/starpu/codelets/codelet_zgelqt.c | 5 +- runtime/starpu/codelets/codelet_zgemm.c | 90 +++---- runtime/starpu/codelets/codelet_zgeqrt.c | 5 +- runtime/starpu/codelets/codelet_zgessm.c | 70 +++--- runtime/starpu/codelets/codelet_zgessq.c | 49 ++-- runtime/starpu/codelets/codelet_zgetrf.c | 61 +++-- .../starpu/codelets/codelet_zgetrf_incpiv.c | 72 +++--- .../starpu/codelets/codelet_zgetrf_nopiv.c | 71 +++--- runtime/starpu/codelets/codelet_zhe2ge.c | 46 ++-- runtime/starpu/codelets/codelet_zhemm.c | 89 ++++--- runtime/starpu/codelets/codelet_zher2k.c | 89 ++++--- runtime/starpu/codelets/codelet_zherfb.c | 89 ++++--- runtime/starpu/codelets/codelet_zherk.c | 80 +++---- runtime/starpu/codelets/codelet_zhessq.c | 43 ++-- runtime/starpu/codelets/codelet_zlacpy.c | 66 +++--- runtime/starpu/codelets/codelet_zlag2c.c | 66 +++--- runtime/starpu/codelets/codelet_zlange.c | 70 +++--- runtime/starpu/codelets/codelet_zlanhe.c | 48 ++-- runtime/starpu/codelets/codelet_zlansy.c | 56 ++--- runtime/starpu/codelets/codelet_zlantr.c | 56 ++--- runtime/starpu/codelets/codelet_zlascal.c | 51 ++-- runtime/starpu/codelets/codelet_zlaset.c | 44 ++-- runtime/starpu/codelets/codelet_zlaset2.c | 42 ++-- runtime/starpu/codelets/codelet_zlatro.c | 58 ++--- runtime/starpu/codelets/codelet_zlauum.c | 45 ++-- runtime/starpu/codelets/codelet_zplghe.c | 57 +++-- runtime/starpu/codelets/codelet_zplgsy.c | 52 ++-- runtime/starpu/codelets/codelet_zplrnt.c | 52 ++-- runtime/starpu/codelets/codelet_zplssq.c | 72 +++--- runtime/starpu/codelets/codelet_zpotrf.c | 58 +++-- runtime/starpu/codelets/codelet_zssssm.c | 88 ++++--- runtime/starpu/codelets/codelet_zsymm.c | 89 ++++--- runtime/starpu/codelets/codelet_zsyr2k.c | 89 ++++--- runtime/starpu/codelets/codelet_zsyrk.c | 81 ++++--- runtime/starpu/codelets/codelet_zsyssq.c | 49 ++-- .../starpu/codelets/codelet_zsytrf_nopiv.c | 51 ++-- runtime/starpu/codelets/codelet_ztile_zero.c | 84 ------- runtime/starpu/codelets/codelet_ztplqt.c | 11 +- runtime/starpu/codelets/codelet_ztpmlqt.c | 15 +- runtime/starpu/codelets/codelet_ztpmqrt.c | 15 +- runtime/starpu/codelets/codelet_ztpqrt.c | 11 +- runtime/starpu/codelets/codelet_ztradd.c | 70 +++--- runtime/starpu/codelets/codelet_ztrasm.c | 59 +++-- runtime/starpu/codelets/codelet_ztrmm.c | 83 ++++--- runtime/starpu/codelets/codelet_ztrsm.c | 83 ++++--- runtime/starpu/codelets/codelet_ztrssq.c | 55 +++-- runtime/starpu/codelets/codelet_ztrtri.c | 69 +++--- .../starpu/codelets/codelet_ztsmlq_hetra1.c | 98 ++++---- .../starpu/codelets/codelet_ztsmqr_hetra1.c | 98 ++++---- runtime/starpu/codelets/codelet_ztstrf.c | 114 +++++---- runtime/starpu/codelets/codelet_zunmlq.c | 157 ++++++------ runtime/starpu/codelets/codelet_zunmqr.c | 157 ++++++------ runtime/starpu/include/runtime_codelet_z.h | 5 - 227 files changed, 2359 insertions(+), 3476 deletions(-) delete mode 100644 runtime/openmp/codelets/codelet_ztile_zero.c delete mode 100644 runtime/parsec/codelets/codelet_zpamm.c delete mode 100644 runtime/parsec/codelets/codelet_ztile_zero.c delete mode 100644 runtime/quark/codelets/codelet_zpamm.c delete mode 100644 runtime/quark/codelets/codelet_ztile_zero.c delete mode 100644 runtime/starpu/codelets/codelet_ztile_zero.c diff --git a/compute/zbuild.c b/compute/zbuild.c index 22f2676df..6ec2419ca 100644 --- a/compute/zbuild.c +++ b/compute/zbuild.c @@ -66,9 +66,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -175,8 +174,7 @@ int CHAMELEON_zbuild( cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeadd.c b/compute/zgeadd.c index cc14238e9..e2674316a 100644 --- a/compute/zgeadd.c +++ b/compute/zgeadd.c @@ -75,8 +75,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -211,8 +210,7 @@ int CHAMELEON_zgeadd( cham_trans_t trans, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgelqf.c b/compute/zgelqf.c index 8c0cc4bec..975250334 100644 --- a/compute/zgelqf.c +++ b/compute/zgelqf.c @@ -56,9 +56,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -165,8 +164,7 @@ int CHAMELEON_zgelqf( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c index fecf0f20f..c507463ef 100644 --- a/compute/zgelqf_param.c +++ b/compute/zgelqf_param.c @@ -54,9 +54,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -164,8 +163,7 @@ int CHAMELEON_zgelqf_param( const libhqr_tree_t *qrtree, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgelqs.c b/compute/zgelqs.c index 08ec0dd3d..cc9b89fef 100644 --- a/compute/zgelqs.c +++ b/compute/zgelqs.c @@ -62,9 +62,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -186,8 +185,7 @@ int CHAMELEON_zgelqs( int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c index b594dd602..720ab5d4d 100644 --- a/compute/zgelqs_param.c +++ b/compute/zgelqs_param.c @@ -64,9 +64,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -192,8 +191,7 @@ int CHAMELEON_zgelqs_param( const libhqr_tree_t *qrtree, int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgels.c b/compute/zgels.c index 99d7914da..276c4e3a6 100644 --- a/compute/zgels.c +++ b/compute/zgels.c @@ -89,9 +89,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -244,8 +243,7 @@ int CHAMELEON_zgels( cham_trans_t trans, int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \return CHAMELEON_SUCCESS successful exit + * @return CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgels_param.c b/compute/zgels_param.c index ea23c9a49..34ab5c609 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -92,9 +92,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -250,8 +249,7 @@ int CHAMELEON_zgels_param( const libhqr_tree_t *qrtree, cham_trans_t trans, int * ******************************************************************************* * - * @return - * \return CHAMELEON_SUCCESS successful exit + * @return CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgemm.c b/compute/zgemm.c index 8d7cfba19..e266039ad 100644 --- a/compute/zgemm.c +++ b/compute/zgemm.c @@ -114,8 +114,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -283,8 +282,7 @@ int CHAMELEON_zgemm( cham_trans_t transA, cham_trans_t transB, int M, int N, int * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c index f365fc510..9ccd619c9 100644 --- a/compute/zgeqrf.c +++ b/compute/zgeqrf.c @@ -55,9 +55,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -164,8 +163,7 @@ int CHAMELEON_zgeqrf( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c index 36cb65d2a..d83e3f447 100644 --- a/compute/zgeqrf_param.c +++ b/compute/zgeqrf_param.c @@ -59,9 +59,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -173,8 +172,7 @@ int CHAMELEON_zgeqrf_param( const libhqr_tree_t *qrtree, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c index ee2d2bc6d..7af82e43f 100644 --- a/compute/zgeqrs.c +++ b/compute/zgeqrs.c @@ -62,9 +62,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -185,8 +184,7 @@ int CHAMELEON_zgeqrs( int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c index 15a5ff0d6..7fe000a50 100644 --- a/compute/zgeqrs_param.c +++ b/compute/zgeqrs_param.c @@ -58,9 +58,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -182,8 +181,7 @@ int CHAMELEON_zgeqrs_param( const libhqr_tree_t *qrtree, int M, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgesv_incpiv.c b/compute/zgesv_incpiv.c index 6dd3073f6..275b6dc6a 100644 --- a/compute/zgesv_incpiv.c +++ b/compute/zgesv_incpiv.c @@ -67,10 +67,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* @@ -189,9 +188,8 @@ int CHAMELEON_zgesv_incpiv( int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* diff --git a/compute/zgesv_nopiv.c b/compute/zgesv_nopiv.c index 7dbf73caa..f7dfbb880 100644 --- a/compute/zgesv_nopiv.c +++ b/compute/zgesv_nopiv.c @@ -66,10 +66,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* @@ -179,9 +178,8 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* diff --git a/compute/zgesvd.c b/compute/zgesvd.c index a9ba03d41..5b3cf9bd6 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -129,9 +129,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -318,8 +317,7 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, * ******************************************************************************* * - * @return - * \return CHAMELEON_SUCCESS successful exit + * @return CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgetrf_incpiv.c b/compute/zgetrf_incpiv.c index f990126eb..273b247f3 100644 --- a/compute/zgetrf_incpiv.c +++ b/compute/zgetrf_incpiv.c @@ -56,10 +56,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, and division by zero will occur * if it is used to solve a system of equations. * @@ -166,9 +165,8 @@ int CHAMELEON_zgetrf_incpiv( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, and division by zero will occur * if it is used to solve a system of equations. * diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c index f99d3d9e1..0e1004c43 100644 --- a/compute/zgetrf_nopiv.c +++ b/compute/zgetrf_nopiv.c @@ -50,10 +50,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been * completed, but the factor U is exactly singular, and division * by zero will occur if it is used to solve a system of * equations. @@ -151,9 +150,8 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, and division by zero will occur * if it is used to solve a system of equations. * diff --git a/compute/zgetrs_incpiv.c b/compute/zgetrs_incpiv.c index f0cf32cb5..8d9aa3630 100644 --- a/compute/zgetrs_incpiv.c +++ b/compute/zgetrs_incpiv.c @@ -69,9 +69,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \return <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @return <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -194,8 +193,7 @@ int CHAMELEON_zgetrs_incpiv( cham_trans_t trans, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zgetrs_nopiv.c b/compute/zgetrs_nopiv.c index fb8ac0722..33b3cf70b 100644 --- a/compute/zgetrs_nopiv.c +++ b/compute/zgetrs_nopiv.c @@ -64,9 +64,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \return <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @return <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -181,8 +180,7 @@ int CHAMELEON_zgetrs_nopiv( cham_trans_t trans, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zheevd.c b/compute/zheevd.c index 7f1a8b497..1291e43a8 100644 --- a/compute/zheevd.c +++ b/compute/zheevd.c @@ -79,10 +79,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if INFO = i, the algorithm failed to converge; i + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * @@ -219,10 +218,9 @@ int CHAMELEON_zheevd( cham_job_t jobz, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if INFO = i, the algorithm failed to converge; i + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * diff --git a/compute/zhemm.c b/compute/zhemm.c index fd968b42d..43f123975 100644 --- a/compute/zhemm.c +++ b/compute/zhemm.c @@ -90,8 +90,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -248,8 +247,7 @@ int CHAMELEON_zhemm( cham_side_t side, cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zher2k.c b/compute/zher2k.c index 216de17a5..fc8a746a3 100644 --- a/compute/zher2k.c +++ b/compute/zher2k.c @@ -92,8 +92,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -249,8 +248,7 @@ int CHAMELEON_zher2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zherk.c b/compute/zherk.c index 13e59ce4c..ff3b21ddc 100644 --- a/compute/zherk.c +++ b/compute/zherk.c @@ -82,8 +82,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -225,8 +224,7 @@ int CHAMELEON_zherk( cham_uplo_t uplo, cham_trans_t trans, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zhetrd.c b/compute/zhetrd.c index 0815e1dd6..f0686a16c 100644 --- a/compute/zhetrd.c +++ b/compute/zhetrd.c @@ -92,10 +92,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if INFO = i, the algorithm failed to converge; i + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * @@ -245,10 +244,9 @@ int CHAMELEON_zhetrd( cham_job_t jobz, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if INFO = i, the algorithm failed to converge; i + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * diff --git a/compute/zlacpy.c b/compute/zlacpy.c index 7bd169637..73ad779e6 100644 --- a/compute/zlacpy.c +++ b/compute/zlacpy.c @@ -180,8 +180,7 @@ int CHAMELEON_zlacpy( cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlange.c b/compute/zlange.c index 522c8c204..b1e9269d2 100644 --- a/compute/zlange.c +++ b/compute/zlange.c @@ -66,8 +66,7 @@ * ******************************************************************************* * - * @return - * \retval the norm described above. + * @retval the norm described above. * ******************************************************************************* * @@ -176,8 +175,7 @@ double CHAMELEON_zlange(cham_normtype_t norm, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlanhe.c b/compute/zlanhe.c index e2dad154c..50f3d1f92 100644 --- a/compute/zlanhe.c +++ b/compute/zlanhe.c @@ -66,8 +66,7 @@ * ******************************************************************************* * - * @return - * \retval the norm described above. + * @retval the norm described above. * ******************************************************************************* * @@ -180,8 +179,7 @@ double CHAMELEON_zlanhe(cham_normtype_t norm, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlansy.c b/compute/zlansy.c index dc9b1236e..c7e39a453 100644 --- a/compute/zlansy.c +++ b/compute/zlansy.c @@ -66,8 +66,7 @@ * ******************************************************************************* * - * @return - * \retval the norm described above. + * @retval the norm described above. * ******************************************************************************* * @@ -180,8 +179,7 @@ double CHAMELEON_zlansy(cham_normtype_t norm, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlantr.c b/compute/zlantr.c index 6721a9b9f..005fea133 100644 --- a/compute/zlantr.c +++ b/compute/zlantr.c @@ -78,8 +78,7 @@ * ******************************************************************************* * - * @return - * \retval the norm described above. + * @retval the norm described above. * ******************************************************************************* * @@ -202,8 +201,7 @@ double CHAMELEON_zlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlascal.c b/compute/zlascal.c index bae3815fb..0d0ff18b6 100644 --- a/compute/zlascal.c +++ b/compute/zlascal.c @@ -57,8 +57,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -168,8 +167,7 @@ int CHAMELEON_zlascal( cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlaset.c b/compute/zlaset.c index 7001e66a2..0ab77a34e 100644 --- a/compute/zlaset.c +++ b/compute/zlaset.c @@ -167,8 +167,7 @@ int CHAMELEON_zlaset( cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zlauum.c b/compute/zlauum.c index 9907d0b08..254eb2b18 100644 --- a/compute/zlauum.c +++ b/compute/zlauum.c @@ -58,9 +58,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -165,8 +164,7 @@ int CHAMELEON_zlauum( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zplghe.c b/compute/zplghe.c index 3fd07d511..ceb0a138b 100644 --- a/compute/zplghe.c +++ b/compute/zplghe.c @@ -56,9 +56,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -160,8 +159,7 @@ int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zplgsy.c b/compute/zplgsy.c index 809e2a224..ff033d819 100644 --- a/compute/zplgsy.c +++ b/compute/zplgsy.c @@ -56,9 +56,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -160,8 +159,7 @@ int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zplrnt.c b/compute/zplrnt.c index 3e15ea36a..56a3cedaf 100644 --- a/compute/zplrnt.c +++ b/compute/zplrnt.c @@ -49,9 +49,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -150,8 +149,7 @@ int CHAMELEON_zplrnt( int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zposv.c b/compute/zposv.c index 055f17e88..668fec3c5 100644 --- a/compute/zposv.c +++ b/compute/zposv.c @@ -75,10 +75,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* @@ -206,9 +205,8 @@ int CHAMELEON_zposv( cham_uplo_t uplo, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* diff --git a/compute/zpotrf.c b/compute/zpotrf.c index bb8485337..d7054e42d 100644 --- a/compute/zpotrf.c +++ b/compute/zpotrf.c @@ -62,10 +62,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* @@ -175,9 +174,8 @@ int CHAMELEON_zpotrf( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* diff --git a/compute/zpotri.c b/compute/zpotri.c index d903bda64..2de905c8d 100644 --- a/compute/zpotri.c +++ b/compute/zpotri.c @@ -53,10 +53,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the (i,i) element of the factor U or L is + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the (i,i) element of the factor U or L is * zero, and the inverse could not be computed. * ******************************************************************************* @@ -162,9 +161,8 @@ int CHAMELEON_zpotri( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, the leading minor of order i of A is not + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not * positive definite, so the factorization could not be * completed, and the solution has not been computed. * diff --git a/compute/zpotrimm.c b/compute/zpotrimm.c index 0f3d8146e..ca57f4962 100644 --- a/compute/zpotrimm.c +++ b/compute/zpotrimm.c @@ -53,10 +53,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the (i,i) element of the factor U or L is + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the (i,i) element of the factor U or L is * zero, and the inverse could not be computed. * ******************************************************************************* @@ -184,9 +183,8 @@ int CHAMELEON_zpotrimm( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, the leading minor of order i of A is not + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not * positive definite, so the factorization could not be * completed, and the solution has not been computed. * diff --git a/compute/zpotrs.c b/compute/zpotrs.c index 3e242d114..7cce83910 100644 --- a/compute/zpotrs.c +++ b/compute/zpotrs.c @@ -61,9 +61,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -183,8 +182,7 @@ int CHAMELEON_zpotrs( cham_uplo_t uplo, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsymm.c b/compute/zsymm.c index 4a64f907e..13221e335 100644 --- a/compute/zsymm.c +++ b/compute/zsymm.c @@ -90,8 +90,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -248,8 +247,7 @@ int CHAMELEON_zsymm( cham_side_t side, cham_uplo_t uplo, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsyr2k.c b/compute/zsyr2k.c index 9ef352142..0fe3e6f9e 100644 --- a/compute/zsyr2k.c +++ b/compute/zsyr2k.c @@ -92,8 +92,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -249,8 +248,7 @@ int CHAMELEON_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsyrk.c b/compute/zsyrk.c index e1c6db986..91f4627b8 100644 --- a/compute/zsyrk.c +++ b/compute/zsyrk.c @@ -82,8 +82,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -225,8 +224,7 @@ int CHAMELEON_zsyrk( cham_uplo_t uplo, cham_trans_t trans, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsysv.c b/compute/zsysv.c index 5b40d66e5..baf78e90e 100644 --- a/compute/zsysv.c +++ b/compute/zsysv.c @@ -76,9 +76,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -203,8 +202,7 @@ int CHAMELEON_zsysv( cham_uplo_t uplo, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsytrf.c b/compute/zsytrf.c index 508f93a9a..44ea078c6 100644 --- a/compute/zsytrf.c +++ b/compute/zsytrf.c @@ -57,10 +57,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, the leading minor of order i of A is not positive definite, so the + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* @@ -169,8 +168,7 @@ int CHAMELEON_zsytrf( cham_uplo_t uplo, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zsytrs.c b/compute/zsytrs.c index 90256661b..4eeb3d7d3 100644 --- a/compute/zsytrs.c +++ b/compute/zsytrs.c @@ -63,9 +63,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -182,8 +181,7 @@ int CHAMELEON_zsytrs( cham_uplo_t uplo, int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztile.c b/compute/ztile.c index d95a729b7..3cafc9b7b 100644 --- a/compute/ztile.c +++ b/compute/ztile.c @@ -45,8 +45,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -122,8 +121,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c index 4be3c8293..b9d07b870 100644 --- a/compute/ztpgqrt.c +++ b/compute/ztpgqrt.c @@ -115,9 +115,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -258,8 +257,7 @@ int CHAMELEON_ztpgqrt( int M, int N, int K, int L, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c index 8847d9235..04e7ddfa6 100644 --- a/compute/ztpqrt.c +++ b/compute/ztpqrt.c @@ -110,9 +110,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -235,8 +234,7 @@ int CHAMELEON_ztpqrt( int M, int N, int L, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztradd.c b/compute/ztradd.c index f5f2d8217..b5e85ec81 100644 --- a/compute/ztradd.c +++ b/compute/ztradd.c @@ -81,8 +81,7 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * @@ -227,8 +226,7 @@ int CHAMELEON_ztradd( cham_uplo_t uplo, cham_trans_t trans, int M, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztrmm.c b/compute/ztrmm.c index 96ef0f7e3..3380900f6 100644 --- a/compute/ztrmm.c +++ b/compute/ztrmm.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -249,8 +248,7 @@ int CHAMELEON_ztrmm( cham_side_t side, cham_uplo_t uplo, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztrsm.c b/compute/ztrsm.c index abcdf8e9e..cc76ab7bd 100644 --- a/compute/ztrsm.c +++ b/compute/ztrsm.c @@ -83,9 +83,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -247,8 +246,7 @@ int CHAMELEON_ztrsm( cham_side_t side, cham_uplo_t uplo, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztrsmpl.c b/compute/ztrsmpl.c index dd7859cf8..2cac2da62 100644 --- a/compute/ztrsmpl.c +++ b/compute/ztrsmpl.c @@ -61,9 +61,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -179,8 +178,7 @@ int CHAMELEON_ztrsmpl( int N, int NRHS, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/ztrtri.c b/compute/ztrtri.c index cb19dffa9..6a2f8f321 100644 --- a/compute/ztrtri.c +++ b/compute/ztrtri.c @@ -61,10 +61,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * \retval >0 if i, A(i,i) is exactly zero. The triangular + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, A(i,i) is exactly zero. The triangular * matrix is singular and its inverse can not be computed. * ******************************************************************************* @@ -182,9 +181,8 @@ int CHAMELEON_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval >0 if i, A(i,i) is exactly zero. The triangular + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, A(i,i) is exactly zero. The triangular * matrix is singular and its inverse can not be computed. * ******************************************************************************* diff --git a/compute/zunglq.c b/compute/zunglq.c index 410154646..ef284b861 100644 --- a/compute/zunglq.c +++ b/compute/zunglq.c @@ -62,9 +62,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -181,8 +180,7 @@ int CHAMELEON_zunglq( int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c index e6d369542..09f40a29f 100644 --- a/compute/zunglq_param.c +++ b/compute/zunglq_param.c @@ -60,9 +60,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -180,8 +179,7 @@ int CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zungqr.c b/compute/zungqr.c index c51539616..6ae056b2d 100644 --- a/compute/zungqr.c +++ b/compute/zungqr.c @@ -62,9 +62,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -180,8 +179,7 @@ int CHAMELEON_zungqr( int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 5b46d66ff..9ed032da7 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -60,9 +60,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -181,8 +180,7 @@ int CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunmlq.c b/compute/zunmlq.c index f460e12e4..f3948bf39 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -86,9 +86,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -234,8 +233,7 @@ int CHAMELEON_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c index 4c0a72358..46372cef9 100644 --- a/compute/zunmlq_param.c +++ b/compute/zunmlq_param.c @@ -86,9 +86,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -233,8 +232,7 @@ int CHAMELEON_zunmlq_param( const libhqr_tree_t *qrtree, cham_side_t side, cham_ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunmqr.c b/compute/zunmqr.c index 6271ed98e..78be51f52 100644 --- a/compute/zunmqr.c +++ b/compute/zunmqr.c @@ -88,9 +88,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -236,8 +235,7 @@ int CHAMELEON_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c index 5674ba090..434c16a04 100644 --- a/compute/zunmqr_param.c +++ b/compute/zunmqr_param.c @@ -89,9 +89,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -239,8 +238,7 @@ int CHAMELEON_zunmqr_param( const libhqr_tree_t *qrtree, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ******************************************************************************* * diff --git a/control/async.c b/control/async.c index 5e65c2b09..55351f6b6 100644 --- a/control/async.c +++ b/control/async.c @@ -86,8 +86,7 @@ int chameleon_sequence_wait(CHAM_context_t *chamctxt, RUNTIME_sequence_t *sequen * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Sequence_Create(RUNTIME_sequence_t **sequence) @@ -117,8 +116,7 @@ int CHAMELEON_Sequence_Create(RUNTIME_sequence_t **sequence) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Sequence_Destroy(RUNTIME_sequence_t *sequence) @@ -152,8 +150,7 @@ int CHAMELEON_Sequence_Destroy(RUNTIME_sequence_t *sequence) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Sequence_Wait(RUNTIME_sequence_t *sequence) @@ -190,8 +187,7 @@ int CHAMELEON_Sequence_Wait(RUNTIME_sequence_t *sequence) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Sequence_Flush(RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) diff --git a/control/auxiliary.c b/control/auxiliary.c index 032dc0684..5f90b85af 100644 --- a/control/auxiliary.c +++ b/control/auxiliary.c @@ -139,8 +139,7 @@ int chameleon_tune(cham_tasktype_t func, int M, int N, int NRHS) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro) @@ -180,8 +179,7 @@ int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro) * ****************************************************************************** * - * @return - * \retval Element size in bytes + * @retval Element size in bytes * */ int CHAMELEON_Element_Size(int type) @@ -209,8 +207,7 @@ int CHAMELEON_Element_Size(int type) * ****************************************************************************** * - * @return - * \retval MPI rank + * @retval MPI rank * */ int CHAMELEON_My_Mpi_Rank(void) diff --git a/control/context.c b/control/context.c index 881abe974..fa0dcd250 100644 --- a/control/context.c +++ b/control/context.c @@ -123,8 +123,7 @@ int chameleon_context_destroy(){ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Enable(int option) @@ -192,8 +191,7 @@ int CHAMELEON_Enable(int option) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Disable(int option) @@ -256,8 +254,7 @@ int CHAMELEON_Disable(int option) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Set( int param, int value ) @@ -350,8 +347,7 @@ int CHAMELEON_Set( int param, int value ) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Get(int param, int *value) diff --git a/control/control.c b/control/control.c index 8a8de8717..08765ca2d 100644 --- a/control/control.c +++ b/control/control.c @@ -154,8 +154,7 @@ int __chameleon_finalize(void) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Pause(void) @@ -178,8 +177,7 @@ int CHAMELEON_Pause(void) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Resume(void) @@ -201,8 +199,7 @@ int CHAMELEON_Resume(void) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Distributed_start(void) @@ -224,8 +221,7 @@ int CHAMELEON_Distributed_start(void) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Distributed_stop(void) @@ -294,8 +290,7 @@ int CHAMELEON_Comm_rank() * ****************************************************************************** * - * @return - * \retval The number of CPU workers started + * @retval The number of CPU workers started * */ int CHAMELEON_GetThreadNbr( ) diff --git a/control/descriptor.c b/control/descriptor.c index 06e52cdec..f32800a28 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -413,8 +413,7 @@ int chameleon_desc_check(const CHAM_desc_t *desc) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, @@ -476,8 +475,7 @@ int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, @@ -560,8 +558,7 @@ int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz, @@ -621,8 +618,7 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz, diff --git a/control/tile.c b/control/tile.c index 960c88f5d..560f5dd7d 100644 --- a/control/tile.c +++ b/control/tile.c @@ -44,8 +44,7 @@ * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Lapack_to_Tile(void *Af77, int LDA, CHAM_desc_t *A) @@ -86,8 +85,7 @@ int CHAMELEON_Lapack_to_Tile(void *Af77, int LDA, CHAM_desc_t *A) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Tile_to_Lapack(CHAM_desc_t *A, void *Af77, int LDA) diff --git a/control/workspace.c b/control/workspace.c index 8039447fb..4a8b078e3 100644 --- a/control/workspace.c +++ b/control/workspace.c @@ -138,8 +138,7 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Dealloc_Workspace(CHAM_desc_t **desc) diff --git a/control/workspace_z.c b/control/workspace_z.c index 6009bac50..732d86fe0 100644 --- a/control/workspace_z.c +++ b/control/workspace_z.c @@ -45,8 +45,7 @@ * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgeev(int N, CHAM_desc_t **descT, int p, int q) { @@ -70,8 +69,7 @@ int CHAMELEON_Alloc_Workspace_zgeev(int N, CHAM_desc_t **descT, int p, int q) { * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgehrd(int N, CHAM_desc_t **descT, int p, int q) { @@ -97,8 +95,7 @@ int CHAMELEON_Alloc_Workspace_zgehrd(int N, CHAM_desc_t **descT, int p, int q) { * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgebrd(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -125,8 +122,7 @@ int CHAMELEON_Alloc_Workspace_zgebrd(int M, int N, CHAM_desc_t **descT, int p, i * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgels(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -159,8 +155,7 @@ int CHAMELEON_Alloc_Workspace_zgels(int M, int N, CHAM_desc_t **descT, int p, in * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgels_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -193,8 +188,7 @@ int CHAMELEON_Alloc_Workspace_zgels_Tile(int M, int N, CHAM_desc_t **descT, int * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgeqrf(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -227,8 +221,7 @@ int CHAMELEON_Alloc_Workspace_zgeqrf(int M, int N, CHAM_desc_t **descT, int p, i * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgeqrf_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -261,8 +254,7 @@ int CHAMELEON_Alloc_Workspace_zgeqrf_Tile(int M, int N, CHAM_desc_t **descT, int * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgelqf(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -294,8 +286,7 @@ int CHAMELEON_Alloc_Workspace_zgelqf(int M, int N, CHAM_desc_t **descT, int p, i * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgelqf_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -324,8 +315,7 @@ int CHAMELEON_Alloc_Workspace_zgelqf_Tile(int M, int N, CHAM_desc_t **descT, int * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgesv_incpiv(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) { @@ -354,8 +344,7 @@ int CHAMELEON_Alloc_Workspace_zgesv_incpiv(int N, CHAM_desc_t **descL, int **IPI * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgesv_incpiv_Tile(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) @@ -388,8 +377,7 @@ int CHAMELEON_Alloc_Workspace_zgesv_incpiv_Tile(int N, CHAM_desc_t **descL, int * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgesvd(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -421,8 +409,7 @@ int CHAMELEON_Alloc_Workspace_zgesvd(int M, int N, CHAM_desc_t **descT, int p, i * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * ****************************************************************************** * @@ -456,8 +443,7 @@ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, i * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv_Tile(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) { @@ -488,8 +474,7 @@ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv_Tile(int N, CHAM_desc_t **descL, int * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zheev(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -520,8 +505,7 @@ int CHAMELEON_Alloc_Workspace_zheev(int M, int N, CHAM_desc_t **descT, int p, in * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zheevd(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -552,8 +536,7 @@ int CHAMELEON_Alloc_Workspace_zheevd(int M, int N, CHAM_desc_t **descT, int p, i * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zhegv(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -584,8 +567,7 @@ int CHAMELEON_Alloc_Workspace_zhegv(int M, int N, CHAM_desc_t **descT, int p, in * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zhegvd(int M, int N, CHAM_desc_t **descT, int p, int q) { @@ -616,8 +598,7 @@ int CHAMELEON_Alloc_Workspace_zhegvd(int M, int N, CHAM_desc_t **descT, int p, i * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Alloc_Workspace_zhetrd(int M, int N, CHAM_desc_t **descT, int p, int q) { diff --git a/coreblas/compute/core_zaxpy.c b/coreblas/compute/core_zaxpy.c index d3477032a..a982aaafe 100644 --- a/coreblas/compute/core_zaxpy.c +++ b/coreblas/compute/core_zaxpy.c @@ -47,9 +47,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgeadd.c b/coreblas/compute/core_zgeadd.c index a85bec68a..5afb5a770 100644 --- a/coreblas/compute/core_zgeadd.c +++ b/coreblas/compute/core_zgeadd.c @@ -71,9 +71,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_zgelqt.c b/coreblas/compute/core_zgelqt.c index 7a2a74ca0..7793a76df 100644 --- a/coreblas/compute/core_zgelqt.c +++ b/coreblas/compute/core_zgelqt.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgeqrt.c b/coreblas/compute/core_zgeqrt.c index 76fcfdfc2..ab5681866 100644 --- a/coreblas/compute/core_zgeqrt.c +++ b/coreblas/compute/core_zgeqrt.c @@ -83,9 +83,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgesplit.c b/coreblas/compute/core_zgesplit.c index 5255442c5..0f30ae816 100644 --- a/coreblas/compute/core_zgesplit.c +++ b/coreblas/compute/core_zgesplit.c @@ -56,9 +56,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgessm.c b/coreblas/compute/core_zgessm.c index 9757800cd..c395a30ff 100644 --- a/coreblas/compute/core_zgessm.c +++ b/coreblas/compute/core_zgessm.c @@ -68,9 +68,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zgessq.c b/coreblas/compute/core_zgessq.c index 55bbf091f..e6462f979 100644 --- a/coreblas/compute/core_zgessq.c +++ b/coreblas/compute/core_zgessq.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval -k, the k-th argument had an illegal value * */ int CORE_zgessq(int M, int N, diff --git a/coreblas/compute/core_zgetf2_nopiv.c b/coreblas/compute/core_zgetf2_nopiv.c index 91c313430..18836b6e1 100644 --- a/coreblas/compute/core_zgetf2_nopiv.c +++ b/coreblas/compute/core_zgetf2_nopiv.c @@ -58,10 +58,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/coreblas/compute/core_zgetrf_incpiv.c b/coreblas/compute/core_zgetrf_incpiv.c index b47084b3c..b1355e645 100644 --- a/coreblas/compute/core_zgetrf_incpiv.c +++ b/coreblas/compute/core_zgetrf_incpiv.c @@ -71,10 +71,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/coreblas/compute/core_zgetrf_nopiv.c b/coreblas/compute/core_zgetrf_nopiv.c index b7661ba5c..fbd34a128 100644 --- a/coreblas/compute/core_zgetrf_nopiv.c +++ b/coreblas/compute/core_zgetrf_nopiv.c @@ -60,10 +60,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/coreblas/compute/core_zherfb.c b/coreblas/compute/core_zherfb.c index d3653d55b..d1f952bf7 100644 --- a/coreblas/compute/core_zherfb.c +++ b/coreblas/compute/core_zherfb.c @@ -85,9 +85,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_zhessq.c b/coreblas/compute/core_zhessq.c index 250962aba..d5b968515 100644 --- a/coreblas/compute/core_zhessq.c +++ b/coreblas/compute/core_zhessq.c @@ -88,9 +88,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zlascal.c b/coreblas/compute/core_zlascal.c index 645bc6714..50654a63b 100644 --- a/coreblas/compute/core_zlascal.c +++ b/coreblas/compute/core_zlascal.c @@ -52,9 +52,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int diff --git a/coreblas/compute/core_zlatro.c b/coreblas/compute/core_zlatro.c index 2bdcbfc31..c22ac72ab 100644 --- a/coreblas/compute/core_zlatro.c +++ b/coreblas/compute/core_zlatro.c @@ -72,9 +72,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_zpamm.c b/coreblas/compute/core_zpamm.c index 35c8e0490..2dd190e9c 100644 --- a/coreblas/compute/core_zpamm.c +++ b/coreblas/compute/core_zpamm.c @@ -174,9 +174,8 @@ static inline int CORE_zpamm_w(cham_side_t side, cham_trans_t trans, cham_uplo_t * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int diff --git a/coreblas/compute/core_zparfb.c b/coreblas/compute/core_zparfb.c index a359402d6..05d07f72e 100644 --- a/coreblas/compute/core_zparfb.c +++ b/coreblas/compute/core_zparfb.c @@ -132,9 +132,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ /* This kernel is never traced so return type on previous line for convert2eztrace.pl script */ diff --git a/coreblas/compute/core_zpemv.c b/coreblas/compute/core_zpemv.c index 62213c723..6b8fc9ed6 100644 --- a/coreblas/compute/core_zpemv.c +++ b/coreblas/compute/core_zpemv.c @@ -113,9 +113,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zssssm.c b/coreblas/compute/core_zssssm.c index 87d18d295..ef5bd6a17 100644 --- a/coreblas/compute/core_zssssm.c +++ b/coreblas/compute/core_zssssm.c @@ -91,9 +91,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zsyssq.c b/coreblas/compute/core_zsyssq.c index a2c19544b..8bce64cec 100644 --- a/coreblas/compute/core_zsyssq.c +++ b/coreblas/compute/core_zsyssq.c @@ -88,9 +88,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztplqt.c b/coreblas/compute/core_ztplqt.c index b2794dc6d..e80f80a96 100644 --- a/coreblas/compute/core_ztplqt.c +++ b/coreblas/compute/core_ztplqt.c @@ -77,9 +77,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int CORE_ztplqt( int M, int N, int L, int IB, diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c index 5909f19ee..6584e2ba5 100644 --- a/coreblas/compute/core_ztpmqrt.c +++ b/coreblas/compute/core_ztpmqrt.c @@ -129,9 +129,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztpqrt.c b/coreblas/compute/core_ztpqrt.c index ddfbb49ab..a251bed84 100644 --- a/coreblas/compute/core_ztpqrt.c +++ b/coreblas/compute/core_ztpqrt.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int CORE_ztpqrt( int M, int N, int L, int IB, diff --git a/coreblas/compute/core_ztradd.c b/coreblas/compute/core_ztradd.c index 831ad069e..3242ae53a 100644 --- a/coreblas/compute/core_ztradd.c +++ b/coreblas/compute/core_ztradd.c @@ -74,9 +74,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_ztrssq.c b/coreblas/compute/core_ztrssq.c index f01e63663..61cc2994d 100644 --- a/coreblas/compute/core_ztrssq.c +++ b/coreblas/compute/core_ztrssq.c @@ -82,9 +82,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval -k, the k-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztslqt.c b/coreblas/compute/core_ztslqt.c index da5b27078..156429d2b 100644 --- a/coreblas/compute/core_ztslqt.c +++ b/coreblas/compute/core_ztslqt.c @@ -94,9 +94,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c index a9324fa04..c2238aed6 100644 --- a/coreblas/compute/core_ztsmlq.c +++ b/coreblas/compute/core_ztsmlq.c @@ -121,9 +121,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztsmlq_hetra1.c b/coreblas/compute/core_ztsmlq_hetra1.c index ff7123317..fc0a5abda 100644 --- a/coreblas/compute/core_ztsmlq_hetra1.c +++ b/coreblas/compute/core_ztsmlq_hetra1.c @@ -108,9 +108,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_ztsmqr.c b/coreblas/compute/core_ztsmqr.c index e4f681581..aeb35c924 100644 --- a/coreblas/compute/core_ztsmqr.c +++ b/coreblas/compute/core_ztsmqr.c @@ -121,9 +121,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztsmqr_hetra1.c b/coreblas/compute/core_ztsmqr_hetra1.c index cfba422e7..40dcf9270 100644 --- a/coreblas/compute/core_ztsmqr_hetra1.c +++ b/coreblas/compute/core_ztsmqr_hetra1.c @@ -110,9 +110,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ #if defined(CHAMELEON_HAVE_WEAK) diff --git a/coreblas/compute/core_ztsqrt.c b/coreblas/compute/core_ztsqrt.c index 7564c4edf..3bbbd8f1b 100644 --- a/coreblas/compute/core_ztsqrt.c +++ b/coreblas/compute/core_ztsqrt.c @@ -83,9 +83,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_ztstrf.c b/coreblas/compute/core_ztstrf.c index c0f5c9eca..6f03a2664 100644 --- a/coreblas/compute/core_ztstrf.c +++ b/coreblas/compute/core_ztstrf.c @@ -84,10 +84,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/coreblas/compute/core_zttlqt.c b/coreblas/compute/core_zttlqt.c index db12242e3..b331b2871 100644 --- a/coreblas/compute/core_zttlqt.c +++ b/coreblas/compute/core_zttlqt.c @@ -95,9 +95,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c index 5b6ee0261..b2fd88691 100644 --- a/coreblas/compute/core_zttmlq.c +++ b/coreblas/compute/core_zttmlq.c @@ -113,9 +113,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zttmqr.c b/coreblas/compute/core_zttmqr.c index 9342ecbe3..850f27599 100644 --- a/coreblas/compute/core_zttmqr.c +++ b/coreblas/compute/core_zttmqr.c @@ -112,9 +112,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zttqrt.c b/coreblas/compute/core_zttqrt.c index c024dc959..4f127334a 100644 --- a/coreblas/compute/core_zttqrt.c +++ b/coreblas/compute/core_zttqrt.c @@ -95,9 +95,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zunmlq.c b/coreblas/compute/core_zunmlq.c index c7ac26f55..3f1593883 100644 --- a/coreblas/compute/core_zunmlq.c +++ b/coreblas/compute/core_zunmlq.c @@ -105,9 +105,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/compute/core_zunmqr.c b/coreblas/compute/core_zunmqr.c index 59fb4c525..347512a01 100644 --- a/coreblas/compute/core_zunmqr.c +++ b/coreblas/compute/core_zunmqr.c @@ -106,9 +106,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/coreblas/eztrace_module/coreblas_eztrace_module b/coreblas/eztrace_module/coreblas_eztrace_module index dca1e36d9..4a8688192 100644 --- a/coreblas/eztrace_module/coreblas_eztrace_module +++ b/coreblas/eztrace_module/coreblas_eztrace_module @@ -1172,12 +1172,6 @@ int CORE_zlatro(int uplo, int trans, void *A, int LDA, void *B, int LDB); void CORE_zlauum(int uplo, int N, void *A, int LDA); -int CORE_zpamm(int op, int side, int storev, - int M, int N, int K, int L, - void *A1, int LDA1, - void *A2, int LDA2, - void *V, int LDV, - void *W, int LDW); int CORE_zparfb(int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, int L, void *A1, int LDA1, diff --git a/cudablas/compute/cuda_zgeadd.c b/cudablas/compute/cuda_zgeadd.c index d7f86784c..e520dfb0a 100644 --- a/cudablas/compute/cuda_zgeadd.c +++ b/cudablas/compute/cuda_zgeadd.c @@ -72,9 +72,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ int CUDA_zgeadd(cham_trans_t trans, diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index 4fa07c2b4..954f67570 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -38,23 +38,13 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options, int M, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int incA, const CHAM_desc_t *B, int Bm, int Bn, int incB ); +void INSERT_TASK_zbuild( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An, int lda, + void *user_data, void* user_build_callback ); void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); -void INSERT_TASK_zlascal( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An, int lda ); -void INSERT_TASK_zbrdalg( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int N, int NB, - const CHAM_desc_t *A, - const CHAM_desc_t *C, int Cm, int Cn, - const CHAM_desc_t *S, int Sm, int Sn, - int i, int j, int m, int grsiz, int BAND, - int *PCOL, int *ACOL, int *MCOL ); void INSERT_TASK_zgelqt( const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -65,39 +55,6 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zgemm2( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zgemm_f2( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); -void INSERT_TASK_zgemm_p2( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAMELEON_Complex64_t **B, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zgemm_p2f1( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAMELEON_Complex64_t **B, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1 ); -void INSERT_TASK_zgemm_p3( const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t **C, int ldc ); void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -126,28 +83,6 @@ void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_nopiv( const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, int iinfo ); -void INSERT_TASK_zgetrf_reclap( const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int *IPIV, - - cham_bool_t check_info, int iinfo, - int nbthread ); -void INSERT_TASK_zgetrf_rectil( const RUNTIME_option_t *options, - const CHAM_desc_t A, const CHAM_desc_t *Amn, int Amnm, int Amnn, int size, - int *IPIV, - - cham_bool_t check_info, int iinfo, - int nbthread ); -void INSERT_TASK_zgetrip( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA ); -void INSERT_TASK_zgetrip_f1( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA, - const CHAM_desc_t *fake, int fakem, int faken, int szeF, int paramF ); -void INSERT_TASK_zgetrip_f2( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2 ); void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int mb, @@ -159,16 +94,6 @@ void INSERT_TASK_zhemm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zhegst( const RUNTIME_option_t *options, - int itype, cham_uplo_t uplo, int N, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn, int LDB, - int iinfo ); -void INSERT_TASK_zherk( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - double alpha, const CHAM_desc_t *A, int Am, int An, int lda, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zher2k( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, @@ -181,6 +106,15 @@ void INSERT_TASK_zherfb( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *T, int Tm, int Tn, int ldt, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zherk( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + double alpha, const CHAM_desc_t *A, int Am, int An, int lda, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zhessq( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int mb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -196,10 +130,6 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options, void INSERT_TASK_zlange_max( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *B, int Bm, int Bn ); -void INSERT_TASK_zhessq( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ); void INSERT_TASK_zlanhe( const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, const CHAM_desc_t *A, int Am, int An, int LDA, @@ -213,31 +143,18 @@ void INSERT_TASK_zlantr( const RUNTIME_option_t *options, int M, int N, int NB, const CHAM_desc_t *A, int Am, int An, int LDA, const CHAM_desc_t *B, int Bm, int Bn ); +void INSERT_TASK_zlascal( const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int lda ); void INSERT_TASK_zlaset( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); + cham_uplo_t uplo, int n1, int n2, + CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, + const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); void INSERT_TASK_zlaset2( const RUNTIME_option_t *options, cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea ); -void INSERT_TASK_zlaswp( const RUNTIME_option_t *options, - int n, const CHAM_desc_t *A, int Am, int An, int lda, - int i1, int i2, int *ipiv, int inc ); -void INSERT_TASK_zlaswp_f2( const RUNTIME_option_t *options, - int n, const CHAM_desc_t *A, int Am, int An, int lda, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); -void INSERT_TASK_zlaswp_ontile( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, - int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel ); -void INSERT_TASK_zlaswp_ontile_f2( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1, - const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 ); -void INSERT_TASK_zlaswpc_ontile( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An, - int i1, int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel ); void INSERT_TASK_zlatro( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -254,17 +171,15 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, int bigM, int m0, int n0, unsigned long long int seed ); +void INSERT_TASK_zplssq( const RUNTIME_option_t *options, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ); +void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ); void INSERT_TASK_zpotrf( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo ); -void INSERT_TASK_zshift( const RUNTIME_option_t *options, - int s, int m, int n, int L, - CHAMELEON_Complex64_t *A ); -void INSERT_TASK_zshiftw( const RUNTIME_option_t *options, - int s, int cl, int m, int n, int L, - const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t *W ); void INSERT_TASK_zssssm( const RUNTIME_option_t *options, int m1, int n1, int m2, int n2, int k, int ib, int nb, const CHAM_desc_t *A1, int A1m, int A1n, int lda1, @@ -278,17 +193,17 @@ void INSERT_TASK_zsymm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int LDB, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); +void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, const CHAM_desc_t *A, int Am, int An, int lda, @@ -297,13 +212,6 @@ void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, const CHAM_desc_t *A, int Am, int An, int lda, int iinfo ); -void INSERT_TASK_zswpab( const RUNTIME_option_t *options, - int i, int n1, int n2, - const CHAM_desc_t *A, int Am, int An, int szeA ); -void INSERT_TASK_zswptr_ontile( const RUNTIME_option_t *options, - const CHAM_desc_t descA, const CHAM_desc_t *Aij, int Aijm, int Aijn, - int i1, int i2, int *ipiv, int inc, - const CHAM_desc_t *Akk, int Akkm, int Akkn, int ldak ); void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, int m, int n, int l, int ib, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -328,14 +236,6 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb, const CHAM_desc_t *T, int Tm, int Tn, int ldt ); -void INSERT_TASK_ztrdalg( const RUNTIME_option_t *options, - cham_uplo_t uplo, - int N, int NB, - const CHAM_desc_t *A, - const CHAM_desc_t *C, int Cm, int Cn, - const CHAM_desc_t *S, int Sm, int Sn, - int i, int j, int m, int grsiz, int BAND, - int *PCOL, int *ACOL, int *MCOL ); void INSERT_TASK_ztradd( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, @@ -349,11 +249,6 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn, int ldb ); -void INSERT_TASK_ztrmm_p2( const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t **B, int ldb ); void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, int m, int n, int nb, @@ -390,18 +285,6 @@ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, const CHAM_desc_t *L, int Lm, int Ln, int ldl, int *IPIV, cham_bool_t check_info, int iinfo ); -void INSERT_TASK_zpamm( const RUNTIME_option_t *options, - int op, cham_side_t side, cham_store_t storev, - int m, int n, int k, int l, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *W, int Wm, int Wn, int ldw ); -void INSERT_TASK_zplssq( const RUNTIME_option_t *options, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, - const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ); -void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ); void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, int m, int n, int ib, int nb, int k, @@ -414,10 +297,6 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *T, int Tm, int Tn, int ldt, const CHAM_desc_t *C, int Cm, int Cn, int ldc ); -void INSERT_TASK_zbuild( const RUNTIME_option_t *options, - const CHAM_desc_t *A, int Am, int An, int lda, - void *user_data, void* user_build_callback ); - /** * Keep these insert_task for retro-compatibility diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index cabe559c2..309dea896 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -28,7 +28,6 @@ # List of codelets required by all runtimes # ----------------------------------------- set(CODELETS_ZSRC - codelets/codelet_ztile_zero.c codelets/codelet_zasum.c ################## # BLAS 1 diff --git a/runtime/openmp/codelets/codelet_zgeadd.c b/runtime/openmp/codelets/codelet_zgeadd.c index 1d18ff18f..2ceeb8159 100644 --- a/runtime/openmp/codelets/codelet_zgeadd.c +++ b/runtime/openmp/codelets/codelet_zgeadd.c @@ -31,7 +31,7 @@ * * @ingroup CORE_CHAMELEON_Complex64_t * - * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * @brief Adds two general matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -75,15 +75,14 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c index 3341a8f01..8dd282d62 100644 --- a/runtime/openmp/codelets/codelet_zgelqt.c +++ b/runtime/openmp/codelets/codelet_zgelqt.c @@ -84,9 +84,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/runtime/openmp/codelets/codelet_zgemm.c b/runtime/openmp/codelets/codelet_zgemm.c index 68aec8de4..b2737c388 100644 --- a/runtime/openmp/codelets/codelet_zgemm.c +++ b/runtime/openmp/codelets/codelet_zgemm.c @@ -43,7 +43,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); -#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) +#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0]) CORE_zgemm(transA, transB, m, n, k, alpha, ptrA, lda, diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c index 6428375b2..f8bf811af 100644 --- a/runtime/openmp/codelets/codelet_zgeqrt.c +++ b/runtime/openmp/codelets/codelet_zgeqrt.c @@ -85,9 +85,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/runtime/openmp/codelets/codelet_zgessm.c b/runtime/openmp/codelets/codelet_zgessm.c index cd24a4ac0..2ed15696a 100644 --- a/runtime/openmp/codelets/codelet_zgessm.c +++ b/runtime/openmp/codelets/codelet_zgessm.c @@ -68,9 +68,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ @@ -83,6 +82,6 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrD = RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn); CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0:Dm*Dn]) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0]) depend(inout:ptrA[0]) CORE_zgessm(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda); } diff --git a/runtime/openmp/codelets/codelet_zgetrf.c b/runtime/openmp/codelets/codelet_zgetrf.c index d7cc9fe75..ab9869f7e 100644 --- a/runtime/openmp/codelets/codelet_zgetrf.c +++ b/runtime/openmp/codelets/codelet_zgetrf.c @@ -34,6 +34,6 @@ void INSERT_TASK_zgetrf(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); int info = 0; -#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0]) +#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0]) CORE_zgetrf( m, n, ptrA, lda, IPIV, &info ); } diff --git a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c index 20b5e92d3..9f26a7064 100644 --- a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c @@ -73,10 +73,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. @@ -92,6 +91,6 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); int info = 0; -#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0]) CORE_zgetrf_incpiv(m, n, ib, ptrA, lda, IPIV, &info); } diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c index 5f26b76e9..829b5473e 100644 --- a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c @@ -63,10 +63,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. @@ -80,6 +79,6 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); int info = 0; -#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0]) CORE_zgetrf_nopiv(m, n, ib, ptrA, lda, &info); } diff --git a/runtime/openmp/codelets/codelet_zhemm.c b/runtime/openmp/codelets/codelet_zhemm.c index 331459e47..4d632655a 100644 --- a/runtime/openmp/codelets/codelet_zhemm.c +++ b/runtime/openmp/codelets/codelet_zhemm.c @@ -43,7 +43,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); -#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) +#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0]) CORE_zhemm(side, uplo, m, n, alpha, ptrA, lda, diff --git a/runtime/openmp/codelets/codelet_zhessq.c b/runtime/openmp/codelets/codelet_zhessq.c index 46cd0f5c7..409d413dd 100644 --- a/runtime/openmp/codelets/codelet_zhessq.c +++ b/runtime/openmp/codelets/codelet_zhessq.c @@ -31,6 +31,6 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); -#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0:SCALESUMSQm*SCALESUMSQn]) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0]) depend(inout:ptrA[0]) CORE_zhessq( uplo, n, ptrA, lda, &ptrScaleSum[0], &ptrScaleSum[1] ); } diff --git a/runtime/openmp/codelets/codelet_zlacpy.c b/runtime/openmp/codelets/codelet_zlacpy.c index 74e420c31..44ea300eb 100644 --- a/runtime/openmp/codelets/codelet_zlacpy.c +++ b/runtime/openmp/codelets/codelet_zlacpy.c @@ -33,10 +33,10 @@ * @ingroup CORE_CHAMELEON_Complex64_t * */ -void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A + displA, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B + displB, CHAMELEON_Complex64_t, Bm, Bn); @@ -44,12 +44,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, CORE_zlacpy(uplo, m, n, ptrA, lda, ptrB, ldb); } -void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, - 0, A, Am, An, lda, - 0, B, Bm, Bn, ldb ); + 0, A, Am, An, lda, + 0, B, Bm, Bn, ldb ); } diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c index b65a938fe..26a024cd0 100644 --- a/runtime/openmp/codelets/codelet_zlag2c.c +++ b/runtime/openmp/codelets/codelet_zlag2c.c @@ -31,13 +31,24 @@ * @ingroup CORE_CHAMELEON_Complex64_t * */ -void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); - CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); -#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CHAMELEON_Complex32_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn); +#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) CORE_zlag2c( m, n, ptrA, lda, ptrB, ldb); } + +void INSERT_TASK_clag2z( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex32_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_clag2z( m, n, ptrA, lda, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zlascal.c b/runtime/openmp/codelets/codelet_zlascal.c index d579bb39a..2aa990418 100644 --- a/runtime/openmp/codelets/codelet_zlascal.c +++ b/runtime/openmp/codelets/codelet_zlascal.c @@ -51,9 +51,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ @@ -64,6 +63,6 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0]) CORE_zlascal(uplo, m, n, alpha, ptrA, lda); } diff --git a/runtime/openmp/codelets/codelet_zlatro.c b/runtime/openmp/codelets/codelet_zlatro.c index 6f7ba5fa5..ec50bb9cf 100644 --- a/runtime/openmp/codelets/codelet_zlatro.c +++ b/runtime/openmp/codelets/codelet_zlatro.c @@ -33,14 +33,14 @@ * @ingroup CORE_CHAMELEON_Complex64_t * */ -void INSERT_TASK_zlatro(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlatro( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); -#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) +#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) CORE_zlatro(uplo, trans, m, n, ptrA, lda, ptrB, ldb); } diff --git a/runtime/openmp/codelets/codelet_zlauum.c b/runtime/openmp/codelets/codelet_zlauum.c index 7ab7c8b99..70030fc19 100644 --- a/runtime/openmp/codelets/codelet_zlauum.c +++ b/runtime/openmp/codelets/codelet_zlauum.c @@ -38,6 +38,6 @@ void INSERT_TASK_zlauum(const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int lda) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0]) CORE_zlauum(uplo, n, ptrA, lda); } diff --git a/runtime/openmp/codelets/codelet_zplghe.c b/runtime/openmp/codelets/codelet_zplghe.c index 06e890a45..a785b19c2 100644 --- a/runtime/openmp/codelets/codelet_zplghe.c +++ b/runtime/openmp/codelets/codelet_zplghe.c @@ -28,13 +28,11 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_z.h" -/* INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ - void INSERT_TASK_zplghe( const RUNTIME_option_t *options, - double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) + double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0]) CORE_zplghe( bump, m, n, ptrA, lda, bigM, m0, n0, seed ); } diff --git a/runtime/openmp/codelets/codelet_zplgsy.c b/runtime/openmp/codelets/codelet_zplgsy.c index 5269d5276..4a3cea2f6 100644 --- a/runtime/openmp/codelets/codelet_zplgsy.c +++ b/runtime/openmp/codelets/codelet_zplgsy.c @@ -28,11 +28,9 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_z.h" -/* INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ - void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, - CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) + CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); #pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0]) diff --git a/runtime/openmp/codelets/codelet_zplrnt.c b/runtime/openmp/codelets/codelet_zplrnt.c index 35cb6300c..139f44c8b 100644 --- a/runtime/openmp/codelets/codelet_zplrnt.c +++ b/runtime/openmp/codelets/codelet_zplrnt.c @@ -28,11 +28,9 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_z.h" -/* INSERT_TASK_zplrnt - Generate a tile for random matrix. */ - void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) + int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); #pragma omp task firstprivate(m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0]) diff --git a/runtime/openmp/codelets/codelet_zplssq.c b/runtime/openmp/codelets/codelet_zplssq.c index 7ee45f66d..cec083dca 100644 --- a/runtime/openmp/codelets/codelet_zplssq.c +++ b/runtime/openmp/codelets/codelet_zplssq.c @@ -29,7 +29,7 @@ * * @ingroup CORE_CHAMELEON_Complex64_t * - * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq) * * with scl and ssq such that * @@ -74,7 +74,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options, } void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) { CHAMELEON_Complex64_t *res = RTBLKADDR(RESULT, CHAMELEON_Complex64_t, RESULTm, RESULTn); diff --git a/runtime/openmp/codelets/codelet_zssssm.c b/runtime/openmp/codelets/codelet_zssssm.c index 38d9ad5e3..db82b480e 100644 --- a/runtime/openmp/codelets/codelet_zssssm.c +++ b/runtime/openmp/codelets/codelet_zssssm.c @@ -91,9 +91,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ @@ -109,10 +108,8 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); CHAMELEON_Complex64_t *ptrL1 = RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n); CHAMELEON_Complex64_t *ptrL2 = RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n); -#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV)\ - depend(inout:ptrA1[0])\ - depend(inout:ptrA2[0])\ - depend(in:ptrL1[0])\ - depend(in:ptrL2[0]) + +#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV) \ + depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrL1[0], ptrL2[0]) CORE_zssssm(m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrL1, ldl1, ptrL2, ldl2, IPIV); } diff --git a/runtime/openmp/codelets/codelet_zsymm.c b/runtime/openmp/codelets/codelet_zsymm.c index efe71b425..76d6ec7b7 100644 --- a/runtime/openmp/codelets/codelet_zsymm.c +++ b/runtime/openmp/codelets/codelet_zsymm.c @@ -41,7 +41,7 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); -#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) +#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0]) CORE_zsymm(side, uplo, m, n, alpha, ptrA, lda, diff --git a/runtime/openmp/codelets/codelet_zsyssq.c b/runtime/openmp/codelets/codelet_zsyssq.c index c2d69dc57..86b58eb00 100644 --- a/runtime/openmp/codelets/codelet_zsyssq.c +++ b/runtime/openmp/codelets/codelet_zsyssq.c @@ -29,6 +29,6 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); -#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0:Am*An]) depend(inout:ptrSCALESUMSQ[0]) +#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0]) depend(inout:ptrSCALESUMSQ[0]) CORE_zsyssq( uplo, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1] ); } diff --git a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c index 1ebd1aa08..73032cf98 100644 --- a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c +++ b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c @@ -32,6 +32,6 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options, int iinfo) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); -#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An]) +#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0]) CORE_zsytf2_nopiv(uplo, n, ptrA, lda); } diff --git a/runtime/openmp/codelets/codelet_ztile_zero.c b/runtime/openmp/codelets/codelet_ztile_zero.c deleted file mode 100644 index 96ef911bf..000000000 --- a/runtime/openmp/codelets/codelet_ztile_zero.c +++ /dev/null @@ -1,38 +0,0 @@ -/** - * - * @file openmp/codelet_ztile_zero.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztile_zero StarPU codelet - * - * @version 1.0.0 - * @author Hatem Ltaief - * @author Mathieu Faverge - * @author Jakub Kurzak - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ - -#include "chameleon_openmp.h" -#include "chameleon/tasks_z.h" -#include "coreblas.h" -/** - * - */ -void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, - int X1, int X2, int Y1, int Y2, - const CHAM_desc_t *A, int Am, int An, int lda ) -{ - CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); - int x, y; - for (x = X1; x < X2; x++) - for (y = Y1; y < Y2; y++) - ptrA[lda*x+y] = 0.0; -} diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c index 4bb4f16f0..367e437a7 100644 --- a/runtime/openmp/codelets/codelet_ztplqt.c +++ b/runtime/openmp/codelets/codelet_ztplqt.c @@ -20,12 +20,12 @@ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" -void -INSERT_TASK_ztplqt( const RUNTIME_option_t *options, - int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) + +void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); @@ -36,8 +36,7 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options, { CHAMELEON_Complex64_t work[ws_size]; - CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt); - + CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt ); CORE_ztplqt( M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt, work ); } diff --git a/runtime/openmp/codelets/codelet_ztpmlqt.c b/runtime/openmp/codelets/codelet_ztpmlqt.c index 543704822..a5da0f533 100644 --- a/runtime/openmp/codelets/codelet_ztpmlqt.c +++ b/runtime/openmp/codelets/codelet_ztpmlqt.c @@ -17,24 +17,25 @@ */ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" -void -INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + +void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); int ws_size = options->ws_wsize; + #pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) { - CHAMELEON_Complex64_t work[ws_size]; - CORE_ztpmlqt( side, trans, M, N, K, L, ib, - ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); + CHAMELEON_Complex64_t work[ws_size]; + CORE_ztpmlqt( side, trans, M, N, K, L, ib, + ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); } } diff --git a/runtime/openmp/codelets/codelet_ztpmqrt.c b/runtime/openmp/codelets/codelet_ztpmqrt.c index 4f3262221..5378a2a5b 100644 --- a/runtime/openmp/codelets/codelet_ztpmqrt.c +++ b/runtime/openmp/codelets/codelet_ztpmqrt.c @@ -17,20 +17,21 @@ */ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" -void -INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + +void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); int ws_size = options->ws_wsize; + #pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, nb, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) { CHAMELEON_Complex64_t tmp[ws_size]; diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c index 7381f6ebd..755de21bd 100644 --- a/runtime/openmp/codelets/codelet_ztpqrt.c +++ b/runtime/openmp/codelets/codelet_ztpqrt.c @@ -19,12 +19,12 @@ */ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" -void -INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, - int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) + +void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); @@ -35,9 +35,8 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, { CHAMELEON_Complex64_t tmp[ws_size]; - CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt); - + CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt ); CORE_ztpqrt( M, N, L, ib, - ptrA, lda, ptrB, ldb, ptrT, ldt, tmp ); + ptrA, lda, ptrB, ldb, ptrT, ldt, tmp ); } } diff --git a/runtime/openmp/codelets/codelet_ztradd.c b/runtime/openmp/codelets/codelet_ztradd.c index 9a39aaf56..384ba192d 100644 --- a/runtime/openmp/codelets/codelet_ztradd.c +++ b/runtime/openmp/codelets/codelet_ztradd.c @@ -22,12 +22,13 @@ #include "chameleon_openmp.h" #include "chameleon/tasks_z.h" + /** ****************************************************************************** * * @ingroup CORE_CHAMELEON_Complex64_t * - * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -77,18 +78,18 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_ztradd(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_ztradd( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) { CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + #pragma omp task firstprivate(uplo, trans, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) CORE_ztradd(uplo, trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb); } diff --git a/runtime/openmp/codelets/codelet_ztstrf.c b/runtime/openmp/codelets/codelet_ztstrf.c index cb612cb6f..4072b5d68 100644 --- a/runtime/openmp/codelets/codelet_ztstrf.c +++ b/runtime/openmp/codelets/codelet_ztstrf.c @@ -83,10 +83,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c index 348b290a1..0dd8f263d 100644 --- a/runtime/openmp/codelets/codelet_zunmlq.c +++ b/runtime/openmp/codelets/codelet_zunmlq.c @@ -105,9 +105,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c index 427654703..ed40c4211 100644 --- a/runtime/openmp/codelets/codelet_zunmqr.c +++ b/runtime/openmp/codelets/codelet_zunmqr.c @@ -105,9 +105,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ diff --git a/runtime/parsec/codelets/codelet_zgeadd.c b/runtime/parsec/codelets/codelet_zgeadd.c index 7ad41db2f..7d937857f 100644 --- a/runtime/parsec/codelets/codelet_zgeadd.c +++ b/runtime/parsec/codelets/codelet_zgeadd.c @@ -52,7 +52,7 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context, * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * @brief Adds two general matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -96,15 +96,14 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); diff --git a/runtime/parsec/codelets/codelet_zgelqt.c b/runtime/parsec/codelets/codelet_zgelqt.c index 4ef5b5b7a..32a7dfaa9 100644 --- a/runtime/parsec/codelets/codelet_zgelqt.c +++ b/runtime/parsec/codelets/codelet_zgelqt.c @@ -76,9 +76,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ static inline int diff --git a/runtime/parsec/codelets/codelet_zgeqrt.c b/runtime/parsec/codelets/codelet_zgeqrt.c index 53ac8ac04..3aaaf84cf 100644 --- a/runtime/parsec/codelets/codelet_zgeqrt.c +++ b/runtime/parsec/codelets/codelet_zgeqrt.c @@ -77,9 +77,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ static inline int diff --git a/runtime/parsec/codelets/codelet_zgessm.c b/runtime/parsec/codelets/codelet_zgessm.c index a7f62dc4b..a4762cfa2 100644 --- a/runtime/parsec/codelets/codelet_zgessm.c +++ b/runtime/parsec/codelets/codelet_zgessm.c @@ -60,9 +60,8 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ static inline int diff --git a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c index 09ef6c401..55a1fe635 100644 --- a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c @@ -65,10 +65,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c index ab7f49bb1..0aadb3c90 100644 --- a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c @@ -58,10 +58,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/parsec/codelets/codelet_zlacpy.c b/runtime/parsec/codelets/codelet_zlacpy.c index 64c777e38..d79617ccb 100644 --- a/runtime/parsec/codelets/codelet_zlacpy.c +++ b/runtime/parsec/codelets/codelet_zlacpy.c @@ -49,11 +49,10 @@ CORE_zlacpyx_parsec( parsec_execution_stream_t *context, } void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); parsec_dtd_taskpool_insert_task( @@ -71,12 +70,12 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, (void)nb; } -void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, - 0, A, Am, An, lda, - 0, B, Bm, Bn, ldb ); + 0, A, Am, An, lda, + 0, B, Bm, Bn, ldb ); } diff --git a/runtime/parsec/codelets/codelet_zpamm.c b/runtime/parsec/codelets/codelet_zpamm.c deleted file mode 100644 index 3d075b014..000000000 --- a/runtime/parsec/codelets/codelet_zpamm.c +++ /dev/null @@ -1,224 +0,0 @@ -/** - * - * @file parsec/codelet_zpamm.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zpamm PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * ZPAMM performs one of the matrix-matrix operations - * - * LEFT RIGHT - * OP ChameleonW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) - * OP ChameleonA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V) - * - * where op( V ) is one of - * - * op( V ) = V or op( V ) = V**T or op( V ) = V**H, - * - * A1, A2 and W are general matrices, and V is: - * - * l = k: rectangle + triangle - * l < k: rectangle + trapezoid - * l = 0: rectangle - * - * Size of V, both rowwise and columnwise, is: - * - * ---------------------- - * side trans size - * ---------------------- - * left N M x K - * T K x M - * right N K x N - * T N x K - * ---------------------- - * - * LEFT (columnwise and rowwise): - * - * | K | | M | - * _ __________ _ _______________ _ - * | | | | | \ - * V: | | | V': |_____________|___\ K - * | | | M-L | | - * M | | | |__________________| _ - * |____| | _ - * \ | | | M - L | L | - * \ | | L - * _ \|____| _ - * - * RIGHT (columnwise and rowwise): - * - * | K | | N | - * _______________ _ _ __________ _ - * | | \ | | | - * V': |_____________|___\ N V: | | | - * | | | | | K-L - * |__________________| _ K | | | - * |____| | _ - * | K - L | L | \ | | - * \ | | L - * _ \|____| _ - * - * Arguments - * ========== - * - * @param[in] op - * - * OP specifies which operation to perform: - * - * @arg ChameleonW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) - * @arg ChameleonA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V) - * - * @param[in] side - * - * SIDE specifies whether op( V ) multiplies A2 - * or W from the left or right as follows: - * - * @arg ChamLeft : multiply op( V ) from the left - * OP ChameleonW : W = A1 + op(V) * A2 - * OP ChameleonA2 : A2 = A2 - op(V) * W - * - * @arg ChamRight : multiply op( V ) from the right - * OP ChameleonW : W = A1 + A2 * op(V) - * OP ChameleonA2 : A2 = A2 - W * op(V) - * - * @param[in] storev - * - * Indicates how the vectors which define the elementary - * reflectors are stored in V: - * - * @arg ChamColumnwise - * @arg ChamRowwise - * - * @param[in] M - * The number of rows of the A1, A2 and W - * If SIDE is ChamLeft, the number of rows of op( V ) - * - * @param[in] N - * The number of columns of the A1, A2 and W - * If SIDE is ChamRight, the number of columns of op( V ) - * - * @param[in] K - * If SIDE is ChamLeft, the number of columns of op( V ) - * If SIDE is ChamRight, the number of rows of op( V ) - * - * @param[in] L - * The size of the triangular part of V - * - * @param[in] A1 - * On entry, the M-by-N tile A1. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M). - * - * @param[in,out] A2 - * On entry, the M-by-N tile A2. - * On exit, if OP is ChameleonA2 A2 is overwritten - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M). - * - * @param[in] V - * The matrix V as described above. - * If SIDE is ChamLeft : op( V ) is M-by-K - * If SIDE is ChamRight: op( V ) is K-by-N - * - * @param[in] LDV - * The leading dimension of the array V. - * - * @param[in,out] W - * On entry, the M-by-N matrix W. - * On exit, W is overwritten either if OP is ChameleonA2 or ChameleonW. - * If OP is ChameleonA2, W is an input and is used as a workspace. - * - * @param[in] LDW - * The leading dimension of array WORK. - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ - - -/**/ - -static inline int -CORE_zpamm_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - int op; - cham_side_t side; - cham_store_t storev; - int M; - int N; - int K; - int L; - CHAMELEON_Complex64_t *A1; - int LDA1; - CHAMELEON_Complex64_t *A2; - int LDA2; - CHAMELEON_Complex64_t *V; - int LDV; - CHAMELEON_Complex64_t *W; - int LDW; - - parsec_dtd_unpack_args( - this_task, &op, &side, &storev, &M, &N, &K, &L, &A1, &LDA1, &A2, &LDA2, &V, &LDV, &W, &LDW ); - - CORE_zpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW ); - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void -INSERT_TASK_zpamm(const RUNTIME_option_t *options, - int op, cham_side_t side, cham_store_t storev, - int m, int n, int k, int l, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *W, int Wm, int Wn, int ldw) -{ - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_zpamm_parsec, options->priority, "pamm", - sizeof(int), &op, VALUE, - sizeof(int), &side, VALUE, - sizeof(int), &storev, VALUE, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &l, VALUE, - PASSED_BY_REF, RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INPUT, - sizeof(int), &lda1, VALUE, - PASSED_BY_REF, RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY, - sizeof(int), &lda2, VALUE, - PASSED_BY_REF, RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT, - sizeof(int), &ldv, VALUE, - PASSED_BY_REF, RTBLKADDR( W, CHAMELEON_Complex64_t, Wm, Wn ), chameleon_parsec_get_arena_index( W ) | INOUT, - sizeof(int), &ldw, VALUE, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_zplssq.c b/runtime/parsec/codelets/codelet_zplssq.c index ed1fde76f..99006c131 100644 --- a/runtime/parsec/codelets/codelet_zplssq.c +++ b/runtime/parsec/codelets/codelet_zplssq.c @@ -21,11 +21,35 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_z.h" +static inline int +CORE_zplssq_parsec( parsec_execution_stream_t *context, + parsec_task_t *this_task ) +{ + double *SCLSSQ_IN; + double *SCLSSQ_OUT; + + parsec_dtd_unpack_args( + this_task, &SCLSSQ_IN, &SCLSSQ_OUT ); + + assert( SCLSSQ_OUT[0] >= 0. ); + if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) { + SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ))); + SCLSSQ_OUT[0] = SCLSSQ_IN[0]; + } else { + if ( SCLSSQ_OUT[0] > 0 ) { + SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ))); + } + } + + (void)context; + return PARSEC_HOOK_RETURN_DONE; +} + /** * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq) * * with scl and ssq such that * @@ -52,33 +76,9 @@ * On exit, result contains scl * sqrt( ssq ) * */ -static inline int -CORE_zplssq_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - double *SCLSSQ_IN; - double *SCLSSQ_OUT; - - parsec_dtd_unpack_args( - this_task, &SCLSSQ_IN, &SCLSSQ_OUT ); - - assert( SCLSSQ_OUT[0] >= 0. ); - if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) { - SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ))); - SCLSSQ_OUT[0] = SCLSSQ_IN[0]; - } else { - if ( SCLSSQ_OUT[0] > 0 ) { - SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ))); - } - } - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - void INSERT_TASK_zplssq( const RUNTIME_option_t *options, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, - const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); @@ -91,7 +91,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options, static inline int CORE_zplssq2_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) + parsec_task_t *this_task ) { double *RESULT; @@ -105,7 +105,7 @@ CORE_zplssq2_parsec( parsec_execution_stream_t *context, } void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); diff --git a/runtime/parsec/codelets/codelet_ztile_zero.c b/runtime/parsec/codelets/codelet_ztile_zero.c deleted file mode 100644 index e07175cb1..000000000 --- a/runtime/parsec/codelets/codelet_ztile_zero.c +++ /dev/null @@ -1,61 +0,0 @@ -/** - * - * @file parsec/codelet_ztile_zero.c - * - * @copyright 2009-2015 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztile_zero PaRSEC codelet - * - * @version 1.0.0 - * @author Reazul Hoque - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -static inline int -CORE_ztile_zero_parsec( parsec_execution_stream_t *context, - parsec_task_t *this_task ) -{ - int X1; - int X2; - int Y1; - int Y2; - CHAMELEON_Complex64_t *A; - int lda; - int x, y; - - parsec_dtd_unpack_args( - this_task, &X1, &X2, &Y1, &Y2, &A, &lda ); - - for (x = X1; x < X2; x++) - for (y = Y1; y < Y2; y++) - A[lda * x + y] = 0.0; - - (void)context; - return PARSEC_HOOK_RETURN_DONE; -} - -void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, - int X1, int X2, int Y1, int Y2, - const CHAM_desc_t *A, int Am, int An, int lda ) -{ - parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); - - parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_ztile_zero_parsec, options->priority, "tile zero", - sizeof(int), &X1, VALUE, - sizeof(int), &X2, VALUE, - sizeof(int), &Y1, VALUE, - sizeof(int), &Y2, VALUE, - PASSED_BY_REF, RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY, - sizeof(int), &lda, VALUE, - PARSEC_DTD_ARG_END ); -} diff --git a/runtime/parsec/codelets/codelet_ztradd.c b/runtime/parsec/codelets/codelet_ztradd.c index b431f983a..267ac386a 100644 --- a/runtime/parsec/codelets/codelet_ztradd.c +++ b/runtime/parsec/codelets/codelet_ztradd.c @@ -52,7 +52,7 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context, * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pztradd. + * @brief Adds two trapezoidal matrices together as in PBLAS pztradd. * * B <- alpha * op(A) + beta * B, * @@ -102,15 +102,14 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context, * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_ztradd(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_ztradd( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); diff --git a/runtime/quark/codelets/codelet_zgeadd.c b/runtime/quark/codelets/codelet_zgeadd.c index 16287953c..935bc0ba1 100644 --- a/runtime/quark/codelets/codelet_zgeadd.c +++ b/runtime/quark/codelets/codelet_zgeadd.c @@ -47,7 +47,7 @@ void CORE_zgeadd_quark(Quark *quark) * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * @brief Adds two general matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -91,15 +91,14 @@ void CORE_zgeadd_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_GEADD; diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c index 240773c98..45db83e55 100644 --- a/runtime/quark/codelets/codelet_zgelqt.c +++ b/runtime/quark/codelets/codelet_zgelqt.c @@ -100,9 +100,8 @@ void CORE_zgelqt_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c index 09ed24eef..33ad21a52 100644 --- a/runtime/quark/codelets/codelet_zgeqrt.c +++ b/runtime/quark/codelets/codelet_zgeqrt.c @@ -101,9 +101,8 @@ void CORE_zgeqrt_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_zgessm.c b/runtime/quark/codelets/codelet_zgessm.c index d31d3dc9b..63b59a664 100644 --- a/runtime/quark/codelets/codelet_zgessm.c +++ b/runtime/quark/codelets/codelet_zgessm.c @@ -86,9 +86,8 @@ void CORE_zgessm_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ void INSERT_TASK_zgessm(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_zgetrf_incpiv.c b/runtime/quark/codelets/codelet_zgetrf_incpiv.c index 7ba0886d1..9b9d29a7f 100644 --- a/runtime/quark/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/quark/codelets/codelet_zgetrf_incpiv.c @@ -94,10 +94,9 @@ void CORE_zgetrf_incpiv_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/quark/codelets/codelet_zgetrf_nopiv.c b/runtime/quark/codelets/codelet_zgetrf_nopiv.c index dfee169e6..c7115e7d5 100644 --- a/runtime/quark/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/quark/codelets/codelet_zgetrf_nopiv.c @@ -81,10 +81,9 @@ void CORE_zgetrf_nopiv_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c index 99a0dc89a..8aa18403b 100644 --- a/runtime/quark/codelets/codelet_zlacpy.c +++ b/runtime/quark/codelets/codelet_zlacpy.c @@ -43,10 +43,10 @@ static inline void CORE_zlacpy_quark(Quark *quark) CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB); } -void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LACPY; @@ -63,12 +63,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, 0); } -void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, - 0, A, Am, An, lda, - 0, B, Bm, Bn, ldb ); + 0, A, Am, An, lda, + 0, B, Bm, Bn, ldb ); } diff --git a/runtime/quark/codelets/codelet_zpamm.c b/runtime/quark/codelets/codelet_zpamm.c deleted file mode 100644 index ba9de2a29..000000000 --- a/runtime/quark/codelets/codelet_zpamm.c +++ /dev/null @@ -1,220 +0,0 @@ -/** - * - * @file quark/codelet_zpamm.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zpamm Quark codelet - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 1.0.0 - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede - * @date 2011-06-14 - * @precisions normal z -> c d s - * - */ -#include "coreblas/cblas.h" -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void -CORE_zpamm_quark(Quark *quark) -{ - int op; - cham_side_t side; - cham_store_t storev; - int M; - int N; - int K; - int L; - CHAMELEON_Complex64_t *A1; - int LDA1; - CHAMELEON_Complex64_t *A2; - int LDA2; - CHAMELEON_Complex64_t *V; - int LDV; - CHAMELEON_Complex64_t *W; - int LDW; - - quark_unpack_args_15(quark, op, side, storev, M, N, K, L, - A1, LDA1, A2, LDA2, V, LDV, W, LDW); - - CORE_zpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW); -} - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - * ZPAMM performs one of the matrix-matrix operations - * - * LEFT RIGHT - * OP ChameleonW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) - * OP ChameleonA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V) - * - * where op( V ) is one of - * - * op( V ) = V or op( V ) = V**T or op( V ) = V**H, - * - * A1, A2 and W are general matrices, and V is: - * - * l = k: rectangle + triangle - * l < k: rectangle + trapezoid - * l = 0: rectangle - * - * Size of V, both rowwise and columnwise, is: - * - * ---------------------- - * side trans size - * ---------------------- - * left N M x K - * T K x M - * right N K x N - * T N x K - * ---------------------- - * - * LEFT (columnwise and rowwise): - * - * | K | | M | - * _ __________ _ _______________ _ - * | | | | | \ - * V: | | | V': |_____________|___\ K - * | | | M-L | | - * M | | | |__________________| _ - * |____| | _ - * \ | | | M - L | L | - * \ | | L - * _ \|____| _ - * - * RIGHT (columnwise and rowwise): - * - * | K | | N | - * _______________ _ _ __________ _ - * | | \ | | | - * V': |_____________|___\ N V: | | | - * | | | | | K-L - * |__________________| _ K | | | - * |____| | _ - * | K - L | L | \ | | - * \ | | L - * _ \|____| _ - * - * Arguments - * ========== - * - * @param[in] op - * - * OP specifies which operation to perform: - * - * @arg ChameleonW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) - * @arg ChameleonA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V) - * - * @param[in] side - * - * SIDE specifies whether op( V ) multiplies A2 - * or W from the left or right as follows: - * - * @arg ChamLeft : multiply op( V ) from the left - * OP ChameleonW : W = A1 + op(V) * A2 - * OP ChameleonA2 : A2 = A2 - op(V) * W - * - * @arg ChamRight : multiply op( V ) from the right - * OP ChameleonW : W = A1 + A2 * op(V) - * OP ChameleonA2 : A2 = A2 - W * op(V) - * - * @param[in] storev - * - * Indicates how the vectors which define the elementary - * reflectors are stored in V: - * - * @arg ChamColumnwise - * @arg ChamRowwise - * - * @param[in] M - * The number of rows of the A1, A2 and W - * If SIDE is ChamLeft, the number of rows of op( V ) - * - * @param[in] N - * The number of columns of the A1, A2 and W - * If SIDE is ChamRight, the number of columns of op( V ) - * - * @param[in] K - * If SIDE is ChamLeft, the number of columns of op( V ) - * If SIDE is ChamRight, the number of rows of op( V ) - * - * @param[in] L - * The size of the triangular part of V - * - * @param[in] A1 - * On entry, the M-by-N tile A1. - * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M). - * - * @param[in,out] A2 - * On entry, the M-by-N tile A2. - * On exit, if OP is ChameleonA2 A2 is overwritten - * - * @param[in] LDA2 - * The leading dimension of the tile A2. LDA2 >= max(1,M). - * - * @param[in] V - * The matrix V as described above. - * If SIDE is ChamLeft : op( V ) is M-by-K - * If SIDE is ChamRight: op( V ) is K-by-N - * - * @param[in] LDV - * The leading dimension of the array V. - * - * @param[in,out] W - * On entry, the M-by-N matrix W. - * On exit, W is overwritten either if OP is ChameleonA2 or ChameleonW. - * If OP is ChameleonA2, W is an input and is used as a workspace. - * - * @param[in] LDW - * The leading dimension of array WORK. - * - ******************************************************************************* - * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value - * - */ -void -INSERT_TASK_zpamm(const RUNTIME_option_t *options, - int op, cham_side_t side, cham_store_t storev, - int m, int n, int k, int l, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *W, int Wm, int Wn, int ldw) -{ - QUARK_Insert_Task(opt->quark, CORE_zpamm_quark, (Quark_Task_Flags*)opt, - sizeof(int), &op, VALUE, - sizeof(int), &side, VALUE, - sizeof(int), &storev, VALUE, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(int), &k, VALUE, - sizeof(int), &l, VALUE, - sizeof(CHAMELEON_Complex64_t)*m*k, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INPUT, - sizeof(int), &lda1, VALUE, - sizeof(CHAMELEON_Complex64_t)*k*n, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT, - sizeof(int), &lda2, VALUE, - sizeof(CHAMELEON_Complex64_t)*m*n, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), INPUT, - sizeof(int), &ldv, VALUE, - sizeof(CHAMELEON_Complex64_t)*m*n, RTBLKADDR(W, CHAMELEON_Complex64_t, Wm, Wn), INOUT, - sizeof(int), &ldw, VALUE, - 0); -} diff --git a/runtime/quark/codelets/codelet_zplssq.c b/runtime/quark/codelets/codelet_zplssq.c index 79067050a..14418c58f 100644 --- a/runtime/quark/codelets/codelet_zplssq.c +++ b/runtime/quark/codelets/codelet_zplssq.c @@ -46,7 +46,7 @@ void CORE_zplssq_quark(Quark *quark) * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq) * * with scl and ssq such that * @@ -74,8 +74,8 @@ void CORE_zplssq_quark(Quark *quark) * */ void INSERT_TASK_zplssq( const RUNTIME_option_t *options, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, - const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); QUARK_Insert_Task(opt->quark, CORE_zplssq_quark, (Quark_Task_Flags*)opt, @@ -94,7 +94,7 @@ void CORE_zplssq2_quark(Quark *quark) } void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); QUARK_Insert_Task(opt->quark, CORE_zplssq2_quark, (Quark_Task_Flags*)opt, diff --git a/runtime/quark/codelets/codelet_zssssm.c b/runtime/quark/codelets/codelet_zssssm.c index ea1830964..878b17a56 100644 --- a/runtime/quark/codelets/codelet_zssssm.c +++ b/runtime/quark/codelets/codelet_zssssm.c @@ -115,9 +115,8 @@ void CORE_zssssm_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ void INSERT_TASK_zssssm(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_ztile_zero.c b/runtime/quark/codelets/codelet_ztile_zero.c deleted file mode 100644 index 68f52d47c..000000000 --- a/runtime/quark/codelets/codelet_ztile_zero.c +++ /dev/null @@ -1,58 +0,0 @@ -/** - * - * @file quark/codelet_ztile_zero.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztile_zero Quark codelet - * - * @version 1.0.0 - * @author Hatem Ltaief - * @author Mathieu Faverge - * @author Jakub Kurzak - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" - -void CORE_ztile_zero_quark(Quark *quark) -{ - int X1; - int X2; - int Y1; - int Y2; - CHAMELEON_Complex64_t *A; - int lda; - - int x, y; - - quark_unpack_args_6(quark, X1, X2, Y1, Y2, A, lda); - - for (x = X1; x < X2; x++) - for (y = Y1; y < Y2; y++) - A[lda*x+y] = 0.0; - -} - -void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, - int X1, int X2, int Y1, int Y2, - const CHAM_desc_t *A, int Am, int An, int lda ) -{ - quark_option_t *opt = (quark_option_t*)(options->schedopt); - QUARK_Insert_Task(opt->quark, CORE_ztile_zero_quark, (Quark_Task_Flags*)opt, - sizeof(int), &X1, VALUE, - sizeof(int), &X2, VALUE, - sizeof(int), &Y1, VALUE, - sizeof(int), &Y2, VALUE, - sizeof(CHAMELEON_Complex64_t)*A->bsiz, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), OUTPUT | LOCALITY, - sizeof(int), &lda, VALUE, - 0); -} diff --git a/runtime/quark/codelets/codelet_ztradd.c b/runtime/quark/codelets/codelet_ztradd.c index d18aa1db8..8c42160bc 100644 --- a/runtime/quark/codelets/codelet_ztradd.c +++ b/runtime/quark/codelets/codelet_ztradd.c @@ -46,7 +46,7 @@ void CORE_ztradd_quark(Quark *quark) * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -96,15 +96,14 @@ void CORE_ztradd_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_ztradd(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_ztradd( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_GEADD; diff --git a/runtime/quark/codelets/codelet_ztstrf.c b/runtime/quark/codelets/codelet_ztstrf.c index d44bb81a1..1c31704e2 100644 --- a/runtime/quark/codelets/codelet_ztstrf.c +++ b/runtime/quark/codelets/codelet_ztstrf.c @@ -115,10 +115,9 @@ void CORE_ztstrf_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c index f87e193ad..af020eaec 100644 --- a/runtime/quark/codelets/codelet_zunmlq.c +++ b/runtime/quark/codelets/codelet_zunmlq.c @@ -129,9 +129,8 @@ void CORE_zunmlq_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c index d78a12faa..8407d8967 100644 --- a/runtime/quark/codelets/codelet_zunmqr.c +++ b/runtime/quark/codelets/codelet_zunmqr.c @@ -129,9 +129,8 @@ void CORE_zunmqr_quark(Quark *quark) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zasum.c b/runtime/starpu/codelets/codelet_zasum.c index e8aa0aeca..6bfaf2c13 100644 --- a/runtime/starpu/codelets/codelet_zasum.c +++ b/runtime/starpu/codelets/codelet_zasum.c @@ -22,10 +22,33 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_dzasum(const RUNTIME_option_t *options, - cham_store_t storev, cham_uplo_t uplo, int M, int N, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn) +#if !defined(CHAMELEON_SIMULATION) +static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) +{ + cham_store_t storev; + cham_uplo_t uplo; + int M; + int N; + CHAMELEON_Complex64_t *A; + int lda; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda); + CORE_dzasum(storev, uplo, M, N, A, lda, work); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func) + +void INSERT_TASK_dzasum( const RUNTIME_option_t *options, + cham_store_t storev, cham_uplo_t uplo, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_zasum; void (*callback)(void*) = options->profiling ? cl_zasum_callback : NULL; @@ -51,27 +74,3 @@ void INSERT_TASK_dzasum(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) -{ - cham_store_t storev; - cham_uplo_t uplo; - int M; - int N; - CHAMELEON_Complex64_t *A; - int lda; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda); - CORE_dzasum(storev, uplo, M, N, A, lda, work); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index 0b70bb6f9..5280eae12 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -20,10 +20,32 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, - int M, CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An, int incA, - const CHAM_desc_t *B, int Bm, int Bn, int incB) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg) +{ + int M; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int incA; + CHAMELEON_Complex64_t *B; + int incB; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB); + CORE_zaxpy(M, alpha, A, incA, B, incB); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func) + +void INSERT_TASK_zaxpy( const RUNTIME_option_t *options, + int M, CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int incA, + const CHAM_desc_t *B, int Bm, int Bn, int incB ) { struct starpu_codelet *codelet = &cl_zaxpy; void (*callback)(void*) = options->profiling ? cl_zaxpy_callback : NULL; @@ -48,26 +70,3 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg) -{ - int M; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t *A; - int incA; - CHAMELEON_Complex64_t *B; - int incB; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB); - CORE_zaxpy(M, alpha, A, incA, B, incB); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c index 4aa45bf5b..a2610a9bc 100644 --- a/runtime/starpu/codelets/codelet_zbuild.c +++ b/runtime/starpu/codelets/codelet_zbuild.c @@ -27,9 +27,35 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_Complex64_t *A; + int ld; + void *user_data; + void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; + int row_min, row_max, col_min, col_max; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback ); + + /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max] + * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran) + * and store it at the address 'buffer' with leading dimension 'ld' + */ + user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data); + +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func) + void INSERT_TASK_zbuild( const RUNTIME_option_t *options, - const CHAM_desc_t *A, int Am, int An, int lda, - void *user_data, void* user_build_callback ) + const CHAM_desc_t *A, int Am, int An, int lda, + void *user_data, void* user_build_callback ) { struct starpu_codelet *codelet = &cl_zbuild; @@ -61,30 +87,3 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_Complex64_t *A; - int ld; - void *user_data; - void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; - int row_min, row_max, col_min, col_max; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback ); - - /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max] - * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran) - * and store it at the address 'buffer' with leading dimension 'ld' - */ - user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data); - -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index 7c296d750..eb0adebe5 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -24,12 +24,76 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) +{ + cham_trans_t trans; + int M; + int N; + CHAMELEON_Complex64_t alpha; + const CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t beta; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); + CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); + return; +} + +#ifdef CHAMELEON_USE_CUBLAS_V2 +static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) +{ + cham_trans_t trans; + int M; + int N; + cuDoubleComplex alpha; + const cuDoubleComplex *A; + int lda; + cuDoubleComplex beta; + cuDoubleComplex *B; + int ldb; + + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb); + + RUNTIME_getStream( stream ); + + CUDA_zgeadd( + trans, + M, N, + &alpha, A, lda, + &beta, B, ldb, + stream); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif + + return; +} +#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +#if defined(CHAMELEON_USE_CUBLAS_V2) +CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC) +#else +CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) +#endif + /** ****************************************************************************** * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * @brief Adds two general matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -73,15 +137,14 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, - cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_zgeadd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; @@ -111,68 +174,3 @@ void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, (void)nb; } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) -{ - cham_trans_t trans; - int M; - int N; - CHAMELEON_Complex64_t alpha; - const CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t beta; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); - CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); - return; -} - -#ifdef CHAMELEON_USE_CUBLAS_V2 -static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) -{ - cham_trans_t trans; - int M; - int N; - cuDoubleComplex alpha; - const cuDoubleComplex *A; - int lda; - cuDoubleComplex beta; - cuDoubleComplex *B; - int ldb; - - A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb); - - RUNTIME_getStream( stream ); - - CUDA_zgeadd( - trans, - M, N, - &alpha, A, lda, - &beta, B, ldb, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -#if defined(CHAMELEON_USE_CUBLAS_V2) -CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC) -#else -CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index 8ffad6e1a..abdf1954c 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -112,9 +112,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 205da5e35..fed1350a7 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zgemm(const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zgemm; - void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &transB, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zgemm", -#endif - 0); -} - #if !defined(CHAMELEON_SIMULATION) static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) { @@ -144,3 +99,48 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zgemm(const RUNTIME_option_t *options, + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zgemm; + void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &transA, sizeof(int), + STARPU_VALUE, &transB, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zgemm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index bee5168f9..1ff57d185 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -114,9 +114,8 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func) * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c index 2dac8366a..72736b064 100644 --- a/runtime/starpu/codelets/codelet_zgessm.c +++ b/runtime/starpu/codelets/codelet_zgessm.c @@ -26,6 +26,32 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgessm_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + int k; + int ib; + int *IPIV; + int ldl; + CHAMELEON_Complex64_t *D; + int ldd; + CHAMELEON_Complex64_t *A; + int lda; + + D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda); + CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -66,18 +92,17 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ -void INSERT_TASK_zgessm(const RUNTIME_option_t *options, - int m, int n, int k, int ib, int nb, - int *IPIV, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, - const CHAM_desc_t *D, int Dm, int Dn, int ldd, - const CHAM_desc_t *A, int Am, int An, int lda) +void INSERT_TASK_zgessm( const RUNTIME_option_t *options, + int m, int n, int k, int ib, int nb, + int *IPIV, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + const CHAM_desc_t *D, int Dm, int Dn, int ldd, + const CHAM_desc_t *A, int Am, int An, int lda ) { (void)nb; struct starpu_codelet *codelet = &cl_zgessm; @@ -109,30 +134,3 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgessm_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int k; - int ib; - int *IPIV; - int ldl; - CHAMELEON_Complex64_t *D; - int ldd; - CHAMELEON_Complex64_t *A; - int lda; - - D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda); - CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c index a9cdc2ff8..e22f803bc 100644 --- a/runtime/starpu/codelets/codelet_zgessq.c +++ b/runtime/starpu/codelets/codelet_zgessq.c @@ -22,10 +22,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgessq_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + double *SCALESUMSQ; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &m, &n, &lda); + CORE_zgessq( m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func) + void INSERT_TASK_zgessq( const RUNTIME_option_t *options, - int m, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_zgessq; void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL; @@ -49,25 +70,3 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgessq_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - double *SCALESUMSQ; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda); - CORE_zgessq( m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c index b6bf892c4..977e8c2c5 100644 --- a/runtime/starpu/codelets/codelet_zgetrf.c +++ b/runtime/starpu/codelets/codelet_zgetrf.c @@ -24,6 +24,36 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int *IPIV; + cham_bool_t check_info; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request); + CORE_zgetrf( m, n, A, lda, IPIV, &info ); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func) + void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, int m, int n, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -56,34 +86,3 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int *IPIV; - cham_bool_t check_info; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request); - CORE_zgetrf( m, n, A, lda, IPIV, &info ); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c index 94113ee7d..66a5201ca 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c @@ -26,6 +26,38 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_starpu_ws_t *h_work; + int m; + int n; + int ib; + CHAMELEON_Complex64_t *A; + int lda, ldl; + int *IPIV; + cham_bool_t check_info; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request); + CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -71,10 +103,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. @@ -122,36 +153,3 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_starpu_ws_t *h_work; - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A; - int lda, ldl; - int *IPIV; - cham_bool_t check_info; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request); - CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c index 9f3a0a8d2..3efbe362e 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c @@ -23,6 +23,38 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +/* + * Codelet CPU + */ +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + int ib; + CHAMELEON_Complex64_t *A; + int lda; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request); + CORE_zgetrf_nopiv(m, n, ib, A, lda, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -61,10 +93,9 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. @@ -101,35 +132,3 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, #endif 0); } - -/* - * Codelet CPU - */ -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int ib; - CHAMELEON_Complex64_t *A; - int lda; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request); - CORE_zgetrf_nopiv(m, n, ib, A, lda, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c index 4c562fbeb..11ce21834 100644 --- a/runtime/starpu/codelets/codelet_zhe2ge.c +++ b/runtime/starpu/codelets/codelet_zhe2ge.c @@ -18,6 +18,29 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + const CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB); + CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -54,26 +77,3 @@ void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - const CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB); - CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 9396feebc..4f7698352 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zhemm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zhemm; - void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zhemm", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zhemm_cpu_func(void *descr[], void *cl_arg) { @@ -142,3 +97,47 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zhemm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zhemm; + void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(int), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zhemm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 668ee4246..24553aba4 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zher2k(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zher2k; - void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(double), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zher2k", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zher2k_cpu_func(void *descr[], void *cl_arg) { @@ -135,3 +90,47 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zher2k(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zher2k; + void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &trans, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(double), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zher2k", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c index 21c97dcf8..d83314923 100644 --- a/runtime/starpu/codelets/codelet_zherfb.c +++ b/runtime/starpu/codelets/codelet_zherfb.c @@ -20,51 +20,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zherfb(const RUNTIME_option_t *options, - cham_uplo_t uplo, - int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - struct starpu_codelet *codelet = &cl_zherfb; - void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(T, Tm, Tn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_VALUE, &nb, sizeof(int), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_SCRATCH, options->ws_worker, - STARPU_VALUE, &nb, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zherfb", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zherfb_cpu_func(void *descr[], void *cl_arg) { @@ -131,3 +86,47 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zherfb(const RUNTIME_option_t *options, + cham_uplo_t uplo, + int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + struct starpu_codelet *codelet = &cl_zherfb; + void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(T, Tm, Tn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &ib, sizeof(int), + STARPU_VALUE, &nb, sizeof(int), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), + STARPU_VALUE, &ldt, sizeof(int), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_SCRATCH, options->ws_worker, + STARPU_VALUE, &nb, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zherfb", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 101eef19c..d8709bac1 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -26,46 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zherk(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - double alpha, const CHAM_desc_t *A, int Am, int An, int lda, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zherk; - void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(double), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_VALUE, &beta, sizeof(double), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zherk", -#endif - 0); -} - #if !defined(CHAMELEON_SIMULATION) static void cl_zherk_cpu_func(void *descr[], void *cl_arg) { @@ -129,3 +89,43 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zherk(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + double alpha, const CHAM_desc_t *A, int Am, int An, int lda, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zherk; + void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &trans, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(double), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_VALUE, &beta, sizeof(double), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zherk", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c index c47e0871d..c2ebde6af 100644 --- a/runtime/starpu/codelets/codelet_zhessq.c +++ b/runtime/starpu/codelets/codelet_zhessq.c @@ -22,6 +22,27 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zhessq_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int n; + CHAMELEON_Complex64_t *A; + int lda; + double *SCALESUMSQ; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda); + CORE_zhessq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zhessq, 2, cl_zhessq_cpu_func) + void INSERT_TASK_zhessq( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, const CHAM_desc_t *A, int Am, int An, int lda, @@ -49,25 +70,3 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zhessq_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int n; - CHAMELEON_Complex64_t *A; - int lda; - double *SCALESUMSQ; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda); - CORE_zhessq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zhessq, 2, cl_zhessq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index 0d2426400..9c53e6e4e 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -26,15 +26,40 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + int displA; + int displB; + const CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB); + CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { (void)nb; struct starpu_codelet *codelet = &cl_zlacpy; @@ -64,37 +89,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, 0); } -void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, - cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, 0, A, Am, An, lda, 0, B, Bm, Bn, ldb ); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - int displA; - int displB; - const CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB); - CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c index 43b4314c6..46eea2740 100644 --- a/runtime/starpu/codelets/codelet_zlag2c.c +++ b/runtime/starpu/codelets/codelet_zlag2c.c @@ -24,6 +24,28 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + CHAMELEON_Complex32_t *B; + int ldb; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb); + CORE_zlag2c( m, n, A, lda, B, ldb); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -60,22 +82,27 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, } #if !defined(CHAMELEON_SIMULATION) -static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) +static void cl_clag2z_cpu_func(void *descr[], void *cl_arg) { int m; int n; - CHAMELEON_Complex64_t *A; + CHAMELEON_Complex32_t *A; int lda; - CHAMELEON_Complex32_t *B; + CHAMELEON_Complex64_t *B; int ldb; - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]); + A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb); - CORE_zlag2c( m, n, A, lda, B, ldb); + CORE_clag2z( m, n, A, lda, B, ldb); } #endif /* !defined(CHAMELEON_SIMULATION) */ +/* + * Codelet definition + */ +CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func) + void INSERT_TASK_clag2z(const RUNTIME_option_t *options, int m, int n, int nb, const CHAM_desc_t *A, int Am, int An, int lda, @@ -105,30 +132,3 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_clag2z_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - CHAMELEON_Complex32_t *A; - int lda; - CHAMELEON_Complex64_t *B; - int ldb; - - A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb); - CORE_clag2z( m, n, A, lda, B, ldb); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func) -/* - * Codelet definition - */ -CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c index 9ab611908..4b389bbb2 100644 --- a/runtime/starpu/codelets/codelet_zlange.c +++ b/runtime/starpu/codelets/codelet_zlange.c @@ -24,6 +24,30 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlange_cpu_func(void *descr[], void *cl_arg) +{ + double *normA; + cham_normtype_t norm; + int M; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA); + CORE_zlange( norm, M, N, A, LDA, work, normA ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlange, 3, cl_zlange_cpu_func) + void INSERT_TASK_zlange( const RUNTIME_option_t *options, cham_normtype_t norm, int M, int N, int NB, const CHAM_desc_t *A, int Am, int An, int LDA, @@ -56,28 +80,25 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options, } #if !defined(CHAMELEON_SIMULATION) -static void cl_zlange_cpu_func(void *descr[], void *cl_arg) +static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg) { - double *normA; - cham_normtype_t norm; - int M; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - double *work; + double *A; + double *B; - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA); - CORE_zlange( norm, M, N, A, LDA, work, normA ); + A = (double *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + + if ( *A > *B ) { + *B = *A; + } + (void)cl_arg; } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlange, 3, cl_zlange_cpu_func) +CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func) void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, @@ -102,24 +123,3 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg) -{ - double *A; - double *B; - - A = (double *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - - if ( *A > *B ) { - *B = *A; - } - (void)cl_arg; -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c index d29b5b3d5..4fc51d3d0 100644 --- a/runtime/starpu/codelets/codelet_zlanhe.c +++ b/runtime/starpu/codelets/codelet_zlanhe.c @@ -24,6 +24,30 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg) +{ + double *normA; + cham_normtype_t norm; + cham_uplo_t uplo; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); + CORE_zlanhe( norm, uplo, N, A, LDA, work, normA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func) + void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, const CHAM_desc_t *A, int Am, int An, int LDA, @@ -55,27 +79,3 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, (void)NB; } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg) -{ - double *normA; - cham_normtype_t norm; - cham_uplo_t uplo; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); - CORE_zlanhe( norm, uplo, N, A, LDA, work, normA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c index 6fd7cae04..fdea83309 100644 --- a/runtime/starpu/codelets/codelet_zlansy.c +++ b/runtime/starpu/codelets/codelet_zlansy.c @@ -24,10 +24,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zlansy(const RUNTIME_option_t *options, - cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlansy_cpu_func(void *descr[], void *cl_arg) +{ + double *normA; + cham_normtype_t norm; + cham_uplo_t uplo; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); + CORE_zlansy( norm, uplo, N, A, LDA, work, normA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func) + +void INSERT_TASK_zlansy( const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ) { (void)NB; struct starpu_codelet *codelet = &cl_zlansy; @@ -54,27 +78,3 @@ void INSERT_TASK_zlansy(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlansy_cpu_func(void *descr[], void *cl_arg) -{ - double *normA; - cham_normtype_t norm; - cham_uplo_t uplo; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); - CORE_zlansy( norm, uplo, N, A, LDA, work, normA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c index b4a5da805..078b81d6b 100644 --- a/runtime/starpu/codelets/codelet_zlantr.c +++ b/runtime/starpu/codelets/codelet_zlantr.c @@ -22,11 +22,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zlantr(const RUNTIME_option_t *options, - cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, - int M, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, - const CHAM_desc_t *B, int Bm, int Bn) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlantr_cpu_func(void *descr[], void *cl_arg) +{ + double *normA; + cham_normtype_t norm, uplo, diag; + int M, N; + CHAMELEON_Complex64_t *A; + int LDA; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA); + CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func) + +void INSERT_TASK_zlantr( const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, + int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_zlantr; void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL; @@ -56,26 +79,3 @@ void INSERT_TASK_zlantr(const RUNTIME_option_t *options, (void)NB; } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlantr_cpu_func(void *descr[], void *cl_arg) -{ - double *normA; - cham_normtype_t norm, uplo, diag; - int M, N; - CHAMELEON_Complex64_t *A; - int LDA; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA); - CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index 61d63bcfc..f625d8830 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -22,6 +22,28 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int LDA; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); + CORE_zlascal(uplo, M, N, alpha, A, LDA); + return; +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -49,12 +71,10 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ - void INSERT_TASK_zlascal(const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, @@ -84,26 +104,3 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t *A; - int LDA; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); - CORE_zlascal(uplo, M, N, alpha, A, LDA); - return; -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c index 80ab8c2c1..9108167de 100644 --- a/runtime/starpu/codelets/codelet_zlaset.c +++ b/runtime/starpu/codelets/codelet_zlaset.c @@ -25,6 +25,27 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlaset_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t beta; + CHAMELEON_Complex64_t *A; + int LDA; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA); + CORE_zlaset(uplo, M, N, alpha, beta, A, LDA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func) /** * @@ -90,26 +111,3 @@ void INSERT_TASK_zlaset(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlaset_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t beta; - CHAMELEON_Complex64_t *A; - int LDA; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA); - CORE_zlaset(uplo, M, N, alpha, beta, A, LDA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c index c3514735d..0e67ee39d 100644 --- a/runtime/starpu/codelets/codelet_zlaset2.c +++ b/runtime/starpu/codelets/codelet_zlaset2.c @@ -25,6 +25,26 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int M; + int N; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int LDA; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); + CORE_zlaset2(uplo, M, N, alpha, A, LDA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func) /** * @@ -86,25 +106,3 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int M; - int N; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t *A; - int LDA; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); - CORE_zlaset2(uplo, M, N, alpha, A, LDA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c index d92ddaf47..063b7aea8 100644 --- a/runtime/starpu/codelets/codelet_zlatro.c +++ b/runtime/starpu/codelets/codelet_zlatro.c @@ -26,16 +26,40 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlatro_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + cham_trans_t trans; + int M; + int N; + const CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB); + CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_zlatro(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_zlatro( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_zlatro; void (*callback)(void*) = NULL; @@ -63,27 +87,3 @@ void INSERT_TASK_zlatro(const RUNTIME_option_t *options, 0); (void)mb; } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlatro_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - cham_trans_t trans; - int M; - int N; - const CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB); - CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index 166b13881..2344c5171 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -26,14 +26,33 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA); + CORE_zlauum(uplo, N, A, LDA); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_zlauum(const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda) +void INSERT_TASK_zlauum( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda ) { (void)nb; struct starpu_codelet *codelet = &cl_zlauum; @@ -56,23 +75,3 @@ void INSERT_TASK_zlauum(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA); - CORE_zlauum(uplo, N, A, LDA); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c index ae014c297..345d18a24 100644 --- a/runtime/starpu/codelets/codelet_zplghe.c +++ b/runtime/starpu/codelets/codelet_zplghe.c @@ -26,13 +26,36 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ +/* cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ -void INSERT_TASK_zplghe( const RUNTIME_option_t *options, - double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) { + double bump; + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int bigM; + int m0; + int n0; + unsigned long long int seed; + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); + CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func) + +void INSERT_TASK_zplghe( const RUNTIME_option_t *options, + double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ struct starpu_codelet *codelet = &cl_zplghe; void (*callback)(void*) = options->profiling ? cl_zplghe_callback : NULL; @@ -58,29 +81,3 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options, #endif 0); } - -/* cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) -{ - double bump; - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int bigM; - int m0; - int n0; - unsigned long long int seed; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c index de00e6033..9141ecd0f 100644 --- a/runtime/starpu/codelets/codelet_zplgsy.c +++ b/runtime/starpu/codelets/codelet_zplgsy.c @@ -26,7 +26,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ +/* cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_Complex64_t bump; + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int bigM; + int m0; + int n0; + unsigned long long int seed; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); + CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func) void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, @@ -58,29 +82,3 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, #endif 0); } - -/* cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_Complex64_t bump; - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int bigM; - int m0; - int n0; - unsigned long long int seed; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c index 24f7c9159..d824485da 100644 --- a/runtime/starpu/codelets/codelet_zplrnt.c +++ b/runtime/starpu/codelets/codelet_zplrnt.c @@ -26,11 +26,32 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* INSERT_TASK_zplrnt - Generate a tile for random matrix. */ +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) +{ + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int bigM; + int m0; + int n0; + unsigned long long int seed; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed ); + CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func) void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, - int bigM, int m0, int n0, unsigned long long int seed ) + int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) { struct starpu_codelet *codelet = &cl_zplrnt; @@ -57,28 +78,3 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, #endif 0); } - -/* cl_zplrnt_cpu_func - Generate a tile for random matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) -{ - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int bigM; - int m0; - int n0; - unsigned long long int seed; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c index 4fdbaf6c3..2fe5d2a7f 100644 --- a/runtime/starpu/codelets/codelet_zplssq.c +++ b/runtime/starpu/codelets/codelet_zplssq.c @@ -23,11 +23,39 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplssq_cpu_func(void *descr[], void *cl_arg) +{ + double *SCLSSQ_IN; + double *SCLSSQ_OUT; + + SCLSSQ_IN = (double *)STARPU_MATRIX_GET_PTR(descr[0]); + SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + + assert( SCLSSQ_OUT[0] >= 0. ); + if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) { + SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ))); + SCLSSQ_OUT[0] = SCLSSQ_IN[0]; + } else { + if ( SCLSSQ_OUT[0] > 0 ) { + SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ))); + } + } + + (void)cl_arg; +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq) * * with scl and ssq such that * @@ -78,25 +106,14 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options, 0); } - #if !defined(CHAMELEON_SIMULATION) -static void cl_zplssq_cpu_func(void *descr[], void *cl_arg) +static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg) { - double *SCLSSQ_IN; - double *SCLSSQ_OUT; + double *RESULT; - SCLSSQ_IN = (double *)STARPU_MATRIX_GET_PTR(descr[0]); - SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]); - assert( SCLSSQ_OUT[0] >= 0. ); - if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) { - SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ))); - SCLSSQ_OUT[0] = SCLSSQ_IN[0]; - } else { - if ( SCLSSQ_OUT[0] > 0 ) { - SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ))); - } - } + RESULT[0] = RESULT[0] * sqrt( RESULT[1] ); (void)cl_arg; } @@ -105,10 +122,10 @@ static void cl_zplssq_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func) +CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func) void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, - const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) { struct starpu_codelet *codelet = &cl_zplssq2; void (*callback)(void*) = options->profiling ? cl_zplssq2_callback : NULL; @@ -127,22 +144,3 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg) -{ - double *RESULT; - - RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]); - - RESULT[0] = RESULT[0] * sqrt( RESULT[1] ); - - (void)cl_arg; -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index a43f31723..bbfe81774 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -26,6 +26,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request); + CORE_zpotrf(uplo, n, A, lda, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -61,33 +89,3 @@ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request); - CORE_zpotrf(uplo, n, A, lda, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func) - diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c index 9efbd985f..ecae613ee 100644 --- a/runtime/starpu/codelets/codelet_zssssm.c +++ b/runtime/starpu/codelets/codelet_zssssm.c @@ -26,6 +26,39 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zssssm_cpu_func(void *descr[], void *cl_arg) +{ + int m1; + int n1; + int m2; + int n2; + int k; + int ib; + CHAMELEON_Complex64_t *A1; + int lda1; + CHAMELEON_Complex64_t *A2; + int lda2; + CHAMELEON_Complex64_t *L1; + int ldl1; + CHAMELEON_Complex64_t *L2; + int ldl2; + int *IPIV; + + A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV); + CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -91,19 +124,17 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value * */ - -void INSERT_TASK_zssssm(const RUNTIME_option_t *options, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, - const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, - const int *IPIV) +void INSERT_TASK_zssssm( const RUNTIME_option_t *options, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, + const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, + const int *IPIV ) { (void)nb; struct starpu_codelet *codelet = &cl_zssssm; @@ -140,38 +171,3 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zssssm_cpu_func(void *descr[], void *cl_arg) -{ - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *L1; - int ldl1; - CHAMELEON_Complex64_t *L2; - int ldl2; - int *IPIV; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV); - CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func) - diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 455d118f1..49d3af5d9 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zsymm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zsymm; - void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zsymm", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zsymm_cpu_func(void *descr[], void *cl_arg) { @@ -142,3 +97,47 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zsymm, 3, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zsymm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zsymm; + void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(int), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zsymm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index bafefa0f7..27b63010a 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -26,51 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zsyr2k; - void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_R(B, Bm, Bn); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zsyr2k", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg) { @@ -135,3 +90,47 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zsyr2k, 3, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zsyr2k; + void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(B, Bm, Bn); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &trans, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zsyr2k", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 6f72802e5..e08990453 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -26,47 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_zsyrk; - void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_RW(C, Cm, Cn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zsyrk", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) { @@ -130,3 +89,43 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(zsyrk, 2, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_zsyrk; + void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_RW(C, Cm, Cn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &trans, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + STARPU_VALUE, &ldc, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zsyrk", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c index 2e2ae7676..3f07e618d 100644 --- a/runtime/starpu/codelets/codelet_zsyssq.c +++ b/runtime/starpu/codelets/codelet_zsyssq.c @@ -22,10 +22,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int n; + CHAMELEON_Complex64_t *A; + int lda; + double *SCALESUMSQ; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda); + CORE_zsyssq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func) + void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_zsyssq; void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL; @@ -49,25 +70,3 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int n; - CHAMELEON_Complex64_t *A; - int lda; - double *SCALESUMSQ; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda); - CORE_zsyssq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c index b1f741aff..06c4775e7 100644 --- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c @@ -26,10 +26,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo) +#if !defined(CHAMELEON_SIMULATION) +static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + int n; + CHAMELEON_Complex64_t *A; + int lda; + int iinfo; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo); + CORE_zsytf2_nopiv(uplo, n, A, lda); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func) + +void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo ) { (void)nb; struct starpu_codelet *codelet = &cl_zsytrf_nopiv; @@ -54,25 +75,3 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - int n; - CHAMELEON_Complex64_t *A; - int lda; - int iinfo; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo); - CORE_zsytf2_nopiv(uplo, n, A, lda); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztile_zero.c b/runtime/starpu/codelets/codelet_ztile_zero.c deleted file mode 100644 index c59115b41..000000000 --- a/runtime/starpu/codelets/codelet_ztile_zero.c +++ /dev/null @@ -1,84 +0,0 @@ -/** - * - * @file starpu/codelet_ztile_zero.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon ztile_zero StarPU codelet - * - * @version 1.0.0 - * @author Hatem Ltaief - * @author Mathieu Faverge - * @author Jakub Kurzak - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include "chameleon_starpu.h" -#include "runtime_codelet_z.h" - -/** - * - */ -void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, - int X1, int X2, int Y1, int Y2, - const CHAM_desc_t *A, int Am, int An, int lda ) -{ - struct starpu_codelet *codelet; - codelet = &cl_ztile_zero; - void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_W(A, Am, An); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &X1, sizeof(int), - STARPU_VALUE, &X2, sizeof(int), - STARPU_VALUE, &Y1, sizeof(int), - STARPU_VALUE, &Y2, sizeof(int), - STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, NULL, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztile_zero", -#endif - 0); -} - -/** - * - */ -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztile_zero_cpu_func(void *descr[], void *cl_arg) -{ - int X1; - int X2; - int Y1; - int Y2; - CHAMELEON_Complex64_t *A; - int lda; - - int x, y; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &X1, &X2, &Y1, &Y2, &lda); - - for (x = X1; x < X2; x++) - for (y = Y1; y < Y2; y++) - A[lda*x+y] = 0.0; - -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztile_zero, 1, cl_ztile_zero_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c index 44615d5c3..8132a27dd 100644 --- a/runtime/starpu/codelets/codelet_ztplqt.c +++ b/runtime/starpu/codelets/codelet_ztplqt.c @@ -54,12 +54,11 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func) -void -INSERT_TASK_ztplqt( const RUNTIME_option_t *options, - int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztplqt; void (*callback)(void*) = options->profiling ? cl_ztplqt_callback : NULL; diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c index 8dffa4ff2..54a24a070 100644 --- a/runtime/starpu/codelets/codelet_ztpmlqt.c +++ b/runtime/starpu/codelets/codelet_ztpmlqt.c @@ -103,14 +103,13 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg) */ CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC) -void -INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) +void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_ztpmlqt; void (*callback)(void*) = options->profiling ? cl_ztpmlqt_callback : NULL; diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index 6684e59f8..c94a33b43 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -104,14 +104,13 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) */ CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC) -void -INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) +void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_ztpmqrt; void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL; diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index 6fbd0afe6..143d613eb 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -54,12 +54,11 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func) -void -INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, - int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztpqrt; void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL; diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index f6265c28c..57fa58e17 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -22,12 +22,39 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + cham_trans_t trans; + int M; + int N; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int LDA; + CHAMELEON_Complex64_t beta; + CHAMELEON_Complex64_t *B; + int LDB; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); + CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB); + return; +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func) + /** ****************************************************************************** * * @ingroup INSERT_TASK_Complex64_t * - * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd. * * B <- alpha * op(A) + beta * B, * @@ -77,15 +104,14 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ -void INSERT_TASK_ztradd(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +void INSERT_TASK_ztradd( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) { struct starpu_codelet *codelet = &cl_ztradd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; @@ -116,31 +142,3 @@ void INSERT_TASK_ztradd(const RUNTIME_option_t *options, (void)nb; } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - cham_trans_t trans; - int M; - int N; - CHAMELEON_Complex64_t alpha; - CHAMELEON_Complex64_t *A; - int LDA; - CHAMELEON_Complex64_t beta; - CHAMELEON_Complex64_t *B; - int LDB; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); - CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB); - return; -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c index c1d154aad..1ca5a1a6a 100644 --- a/runtime/starpu/codelets/codelet_ztrasm.c +++ b/runtime/starpu/codelets/codelet_ztrasm.c @@ -22,10 +22,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void INSERT_TASK_ztrasm(const RUNTIME_option_t *options, - cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn) +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg) +{ + cham_store_t storev; + cham_uplo_t uplo; + cham_diag_t diag; + int M; + int N; + CHAMELEON_Complex64_t *A; + int lda; + double *work; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda); + CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func) + +void INSERT_TASK_ztrasm( const RUNTIME_option_t *options, + cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_ztrasm; void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL; @@ -48,32 +72,7 @@ void INSERT_TASK_ztrasm(const RUNTIME_option_t *options, STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrasm", + STARPU_NAME, "ztrasm", #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg) -{ - cham_store_t storev; - cham_uplo_t uplo; - cham_diag_t diag; - int M; - int N; - CHAMELEON_Complex64_t *A; - int lda; - double *work; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda); - CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index b125de67f..b9f553b64 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -26,48 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_ztrmm; - void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_RW(B, Bm, Bn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrmm", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) { @@ -136,3 +94,44 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(ztrmm, 2, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_ztrmm; + void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_RW(B, Bm, Bn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(int), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &transA, sizeof(int), + STARPU_VALUE, &diag, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "ztrmm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index e48a4eb16..83310ab1b 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -26,48 +26,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) -{ - (void)nb; - struct starpu_codelet *codelet = &cl_ztrsm; - void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL; - - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(A, Am, An); - CHAMELEON_ACCESS_RW(B, Bm, Bn); - CHAMELEON_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrsm", -#endif - 0); -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) { @@ -134,3 +92,44 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ CODELETS(ztrsm, 2, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + (void)nb; + struct starpu_codelet *codelet = &cl_ztrsm; + void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_RW(B, Bm, Bn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(int), + STARPU_VALUE, &uplo, sizeof(int), + STARPU_VALUE, &transA, sizeof(int), + STARPU_VALUE, &diag, sizeof(int), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "ztrsm", +#endif + 0); +} diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c index aac4b9fbd..e7d0de998 100644 --- a/runtime/starpu/codelets/codelet_ztrssq.c +++ b/runtime/starpu/codelets/codelet_ztrssq.c @@ -22,11 +22,34 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + cham_diag_t diag; + int m; + int n; + CHAMELEON_Complex64_t *A; + int lda; + double *SCALESUMSQ; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); + starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda); + CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func) + void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_diag_t diag, - int m, int n, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) + cham_uplo_t uplo, cham_diag_t diag, + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_ztrssq; void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL; @@ -52,27 +75,3 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - cham_diag_t diag; - int m; - int n; - CHAMELEON_Complex64_t *A; - int lda; - double *SCALESUMSQ; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda); - CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index 81ee2923e..804d21b02 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -26,16 +26,45 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) +{ + cham_uplo_t uplo; + cham_diag_t diag; + int N; + CHAMELEON_Complex64_t *A; + int LDA; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request); + CORE_ztrtri(uplo, diag, N, A, LDA, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_ztrtri(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_diag_t diag, - int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - int iinfo) +void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_diag_t diag, + int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo ) { (void)nb; struct starpu_codelet *codelet = &cl_ztrtri; @@ -62,33 +91,3 @@ void INSERT_TASK_ztrtri(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) -{ - cham_uplo_t uplo; - cham_diag_t diag; - int N; - CHAMELEON_Complex64_t *A; - int LDA; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request); - CORE_ztrtri(uplo, diag, N, A, LDA, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c index d68e2bebf..4e82f101c 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c @@ -22,18 +22,60 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m1; + int n1; + int m2; + int n2; + int k; + int ib; + int nb; + CHAMELEON_Complex64_t *A1; + int lda1; + CHAMELEON_Complex64_t *A2; + int lda2; + CHAMELEON_Complex64_t *V; + int ldv; + CHAMELEON_Complex64_t *T; + int ldt; + + CHAMELEON_Complex64_t *WORK; + int ldwork; + + A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ + + starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, + &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork); + CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, + ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) +void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztsmlq_hetra1; void (*callback)(void*) = options->profiling ? cl_ztsmlq_hetra1_callback : NULL; @@ -75,45 +117,3 @@ void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - int nb; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, - &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork); - CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, - ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c index af9f2adcc..66fa69dab 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c @@ -22,18 +22,60 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m1; + int n1; + int m2; + int n2; + int k; + int ib; + CHAMELEON_Complex64_t *A1; + int lda1; + CHAMELEON_Complex64_t *A2; + int lda2; + CHAMELEON_Complex64_t *V; + int ldv; + CHAMELEON_Complex64_t *T; + int ldt; + + /* TODO: manage workspace */ + CHAMELEON_Complex64_t *WORK; + int ldwork; + + A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, + &ib, &lda1, &lda2, &ldv, &ldt, &ldwork); + CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, + ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) +void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztsmqr_hetra1; void (*callback)(void*) = options->profiling ? cl_ztsmqr_hetra1_callback : NULL; @@ -74,45 +116,3 @@ void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options, #endif 0); } - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - CHAMELEON_Complex64_t *A1; - int lda1; - CHAMELEON_Complex64_t *A2; - int lda2; - CHAMELEON_Complex64_t *V; - int ldv; - CHAMELEON_Complex64_t *T; - int ldt; - - /* TODO: manage workspace */ - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, - &ib, &lda1, &lda2, &ldv, &ldt, &ldwork); - CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, - ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c index e139931a6..7e1dfd92a 100644 --- a/runtime/starpu/codelets/codelet_ztstrf.c +++ b/runtime/starpu/codelets/codelet_ztstrf.c @@ -26,6 +26,51 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) +{ + CHAMELEON_starpu_ws_t *d_work; + int m; + int n; + int ib; + int nb; + CHAMELEON_Complex64_t *U; + int ldu; + CHAMELEON_Complex64_t *A; + int lda; + CHAMELEON_Complex64_t *L; + int ldl; + int *IPIV; + CHAMELEON_Complex64_t *WORK; + int ldwork; + cham_bool_t check_info; + int iinfo; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + int info = 0; + + U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, + &IPIV, &d_work, &ldwork, &check_info, &iinfo, + &sequence, &request); + + CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info); + + if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -83,23 +128,21 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if INFO = -k, the k-th argument had an illegal value - * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if INFO = -k, the k-th argument had an illegal value + * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * */ - -void INSERT_TASK_ztstrf(const RUNTIME_option_t *options, - int m, int n, int ib, int nb, - const CHAM_desc_t *U, int Um, int Un, int ldu, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, - int *IPIV, - cham_bool_t check_info, int iinfo) +void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *U, int Um, int Un, int ldu, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + int *IPIV, + cham_bool_t check_info, int iinfo ) { (void)nb; struct starpu_codelet *codelet = &cl_ztstrf; @@ -139,50 +182,3 @@ void INSERT_TASK_ztstrf(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) -{ - CHAMELEON_starpu_ws_t *d_work; - int m; - int n; - int ib; - int nb; - CHAMELEON_Complex64_t *U; - int ldu; - CHAMELEON_Complex64_t *A; - int lda; - CHAMELEON_Complex64_t *L; - int ldl; - int *IPIV; - CHAMELEON_Complex64_t *WORK; - int ldwork; - cham_bool_t check_info; - int iinfo; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; - int info = 0; - - U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, - &IPIV, &d_work, &ldwork, &check_info, &iinfo, - &sequence, &request); - - CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info); - - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); - } -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) - diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index 89ef1c851..046b4e568 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -27,6 +27,75 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m; + int n; + int k; + int ib; + const CHAMELEON_Complex64_t *A; + int lda; + const CHAMELEON_Complex64_t *T; + int ldt; + CHAMELEON_Complex64_t *C; + int ldc; + CHAMELEON_Complex64_t *WORK; + int ldwork; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, + &lda, &ldt, &ldc, &ldwork); + + CORE_zunmlq(side, trans, m, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork); +} + +#if defined(CHAMELEON_USE_CUDA) +static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m; + int n; + int k; + int ib; + const cuDoubleComplex *A, *T; + cuDoubleComplex *C, *WORK; + int lda, ldt, ldc, ldwork; + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, + &lda, &ldt, &ldc, &ldwork); + + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); + C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + RUNTIME_getStream(stream); + + CUDA_zunmlqt( + side, trans, m, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif +} +#endif /* defined(CHAMELEON_USE_CUDA) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -105,18 +174,16 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ - -void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m, int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc) +void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc ) { struct starpu_codelet *codelet = &cl_zunmlq; void (*callback)(void*) = options->profiling ? cl_zunmlq_callback : NULL; @@ -151,73 +218,3 @@ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m; - int n; - int k; - int ib; - const CHAMELEON_Complex64_t *A; - int lda; - const CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *C; - int ldc; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); - - CORE_zunmlq(side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork); -} - -#if defined(CHAMELEON_USE_CUDA) -static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m; - int n; - int k; - int ib; - const cuDoubleComplex *A, *T; - cuDoubleComplex *C, *WORK; - int lda, ldt, ldc, ldwork; - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); - - A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - RUNTIME_getStream(stream); - - CUDA_zunmlqt( - side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif -} -#endif /* defined(CHAMELEON_USE_CUDA) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index e6f97c032..afa04149b 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -26,6 +26,75 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +#if !defined(CHAMELEON_SIMULATION) +static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m; + int n; + int k; + int ib; + const CHAMELEON_Complex64_t *A; + int lda; + const CHAMELEON_Complex64_t *T; + int ldt; + CHAMELEON_Complex64_t *C; + int ldc; + CHAMELEON_Complex64_t *WORK; + int ldwork; + + A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, + &lda, &ldt, &ldc, &ldwork); + + CORE_zunmqr(side, trans, m, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork); +} + +#if defined(CHAMELEON_USE_CUDA) +static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) +{ + cham_side_t side; + cham_trans_t trans; + int m; + int n; + int k; + int ib; + const cuDoubleComplex *A, *T; + cuDoubleComplex *C, *WORK; + int lda, ldt, ldc, ldwork; + + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, + &lda, &ldt, &ldc, &ldwork); + + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); + C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + RUNTIME_getStream(stream); + + CUDA_zunmqrt( + side, trans, m, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif +} +#endif /* defined(CHAMELEON_USE_CUDA) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) + /** * * @ingroup INSERT_TASK_Complex64_t @@ -105,18 +174,16 @@ * ******************************************************************************* * - * @return - * \retval CHAMELEON_SUCCESS successful exit - * \retval <0 if -i, the i-th argument had an illegal value + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value * */ - -void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, - cham_side_t side, cham_trans_t trans, - int m, int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc) +void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc ) { struct starpu_codelet *codelet = &cl_zunmqr; void (*callback)(void*) = options->profiling ? cl_zunmqr_callback : NULL; @@ -151,73 +218,3 @@ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, #endif 0); } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m; - int n; - int k; - int ib; - const CHAMELEON_Complex64_t *A; - int lda; - const CHAMELEON_Complex64_t *T; - int ldt; - CHAMELEON_Complex64_t *C; - int ldc; - CHAMELEON_Complex64_t *WORK; - int ldwork; - - A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); - - CORE_zunmqr(side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork); -} - -#if defined(CHAMELEON_USE_CUDA) -static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) -{ - cham_side_t side; - cham_trans_t trans; - int m; - int n; - int k; - int ib; - const cuDoubleComplex *A, *T; - cuDoubleComplex *C, *WORK; - int lda, ldt, ldc, ldwork; - - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); - - A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - RUNTIME_getStream(stream); - - CUDA_zunmqrt( - side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif -} -#endif /* defined(CHAMELEON_USE_CUDA) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - -/* - * Codelet definition - */ -CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index 509abacfc..b97e06ba8 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -33,11 +33,6 @@ #endif #endif -/* - * Management functions - */ -ZCODELETS_HEADER(tile_zero) - /* * BLAS 1 functions */ -- GitLab