diff --git a/runtime/quark/codelets/codelet_zaxpy.c b/runtime/quark/codelets/codelet_zaxpy.c index 04cc7fad949fa01e50a62563081ce4b8cc1617f8..bb3357b8c18c70d3d55ba926782ad1458c33c5ff 100644 --- a/runtime/quark/codelets/codelet_zaxpy.c +++ b/runtime/quark/codelets/codelet_zaxpy.c @@ -39,6 +39,10 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, int incA, const CHAM_desc_t *B, int Bm, int Bn, int incB) { + if ( alpha == 0. ) { + return; + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_AXPY; QUARK_Insert_Task(opt->quark, CORE_zaxpy_quark, (Quark_Task_Flags*)opt, diff --git a/runtime/quark/codelets/codelet_zgeadd.c b/runtime/quark/codelets/codelet_zgeadd.c index d95e443814b94c21a81990aceceae59751155f2b..6fa2cfb2b32c8063e66e584ca42e51ddbbe2e1b6 100644 --- a/runtime/quark/codelets/codelet_zgeadd.c +++ b/runtime/quark/codelets/codelet_zgeadd.c @@ -38,65 +38,19 @@ void CORE_zgeadd_quark(Quark *quark) return; } -/** - ****************************************************************************** - * - * @ingroup INSERT_TASK_Complex64_t - * - * @brief Adds two general matrices together as in PBLAS pzgeadd. - * - * B <- alpha * op(A) + beta * B, - * - * where op(X) = X, X', or conj(X') - * - ******************************************************************************* - * - * @param[in] trans - * Specifies whether the matrix A is non-transposed, transposed, or - * conjugate transposed - * = ChamNoTrans: op(A) = A - * = ChamTrans: op(A) = A' - * = ChamConjTrans: op(A) = conj(A') - * - * @param[in] M - * Number of rows of the matrices op(A) and B. - * - * @param[in] N - * Number of columns of the matrices op(A) and B. - * - * @param[in] alpha - * Scalar factor of A. - * - * @param[in] A - * Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M - * otherwise. - * - * @param[in] LDA - * Leading dimension of the array A. LDA >= max(1,k), with k=M, if - * trans = ChamNoTrans, and k=N otherwise. - * - * @param[in] beta - * Scalar factor of B. - * - * @param[in,out] B - * Matrix of size LDB-by-N. - * On exit, B = alpha * op(A) + beta * B - * - * @param[in] LDB - * Leading dimension of the array B. LDB >= max(1,M) - * - ******************************************************************************* - * - * @retval CHAMELEON_SUCCESS successful exit - * @retval <0 if -i, the i-th argument had an illegal value - * - */ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, B, Bm, Bn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessB = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_GEADD; QUARK_Insert_Task(opt->quark, CORE_zgeadd_quark, (Quark_Task_Flags*)opt, sizeof(int), &trans, VALUE, @@ -105,7 +59,7 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, sizeof(CHAMELEON_Complex64_t), &alpha, VALUE, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(CHAMELEON_Complex64_t), &beta, VALUE, - sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT, + sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), accessB, 0); (void)nb; diff --git a/runtime/quark/codelets/codelet_zgemm.c b/runtime/quark/codelets/codelet_zgemm.c index 6def09b52ccce39402397f42c9faaa069329e5e7..9b513766324a29f3c01ccc6e71d80e5875070b5d 100644 --- a/runtime/quark/codelets/codelet_zgemm.c +++ b/runtime/quark/codelets/codelet_zgemm.c @@ -41,8 +41,7 @@ void CORE_zgemm_quark(Quark *quark) quark_unpack_args_10(quark, transA, transB, m, n, k, alpha, tileA, tileB, beta, tileC); TCORE_zgemm( transA, transB, m, n, k, - alpha, tileA, - tileB, + alpha, tileA, tileB, beta, tileC ); } @@ -50,10 +49,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, cham_trans_t transA, cham_trans_t transB, int m, int n, int k, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) + const CHAM_desc_t *B, int Bm, int Bn, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessC = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_GEMM; QUARK_Insert_Task(opt->quark, CORE_zgemm_quark, (Quark_Task_Flags*)opt, sizeof(int), &transA, VALUE, @@ -65,6 +71,6 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INPUT, sizeof(CHAMELEON_Complex64_t), &beta, VALUE, - sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT, + sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, 0); } diff --git a/runtime/quark/codelets/codelet_zhe2ge.c b/runtime/quark/codelets/codelet_zhe2ge.c index 7b4a425665848e1c19035b7a46a5c1d1991ff9e9..e8aefce457b3d255e15fa87e1877b1cc1ab56b9f 100644 --- a/runtime/quark/codelets/codelet_zhe2ge.c +++ b/runtime/quark/codelets/codelet_zhe2ge.c @@ -21,11 +21,6 @@ #include "chameleon/tasks_z.h" #include "coreblas/coreblas_ztile.h" -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ static inline void CORE_zhe2ge_quark(Quark *quark) { cham_uplo_t uplo; @@ -38,12 +33,11 @@ static inline void CORE_zhe2ge_quark(Quark *quark) TCORE_zhe2ge(uplo, M, N, tileA, tileB); } - -void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LACPY; diff --git a/runtime/quark/codelets/codelet_zhemm.c b/runtime/quark/codelets/codelet_zhemm.c index 5ab6412228c9aa9ab0e8e891b436368684356a73..c55fa6901d5f0361faa0e4455733e541c80f1b4c 100644 --- a/runtime/quark/codelets/codelet_zhemm.c +++ b/runtime/quark/codelets/codelet_zhemm.c @@ -52,7 +52,14 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessC = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_HEMM; QUARK_Insert_Task(opt->quark, CORE_zhemm_quark, (Quark_Task_Flags*)opt, sizeof(int), &side, VALUE, @@ -63,7 +70,6 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INPUT, sizeof(CHAMELEON_Complex64_t), &beta, VALUE, - sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT, + sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, 0); } - diff --git a/runtime/quark/codelets/codelet_zher2k.c b/runtime/quark/codelets/codelet_zher2k.c index bd6437c53ec7861534693eb58655fe487f3b65b9..05b46cd1fbb1e0073e13cec5d5a0d57c6a399cc9 100644 --- a/runtime/quark/codelets/codelet_zher2k.c +++ b/runtime/quark/codelets/codelet_zher2k.c @@ -42,14 +42,22 @@ void CORE_zher2k_quark(Quark *quark) n, k, alpha, tileA, tileB, beta, tileC); } -void INSERT_TASK_zher2k(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn, - double beta, const CHAM_desc_t *C, int Cm, int Cn) +void +INSERT_TASK_zher2k( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn, + double beta, const CHAM_desc_t *C, int Cm, int Cn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessC = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_HER2K; QUARK_Insert_Task(opt->quark, CORE_zher2k_quark, (Quark_Task_Flags*)opt, sizeof(int), &uplo, VALUE, @@ -60,6 +68,6 @@ void INSERT_TASK_zher2k(const RUNTIME_option_t *options, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INPUT, sizeof(double), &beta, VALUE, - sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT, + sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, 0); } diff --git a/runtime/quark/codelets/codelet_zherk.c b/runtime/quark/codelets/codelet_zherk.c index 3d47a8e59f9f12df5fd2282c473e46e04e34f45a..7d11dfb52c1f181befa3f7c917e3699a7cd010f7 100644 --- a/runtime/quark/codelets/codelet_zherk.c +++ b/runtime/quark/codelets/codelet_zherk.c @@ -49,7 +49,14 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options, double alpha, const CHAM_desc_t *A, int Am, int An, double beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessC = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_HERK; QUARK_Insert_Task(opt->quark, CORE_zherk_quark, (Quark_Task_Flags*)opt, sizeof(int), &uplo, VALUE, @@ -59,6 +66,6 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options, sizeof(double), &alpha, VALUE, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(double), &beta, VALUE, - sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT, + sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, 0); } diff --git a/runtime/quark/codelets/codelet_zlascal.c b/runtime/quark/codelets/codelet_zlascal.c index 716c85c6bf2560bd5c70e6027e509fe68a4a023d..67cdcb149bb78d466dc8ec5c411c701c176ed1a7 100644 --- a/runtime/quark/codelets/codelet_zlascal.c +++ b/runtime/quark/codelets/codelet_zlascal.c @@ -43,6 +43,14 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An) { + if ( alpha == 0. ) { + return INSERT_TASK_zlaset( options, uplo, m, n, + alpha, alpha, A, Am, An ); + } + else if ( alpha == 1. ) { + return; + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LASCAL; QUARK_Insert_Task(opt->quark, CORE_zlascal_quark, (Quark_Task_Flags*)opt, @@ -53,5 +61,3 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT, 0); } - - diff --git a/runtime/quark/codelets/codelet_zsymm.c b/runtime/quark/codelets/codelet_zsymm.c index 6bccc1deebdca6e376843a60c66c28c5027aef02..71658b68c44370afe45810220bfee3794c67b0b6 100644 --- a/runtime/quark/codelets/codelet_zsymm.c +++ b/runtime/quark/codelets/codelet_zsymm.c @@ -52,7 +52,14 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessC = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_SYMM; QUARK_Insert_Task(opt->quark, CORE_zsymm_quark, (Quark_Task_Flags*)opt, sizeof(int), &side, VALUE, @@ -63,6 +70,6 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INPUT, sizeof(CHAMELEON_Complex64_t), &beta, VALUE, - sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT, + sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, 0); } diff --git a/runtime/quark/codelets/codelet_zsyr2k.c b/runtime/quark/codelets/codelet_zsyr2k.c index 0e41e44fa6c9f1e2241d2fb1490fb606b8056a25..d172bc35971ffa261fb37914fd7c19eda2980623 100644 --- a/runtime/quark/codelets/codelet_zsyr2k.c +++ b/runtime/quark/codelets/codelet_zsyr2k.c @@ -39,7 +39,7 @@ void CORE_zsyr2k_quark(Quark *quark) quark_unpack_args_9(quark, uplo, trans, n, k, alpha, tileA, tileB, beta, tileC); TCORE_zsyr2k(uplo, trans, - n, k, alpha, tileA, tileB, beta, tileC); + n, k, alpha, tileA, tileB, beta, tileC); } void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, @@ -49,7 +49,14 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessC = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_SYR2K; QUARK_Insert_Task(opt->quark, CORE_zsyr2k_quark, (Quark_Task_Flags*)opt, sizeof(int), &uplo, VALUE, @@ -60,6 +67,6 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INPUT, sizeof(CHAMELEON_Complex64_t), &beta, VALUE, - sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT, + sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, 0); } diff --git a/runtime/quark/codelets/codelet_zsyrk.c b/runtime/quark/codelets/codelet_zsyrk.c index d8c272f5013aba1eff3d67fcbbedc69add9c83a0..b58c022d35993f0874ffe64da6cbfc9d814774ac 100644 --- a/runtime/quark/codelets/codelet_zsyrk.c +++ b/runtime/quark/codelets/codelet_zsyrk.c @@ -49,7 +49,14 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessC = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_SYRK; QUARK_Insert_Task(opt->quark, CORE_zsyrk_quark, (Quark_Task_Flags*)opt, sizeof(int), &uplo, VALUE, @@ -59,6 +66,6 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, sizeof(CHAMELEON_Complex64_t), &alpha, VALUE, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(CHAMELEON_Complex64_t), &beta, VALUE, - sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT, + sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, 0); } diff --git a/runtime/quark/codelets/codelet_ztradd.c b/runtime/quark/codelets/codelet_ztradd.c index f3a9e0d24dd2d72dfb866295dfaabee528f12086..6532b16008bfc22e1b5288d8cfe5289eaa7406d9 100644 --- a/runtime/quark/codelets/codelet_ztradd.c +++ b/runtime/quark/codelets/codelet_ztradd.c @@ -37,71 +37,19 @@ void CORE_ztradd_quark(Quark *quark) return; } -/** - ****************************************************************************** - * - * @ingroup INSERT_TASK_Complex64_t - * - * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd. - * - * B <- alpha * op(A) + beta * B, - * - * where op(X) = X, X', or conj(X') - * - ******************************************************************************* - * - * @param[in] uplo - * Specifies the shape of A and B matrices: - * = ChamUpperLower: A and B are general matrices. - * = ChamUpper: op(A) and B are upper trapezoidal matrices. - * = ChamLower: op(A) and B are lower trapezoidal matrices. - * - * @param[in] trans - * Specifies whether the matrix A is non-transposed, transposed, or - * conjugate transposed - * = ChamNoTrans: op(A) = A - * = ChamTrans: op(A) = A' - * = ChamConjTrans: op(A) = conj(A') - * - * @param[in] M - * Number of rows of the matrices op(A) and B. - * - * @param[in] N - * Number of columns of the matrices op(A) and B. - * - * @param[in] alpha - * Scalar factor of A. - * - * @param[in] A - * Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M - * otherwise. - * - * @param[in] LDA - * Leading dimension of the array A. LDA >= max(1,k), with k=M, if - * trans = ChamNoTrans, and k=N otherwise. - * - * @param[in] beta - * Scalar factor of B. - * - * @param[in,out] B - * Matrix of size LDB-by-N. - * On exit, B = alpha * op(A) + beta * B - * - * @param[in] LDB - * Leading dimension of the array B. LDB >= max(1,M) - * - ******************************************************************************* - * - * @retval CHAMELEON_SUCCESS successful exit - * @retval <0 if -i, the i-th argument had an illegal value - * - */ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, m, n, nb, + beta, B, Bm, Bn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessB = ( beta == 0. ) ? OUTPUT : INOUT; + DAG_CORE_GEADD; QUARK_Insert_Task(opt->quark, CORE_ztradd_quark, (Quark_Task_Flags*)opt, sizeof(int), &uplo, VALUE, @@ -111,7 +59,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, sizeof(CHAMELEON_Complex64_t), &alpha, VALUE, sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(CHAMELEON_Complex64_t), &beta, VALUE, - sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT, + sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), accessB, 0); (void)nb; diff --git a/runtime/quark/codelets/codelet_ztrmm.c b/runtime/quark/codelets/codelet_ztrmm.c index 56d6afada16d189dcf06463cc2c93ef73cb958b3..df18b77bebac6a95f34d3aff32353d16fbf5d241 100644 --- a/runtime/quark/codelets/codelet_ztrmm.c +++ b/runtime/quark/codelets/codelet_ztrmm.c @@ -45,12 +45,17 @@ void CORE_ztrmm_quark(Quark *quark) tileB); } -void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlaset( options, ChamUpperLower, m, n, + alpha, alpha, B, Bm, Bn ); + } + quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_TRMM; QUARK_Insert_Task(opt->quark, CORE_ztrmm_quark, (Quark_Task_Flags*)opt, diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index bd027eff028f460333ab83651600783c3d085389..65c82231e8205f3e666fb579135ae92f1ad9c660 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -12,8 +12,6 @@ * @brief Chameleon zgeadd StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede @@ -86,59 +84,6 @@ CODELETS(zgeadd, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC) CODELETS_CPU(zgeadd, cl_zgeadd_cpu_func) #endif -/** - ****************************************************************************** - * - * @ingroup INSERT_TASK_Complex64_t - * - * @brief Adds two general matrices together as in PBLAS pzgeadd. - * - * B <- alpha * op(A) + beta * B, - * - * where op(X) = X, X', or conj(X') - * - ******************************************************************************* - * - * @param[in] trans - * Specifies whether the matrix A is non-transposed, transposed, or - * conjugate transposed - * = ChamNoTrans: op(A) = A - * = ChamTrans: op(A) = A' - * = ChamConjTrans: op(A) = conj(A') - * - * @param[in] M - * Number of rows of the matrices op(A) and B. - * - * @param[in] N - * Number of columns of the matrices op(A) and B. - * - * @param[in] alpha - * Scalar factor of A. - * - * @param[in] A - * Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M - * otherwise. - * - * @param[in] ldA - * Leading dimension of the array A. ldA >= max(1,k), with k=M, if - * trans = ChamNoTrans, and k=N otherwise. - * - * @param[in] beta - * Scalar factor of B. - * - * @param[in,out] B - * Matrix of size ldB-by-N. - * On exit, B = alpha * op(A) + beta * B - * - * @param[in] ldB - * Leading dimension of the array B. ldB >= max(1,M) - * - ******************************************************************************* - * - * @retval CHAMELEON_SUCCESS successful exit - * @retval <0 if -i, the i-th argument had an illegal value - * - */ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 54e6256b57d2b0447d579bfbaf59d075f4c33bc4..42bd6609ab848313fce0892dd463f4c94f502a08 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -12,8 +12,6 @@ * @brief Chameleon zgemm StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c index fe1f9eb291209851ecfb84e1dd7039cfdd560d1a..203544170939a742ccaef1aa05b9066e4185d0a9 100644 --- a/runtime/starpu/codelets/codelet_zhe2ge.c +++ b/runtime/starpu/codelets/codelet_zhe2ge.c @@ -44,11 +44,6 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU(zhe2ge, cl_zhe2ge_cpu_func) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int mb, diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 5c90271ece923555a05f3c3d9d2374b9ba41b000..f4963cacfd70f21410f743fc182eff072886976f 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -12,8 +12,6 @@ * @brief Chameleon zhemm StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 0e93a35c99f0b9469839b5c5e8e17d8428598bcd..e652db505a1da7519059c5a52ad6ffd154f900fe 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -12,8 +12,6 @@ * @brief Chameleon zher2k StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 915cc9b77d4a13cdc43b4ba14c67c4871a49dd37..ec0f985b533c5e39a340095ab5301afd660e8cab 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -12,8 +12,6 @@ * @brief Chameleon zherk StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge @@ -88,11 +86,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) */ CODELETS(zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ void INSERT_TASK_zherk(const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index d1bfc3fd35a70e56451b16e70e714433a49dbd1d..0142c39ec967856284333e28c97c5b7b8df3b6e9 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -12,8 +12,6 @@ * @brief Chameleon zlascal StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Dalal Sukkari * @author Lucas Barros de Assis * @date 2020-03-03 diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 40ed44bcbb3e44904160bc28faa1d9d58dfccf32..b87b3bef619ec969a7d11e88318bd45522e5679b 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -12,8 +12,6 @@ * @brief Chameleon zsymm StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 51f013036ddde9370e06e62de325c32c259cb01a..82209455847a0c76a6d6297b2537ddab0187c717 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -12,8 +12,6 @@ * @brief Chameleon zsyr2k StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 83c51f5997d6195cc5542ab63386e235b7241f21..9795d0a982665b29d1c1d5a3e20a44898fff1275 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -12,8 +12,6 @@ * @brief Chameleon zsyrk StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index ac3dc8bfaecc14657dc48399b2c10a010babe83f..fbd6a0f8e8f89e58d1f8795c82698325d88f8440 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -12,8 +12,6 @@ * @brief Chameleon ztradd StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Mathieu Faverge * @author Lucas Barros de Assis * @date 2020-03-03 @@ -48,65 +46,6 @@ static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU(ztradd, cl_ztradd_cpu_func) -/** - ****************************************************************************** - * - * @ingroup INSERT_TASK_Complex64_t - * - * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd. - * - * B <- alpha * op(A) + beta * B, - * - * where op(X) = X, X', or conj(X') - * - ******************************************************************************* - * - * @param[in] uplo - * Specifies the shape of A and B matrices: - * = ChamUpperLower: A and B are general matrices. - * = ChamUpper: op(A) and B are upper trapezoidal matrices. - * = ChamLower: op(A) and B are lower trapezoidal matrices. - * - * @param[in] trans - * Specifies whether the matrix A is non-transposed, transposed, or - * conjugate transposed - * = ChamNoTrans: op(A) = A - * = ChamTrans: op(A) = A' - * = ChamConjTrans: op(A) = conj(A') - * - * @param[in] M - * Number of rows of the matrices op(A) and B. - * - * @param[in] N - * Number of columns of the matrices op(A) and B. - * - * @param[in] alpha - * Scalar factor of A. - * - * @param[in] A - * Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M - * otherwise. - * - * @param[in] ldA - * Leading dimension of the array A. ldA >= max(1,k), with k=M, if - * trans = ChamNoTrans, and k=N otherwise. - * - * @param[in] beta - * Scalar factor of B. - * - * @param[in,out] B - * Matrix of size ldB-by-N. - * On exit, B = alpha * op(A) + beta * B - * - * @param[in] ldB - * Leading dimension of the array B. ldB >= max(1,M) - * - ******************************************************************************* - * - * @retval CHAMELEON_SUCCESS successful exit - * @retval <0 if -i, the i-th argument had an illegal value - * - */ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index d1404ba960ac1991c06b84dca012c2e2b930e248..e820a6d6b6b2bf65be16e5f6e06ea0a7a643ee57 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -12,8 +12,6 @@ * @brief Chameleon ztrmm StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge