From 11b9f87af90f36ef16b283bff9ec094673fbeb2d Mon Sep 17 00:00:00 2001 From: Florent Pruvost <florent.pruvost@inria.fr> Date: Tue, 15 Sep 2015 13:55:16 +0000 Subject: [PATCH] update the runtimes interface to call only CORE_ functions from coreblas and not cblas --- coreblas/CMakeLists.txt | 1 + coreblas/compute/CMakeLists.txt | 1 + coreblas/compute/core_zaxpy.c | 81 ++++++++++++++++++++++++ coreblas/include/coreblas_z.h | 3 + runtime/quark/codelets/codelet_zaxpy.c | 2 +- runtime/quark/codelets/codelet_zgemm.c | 8 +-- runtime/quark/codelets/codelet_zhemm.c | 8 +-- runtime/quark/codelets/codelet_zher2k.c | 4 +- runtime/quark/codelets/codelet_zherk.c | 4 +- runtime/quark/codelets/codelet_zsymm.c | 8 +-- runtime/quark/codelets/codelet_zsyr2k.c | 4 +- runtime/quark/codelets/codelet_zsyrk.c | 8 +-- runtime/quark/codelets/codelet_ztrmm.c | 8 +-- runtime/quark/codelets/codelet_ztrsm.c | 8 +-- runtime/starpu/codelets/codelet_zaxpy.c | 35 ++++------ runtime/starpu/codelets/codelet_zgemm.c | 8 +-- runtime/starpu/codelets/codelet_zhemm.c | 8 +-- runtime/starpu/codelets/codelet_zher2k.c | 4 +- runtime/starpu/codelets/codelet_zherk.c | 4 +- runtime/starpu/codelets/codelet_zsymm.c | 8 +-- runtime/starpu/codelets/codelet_zsyr2k.c | 4 +- runtime/starpu/codelets/codelet_zsyrk.c | 8 +-- runtime/starpu/codelets/codelet_ztrmm.c | 8 +-- runtime/starpu/codelets/codelet_ztrsm.c | 8 +-- 24 files changed, 145 insertions(+), 98 deletions(-) create mode 100644 coreblas/compute/core_zaxpy.c diff --git a/coreblas/CMakeLists.txt b/coreblas/CMakeLists.txt index 80917850f..ede4ac243 100644 --- a/coreblas/CMakeLists.txt +++ b/coreblas/CMakeLists.txt @@ -28,6 +28,7 @@ add_subdirectory(include) add_subdirectory(compute) +add_subdirectory(eztrace_module) ### ### END CMakeLists.txt diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index 17f35edc0..dc0f6f519 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -31,6 +31,7 @@ set(COREBLAS_SRCS_GENERATED "") set(ZSRC core_dzasum.c + core_zaxpy.c core_zgeadd.c core_zgelqt.c core_zgemm.c diff --git a/coreblas/compute/core_zaxpy.c b/coreblas/compute/core_zaxpy.c new file mode 100644 index 000000000..373ff859a --- /dev/null +++ b/coreblas/compute/core_zaxpy.c @@ -0,0 +1,81 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2015 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file core_zaxpy.c + * + * @author Florent Pruvost + * @date 2015-09-15 + * @precisions normal z -> c d s + * + **/ +#include "coreblas/include/coreblas.h" + +/***************************************************************************//** + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_zaxpy adds to vectors together. + * + * B <- alpha * A + B + * + ******************************************************************************* + * + * @param[in] M + * Number of rows of the vectors A and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Vector of size M. + * + * @param[in] incA + * Offset. incA > 0 + * + * @param[in,out] B + * Vector of size M. + * + * @param[in] incB + * Offset. incB > 0 + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ + +int CORE_zaxpy(int M, MORSE_Complex64_t alpha, + const MORSE_Complex64_t *A, int incA, + MORSE_Complex64_t *B, int incB) +{ + if (M < 0) { + coreblas_error(1, "Illegal value of M"); + return -1; + } + if (incA < 0) { + coreblas_error(5, "Illegal value of incA"); + return -4; + } + if (incB < 0) { + coreblas_error(5, "Illegal value of incB"); + return -6; + } + + cblas_zaxpy(M, CBLAS_SADDR(alpha), A, incA, B, incB); + + return MORSE_SUCCESS; +} + + + diff --git a/coreblas/include/coreblas_z.h b/coreblas/include/coreblas_z.h index d972b93a1..8b8c993a6 100644 --- a/coreblas/include/coreblas_z.h +++ b/coreblas/include/coreblas_z.h @@ -55,6 +55,9 @@ int CORE_zgbrce(MORSE_enum uplo, int N, int CORE_zgblrx(MORSE_enum uplo, int N, MORSE_desc_t *A, MORSE_Complex64_t *V, MORSE_Complex64_t *TAU, int st, int ed, int eltsize); +int CORE_zaxpy(int M, MORSE_Complex64_t alpha, + const MORSE_Complex64_t *A, int incA, + MORSE_Complex64_t *B, int incB); int CORE_zgeadd(int M, int N, MORSE_Complex64_t alpha, const MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); diff --git a/runtime/quark/codelets/codelet_zaxpy.c b/runtime/quark/codelets/codelet_zaxpy.c index 1135660cb..1fd7c69f5 100644 --- a/runtime/quark/codelets/codelet_zaxpy.c +++ b/runtime/quark/codelets/codelet_zaxpy.c @@ -54,5 +54,5 @@ void CORE_zaxpy_quark(Quark *quark) int incB; quark_unpack_args_6(quark, M, alpha, A, incA, B, incB); - cblas_zaxpy(M, CBLAS_SADDR(alpha), A, incA, B, incB); + CORE_zaxpy(M, alpha, A, incA, B, incB); } diff --git a/runtime/quark/codelets/codelet_zgemm.c b/runtime/quark/codelets/codelet_zgemm.c index 29c8a9cac..1ffa8c0e0 100644 --- a/runtime/quark/codelets/codelet_zgemm.c +++ b/runtime/quark/codelets/codelet_zgemm.c @@ -81,13 +81,11 @@ void CORE_zgemm_quark(Quark *quark) int ldc; quark_unpack_args_13(quark, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - cblas_zgemm( - CblasColMajor, - (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, + CORE_zgemm(transA, transB, m, n, k, - CBLAS_SADDR(alpha), A, lda, + alpha, A, lda, B, ldb, - CBLAS_SADDR(beta), C, ldc); + beta, C, ldc); } diff --git a/runtime/quark/codelets/codelet_zhemm.c b/runtime/quark/codelets/codelet_zhemm.c index a52b9d82f..90671f8a9 100644 --- a/runtime/quark/codelets/codelet_zhemm.c +++ b/runtime/quark/codelets/codelet_zhemm.c @@ -81,12 +81,10 @@ void CORE_zhemm_quark(Quark *quark) int LDC; quark_unpack_args_12(quark, side, uplo, M, N, alpha, A, LDA, B, LDB, beta, C, LDC); - cblas_zhemm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + CORE_zhemm(side, uplo, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB, - CBLAS_SADDR(beta), C, LDC); + beta, C, LDC); } #endif diff --git a/runtime/quark/codelets/codelet_zher2k.c b/runtime/quark/codelets/codelet_zher2k.c index 7d31f3828..9b01efe74 100644 --- a/runtime/quark/codelets/codelet_zher2k.c +++ b/runtime/quark/codelets/codelet_zher2k.c @@ -81,7 +81,7 @@ void CORE_zher2k_quark(Quark *quark) int ldc; quark_unpack_args_12(quark, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, - n, k, CBLAS_SADDR(alpha), A, lda, B, ldb, beta, C, ldc); + CORE_zher2k(uplo, trans, + n, k, alpha, A, lda, B, ldb, beta, C, ldc); } #endif diff --git a/runtime/quark/codelets/codelet_zherk.c b/runtime/quark/codelets/codelet_zherk.c index 16c6fe9b7..82838b6cd 100644 --- a/runtime/quark/codelets/codelet_zherk.c +++ b/runtime/quark/codelets/codelet_zherk.c @@ -76,9 +76,7 @@ void CORE_zherk_quark(Quark *quark) int ldc; quark_unpack_args_10(quark, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); - cblas_zherk( - CblasColMajor, - (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + CORE_zherk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); diff --git a/runtime/quark/codelets/codelet_zsymm.c b/runtime/quark/codelets/codelet_zsymm.c index 9a00b1e78..40bf58963 100644 --- a/runtime/quark/codelets/codelet_zsymm.c +++ b/runtime/quark/codelets/codelet_zsymm.c @@ -78,11 +78,9 @@ void CORE_zsymm_quark(Quark *quark) int LDC; quark_unpack_args_12(quark, side, uplo, M, N, alpha, A, LDA, B, LDB, beta, C, LDC); - cblas_zsymm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + CORE_zsymm(side, uplo, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB, - CBLAS_SADDR(beta), C, LDC); + beta, C, LDC); } diff --git a/runtime/quark/codelets/codelet_zsyr2k.c b/runtime/quark/codelets/codelet_zsyr2k.c index accdff0ef..1567db1e0 100644 --- a/runtime/quark/codelets/codelet_zsyr2k.c +++ b/runtime/quark/codelets/codelet_zsyr2k.c @@ -78,6 +78,6 @@ void CORE_zsyr2k_quark(Quark *quark) int ldc; quark_unpack_args_12(quark, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - cblas_zsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, - n, k, CBLAS_SADDR(alpha), A, lda, B, ldb, CBLAS_SADDR(beta), C, ldc); + CORE_zsyr2k(uplo, trans, + n, k, alpha, A, lda, B, ldb, beta, C, ldc); } diff --git a/runtime/quark/codelets/codelet_zsyrk.c b/runtime/quark/codelets/codelet_zsyrk.c index b9c67b9eb..30ca13f50 100644 --- a/runtime/quark/codelets/codelet_zsyrk.c +++ b/runtime/quark/codelets/codelet_zsyrk.c @@ -73,10 +73,8 @@ void CORE_zsyrk_quark(Quark *quark) int ldc; quark_unpack_args_10(quark, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); - cblas_zsyrk( - CblasColMajor, - (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + CORE_zsyrk(uplo, trans, n, k, - CBLAS_SADDR(alpha), A, lda, - CBLAS_SADDR(beta), C, ldc); + alpha, A, lda, + beta, C, ldc); } diff --git a/runtime/quark/codelets/codelet_ztrmm.c b/runtime/quark/codelets/codelet_ztrmm.c index 284f47e11..21fc8dc25 100644 --- a/runtime/quark/codelets/codelet_ztrmm.c +++ b/runtime/quark/codelets/codelet_ztrmm.c @@ -75,12 +75,10 @@ void CORE_ztrmm_quark(Quark *quark) int LDB; quark_unpack_args_11(quark, side, uplo, transA, diag, M, N, alpha, A, LDA, B, LDB); - cblas_ztrmm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, - (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + CORE_ztrmm(side, uplo, + transA, diag, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB); } diff --git a/runtime/quark/codelets/codelet_ztrsm.c b/runtime/quark/codelets/codelet_ztrsm.c index 59164c2c0..b14ed16e3 100644 --- a/runtime/quark/codelets/codelet_ztrsm.c +++ b/runtime/quark/codelets/codelet_ztrsm.c @@ -75,11 +75,9 @@ void CORE_ztrsm_quark(Quark *quark) int ldb; quark_unpack_args_11(quark, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); - cblas_ztrsm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, - (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + CORE_ztrsm(side, uplo, + transA, diag, m, n, - CBLAS_SADDR(alpha), A, lda, + alpha, A, lda, B, ldb); } diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index ff26364b5..75251e5fe 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -35,17 +35,17 @@ void MORSE_TASK_zaxpy(MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) || morse_desc_islocal( B, Bm, Bn ) ){ - starpu_insert_task( - codelet, - STARPU_VALUE, &M, sizeof(int), - STARPU_VALUE, alpha, sizeof(MORSE_Complex64_t), - STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), - STARPU_VALUE, &incA, sizeof(int), - STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), - STARPU_VALUE, &incB, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, - 0); + starpu_insert_task( + codelet, + STARPU_VALUE, &M, sizeof(int), + STARPU_VALUE, alpha, sizeof(MORSE_Complex64_t), + STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &incA, sizeof(int), + STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), + STARPU_VALUE, &incB, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, + 0); } } @@ -63,18 +63,7 @@ static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB); - cblas_zaxpy(M, CBLAS_SADDR(alpha), A, incA, B, incB); -// printf("\nCBLAS_SADDR(alpha) %e\n",alpha); -// printf("A\n"); -// for(m = 0; m < M; m++) { -// printf("%e ",A[m]); -// } -// printf("\n"); -// printf("B\n"); -// for(m = 0; m < M; m++) { -// printf("%e ",B[m]); -// } -// printf("\n"); + CORE_zaxpy(M, alpha, A, incA, B, incB); } /* diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index c265809e6..e2c3ec9ef 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -120,13 +120,11 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - cblas_zgemm( - CblasColMajor, - (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, + CORE_zgemm(transA, transB, m, n, k, - CBLAS_SADDR(alpha), A, lda, + alpha, A, lda, B, ldb, - CBLAS_SADDR(beta), C, ldc); + beta, C, ldc); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 4c700b698..c1748ca22 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -92,13 +92,11 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - cblas_zhemm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + CORE_zhemm(side, uplo, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB, - CBLAS_SADDR(beta), C, LDC); + beta, C, LDC); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index de2b09367..b2b34a90d 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -92,8 +92,8 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, - n, k, CBLAS_SADDR(alpha), A, lda, B, ldb, beta, C, ldc); + CORE_zher2k(uplo, trans, + n, k, alpha, A, lda, B, ldb, beta, C, ldc); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 5d3a7e301..0f8334a74 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -84,9 +84,7 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - cblas_zherk( - CblasColMajor, - (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + CORE_zherk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index dfa54ca0e..8ad1659d9 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -92,13 +92,11 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - cblas_zsymm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + CORE_zsymm(side, uplo, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB, - CBLAS_SADDR(beta), C, LDC); + beta, C, LDC); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 161caa119..71002a104 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -92,8 +92,8 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - cblas_zsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, - n, k, CBLAS_SADDR(alpha), A, lda, B, ldb, CBLAS_SADDR(beta), C, ldc); + CORE_zsyr2k(uplo, trans, + n, k, alpha, A, lda, B, ldb, beta, C, ldc); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 9388048ee..74432a413 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -85,12 +85,10 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - cblas_zsyrk( - CblasColMajor, - (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + CORE_zsyrk(uplo, trans, n, k, - CBLAS_SADDR(alpha), A, lda, - CBLAS_SADDR(beta), C, ldc); + alpha, A, lda, + beta, C, ldc); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index 5fe76f76b..4c9586da5 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -87,12 +87,10 @@ static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB); - cblas_ztrmm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, - (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + CORE_ztrmm(side, uplo, + transA, diag, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB); } diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index c5278d267..36568d03d 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -108,12 +108,10 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); - cblas_ztrsm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, - (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + CORE_ztrsm(side, uplo, + transA, diag, m, n, - CBLAS_SADDR(alpha), A, lda, + alpha, A, lda, B, ldb); } -- GitLab