diff --git a/coreblas/CMakeLists.txt b/coreblas/CMakeLists.txt index 80917850f1e691dd56207362f303a62f5efbdff4..ede4ac243537dcc0916b0c534df243833e45e550 100644 --- a/coreblas/CMakeLists.txt +++ b/coreblas/CMakeLists.txt @@ -28,6 +28,7 @@ add_subdirectory(include) add_subdirectory(compute) +add_subdirectory(eztrace_module) ### ### END CMakeLists.txt diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index 17f35edc0dd20573f1003b46143eeca2aca00159..dc0f6f519ff23ee14ad0dd83858c74d389bf4982 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -31,6 +31,7 @@ set(COREBLAS_SRCS_GENERATED "") set(ZSRC core_dzasum.c + core_zaxpy.c core_zgeadd.c core_zgelqt.c core_zgemm.c diff --git a/coreblas/compute/core_zaxpy.c b/coreblas/compute/core_zaxpy.c new file mode 100644 index 0000000000000000000000000000000000000000..373ff859a0aaf2e3dc6d420f0b4bad14f02d0462 --- /dev/null +++ b/coreblas/compute/core_zaxpy.c @@ -0,0 +1,81 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2015 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file core_zaxpy.c + * + * @author Florent Pruvost + * @date 2015-09-15 + * @precisions normal z -> c d s + * + **/ +#include "coreblas/include/coreblas.h" + +/***************************************************************************//** + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_zaxpy adds to vectors together. + * + * B <- alpha * A + B + * + ******************************************************************************* + * + * @param[in] M + * Number of rows of the vectors A and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Vector of size M. + * + * @param[in] incA + * Offset. incA > 0 + * + * @param[in,out] B + * Vector of size M. + * + * @param[in] incB + * Offset. incB > 0 + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ + +int CORE_zaxpy(int M, MORSE_Complex64_t alpha, + const MORSE_Complex64_t *A, int incA, + MORSE_Complex64_t *B, int incB) +{ + if (M < 0) { + coreblas_error(1, "Illegal value of M"); + return -1; + } + if (incA < 0) { + coreblas_error(5, "Illegal value of incA"); + return -4; + } + if (incB < 0) { + coreblas_error(5, "Illegal value of incB"); + return -6; + } + + cblas_zaxpy(M, CBLAS_SADDR(alpha), A, incA, B, incB); + + return MORSE_SUCCESS; +} + + + diff --git a/coreblas/include/coreblas_z.h b/coreblas/include/coreblas_z.h index d972b93a18e3c1eef04a1f460aefdf0d8468a2d0..8b8c993a69f3be1bd8105589a34782cd89f835ad 100644 --- a/coreblas/include/coreblas_z.h +++ b/coreblas/include/coreblas_z.h @@ -55,6 +55,9 @@ int CORE_zgbrce(MORSE_enum uplo, int N, int CORE_zgblrx(MORSE_enum uplo, int N, MORSE_desc_t *A, MORSE_Complex64_t *V, MORSE_Complex64_t *TAU, int st, int ed, int eltsize); +int CORE_zaxpy(int M, MORSE_Complex64_t alpha, + const MORSE_Complex64_t *A, int incA, + MORSE_Complex64_t *B, int incB); int CORE_zgeadd(int M, int N, MORSE_Complex64_t alpha, const MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); diff --git a/runtime/quark/codelets/codelet_zaxpy.c b/runtime/quark/codelets/codelet_zaxpy.c index 1135660cb75af5d42d53742ba7d99ffaf4e12ce1..1fd7c69f57648f1da24803f2d631bb22132fa18f 100644 --- a/runtime/quark/codelets/codelet_zaxpy.c +++ b/runtime/quark/codelets/codelet_zaxpy.c @@ -54,5 +54,5 @@ void CORE_zaxpy_quark(Quark *quark) int incB; quark_unpack_args_6(quark, M, alpha, A, incA, B, incB); - cblas_zaxpy(M, CBLAS_SADDR(alpha), A, incA, B, incB); + CORE_zaxpy(M, alpha, A, incA, B, incB); } diff --git a/runtime/quark/codelets/codelet_zgemm.c b/runtime/quark/codelets/codelet_zgemm.c index 29c8a9cac747d2517bf2c234e9e57451d93b9c64..1ffa8c0e0834ced73b2dc1910e482aaaf5d51594 100644 --- a/runtime/quark/codelets/codelet_zgemm.c +++ b/runtime/quark/codelets/codelet_zgemm.c @@ -81,13 +81,11 @@ void CORE_zgemm_quark(Quark *quark) int ldc; quark_unpack_args_13(quark, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - cblas_zgemm( - CblasColMajor, - (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, + CORE_zgemm(transA, transB, m, n, k, - CBLAS_SADDR(alpha), A, lda, + alpha, A, lda, B, ldb, - CBLAS_SADDR(beta), C, ldc); + beta, C, ldc); } diff --git a/runtime/quark/codelets/codelet_zhemm.c b/runtime/quark/codelets/codelet_zhemm.c index a52b9d82f7a9c0ec8a6342783144b9af9e340261..90671f8a9676c35f71dc86a77965205a86a868ff 100644 --- a/runtime/quark/codelets/codelet_zhemm.c +++ b/runtime/quark/codelets/codelet_zhemm.c @@ -81,12 +81,10 @@ void CORE_zhemm_quark(Quark *quark) int LDC; quark_unpack_args_12(quark, side, uplo, M, N, alpha, A, LDA, B, LDB, beta, C, LDC); - cblas_zhemm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + CORE_zhemm(side, uplo, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB, - CBLAS_SADDR(beta), C, LDC); + beta, C, LDC); } #endif diff --git a/runtime/quark/codelets/codelet_zher2k.c b/runtime/quark/codelets/codelet_zher2k.c index 7d31f382831c0191d140f795bfd325ee97b8616a..9b01efe748a57c1b2ea8f0a670f9d02b6769aa6d 100644 --- a/runtime/quark/codelets/codelet_zher2k.c +++ b/runtime/quark/codelets/codelet_zher2k.c @@ -81,7 +81,7 @@ void CORE_zher2k_quark(Quark *quark) int ldc; quark_unpack_args_12(quark, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, - n, k, CBLAS_SADDR(alpha), A, lda, B, ldb, beta, C, ldc); + CORE_zher2k(uplo, trans, + n, k, alpha, A, lda, B, ldb, beta, C, ldc); } #endif diff --git a/runtime/quark/codelets/codelet_zherk.c b/runtime/quark/codelets/codelet_zherk.c index 16c6fe9b769278ddacd8ae1efe4764b388d0580b..82838b6cd9bd777123ac5f6642264d580542740c 100644 --- a/runtime/quark/codelets/codelet_zherk.c +++ b/runtime/quark/codelets/codelet_zherk.c @@ -76,9 +76,7 @@ void CORE_zherk_quark(Quark *quark) int ldc; quark_unpack_args_10(quark, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); - cblas_zherk( - CblasColMajor, - (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + CORE_zherk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); diff --git a/runtime/quark/codelets/codelet_zsymm.c b/runtime/quark/codelets/codelet_zsymm.c index 9a00b1e789d157fabac762e4013c329615fa00e9..40bf589636c35cd559a1b286b59cd25f8242fa41 100644 --- a/runtime/quark/codelets/codelet_zsymm.c +++ b/runtime/quark/codelets/codelet_zsymm.c @@ -78,11 +78,9 @@ void CORE_zsymm_quark(Quark *quark) int LDC; quark_unpack_args_12(quark, side, uplo, M, N, alpha, A, LDA, B, LDB, beta, C, LDC); - cblas_zsymm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + CORE_zsymm(side, uplo, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB, - CBLAS_SADDR(beta), C, LDC); + beta, C, LDC); } diff --git a/runtime/quark/codelets/codelet_zsyr2k.c b/runtime/quark/codelets/codelet_zsyr2k.c index accdff0efbe991094dcc41b38683e9e2aa1091b8..1567db1e04a692b4f93e1daa7a8dfb2b8805c2ef 100644 --- a/runtime/quark/codelets/codelet_zsyr2k.c +++ b/runtime/quark/codelets/codelet_zsyr2k.c @@ -78,6 +78,6 @@ void CORE_zsyr2k_quark(Quark *quark) int ldc; quark_unpack_args_12(quark, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - cblas_zsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, - n, k, CBLAS_SADDR(alpha), A, lda, B, ldb, CBLAS_SADDR(beta), C, ldc); + CORE_zsyr2k(uplo, trans, + n, k, alpha, A, lda, B, ldb, beta, C, ldc); } diff --git a/runtime/quark/codelets/codelet_zsyrk.c b/runtime/quark/codelets/codelet_zsyrk.c index b9c67b9ebf5920b5ae17220f16ad2194f41bcab5..30ca13f50bbfdea07da02ce45c989b3305048e00 100644 --- a/runtime/quark/codelets/codelet_zsyrk.c +++ b/runtime/quark/codelets/codelet_zsyrk.c @@ -73,10 +73,8 @@ void CORE_zsyrk_quark(Quark *quark) int ldc; quark_unpack_args_10(quark, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); - cblas_zsyrk( - CblasColMajor, - (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + CORE_zsyrk(uplo, trans, n, k, - CBLAS_SADDR(alpha), A, lda, - CBLAS_SADDR(beta), C, ldc); + alpha, A, lda, + beta, C, ldc); } diff --git a/runtime/quark/codelets/codelet_ztrmm.c b/runtime/quark/codelets/codelet_ztrmm.c index 284f47e11f9d99990c3685139a0b368b0d8651ad..21fc8dc25f2acf9f4d9227f4d8bf8724bbb72c7b 100644 --- a/runtime/quark/codelets/codelet_ztrmm.c +++ b/runtime/quark/codelets/codelet_ztrmm.c @@ -75,12 +75,10 @@ void CORE_ztrmm_quark(Quark *quark) int LDB; quark_unpack_args_11(quark, side, uplo, transA, diag, M, N, alpha, A, LDA, B, LDB); - cblas_ztrmm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, - (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + CORE_ztrmm(side, uplo, + transA, diag, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB); } diff --git a/runtime/quark/codelets/codelet_ztrsm.c b/runtime/quark/codelets/codelet_ztrsm.c index 59164c2c09cc20551bd7f7ccc781a7b43977e840..b14ed16e3fbbc83e04daeaaf85a6bf6e192d6552 100644 --- a/runtime/quark/codelets/codelet_ztrsm.c +++ b/runtime/quark/codelets/codelet_ztrsm.c @@ -75,11 +75,9 @@ void CORE_ztrsm_quark(Quark *quark) int ldb; quark_unpack_args_11(quark, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); - cblas_ztrsm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, - (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + CORE_ztrsm(side, uplo, + transA, diag, m, n, - CBLAS_SADDR(alpha), A, lda, + alpha, A, lda, B, ldb); } diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index ff26364b55c9d4e3398fef6502b926d166cf2209..75251e5fec761fe1e190a6abc89f07acb19ec5cb 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -35,17 +35,17 @@ void MORSE_TASK_zaxpy(MORSE_option_t *options, if ( morse_desc_islocal( A, Am, An ) || morse_desc_islocal( B, Bm, Bn ) ){ - starpu_insert_task( - codelet, - STARPU_VALUE, &M, sizeof(int), - STARPU_VALUE, alpha, sizeof(MORSE_Complex64_t), - STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), - STARPU_VALUE, &incA, sizeof(int), - STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), - STARPU_VALUE, &incB, sizeof(int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, - 0); + starpu_insert_task( + codelet, + STARPU_VALUE, &M, sizeof(int), + STARPU_VALUE, alpha, sizeof(MORSE_Complex64_t), + STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &incA, sizeof(int), + STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), + STARPU_VALUE, &incB, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, + 0); } } @@ -63,18 +63,7 @@ static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB); - cblas_zaxpy(M, CBLAS_SADDR(alpha), A, incA, B, incB); -// printf("\nCBLAS_SADDR(alpha) %e\n",alpha); -// printf("A\n"); -// for(m = 0; m < M; m++) { -// printf("%e ",A[m]); -// } -// printf("\n"); -// printf("B\n"); -// for(m = 0; m < M; m++) { -// printf("%e ",B[m]); -// } -// printf("\n"); + CORE_zaxpy(M, alpha, A, incA, B, incB); } /* diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index c265809e6bbd68e7f9ff737342247433e6c50a2a..e2c3ec9ef3cdf5f138ada5cd0708f24273d9d21d 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -120,13 +120,11 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - cblas_zgemm( - CblasColMajor, - (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, + CORE_zgemm(transA, transB, m, n, k, - CBLAS_SADDR(alpha), A, lda, + alpha, A, lda, B, ldb, - CBLAS_SADDR(beta), C, ldc); + beta, C, ldc); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 4c700b698e266d7fb036c0b629257f237e7eadd1..c1748ca22cd95976ea850c0b77619b5df49054ac 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -92,13 +92,11 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - cblas_zhemm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + CORE_zhemm(side, uplo, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB, - CBLAS_SADDR(beta), C, LDC); + beta, C, LDC); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index de2b0936730af86e0ac90a33f1e15e75dbfcfff2..b2b34a90d85730ef012b9fc87b6bc5d9536eae47 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -92,8 +92,8 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, - n, k, CBLAS_SADDR(alpha), A, lda, B, ldb, beta, C, ldc); + CORE_zher2k(uplo, trans, + n, k, alpha, A, lda, B, ldb, beta, C, ldc); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 5d3a7e301c06c2f2594e62e3e8499413e446a6ab..0f8334a74a7edab5bf7e079d99cdd582bcf1362c 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -84,9 +84,7 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - cblas_zherk( - CblasColMajor, - (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + CORE_zherk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index dfa54ca0ea9105c3fa531fbcacb02f1010d58c42..8ad1659d90d4fefe5f510ca0cf360fd9518aa587 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -92,13 +92,11 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - cblas_zsymm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + CORE_zsymm(side, uplo, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB, - CBLAS_SADDR(beta), C, LDC); + beta, C, LDC); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 161caa1192c005796a3ddda10bc7fe11de5422aa..71002a1040ca7dd73147d70f0049661375c10303 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -92,8 +92,8 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg) B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - cblas_zsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, - n, k, CBLAS_SADDR(alpha), A, lda, B, ldb, CBLAS_SADDR(beta), C, ldc); + CORE_zsyr2k(uplo, trans, + n, k, alpha, A, lda, B, ldb, beta, C, ldc); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 9388048eea5f66bc8f4e48fc3eabcf877694066d..74432a4138ce70ef5ed838269cf9d5b1ddd971f6 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -85,12 +85,10 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - cblas_zsyrk( - CblasColMajor, - (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + CORE_zsyrk(uplo, trans, n, k, - CBLAS_SADDR(alpha), A, lda, - CBLAS_SADDR(beta), C, ldc); + alpha, A, lda, + beta, C, ldc); } #ifdef CHAMELEON_USE_CUDA diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index 5fe76f76b21d3c6f175f48989336875ee2d56bcf..4c9586da569711559fe374a6f369c74ed2f58de0 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -87,12 +87,10 @@ static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB); - cblas_ztrmm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, - (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + CORE_ztrmm(side, uplo, + transA, diag, M, N, - CBLAS_SADDR(alpha), A, LDA, + alpha, A, LDA, B, LDB); } diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index c5278d2675d055e2c5840983b14953d7b68f6bca..36568d03de0ae16de609c13408ebb16cd8887e35 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -108,12 +108,10 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); - cblas_ztrsm( - CblasColMajor, - (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, - (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + CORE_ztrsm(side, uplo, + transA, diag, m, n, - CBLAS_SADDR(alpha), A, lda, + alpha, A, lda, B, ldb); }