From 3283225b1bb24dee2757594ccb1abb701a42638a Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Tue, 6 Oct 2020 11:48:40 +0200 Subject: [PATCH] Add BLAS 2 xgemv kernel --- coreblas/compute/CMakeLists.txt | 1 + coreblas/compute/core_zgemv.c | 98 +++++++++++++++ coreblas/compute/core_ztile.c | 13 ++ coreblas/include/coreblas/coreblas_z.h | 4 + coreblas/include/coreblas/coreblas_ztile.h | 1 + include/chameleon/tasks_z.h | 5 + runtime/CMakeLists.txt | 4 + runtime/openmp/codelets/codelet_zgemv.c | 37 ++++++ runtime/parsec/codelets/codelet_zgemv.c | 76 +++++++++++ runtime/quark/codelets/codelet_zgemv.c | 66 ++++++++++ runtime/starpu/codelets/codelet_zcallback.c | 1 + runtime/starpu/codelets/codelet_zgemv.c | 133 ++++++++++++++++++++ runtime/starpu/include/runtime_codelet_z.h | 5 + 13 files changed, 444 insertions(+) create mode 100644 coreblas/compute/core_zgemv.c create mode 100644 runtime/openmp/codelets/codelet_zgemv.c create mode 100644 runtime/parsec/codelets/codelet_zgemv.c create mode 100644 runtime/quark/codelets/codelet_zgemv.c create mode 100644 runtime/starpu/codelets/codelet_zgemv.c diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index dd70cd7c7..f0d4976c2 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -37,6 +37,7 @@ set(ZSRC core_zlascal.c core_zlatm1.c core_zgelqt.c + core_zgemv.c core_zgemm.c core_zgeqrt.c core_zgesplit.c diff --git a/coreblas/compute/core_zgemv.c b/coreblas/compute/core_zgemv.c new file mode 100644 index 000000000..8c25d3dba --- /dev/null +++ b/coreblas/compute/core_zgemv.c @@ -0,0 +1,98 @@ +/** + * + * @file core_zgemv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon core_zgemv CPU kernel + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> c d s + * + */ +#include "coreblas.h" + +/** + ******************************************************************************* + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * Performs one of the matrix-vector operations + * + * \f[ y = \alpha [op( A ) \times x] + \beta y \f], + * + * where op( A ) is one of: + * \f[ op( A ) = A, \f] + * \f[ op( A ) = A^T, \f] + * \f[ op( A ) = A^H, \f] + * + * alpha and beta are scalars, op(A) an m-by-n matrix, and x and y are two vectors. + * + ******************************************************************************* + * + * @param[in] trans + * - ChamNoTrans: A is not transposed, + * - ChamTrans: A is transposed, + * - ChamConjTrans: A is conjugate transposed. + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] alpha + * The scalar alpha. + * + * @param[in] A + * An lda-by-n matrix, where only the m-by-n leading entries are references. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in] x + * X is COMPLEX*16 array, dimension at least + * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + * and at least + * ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + * Before entry, the incremented array X must contain the + * vector x. + * + * @param[in] incX + * On entry, INCX specifies the increment for the elements of + * X. INCX must not be zero. + * + * @param[in] beta + * The scalar beta. + * + * @param[in] y + * Y is COMPLEX*16 array, dimension at least + * ( 1 + ( n - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + * and at least + * ( 1 + ( m - 1 )*abs( INCY ) ) otherwise. + * Before entry, the incremented array Y must contain the vector y with + * beta != 0. On exit, Y is overwritten by the updated vector. + * + * @param[in] incY + * On entry, INCY specifies the increment for the elements of + * Y. INCY must not be zero. + * + */ +void CORE_zgemv( cham_trans_t trans, int M, int N, + CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA, + const CHAMELEON_Complex64_t *x, int incX, + CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *y, int incY ) +{ + cblas_zgemv( + CblasColMajor, (CBLAS_TRANSPOSE)trans, M, N, + CBLAS_SADDR(alpha), A, LDA, + x, incX, + CBLAS_SADDR(beta), y, incY ); +} diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c index 377c47253..f27cebfd2 100644 --- a/coreblas/compute/core_ztile.c +++ b/coreblas/compute/core_ztile.c @@ -83,6 +83,19 @@ TCORE_zgelqt( int M, return CORE_zgelqt( M, N, IB, A->mat, A->ld, T->mat, T->ld, TAU, WORK ); } +void +TCORE_zgemv( cham_trans_t trans, int M, int N, + CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, + const CHAM_tile_t *x, int incX, + CHAMELEON_Complex64_t beta, CHAM_tile_t *y, int incY ) +{ + assert( A->format & CHAMELEON_TILE_FULLRANK ); + assert( x->format & CHAMELEON_TILE_FULLRANK ); + assert( y->format & CHAMELEON_TILE_FULLRANK ); + CORE_zgemv( + trans, M, N, alpha, A->mat, A->ld, x->mat, incX, beta, y->mat, incY ); +} + void TCORE_zgemm( cham_trans_t transA, cham_trans_t transB, diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h index 806337af2..9f317d4e5 100644 --- a/coreblas/include/coreblas/coreblas_z.h +++ b/coreblas/include/coreblas/coreblas_z.h @@ -55,6 +55,10 @@ void CORE_zgemm(cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA, const CHAMELEON_Complex64_t *B, int LDB, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *C, int LDC); +void CORE_zgemv(cham_trans_t trans, int M, int N, + CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA, + const CHAMELEON_Complex64_t *x, int incX, + CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *y, int incY); int CORE_zgeqrt(int M, int N, int IB, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *T, int LDT, diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h index ccb6b83b2..c754393a0 100644 --- a/coreblas/include/coreblas/coreblas_ztile.h +++ b/coreblas/include/coreblas/coreblas_ztile.h @@ -23,6 +23,7 @@ void TCORE_dzasum( cham_store_t storev, cham_uplo_t uplo, int M, int N, const CH int TCORE_zaxpy( int M, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, int incA, CHAM_tile_t *B, int incB ); int TCORE_zgeadd( cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, CHAMELEON_Complex64_t beta, CHAM_tile_t *B ); int TCORE_zgelqt( int M, int N, int IB, CHAM_tile_t *A, CHAM_tile_t *T, CHAMELEON_Complex64_t *TAU, CHAMELEON_Complex64_t *WORK ); +void TCORE_zgemv( cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *x, int incx, CHAMELEON_Complex64_t beta, CHAM_tile_t *y, int incy ); void TCORE_zgemm( cham_trans_t transA, cham_trans_t transB, int M, int N, int K, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, CHAMELEON_Complex64_t beta, CHAM_tile_t *C ); int TCORE_zgeqrt( int M, int N, int IB, CHAM_tile_t *A, CHAM_tile_t *T, CHAMELEON_Complex64_t *TAU, CHAMELEON_Complex64_t *WORK ); int TCORE_zgessm( int M, int N, int K, int IB, const int *IPIV, const CHAM_tile_t *L, CHAM_tile_t *A ); diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index 3cb7c5bc2..a78d30041 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -53,6 +53,11 @@ void INSERT_TASK_zgelqt( const RUNTIME_option_t *options, int m, int n, int ib, int nb, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *T, int Tm, int Tn ); +void INSERT_TASK_zgemv( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *X, int Xm, int Xn, int incX, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *Y, int Ym, int Yn, int incY ); void INSERT_TASK_zgemm( const RUNTIME_option_t *options, cham_trans_t transA, cham_trans_t transB, int m, int n, int k, int nb, diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 76a0bce72..1caa32fef 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -35,6 +35,10 @@ set(CODELETS_ZSRC ################## codelets/codelet_zaxpy.c ################## + # BLAS 2 + ################## + codelets/codelet_zgemv.c + ################## # BLAS 3 ################## codelets/codelet_zgemm.c diff --git a/runtime/openmp/codelets/codelet_zgemv.c b/runtime/openmp/codelets/codelet_zgemv.c new file mode 100644 index 000000000..c3d70b0b4 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgemv.c @@ -0,0 +1,37 @@ +/** + * + * @file openmp/codelet_zgemv.c + * + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgemv OpenMP codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_ztile.h" + +void +INSERT_TASK_zgemv( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *X, int Xm, int Xn, int incX, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *Y, int Ym, int Yn, int incY ) +{ + CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); + CHAM_tile_t *tileX = X->get_blktile( X, Xm, Xn ); + CHAM_tile_t *tileY = Y->get_blktile( Y, Ym, Yn ); + +#pragma omp task firstprivate( trans, m, n, alpha, tileA, tileX, incX, beta, tileY, incY ) depend( in:tileA[0], tileX[0] ) depend( inout:tileY[0] ) + TCORE_zgemv( trans, m, n, + alpha, tileA, tileX, incX, + beta, tileY, incY ); +} diff --git a/runtime/parsec/codelets/codelet_zgemv.c b/runtime/parsec/codelets/codelet_zgemv.c new file mode 100644 index 000000000..0579743d5 --- /dev/null +++ b/runtime/parsec/codelets/codelet_zgemv.c @@ -0,0 +1,76 @@ +/** + * + * @file parsec/codelet_zgemv.c + * + * @copyright 2009-2015 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgemv PaRSEC codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> c d s + * + */ +#include "chameleon_parsec.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +static inline int +CORE_zgemv_parsec( parsec_execution_stream_t *context, + parsec_task_t *this_task ) +{ + cham_trans_t trans; + int m; + int n; + CHAMELEON_Complex64_t alpha; + CHAMELEON_Complex64_t *A; + int lda; + CHAMELEON_Complex64_t *X; + int incX; + CHAMELEON_Complex64_t beta; + CHAMELEON_Complex64_t *Y; + int incY; + + parsec_dtd_unpack_args( + this_task, &trans, &m, &n, &alpha, &A, &lda, &X, &incX, &beta, &Y, &incY ); + + CORE_zgemv( trans, m, n, + alpha, A, lda, + X, incX, + beta, Y, incY ); + + (void)context; + return PARSEC_HOOK_RETURN_DONE; +} + +void +INSERT_TASK_zgemv( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *X, int Xm, int Xn, int incX, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *Y, int Ym, int Yn, int incY ) +{ + parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); + CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); + + parsec_dtd_taskpool_insert_task( + PARSEC_dtd_taskpool, CORE_zgemv_parsec, options->priority, "zgemv", + sizeof(cham_trans_t), &trans, VALUE, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(CHAMELEON_Complex64_t), &alpha, VALUE, + PASSED_BY_REF, RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT, + sizeof(int), &(tileA->ld), VALUE, + PASSED_BY_REF, RTBLKADDR( X, CHAMELEON_Complex64_t, Xm, Xn ), chameleon_parsec_get_arena_index( X ) | INPUT, + sizeof(int), &incX, VALUE, + sizeof(CHAMELEON_Complex64_t), &beta, VALUE, + PASSED_BY_REF, RTBLKADDR( Y, CHAMELEON_Complex64_t, Ym, Yn ), chameleon_parsec_get_arena_index( Y ) | INOUT | AFFINITY, + sizeof(int), &incY, VALUE, + PARSEC_DTD_ARG_END ); +} diff --git a/runtime/quark/codelets/codelet_zgemv.c b/runtime/quark/codelets/codelet_zgemv.c new file mode 100644 index 000000000..4dcde9bf5 --- /dev/null +++ b/runtime/quark/codelets/codelet_zgemv.c @@ -0,0 +1,66 @@ +/** + * + * @file quark/codelet_zgemv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgemv Quark codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> c d s + * + */ +#include "chameleon_quark.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_ztile.h" + +void CORE_zgemv_quark(Quark *quark) +{ + cham_trans_t trans; + int m; + int n; + CHAMELEON_Complex64_t alpha; + CHAM_tile_t *tileA; + CHAM_tile_t *tileX; + int incX; + CHAMELEON_Complex64_t beta; + CHAM_tile_t *tileY; + int incY; + + quark_unpack_args_10( quark, trans, m, n, alpha, tileA, tileX, incX, beta, tileY, incY ); + TCORE_zgemv( trans, m, n, + alpha, tileA, tileX, incX, + beta, tileY, incY ); +} + +void INSERT_TASK_zgemv( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *X, int Xm, int Xn, int incX, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *Y, int Ym, int Yn, int incY ) +{ + quark_option_t *opt = (quark_option_t*)(options->schedopt); + int accessY = ( beta == 0. ) ? OUTPUT : INOUT; + + /* DAG_CORE_GEMV; */ + QUARK_Insert_Task( + opt->quark, CORE_zgemv_quark, (Quark_Task_Flags*)opt, + sizeof(cham_trans_t), &trans, VALUE, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(CHAMELEON_Complex64_t), &alpha, VALUE, + sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, + sizeof(void*), RTBLKADDR(X, CHAMELEON_Complex64_t, Xm, Xn), INPUT, + sizeof(int), &incX, VALUE, + sizeof(CHAMELEON_Complex64_t), &beta, VALUE, + sizeof(void*), RTBLKADDR(Y, CHAMELEON_Complex64_t, Ym, Yn), accessY, + sizeof(int), &incY, VALUE, + 0); +} diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index ef24f40dc..cb381cfc1 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -30,6 +30,7 @@ CHAMELEON_CL_CB(zaxpy, cti_handle_get_m(task->handles[0]), cti_handle_ge CHAMELEON_CL_CB(zgeadd, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N) CHAMELEON_CL_CB(zlascal, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N) CHAMELEON_CL_CB(zgelqt, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, (4./3.)*M*N*K) +CHAMELEON_CL_CB(zgemv, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 2. *M*N ) CHAMELEON_CL_CB(zgemm, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */ CHAMELEON_CL_CB(zgeqrt, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, (4./3.)*M*M*N) CHAMELEON_CL_CB(zgessm, cti_handle_get_m(task->handles[2]), cti_handle_get_m(task->handles[2]), cti_handle_get_m(task->handles[2]), 2. *M*N*K) diff --git a/runtime/starpu/codelets/codelet_zgemv.c b/runtime/starpu/codelets/codelet_zgemv.c new file mode 100644 index 000000000..62b887247 --- /dev/null +++ b/runtime/starpu/codelets/codelet_zgemv.c @@ -0,0 +1,133 @@ +/** + * + * @file starpu/codelet_zgemv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgemv StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> c d s + * + */ +#include "chameleon_starpu.h" +#include "runtime_codelet_z.h" + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgemv_cpu_func(void *descr[], void *cl_arg) +{ + cham_trans_t trans; + int m; + int n; + CHAMELEON_Complex64_t alpha; + CHAM_tile_t *tileA; + CHAM_tile_t *tileX; + int incX; + CHAMELEON_Complex64_t beta; + CHAM_tile_t *tileY; + int incY; + + tileA = cti_interface_get(descr[0]); + tileX = cti_interface_get(descr[1]); + tileY = cti_interface_get(descr[2]); + + starpu_codelet_unpack_args(cl_arg, &trans, &m, &n, &alpha, &incX, &beta, &incY ); + TCORE_zgemv( trans, m, n, + alpha, tileA, tileX, incX, + beta, tileY, incY ); +} + +#if defined(CHAMELEON_USE_CUDA) & 0 +static void cl_zgemv_cuda_func(void *descr[], void *cl_arg) +{ + cham_trans_t transA; + cham_trans_t transB; + int m; + int n; + int k; + cuDoubleComplex alpha; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + cuDoubleComplex beta; + CHAM_tile_t *tileC; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + tileC = cti_interface_get(descr[2]); + + starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta); + + RUNTIME_getStream( stream ); + + CUDA_zgemv( + transA, transB, + m, n, k, + &alpha, tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + &beta, tileC->mat, tileC->ld, + stream); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif + + return; +} +#endif /* defined(CHAMELEON_USE_CUDA) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgemv, cl_zgemv_cpu_func) + +/** + * + * @ingroup INSERT_TASK_Complex64_t + * + */ +void INSERT_TASK_zgemv( const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *X, int Xm, int Xn, int incX, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *Y, int Ym, int Yn, int incY ) +{ + struct starpu_codelet *codelet = &cl_zgemv; + void (*callback)(void*) = options->profiling ? cl_zgemv_callback : NULL; + starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); + int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessY = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_R(X, Xm, Xn); + CHAMELEON_ACCESS_RW(Y, Ym, Yn); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &trans, sizeof(cham_trans_t), + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_R, RTBLKADDR(X, CHAMELEON_Complex64_t, Xm, Xn), + STARPU_VALUE, &incX, sizeof(int), + STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + accessY, RTBLKADDR(Y, CHAMELEON_Complex64_t, Ym, Yn), + STARPU_VALUE, &incY, sizeof(int), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, + STARPU_EXECUTE_ON_WORKER, workerid, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zgemv", +#endif + 0); +} diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index 95bc484a4..e5ed6704c 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -39,6 +39,11 @@ */ CODELETS_HEADER(zaxpy); +/* + * BLAS 2 functions + */ +CODELETS_HEADER(zgemv); + /* * BLAS 3 functions */ -- GitLab