Commit 7034ee8f authored by Mathieu Faverge's avatar Mathieu Faverge

Add geadd GPU kernel

parent a39e03bc
......@@ -48,6 +48,13 @@ set(ZSRC
cuda_zunmqrt.c
)
if( CHAMELEON_USE_CUBLAS_V2 )
set(ZSRC
${ZSRC}
cuda_zgeadd.c
)
endif( CHAMELEON_USE_CUBLAS_V2 )
if( CHAMELEON_USE_MAGMA )
set(ZSRC
${ZSRC}
......
......@@ -29,21 +29,20 @@
#error "This file requires cublas api v2 support"
#endif
int CUDA_zgeadd(MORSE_enum transa, MORSE_enum transb,
int m, int n, int k,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
cuDoubleComplex *beta,
cuDoubleComplex *C, int ldc,
CUBLAS_STREAM_PARAM)
int CUDA_zgeadd(MORSE_enum trans,
int m, int n,
const cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *beta,
cuDoubleComplex *B, int ldb,
CUBLAS_STREAM_PARAM)
{
cublasZgeam(CUBLAS_HANDLE
morse_cublas_const(transa), morse_cublas_const(transb),
m, n, k,
morse_cublas_const(trans), morse_cublas_const(MorseNoTrans),
m, n,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
CUBLAS_VALUE(beta), C, ldc);
CUBLAS_VALUE(beta), B, ldb,
B, ldb);
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
......
This diff is collapsed.
......@@ -137,9 +137,47 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
return;
}
#ifdef CHAMELEON_USE_CUBLAS_V2
static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum trans;
int M;
int N;
cuDoubleComplex alpha;
const cuDoubleComplex *A;
int lda;
cuDoubleComplex beta;
cuDoubleComplex *B;
int ldb;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb);
RUNTIME_getStream( stream );
CUDA_zgeadd(
trans,
M, N,
&alpha, A, lda,
&beta, B, ldb,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
#if defined(CHAMELEON_USE_CUBLAS_V2)
CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment