Commit f08137a8 authored by Mathieu Faverge's avatar Mathieu Faverge

Merge branch 'cuda/geadd' into 'master'

CUDA geadd kernel

Closes #22

See merge request !39
parents fd7564f2 959850d6
......@@ -48,6 +48,13 @@ set(ZSRC
cuda_zunmqrt.c
)
if( CHAMELEON_USE_CUBLAS_V2 )
set(ZSRC
${ZSRC}
cuda_zgeadd.c
)
endif( CHAMELEON_USE_CUBLAS_V2 )
if( CHAMELEON_USE_MAGMA )
set(ZSRC
${ZSRC}
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_zgeadd.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Mathieu Faverge
* @date 2015-09-17
* @precisions normal z -> c d s
*
**/
#include "cudablas/include/cudablas.h"
#include "cudablas/include/cudablas_z.h"
#if !defined(CHAMELEON_USE_CUBLAS_V2)
#error "This file requires cublas api v2 support"
#endif
/**
******************************************************************************
*
* @ingroup CUDA_MORSE_Complex64_t
*
* CUDA_zgeadd adds to matrices together as in PBLAS pzgeadd.
*
* B <- alpha * op(A) + beta * B,
*
* where op(X) = X, X', or conj(X')
*
*******************************************************************************
*
* @param[in] trans
* Specifies whether the matrix A is non-transposed, transposed, or
* conjugate transposed
* = MorseNoTrans: op(A) = A
* = MorseTrans: op(A) = A'
* = MorseConjTrans: op(A) = conj(A')
*
* @param[in] M
* Number of rows of the matrices op(A) and B.
*
* @param[in] N
* Number of columns of the matrices op(A) and B.
*
* @param[in] alpha
* Scalar factor of A.
*
* @param[in] A
* Matrix of size LDA-by-N, if trans = MorseNoTrans, LDA-by-M
* otherwise.
*
* @param[in] LDA
* Leading dimension of the array A. LDA >= max(1,k), with k=M, if
* trans = MorseNoTrans, and k=N otherwise.
*
* @param[in] beta
* Scalar factor of B.
*
* @param[in,out] B
* Matrix of size LDB-by-N.
* On exit, B = alpha * op(A) + beta * B
*
* @param[in] LDB
* Leading dimension of the array B. LDB >= max(1,M)
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
******************************************************************************/
int CUDA_zgeadd(MORSE_enum trans,
int m, int n,
const cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *beta,
cuDoubleComplex *B, int ldb,
CUBLAS_STREAM_PARAM)
{
cublasZgeam(CUBLAS_HANDLE
morse_cublas_const(trans), morse_cublas_const(MorseNoTrans),
m, n,
CUBLAS_VALUE(alpha), A, lda,
CUBLAS_VALUE(beta), B, ldb,
B, ldb);
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
return MORSE_SUCCESS;
}
This diff is collapsed.
......@@ -137,9 +137,47 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
return;
}
#ifdef CHAMELEON_USE_CUBLAS_V2
static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum trans;
int M;
int N;
cuDoubleComplex alpha;
const cuDoubleComplex *A;
int lda;
cuDoubleComplex beta;
cuDoubleComplex *B;
int ldb;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb);
RUNTIME_getStream( stream );
CUDA_zgeadd(
trans,
M, N,
&alpha, A, lda,
&beta, B, ldb,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
#if defined(CHAMELEON_USE_CUBLAS_V2)
CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment