Commit ca3d1e6f authored by PRUVOST Florent's avatar PRUVOST Florent

add cudablas library to make calls to cuda kernels (cublas here), no more...

add cudablas library to make calls to cuda kernels (cublas here), no more calls to cublas in runtime/starpu codelets
parent 9191230c
......@@ -28,7 +28,7 @@
add_subdirectory(include)
add_subdirectory(compute)
#add_subdirectory(eztrace_module)
add_subdirectory(eztrace_module)
###
### END CMakeLists.txt
......
......@@ -28,14 +28,23 @@
set(CUDABLAS_SRCS_GENERATED "")
set(ZSRC
cuda_zgelqt.c
#cuda_zgemerge.c
cuda_zgemerge.c
cuda_zgemm.c
cuda_zgeqrt.c
cuda_zgessm.c
cuda_zgetrf.c
cuda_zhemm.c
cuda_zher2k.c
cuda_zherk.c
cuda_zlauum.c
#cuda_zparfb.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_zsymm.c
cuda_zsyr2k.c
cuda_zsyrk.c
cuda_ztrmm.c
cuda_ztrsm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsmlq.c
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2015 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_zgemm.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-17
* @precisions normal z -> c d s
*
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUBLAS_V2)
int CUDA_zgemm_V2(
MORSE_enum transa, MORSE_enum transb,
int m, int n, int k,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
cuDoubleComplex *beta,
cuDoubleComplex *C, int ldc,
CUstream stream)
{
cublasHandle_t handle;
cublasStatus_t stat;
cublasOperation_t cublasTransA;
cublasOperation_t cublasTransB;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
if (transa == MorseNoTrans){
cublasTransA = CUBLAS_OP_N;
}else if(transa == MorseTrans){
cublasTransA = CUBLAS_OP_T;
}else if(transa == MorseConjTrans){
cublasTransA = CUBLAS_OP_C;
}else{
fprintf(stderr, "Error in cl_zgemm_cuda_func: bad transA parameter %d\n", transa);
}
if (transb == MorseNoTrans){
cublasTransB = CUBLAS_OP_N;
}else if(transb == MorseTrans){
cublasTransB = CUBLAS_OP_T;
}else if(transb == MorseConjTrans){
cublasTransB = CUBLAS_OP_C;
}else{
fprintf(stderr, "Error in cl_zgemm_cuda_func: bad transB parameter %d\n", transb);
}
stat = cublasZgemm(handle,
cublasTransA, cublasTransB,
m, n, k,
(const cuDoubleComplex *) alpha, A, lda,
B, ldb,
(const cuDoubleComplex *) beta, C, ldc);
if (stat != CUBLAS_STATUS_SUCCESS){
printf ("cublasZgemm failed");
cublasDestroy(handle);
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasDestroy(handle);
return MORSE_SUCCESS;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
int CUDA_zgemm(
int transa, int transb,
int m, int n, int k,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
cuDoubleComplex *beta,
cuDoubleComplex *C, int ldc,
CUstream stream
)
{
cublasSetKernelStream( stream );
cublasZgemm(
morse_lapack_const(transa), morse_lapack_const(transb),
m, n, k,
*alpha, A, lda,
B, ldb,
*beta, C, ldc);
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
return MORSE_SUCCESS;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif
......@@ -179,114 +179,4 @@ int CUDA_zgeqrt(
return MORSE_SUCCESS;
}
#if defined(CHAMELEON_USE_CUBLAS_V2)
int CUDA_zgemerge(
magma_side_t side, magma_diag_t diag,
magma_int_t M, magma_int_t N,
magmaDoubleComplex *A, magma_int_t LDA,
magmaDoubleComplex *B, magma_int_t LDB,
CUstream stream)
{
int i, j;
magmaDoubleComplex *cola, *colb;
cublasHandle_t handle;
cublasStatus_t stat;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
if (M < 0) {
return -1;
}
if (N < 0) {
return -2;
}
if ( (LDA < max(1,M)) && (M > 0) ) {
return -5;
}
if ( (LDB < max(1,M)) && (M > 0) ) {
return -7;
}
if (side == MagmaLeft){
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(handle, i+1, cola, 1, colb, 1);
cudaMemcpyAsync(colb , cola,
(i+1)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
}
}else{
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(handle, M-i, cola + i, 1, colb + i, 1);
cudaMemcpyAsync(colb+i , cola+i,
(M-i)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
}
}
cublasDestroy(handle);
return MORSE_SUCCESS;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
int CUDA_zgemerge(
magma_side_t side, magma_diag_t diag,
magma_int_t M, magma_int_t N,
magmaDoubleComplex *A, magma_int_t LDA,
magmaDoubleComplex *B, magma_int_t LDB,
CUstream stream)
{
int i, j;
magmaDoubleComplex *cola, *colb;
if (M < 0) {
return -1;
}
if (N < 0) {
return -2;
}
if ( (LDA < max(1,M)) && (M > 0) ) {
return -5;
}
if ( (LDB < max(1,M)) && (M > 0) ) {
return -7;
}
if (side == MagmaLeft){
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(i+1, cola, 1, colb, 1);
cudaMemcpyAsync(colb , cola,
(i+1)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
}
}else{
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(M-i, cola + i, 1, colb + i, 1);
cudaMemcpyAsync(colb+i , cola+i,
(M-i)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
}
}
return MORSE_SUCCESS;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2015 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_zhemm.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-17
* @precisions normal z -> c
*
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUBLAS_V2)
int CUDA_zhemm_V2(
MORSE_enum side, MORSE_enum uplo,
int m, int n,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
cuDoubleComplex *beta,
cuDoubleComplex *C, int ldc,
CUstream stream)
{
cublasHandle_t handle;
cublasStatus_t stat;
cublasSideMode_t cublasSide;
cublasFillMode_t cublasUplo;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
if (side == MorseLeft){
cublasSide = CUBLAS_SIDE_LEFT;
}else if (side == MorseRight){
cublasSide = CUBLAS_SIDE_RIGHT;
}else{
fprintf(stderr, "Error in cl_zhemm_cuda_func: bad side parameter %d\n", side);
}
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
cublasUplo = CUBLAS_FILL_MODE_LOWER;
}else if(uplo == MorseUpperLower){
cublasUplo = 0;
}else{
fprintf(stderr, "Error in cl_zhemm_cuda_func: bad uplo parameter %d\n", uplo);
}
stat = cublasZhemm(handle,
cublasSide, cublasUplo,
m, n,
(const cuDoubleComplex *) alpha, A, lda,
B, ldb,
(const cuDoubleComplex *) beta, C, ldc);
if (stat != CUBLAS_STATUS_SUCCESS){
printf ("cublasZhemm failed");
cublasDestroy(handle);
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasDestroy(handle);
return MORSE_SUCCESS;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
int CUDA_zhemm(
MORSE_enum side, MORSE_enum uplo,
int m, int n,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
cuDoubleComplex *beta,
cuDoubleComplex *C, int ldc,
CUstream stream)
{
cublasSetKernelStream( stream );
cublasZhemm(
morse_lapack_const(side), morse_lapack_const(uplo),
m, n,
*alpha, A, lda,
B, ldb,
*beta, C, ldc);
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
return MORSE_SUCCESS;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2015 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_zher2k.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-17
* @precisions normal z -> c
*
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUBLAS_V2)
int CUDA_zher2k_V2(
MORSE_enum uplo, MORSE_enum trans,
int n, int k,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
double *beta,
cuDoubleComplex *C, int ldc,
CUstream stream)
{
cublasHandle_t handle;
cublasStatus_t stat;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTrans;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
cublasUplo = CUBLAS_FILL_MODE_LOWER;
}else if(uplo == MorseUpperLower){
cublasUplo = 0;
}else{
fprintf(stderr, "Error in cl_zher2k_cuda_func: bad uplo parameter %d\n", uplo);
}
if (trans == MorseNoTrans){
cublasTrans = CUBLAS_OP_N;
}else if(trans == MorseTrans){
cublasTrans = CUBLAS_OP_T;
}else if(trans == MorseConjTrans){
cublasTrans = CUBLAS_OP_C;
}else{
fprintf(stderr, "Error in cl_zher2k_cuda_func: bad trans parameter %d\n", trans);
}
stat = cublasZher2k( handle, cublasUplo, cublasTrans,
n, k, (const cuDoubleComplex *) alpha, A, lda, B, ldb,
(const double *) beta, C, ldc);
if (stat != CUBLAS_STATUS_SUCCESS){
printf ("cublasZher2k failed");
cublasDestroy(handle);
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasDestroy(handle);
return MORSE_SUCCESS;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
int CUDA_zher2k(
MORSE_enum uplo, MORSE_enum trans,
int n, int k,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
double *beta,
cuDoubleComplex *C, int ldc,
CUstream stream)
{
cublasSetKernelStream( stream );
cublasZher2k(
morse_lapack_const(uplo), morse_lapack_const(trans),
n, k,
*alpha, A, lda,
B, ldb,
*beta, C, ldc);
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
return MORSE_SUCCESS;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2015 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_zherk.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-17
* @precisions normal z -> c
*
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUBLAS_V2)
int CUDA_zherk_V2(
MORSE_enum uplo, MORSE_enum trans,
int n, int k,
double *alpha,
const cuDoubleComplex *A, int lda,
double *beta,
cuDoubleComplex *B, int ldb,
CUstream stream)
{
cublasHandle_t handle;
cublasStatus_t stat;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTrans;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
cublasUplo = CUBLAS_FILL_MODE_LOWER;
}else if(uplo == MorseUpperLower){
cublasUplo = 0;
}else{
fprintf(stderr, "Error in cl_zherk_cuda_func: bad uplo parameter %d\n", uplo);
}
if (trans == MorseNoTrans){
cublasTrans = CUBLAS_OP_N;
}else if(trans == MorseTrans){
cublasTrans = CUBLAS_OP_T;
}else if(trans == MorseConjTrans){
cublasTrans = CUBLAS_OP_C;
}else{
fprintf(stderr, "Error in cl_zherk_cuda_func: bad trans parameter %d\n", trans);
}
stat = cublasZherk(handle,
cublasUplo, cublasTrans,
n, k,
(const double *) &alpha, A, lda,
(const double *) &beta, B, ldb);
if (stat != CUBLAS_STATUS_SUCCESS){
printf ("cublasZherk failed");
cublasDestroy(handle);
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasDestroy(handle);
return MORSE_SUCCESS;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
int CUDA_zherk(
MORSE_enum uplo, MORSE_enum trans,
int n, int k,
double *alpha,
const cuDoubleComplex *A, int lda,
double *beta,
cuDoubleComplex *B, int ldb,
CUstream stream)
{
cublasSetKernelStream( stream );
cublasZherk(
morse_lapack_const(uplo), morse_lapack_const(trans),
n, k,
*alpha, A, lda,
*beta, B, ldb);
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
return MORSE_SUCCESS;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2015 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_zsymm.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-17
* @precisions normal z -> c d s
*
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUBLAS_V2)
int CUDA_zsymm_V2(
MORSE_enum side, MORSE_enum uplo,
int m, int n,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
cuDoubleComplex *beta,
cuDoubleComplex *C, int ldc,
CUstream stream)
{
cublasHandle_t handle;
cublasStatus_t stat;
cublasSideMode_t cublasSide;
cublasFillMode_t cublasUplo;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
if (side == MorseLeft){
cublasSide = CUBLAS_SIDE_LEFT;
}else if (side == MorseRight){
cublasSide = CUBLAS_SIDE_RIGHT;
}else{
fprintf(stderr, "Error in cl_zsymm_cuda_func: bad side parameter %d\n", side);
}
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
cublasUplo = CUBLAS_FILL_MODE_LOWER;
}else if(uplo == MorseUpperLower){
cublasUplo = 0;
}else{
fprintf(stderr, "Error in cl_zsymm_cuda_func: bad uplo parameter %d\n", uplo);
}
stat = cublasZsymm(handle,
cublasSide, cublasUplo,
m, n,
(const cuDoubleComplex *) alpha, A, lda,
B, ldb,
(const cuDoubleComplex *) beta, C, ldc);
if (stat != CUBLAS_STATUS_SUCCESS){
printf ("cublasZsymm failed");
cublasDestroy(handle);
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasDestroy(handle);
return MORSE_SUCCESS;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
int CUDA_zsymm(
MORSE_enum side, MORSE_enum uplo,
int m, int n,
cuDoubleComplex *alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
cuDoubleComplex *beta,
cuDoubleComplex *C, int ldc,
CUstream stream)
{
cublasSetKernelStream( stream );
cublasZsymm(
morse_lapack_const(side), morse_lapack_const(uplo),
m, n,
*alpha, A, lda,
B, ldb,
*beta, C, ldc);
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
return MORSE_SUCCESS;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif