Commit c0294044 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Remove V2 since it is integreted in the prototype

parent e0d0c8af
......@@ -131,7 +131,6 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum transA;
......@@ -156,45 +155,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
stream = starpu_cuda_get_local_stream();
CUDA_zgemm_V2(
transA, transB,
m, n, k,
&alpha, A, lda,
B, ldb,
&beta, C, ldc,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum transA;
MORSE_enum transB;
int m;
int n;
int k;
cuDoubleComplex alpha;
cuDoubleComplex *A;
int lda;
cuDoubleComplex *B;
int ldb;
cuDoubleComplex beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
stream = starpu_cuda_get_local_stream();
CUDA_zgemm(
transA, transB,
m, n, k,
......@@ -209,7 +169,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -101,7 +101,6 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -125,46 +124,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
stream = starpu_cuda_get_local_stream();
CUDA_zhemm_V2(handle,
side, uplo,
M, N,
&alpha,
A, LDA,
B, LDB,
&beta,
C, LDC,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
int M;
int N;
cuDoubleComplex alpha;
cuDoubleComplex *A;
int LDA;
cuDoubleComplex *B;
int LDB;
cuDoubleComplex beta;
cuDoubleComplex *C;
int LDC;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
stream = starpu_cuda_get_local_stream();
CUDA_zhemm(
side, uplo,
M, N,
......@@ -179,7 +138,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -98,7 +98,6 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -122,41 +121,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
stream = starpu_cuda_get_local_stream();
CUDA_zher2k_V2( uplo, trans,
n, k, &alpha, A, lda, B, ldb,
&beta, C, ldc,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
MORSE_enum trans;
int n;
int k;
cuDoubleComplex alpha;
cuDoubleComplex *A;
int lda;
cuDoubleComplex *B;
int ldb;
double beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
stream = starpu_cuda_get_local_stream();
CUDA_zher2k( uplo, trans,
n, k, &alpha, A, lda, B, ldb, &beta, C, ldc,
stream);
......@@ -167,7 +131,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -92,7 +92,6 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -106,10 +105,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
cuDoubleComplex *C;
int ldc;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTrans;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
......@@ -117,40 +112,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
stream = starpu_cuda_get_local_stream();
CUDA_zherk_V2(
uplo, trans,
n, k,
&alpha, A, lda,
&beta, C, ldc,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
MORSE_enum trans;
int n;
int k;
double alpha;
cuDoubleComplex *A;
int lda;
double beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
stream = starpu_cuda_get_local_stream();
CUDA_zherk(
uplo, trans,
n, k,
......@@ -164,7 +125,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -101,7 +101,6 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -125,44 +124,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
stream = starpu_cuda_get_local_stream();
CUDA_zsymm_V2(
side, uplo,
M, N,
&alpha, A, LDA,
B, LDB,
&beta, C, LDC,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
int M;
int N;
cuDoubleComplex alpha;
cuDoubleComplex *A;
int LDA;
cuDoubleComplex *B;
int LDB;
cuDoubleComplex beta;
cuDoubleComplex *C;
int LDC;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
stream = starpu_cuda_get_local_stream();
CUDA_zsymm(
side, uplo,
M, N,
......@@ -177,7 +138,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -98,7 +98,6 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -122,41 +121,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
stream = starpu_cuda_get_local_stream();
CUDA_zsyr2k_V2( uplo, trans,
n, k, &alpha, A, lda, B, ldb,
&beta, C, ldc,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
MORSE_enum trans;
int n;
int k;
cuDoubleComplex alpha;
cuDoubleComplex *A;
int lda;
cuDoubleComplex *B;
int ldb;
cuDoubleComplex beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
stream = starpu_cuda_get_local_stream();
CUDA_zsyr2k( uplo, trans,
n, k, &alpha, A, lda, B, ldb, &beta, C, ldc,
stream);
......@@ -167,7 +131,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -93,7 +93,6 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -108,37 +107,7 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
int ldc;
CUstream stream;
stream = starpu_cuda_get_local_stream();
CUDA_zsyrk_V2(
uplo, trans,
n, k,
&alpha, A, lda,
&beta, C, ldc,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
MORSE_enum trans;
int n;
int k;
cuDoubleComplex alpha;
cuDoubleComplex *A;
int lda;
cuDoubleComplex beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
......@@ -157,7 +126,6 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -96,7 +96,6 @@ static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -118,41 +117,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
stream = starpu_cuda_get_local_stream();
CUDA_ztrmm_V2(
side, uplo, transA, diag,
M, N,
&alpha, A, LDA,
B, LDB, B, LDB,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
MORSE_enum transA;
MORSE_enum diag;
int M;
int N;
cuDoubleComplex alpha;
const cuDoubleComplex *A;
int LDA;
cuDoubleComplex *B;
int LDB;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB);
stream = starpu_cuda_get_local_stream();
CUDA_ztrmm(
side, uplo,
transA, diag,
......@@ -167,7 +131,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -118,43 +118,6 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
MORSE_enum transA;
MORSE_enum diag;
int m;
int n;
const cuDoubleComplex alpha;
const cuDoubleComplex *A;
int lda;
cuDoubleComplex *B;
int ldb;
CUstream stream;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb);
stream = starpu_cuda_get_local_stream();
CUDA_ztrsm_V2(
side, uplo, transA, diag,
m, n,
&alpha, A, lda,
B, ldb,
stream);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -170,7 +133,7 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
int ldb;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb);
......@@ -189,7 +152,6 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif /* !defined(CHAMELEON_SIMULATION) */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment