From c02940440f49e6429cdfbbc45485a12d3599320e Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 1 Dec 2016 17:02:39 +0000 Subject: [PATCH] Remove V2 since it is integreted in the prototype --- runtime/starpu/codelets/codelet_zgemm.c | 41 ----------------------- runtime/starpu/codelets/codelet_zhemm.c | 42 ------------------------ runtime/starpu/codelets/codelet_zher2k.c | 37 --------------------- runtime/starpu/codelets/codelet_zherk.c | 40 ---------------------- runtime/starpu/codelets/codelet_zsymm.c | 40 ---------------------- runtime/starpu/codelets/codelet_zsyr2k.c | 37 --------------------- runtime/starpu/codelets/codelet_zsyrk.c | 34 +------------------ runtime/starpu/codelets/codelet_ztrmm.c | 37 --------------------- runtime/starpu/codelets/codelet_ztrsm.c | 40 +--------------------- 9 files changed, 2 insertions(+), 346 deletions(-) diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 592c9c2c4..91b9ccdf4 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -131,7 +131,6 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum transA; @@ -156,45 +155,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zgemm_V2( - transA, transB, - m, n, k, - &alpha, A, lda, - B, ldb, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum transA; - MORSE_enum transB; - int m; - int n; - int k; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int lda; - cuDoubleComplex *B; - int ldb; - cuDoubleComplex beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - - stream = starpu_cuda_get_local_stream(); - CUDA_zgemm( transA, transB, m, n, k, @@ -209,7 +169,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 8f47ad630..6fd670682 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -101,7 +101,6 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -125,46 +124,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zhemm_V2(handle, - side, uplo, - M, N, - &alpha, - A, LDA, - B, LDB, - &beta, - C, LDC, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum side; - MORSE_enum uplo; - int M; - int N; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int LDA; - cuDoubleComplex *B; - int LDB; - cuDoubleComplex beta; - cuDoubleComplex *C; - int LDC; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - - stream = starpu_cuda_get_local_stream(); - CUDA_zhemm( side, uplo, M, N, @@ -179,7 +138,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 80c4cb7a0..fa002e212 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -98,7 +98,6 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) { MORSE_enum uplo; @@ -122,41 +121,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zher2k_V2( uplo, trans, - n, k, &alpha, A, lda, B, ldb, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum trans; - int n; - int k; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int lda; - cuDoubleComplex *B; - int ldb; - double beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - - stream = starpu_cuda_get_local_stream(); - CUDA_zher2k( uplo, trans, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, stream); @@ -167,7 +131,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index d9bfef3b5..df6d8718f 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -92,7 +92,6 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zherk_cuda_func(void *descr[], void *cl_arg) { MORSE_enum uplo; @@ -106,10 +105,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *C; int ldc; CUstream stream; - cublasHandle_t handle; - cublasStatus_t stat; - cublasFillMode_t cublasUplo; - cublasOperation_t cublasTrans; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -117,40 +112,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zherk_V2( - uplo, trans, - n, k, - &alpha, A, lda, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zherk_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum trans; - int n; - int k; - double alpha; - cuDoubleComplex *A; - int lda; - double beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - - stream = starpu_cuda_get_local_stream(); - CUDA_zherk( uplo, trans, n, k, @@ -164,7 +125,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 5356f6a03..030ccc1b7 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -101,7 +101,6 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -125,44 +124,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zsymm_V2( - side, uplo, - M, N, - &alpha, A, LDA, - B, LDB, - &beta, C, LDC, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum side; - MORSE_enum uplo; - int M; - int N; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int LDA; - cuDoubleComplex *B; - int LDB; - cuDoubleComplex beta; - cuDoubleComplex *C; - int LDC; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - - stream = starpu_cuda_get_local_stream(); - CUDA_zsymm( side, uplo, M, N, @@ -177,7 +138,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index afaf7ce66..b3bebf3b0 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -98,7 +98,6 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) { MORSE_enum uplo; @@ -122,41 +121,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zsyr2k_V2( uplo, trans, - n, k, &alpha, A, lda, B, ldb, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum trans; - int n; - int k; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int lda; - cuDoubleComplex *B; - int ldb; - cuDoubleComplex beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - - stream = starpu_cuda_get_local_stream(); - CUDA_zsyr2k( uplo, trans, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, stream); @@ -167,7 +131,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 9b30fbad7..98e472cc4 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -93,7 +93,6 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) { MORSE_enum uplo; @@ -108,37 +107,7 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) int ldc; CUstream stream; - stream = starpu_cuda_get_local_stream(); - - CUDA_zsyrk_V2( - uplo, trans, - n, k, - &alpha, A, lda, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum trans; - int n; - int k; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int lda; - cuDoubleComplex beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); @@ -157,7 +126,6 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index bbf932282..9878b02fc 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -96,7 +96,6 @@ static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -118,41 +117,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_ztrmm_V2( - side, uplo, transA, diag, - M, N, - &alpha, A, LDA, - B, LDB, B, LDB, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum side; - MORSE_enum uplo; - MORSE_enum transA; - MORSE_enum diag; - int M; - int N; - cuDoubleComplex alpha; - const cuDoubleComplex *A; - int LDA; - cuDoubleComplex *B; - int LDB; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB); - - stream = starpu_cuda_get_local_stream(); - CUDA_ztrmm( side, uplo, transA, diag, @@ -167,7 +131,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 2e1ec6c82..e009a14b3 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -118,43 +118,6 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) -static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum side; - MORSE_enum uplo; - MORSE_enum transA; - MORSE_enum diag; - int m; - int n; - const cuDoubleComplex alpha; - const cuDoubleComplex *A; - int lda; - cuDoubleComplex *B; - int ldb; - CUstream stream; - - A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); - - - stream = starpu_cuda_get_local_stream(); - - CUDA_ztrsm_V2( - side, uplo, transA, diag, - m, n, - &alpha, A, lda, - B, ldb, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -170,7 +133,7 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) int ldb; CUstream stream; - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); @@ -189,7 +152,6 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ -- GitLab