diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 592c9c2c405ea503124b31c3fbb386fc08e215d4..91b9ccdf4d89be209b3e183b2bc0f6aa570a10a9 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -131,7 +131,6 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum transA; @@ -156,45 +155,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zgemm_V2( - transA, transB, - m, n, k, - &alpha, A, lda, - B, ldb, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum transA; - MORSE_enum transB; - int m; - int n; - int k; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int lda; - cuDoubleComplex *B; - int ldb; - cuDoubleComplex beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - - stream = starpu_cuda_get_local_stream(); - CUDA_zgemm( transA, transB, m, n, k, @@ -209,7 +169,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 8f47ad6309397e79b9cc8ebb8e5c5b656ec8d223..6fd6706820ead9907f01f7bc93da6c227c75e82a 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -101,7 +101,6 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -125,46 +124,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zhemm_V2(handle, - side, uplo, - M, N, - &alpha, - A, LDA, - B, LDB, - &beta, - C, LDC, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum side; - MORSE_enum uplo; - int M; - int N; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int LDA; - cuDoubleComplex *B; - int LDB; - cuDoubleComplex beta; - cuDoubleComplex *C; - int LDC; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - - stream = starpu_cuda_get_local_stream(); - CUDA_zhemm( side, uplo, M, N, @@ -179,7 +138,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 80c4cb7a0113303a5959a955300764d5effddd05..fa002e2126f653651b445ccfd3173acb2c2556e5 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -98,7 +98,6 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) { MORSE_enum uplo; @@ -122,41 +121,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zher2k_V2( uplo, trans, - n, k, &alpha, A, lda, B, ldb, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum trans; - int n; - int k; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int lda; - cuDoubleComplex *B; - int ldb; - double beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - - stream = starpu_cuda_get_local_stream(); - CUDA_zher2k( uplo, trans, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, stream); @@ -167,7 +131,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index d9bfef3b526b1ae9e16ee2289f241fe8a570bd7f..df6d8718f51783d57f0b5fa5b18fa7ea77b4d1f3 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -92,7 +92,6 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zherk_cuda_func(void *descr[], void *cl_arg) { MORSE_enum uplo; @@ -106,10 +105,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *C; int ldc; CUstream stream; - cublasHandle_t handle; - cublasStatus_t stat; - cublasFillMode_t cublasUplo; - cublasOperation_t cublasTrans; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -117,40 +112,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zherk_V2( - uplo, trans, - n, k, - &alpha, A, lda, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zherk_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum trans; - int n; - int k; - double alpha; - cuDoubleComplex *A; - int lda; - double beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - - stream = starpu_cuda_get_local_stream(); - CUDA_zherk( uplo, trans, n, k, @@ -164,7 +125,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 5356f6a0321a97d1f4703b4cc47491afc035443c..030ccc1b7016d49b304c00b2b50776e82841b2eb 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -101,7 +101,6 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -125,44 +124,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zsymm_V2( - side, uplo, - M, N, - &alpha, A, LDA, - B, LDB, - &beta, C, LDC, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum side; - MORSE_enum uplo; - int M; - int N; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int LDA; - cuDoubleComplex *B; - int LDB; - cuDoubleComplex beta; - cuDoubleComplex *C; - int LDC; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - - stream = starpu_cuda_get_local_stream(); - CUDA_zsymm( side, uplo, M, N, @@ -177,7 +138,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index afaf7ce666cfc3262929e810d0c1e2b338783e79..b3bebf3b051647fcdafe93cafa4c48b98e062581 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -98,7 +98,6 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) { MORSE_enum uplo; @@ -122,41 +121,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_zsyr2k_V2( uplo, trans, - n, k, &alpha, A, lda, B, ldb, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum trans; - int n; - int k; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int lda; - cuDoubleComplex *B; - int ldb; - cuDoubleComplex beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - - stream = starpu_cuda_get_local_stream(); - CUDA_zsyr2k( uplo, trans, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, stream); @@ -167,7 +131,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 9b30fbad70c6fe66a5525680991e0d34b25ed904..98e472cc47cc4d92bc01e46b048c27dc39f09b2f 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -93,7 +93,6 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) { MORSE_enum uplo; @@ -108,37 +107,7 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) int ldc; CUstream stream; - stream = starpu_cuda_get_local_stream(); - - CUDA_zsyrk_V2( - uplo, trans, - n, k, - &alpha, A, lda, - &beta, C, ldc, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum trans; - int n; - int k; - cuDoubleComplex alpha; - cuDoubleComplex *A; - int lda; - cuDoubleComplex beta; - cuDoubleComplex *C; - int ldc; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); @@ -157,7 +126,6 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index bbf932282c77ebb72a41119713fb7575c49b5a23..9878b02fcd81ff9164d43a9c79b9d7b99dfdc943 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -96,7 +96,6 @@ static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -118,41 +117,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) stream = starpu_cuda_get_local_stream(); - CUDA_ztrmm_V2( - side, uplo, transA, diag, - M, N, - &alpha, A, LDA, - B, LDB, B, LDB, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ -static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum side; - MORSE_enum uplo; - MORSE_enum transA; - MORSE_enum diag; - int M; - int N; - cuDoubleComplex alpha; - const cuDoubleComplex *A; - int LDA; - cuDoubleComplex *B; - int LDB; - CUstream stream; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB); - - stream = starpu_cuda_get_local_stream(); - CUDA_ztrmm( side, uplo, transA, diag, @@ -167,7 +131,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 2e1ec6c82b52d7ed8acd501c1f45bfaea214e21d..e009a14b374327a452aebcc8cd829453d92a9f0d 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -118,43 +118,6 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) } #ifdef CHAMELEON_USE_CUDA -#if defined(CHAMELEON_USE_CUBLAS_V2) -static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum side; - MORSE_enum uplo; - MORSE_enum transA; - MORSE_enum diag; - int m; - int n; - const cuDoubleComplex alpha; - const cuDoubleComplex *A; - int lda; - cuDoubleComplex *B; - int ldb; - CUstream stream; - - A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); - - - stream = starpu_cuda_get_local_stream(); - - CUDA_ztrsm_V2( - side, uplo, transA, diag, - m, n, - &alpha, A, lda, - B, ldb, - stream); - -#ifndef STARPU_CUDA_ASYNC - cudaStreamSynchronize( stream ); -#endif - - return; -} -#else /* CHAMELEON_USE_CUBLAS_V2 */ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -170,7 +133,7 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) int ldb; CUstream stream; - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); @@ -189,7 +152,6 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUBLAS_V2 */ #endif /* CHAMELEON_USE_CUDA */ #endif /* !defined(CHAMELEON_SIMULATION) */