From 117a67f9a3ffde9ea742056549402298b8f81e44 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 6 Apr 2017 19:36:23 +0200 Subject: [PATCH] Use the new macros --- runtime/starpu/codelets/codelet_zgelqt.c | 6 ++---- runtime/starpu/codelets/codelet_zgemm.c | 3 +-- runtime/starpu/codelets/codelet_zgeqrt.c | 4 +--- runtime/starpu/codelets/codelet_zhemm.c | 3 +-- runtime/starpu/codelets/codelet_zher2k.c | 3 +-- runtime/starpu/codelets/codelet_zherfb.c | 3 +-- runtime/starpu/codelets/codelet_zherk.c | 3 +-- runtime/starpu/codelets/codelet_zsymm.c | 3 +-- runtime/starpu/codelets/codelet_zsyr2k.c | 3 +-- runtime/starpu/codelets/codelet_zsyrk.c | 3 +-- runtime/starpu/codelets/codelet_ztpmqrt.c | 3 +-- runtime/starpu/codelets/codelet_ztrmm.c | 3 +-- runtime/starpu/codelets/codelet_ztrsm.c | 3 +-- runtime/starpu/codelets/codelet_ztslqt.c | 3 +-- runtime/starpu/codelets/codelet_ztsmlq.c | 4 +--- runtime/starpu/codelets/codelet_ztsmqr.c | 4 +--- runtime/starpu/codelets/codelet_ztsqrt.c | 3 +-- runtime/starpu/codelets/codelet_zunmlq.c | 4 +--- runtime/starpu/codelets/codelet_zunmqr.c | 4 +--- 19 files changed, 20 insertions(+), 45 deletions(-) diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index af571f2ae..223559778 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -166,7 +166,6 @@ static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU; cuDoubleComplex *d_A, *d_T, *d_D, *d_W; int lda, ldt; - CUstream stream; starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); @@ -186,15 +185,14 @@ static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg) h_W = h_TAU + chameleon_max(m,n); h_D = h_W + ib*ib; - stream = starpu_cuda_get_local_stream(); - cublasSetKernelStream( stream ); + RUNTIME_getStream(stream); CUDA_zgelqt( m, n, ib, d_A, lda, h_A, ib, d_T, ldt, h_T, ib, d_D, h_D, ib, h_TAU, - h_W, d_W, stream); + h_W, d_W, stream ); cudaThreadSynchronize(); } diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 292e071de..90fe880a7 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -148,14 +148,13 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int ldc; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream( stream ); CUDA_zgemm( transA, transB, diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index 50769b3be..868c10c16 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -166,7 +166,6 @@ static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU; cuDoubleComplex *d_A, *d_T, *d_D, *d_W; int lda, ldt; - CUstream stream; starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); @@ -186,8 +185,7 @@ static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg) h_W = h_TAU + chameleon_max(m,n); h_D = h_W + ib*ib; - stream = starpu_cuda_get_local_stream(); - cublasSetKernelStream( stream ); + RUNTIME_getStream(stream); CUDA_zgeqrt( m, n, ib, diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index df4523463..97f0adcc5 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -119,14 +119,13 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int LDC; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_zhemm( side, uplo, diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 9b6b7194d..7c5509240 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -116,14 +116,13 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) double beta; cuDoubleComplex *C; int ldc; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_zher2k( uplo, trans, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c index b5ceec04b..8b6c5d8d3 100644 --- a/runtime/starpu/codelets/codelet_zherfb.c +++ b/runtime/starpu/codelets/codelet_zherfb.c @@ -113,9 +113,8 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) int ldc; cuDoubleComplex *WORK; int ldwork; - CUstream stream; - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index e5ef78021..0576cef52 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -108,13 +108,12 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) double beta; cuDoubleComplex *C; int ldc; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_zherk( uplo, trans, diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 71e6b8670..bb01bfc7b 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -119,14 +119,13 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int LDC; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_zsymm( side, uplo, diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index cafe3887e..aec9d75f5 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -116,14 +116,13 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int ldc; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_zsyr2k( uplo, trans, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index b368779f5..265a8922d 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -109,13 +109,12 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int ldc; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_zsyrk( uplo, trans, diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index 5428c9eea..c13447604 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -126,7 +126,6 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *B; int ldb; cuDoubleComplex *W; - CUstream stream; V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -137,7 +136,7 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &ldv, &ldt, &lda, &ldb ); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_ztpmqrt( side, trans, M, N, K, L, ib, diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index ca5484870..b002c8170 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -113,13 +113,12 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) int LDA; cuDoubleComplex *B; int LDB; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_ztrmm( side, uplo, diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 769032832..288427e82 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -135,13 +135,12 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) int lda; cuDoubleComplex *B; int ldb; - CUstream stream; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_ztrsm( side, uplo, transA, diag, diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c index 378030827..101feebfe 100644 --- a/runtime/starpu/codelets/codelet_ztslqt.c +++ b/runtime/starpu/codelets/codelet_ztslqt.c @@ -185,7 +185,6 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W; cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W; int lda1, lda2, ldt; - CUstream stream; starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); @@ -204,7 +203,7 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg) h_W = h_TAU + chameleon_max(m,n); h_D = h_W + ib*m; - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_ztslqt( m, n, ib, d_A1, lda1, d_A2, lda2, diff --git a/runtime/starpu/codelets/codelet_ztsmlq.c b/runtime/starpu/codelets/codelet_ztsmlq.c index 390129540..1ec507ca2 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq.c +++ b/runtime/starpu/codelets/codelet_ztsmlq.c @@ -239,7 +239,6 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *W, *WC; int ldwork; int ldworkc; - CUstream stream; A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -253,8 +252,7 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg) WC = W + ib * ldwork; ldworkc = (side == MorseLeft) ? m1 : ib; - stream = starpu_cuda_get_local_stream(); - cublasSetKernelStream( stream ); + RUNTIME_getStream(stream); CUDA_ztsmlq( side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, diff --git a/runtime/starpu/codelets/codelet_ztsmqr.c b/runtime/starpu/codelets/codelet_ztsmqr.c index ed4f7dbe9..c1c177c92 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr.c +++ b/runtime/starpu/codelets/codelet_ztsmqr.c @@ -270,7 +270,6 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *W, *WC; int ldwork; int ldworkc; - CUstream stream; A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -284,8 +283,7 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg) WC = W + ib * (side == MorseLeft ? m1 : n1); ldworkc = (side == MorseLeft) ? m2 : ib; - stream = starpu_cuda_get_local_stream(); - cublasSetKernelStream( stream ); + RUNTIME_getStream(stream); CUDA_ztsmqr( side, trans, m1, n1, m2, n2, k, ib, diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c index 880e1ff8a..4c5d03fe2 100644 --- a/runtime/starpu/codelets/codelet_ztsqrt.c +++ b/runtime/starpu/codelets/codelet_ztsqrt.c @@ -176,7 +176,6 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W; cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W; int lda1, lda2, ldt; - CUstream stream; starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); @@ -195,7 +194,7 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg) h_W = h_TAU + chameleon_max(m,n); h_D = h_W + ib*n; - stream = starpu_cuda_get_local_stream(); + RUNTIME_getStream(stream); CUDA_ztsqrt( m, n, ib, d_A1, lda1, d_A2, lda2, diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index 6525661f6..b990da74b 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -201,7 +201,6 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) const cuDoubleComplex *A, *T; cuDoubleComplex *C, *WORK; int lda, ldt, ldc, ldwork; - CUstream stream; starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &lda, &ldt, &ldc, &ldwork); @@ -211,8 +210,7 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - stream = starpu_cuda_get_local_stream(); - cublasSetKernelStream( stream ); + RUNTIME_getStream(stream); CUDA_zunmlqt( side, trans, m, n, k, ib, diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index 2b88c1d3b..d4c6d6d81 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -227,7 +227,6 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) const cuDoubleComplex *A, *T; cuDoubleComplex *C, *WORK; int lda, ldt, ldc, ldwork; - CUstream stream; starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &lda, &ldt, &ldc, &ldwork); @@ -237,8 +236,7 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - stream = starpu_cuda_get_local_stream(); - cublasSetKernelStream( stream ); + RUNTIME_getStream(stream); CUDA_zunmqrt( side, trans, m, n, k, ib, -- GitLab