diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 1b4b1241b630ef62e11c2af655dc3fd4118e7adc..3a74c1f9f890f487631f4a126a5069af066cab3d 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -193,13 +193,14 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int LDC; + CUstream stream; A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); cublasZhemm( diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index bd7da41cf61552d661b0ade9a5dc0e62277bc4ea..9eb3cb836929d443977048ca1594c4a8ccc2f4c7 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -187,13 +187,14 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) double beta; cuDoubleComplex *C; int ldc; + CUstream stream; A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); cublasZher2k( morse_lapack_const(uplo), morse_lapack_const(trans), diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 44ae4f252a1ac2edcf76ae10fcd1838a3ea696fe..6c08e6b5aece30233ce3451cbaa4f38ac3f00c48 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -180,12 +180,13 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) double beta; cuDoubleComplex *C; int ldc; + CUstream stream; A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); cublasZherk( diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 173d6d3b85562aea11b53a2f7214674e5d0ac0ec..e1f036c59f285364a92465b547acc4beec529351 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -193,13 +193,14 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int LDC; + CUstream stream; A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); cublasZsymm( diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 2922de6fc788f852d80ee13756ee553274d52247..b8f6598c643ecf93aef5de9200b6dc41aeb8e67b 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -187,13 +187,14 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int ldc; + CUstream stream; A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); cublasZsyr2k( morse_lapack_const(uplo), morse_lapack_const(trans), diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 0f69498a4e77cd61b5c9515560c4a6d7d6d770c2..6b8086b7af6901ef0797ba9f410848abab050d91 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -181,12 +181,13 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex beta; cuDoubleComplex *C; int ldc; + CUstream stream; A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); cublasZsyrk( diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index 510005d40d382230dcf86d242e4f28d69e06c5d0..92591140400bd2812c7224a7cdee479ce2801abf 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -202,12 +202,13 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) int LDA; cuDoubleComplex *B; int LDB; + CUstream stream; A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB); - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); cublasZtrmm( diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 0b100e81a01d055a030dd5f0be086ff5894d20d6..b88131bb6dbfd914007a47011bc19bdf86b6d4dd 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -202,12 +202,13 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) int lda; cuDoubleComplex *B; int ldb; + CUstream stream; A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); cublasZtrsm( diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c index c81353fa0ea4ee709640088190cee8da3862b5fb..dffd2dd6ab8205ee53b0e53d0d4d2ab364f41fa3 100644 --- a/runtime/starpu/codelets/codelet_ztslqt.c +++ b/runtime/starpu/codelets/codelet_ztslqt.c @@ -333,6 +333,7 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W; cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W; int lda1, lda2, ldt; + CUstream stream; starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); @@ -351,7 +352,7 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg) h_W = h_TAU + max(m,n); h_D = h_W + ib*m; - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); magma_ztslqt_gpu( m, n, ib, d_A1, lda1, d_A2, lda2, h_A2, ib, diff --git a/runtime/starpu/codelets/codelet_ztsmlq.c b/runtime/starpu/codelets/codelet_ztsmlq.c index f819ec03c8020dad5ec10a39b6c95bc88407fa34..308dd22cc2c00009d2c85eca706c1ece4b245230 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq.c +++ b/runtime/starpu/codelets/codelet_ztsmlq.c @@ -363,6 +363,7 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *W, *WC; int ldwork; int ldworkc; + CUstream stream; A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -376,7 +377,7 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg) WC = W + ib * ldwork; ldworkc = (side == MorseLeft) ? m1 : ib; - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); magma_ztsmlq_gpu( side, trans, m1, n1, m2, n2, k, ib, diff --git a/runtime/starpu/codelets/codelet_ztsmqr.c b/runtime/starpu/codelets/codelet_ztsmqr.c index e88ca291d05880f79ded733d97daf0c942ef48a8..93a5e24b0144f665f03d0c810b6c1926d95cd494 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr.c +++ b/runtime/starpu/codelets/codelet_ztsmqr.c @@ -912,6 +912,7 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *W, *WC; int ldwork; int ldworkc; + CUstream stream; A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -925,7 +926,7 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg) WC = W + ib * (side == MorseLeft ? n1 : m1); ldworkc = (side == MorseLeft) ? m1 : ib; - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); cublasSetKernelStream( stream ); magma_ztsmqr_gpu( side, trans, m1, n1, m2, n2, k, ib, diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c index 9eedc071b00c1bac821a21741b27df3723c8fc22..5e003b2af10ff13dab30c645a18c40aeb2d3acd4 100644 --- a/runtime/starpu/codelets/codelet_ztsqrt.c +++ b/runtime/starpu/codelets/codelet_ztsqrt.c @@ -345,6 +345,7 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg) cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W; cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W; int lda1, lda2, ldt; + CUstream stream; starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); @@ -363,7 +364,7 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg) h_W = h_TAU + max(m,n); h_D = h_W + ib*n; - CUstream stream = starpu_cuda_get_local_stream(); + stream = starpu_cuda_get_local_stream(); magma_ztsqrt2_gpu( m, n, ib, d_A1, lda1, d_A2, lda2, h_A2, lda2,