From 117a67f9a3ffde9ea742056549402298b8f81e44 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Thu, 6 Apr 2017 19:36:23 +0200
Subject: [PATCH] Use the new macros

---
 runtime/starpu/codelets/codelet_zgelqt.c  | 6 ++----
 runtime/starpu/codelets/codelet_zgemm.c   | 3 +--
 runtime/starpu/codelets/codelet_zgeqrt.c  | 4 +---
 runtime/starpu/codelets/codelet_zhemm.c   | 3 +--
 runtime/starpu/codelets/codelet_zher2k.c  | 3 +--
 runtime/starpu/codelets/codelet_zherfb.c  | 3 +--
 runtime/starpu/codelets/codelet_zherk.c   | 3 +--
 runtime/starpu/codelets/codelet_zsymm.c   | 3 +--
 runtime/starpu/codelets/codelet_zsyr2k.c  | 3 +--
 runtime/starpu/codelets/codelet_zsyrk.c   | 3 +--
 runtime/starpu/codelets/codelet_ztpmqrt.c | 3 +--
 runtime/starpu/codelets/codelet_ztrmm.c   | 3 +--
 runtime/starpu/codelets/codelet_ztrsm.c   | 3 +--
 runtime/starpu/codelets/codelet_ztslqt.c  | 3 +--
 runtime/starpu/codelets/codelet_ztsmlq.c  | 4 +---
 runtime/starpu/codelets/codelet_ztsmqr.c  | 4 +---
 runtime/starpu/codelets/codelet_ztsqrt.c  | 3 +--
 runtime/starpu/codelets/codelet_zunmlq.c  | 4 +---
 runtime/starpu/codelets/codelet_zunmqr.c  | 4 +---
 19 files changed, 20 insertions(+), 45 deletions(-)

diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index af571f2ae..223559778 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -166,7 +166,6 @@ static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU;
     cuDoubleComplex *d_A, *d_T, *d_D, *d_W;
     int lda, ldt;
-    CUstream stream;
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
 
@@ -186,15 +185,14 @@ static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg)
     h_W   = h_TAU + chameleon_max(m,n);
     h_D   = h_W   + ib*ib;
 
-    stream = starpu_cuda_get_local_stream();
-    cublasSetKernelStream( stream );
+    RUNTIME_getStream(stream);
 
     CUDA_zgelqt(
             m, n, ib,
             d_A, lda, h_A, ib,
             d_T, ldt, h_T, ib,
             d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream);
+            h_W, d_W, stream );
 
     cudaThreadSynchronize();
 }
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 292e071de..90fe880a7 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -148,14 +148,13 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int ldc;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream( stream );
 
     CUDA_zgemm(
         transA, transB,
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index 50769b3be..868c10c16 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -166,7 +166,6 @@ static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU;
     cuDoubleComplex *d_A, *d_T, *d_D, *d_W;
     int lda, ldt;
-    CUstream stream;
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
 
@@ -186,8 +185,7 @@ static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
     h_W   = h_TAU + chameleon_max(m,n);
     h_D   = h_W   + ib*ib;
 
-    stream = starpu_cuda_get_local_stream();
-    cublasSetKernelStream( stream );
+    RUNTIME_getStream(stream);
 
     CUDA_zgeqrt(
             m, n, ib,
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index df4523463..97f0adcc5 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -119,14 +119,13 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int LDC;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_zhemm(
         side, uplo,
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 9b6b7194d..7c5509240 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -116,14 +116,13 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
     double beta;
     cuDoubleComplex *C;
     int ldc;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_zher2k( uplo, trans,
                  n, k, &alpha, A, lda, B, ldb, &beta, C, ldc,
diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c
index b5ceec04b..8b6c5d8d3 100644
--- a/runtime/starpu/codelets/codelet_zherfb.c
+++ b/runtime/starpu/codelets/codelet_zherfb.c
@@ -113,9 +113,8 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg)
     int ldc;
     cuDoubleComplex *WORK;
     int ldwork;
-    CUstream stream;
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index e5ef78021..0576cef52 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -108,13 +108,12 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
     double beta;
     cuDoubleComplex *C;
     int ldc;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_zherk(
         uplo, trans,
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 71e6b8670..bb01bfc7b 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -119,14 +119,13 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int LDC;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_zsymm(
         side, uplo,
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index cafe3887e..aec9d75f5 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -116,14 +116,13 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int ldc;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_zsyr2k( uplo, trans,
                  n, k, &alpha, A, lda, B, ldb, &beta, C, ldc,
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index b368779f5..265a8922d 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -109,13 +109,12 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int ldc;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_zsyrk(
         uplo, trans,
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
index 5428c9eea..c13447604 100644
--- a/runtime/starpu/codelets/codelet_ztpmqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -126,7 +126,6 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *B;
     int ldb;
     cuDoubleComplex *W;
-    CUstream stream;
 
     V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -137,7 +136,7 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
                                 &ldv, &ldt, &lda, &ldb );
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_ztpmqrt(
             side, trans, M, N, K, L, ib,
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index ca5484870..b002c8170 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -113,13 +113,12 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
     int LDA;
     cuDoubleComplex *B;
     int LDB;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_ztrmm(
         side, uplo,
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index 769032832..288427e82 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -135,13 +135,12 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
     int lda;
     cuDoubleComplex *B;
     int ldb;
-    CUstream stream;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb);
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
 
     CUDA_ztrsm(
         side, uplo, transA, diag,
diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c
index 378030827..101feebfe 100644
--- a/runtime/starpu/codelets/codelet_ztslqt.c
+++ b/runtime/starpu/codelets/codelet_ztslqt.c
@@ -185,7 +185,6 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W;
     cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W;
     int lda1, lda2, ldt;
-    CUstream stream;
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
 
@@ -204,7 +203,7 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg)
     h_W   = h_TAU + chameleon_max(m,n);
     h_D   = h_W   + ib*m;
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
     CUDA_ztslqt(
             m, n, ib,
             d_A1, lda1, d_A2, lda2,
diff --git a/runtime/starpu/codelets/codelet_ztsmlq.c b/runtime/starpu/codelets/codelet_ztsmlq.c
index 390129540..1ec507ca2 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq.c
@@ -239,7 +239,6 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *W, *WC;
     int ldwork;
     int ldworkc;
-    CUstream stream;
 
     A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -253,8 +252,7 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
     WC = W + ib * ldwork;
     ldworkc = (side == MorseLeft) ? m1 : ib;
 
-    stream = starpu_cuda_get_local_stream();
-    cublasSetKernelStream( stream );
+    RUNTIME_getStream(stream);
 
     CUDA_ztsmlq( side, trans, m1, n1, m2, n2, k, ib,
                       A1, lda1, A2, lda2, V, ldv, T, ldt,
diff --git a/runtime/starpu/codelets/codelet_ztsmqr.c b/runtime/starpu/codelets/codelet_ztsmqr.c
index ed4f7dbe9..c1c177c92 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr.c
@@ -270,7 +270,6 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *W, *WC;
     int ldwork;
     int ldworkc;
-    CUstream stream;
 
     A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -284,8 +283,7 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg)
     WC = W + ib * (side == MorseLeft ? m1 : n1);
     ldworkc = (side == MorseLeft) ? m2 : ib;
 
-    stream = starpu_cuda_get_local_stream();
-    cublasSetKernelStream( stream );
+    RUNTIME_getStream(stream);
 
     CUDA_ztsmqr(
             side, trans, m1, n1, m2, n2, k, ib,
diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c
index 880e1ff8a..4c5d03fe2 100644
--- a/runtime/starpu/codelets/codelet_ztsqrt.c
+++ b/runtime/starpu/codelets/codelet_ztsqrt.c
@@ -176,7 +176,6 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W;
     cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W;
     int lda1, lda2, ldt;
-    CUstream stream;
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
 
@@ -195,7 +194,7 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg)
     h_W   = h_TAU + chameleon_max(m,n);
     h_D   = h_W   + ib*n;
 
-    stream = starpu_cuda_get_local_stream();
+    RUNTIME_getStream(stream);
     CUDA_ztsqrt(
             m, n, ib,
             d_A1, lda1, d_A2, lda2,
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index 6525661f6..b990da74b 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -201,7 +201,6 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
     const cuDoubleComplex *A, *T;
     cuDoubleComplex *C, *WORK;
     int lda, ldt, ldc, ldwork;
-    CUstream stream;
 
     starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
                                &lda, &ldt, &ldc, &ldwork);
@@ -211,8 +210,7 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
     C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
 
-    stream = starpu_cuda_get_local_stream();
-    cublasSetKernelStream( stream );
+    RUNTIME_getStream(stream);
 
     CUDA_zunmlqt(
             side, trans, m, n, k, ib,
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index 2b88c1d3b..d4c6d6d81 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -227,7 +227,6 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
     const cuDoubleComplex *A, *T;
     cuDoubleComplex *C, *WORK;
     int lda, ldt, ldc, ldwork;
-    CUstream stream;
 
     starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
                                &lda, &ldt, &ldc, &ldwork);
@@ -237,8 +236,7 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
     C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
 
-    stream = starpu_cuda_get_local_stream();
-    cublasSetKernelStream( stream );
+    RUNTIME_getStream(stream);
 
     CUDA_zunmqrt(
             side, trans, m, n, k, ib,
-- 
GitLab