diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index 1b4b1241b630ef62e11c2af655dc3fd4118e7adc..3a74c1f9f890f487631f4a126a5069af066cab3d 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -193,13 +193,14 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int LDC;
+    CUstream stream;
 
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     cublasZhemm(
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index bd7da41cf61552d661b0ade9a5dc0e62277bc4ea..9eb3cb836929d443977048ca1594c4a8ccc2f4c7 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -187,13 +187,14 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
     double beta;
     cuDoubleComplex *C;
     int ldc;
+    CUstream stream;
 
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     cublasZher2k( morse_lapack_const(uplo), morse_lapack_const(trans),
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 44ae4f252a1ac2edcf76ae10fcd1838a3ea696fe..6c08e6b5aece30233ce3451cbaa4f38ac3f00c48 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -180,12 +180,13 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
     double beta;
     cuDoubleComplex *C;
     int ldc;
+    CUstream stream;
 
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     cublasZherk(
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 173d6d3b85562aea11b53a2f7214674e5d0ac0ec..e1f036c59f285364a92465b547acc4beec529351 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -193,13 +193,14 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int LDC;
+    CUstream stream;
 
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     cublasZsymm(
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index 2922de6fc788f852d80ee13756ee553274d52247..b8f6598c643ecf93aef5de9200b6dc41aeb8e67b 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -187,13 +187,14 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int ldc;
+    CUstream stream;
 
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     cublasZsyr2k( morse_lapack_const(uplo), morse_lapack_const(trans),
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 0f69498a4e77cd61b5c9515560c4a6d7d6d770c2..6b8086b7af6901ef0797ba9f410848abab050d91 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -181,12 +181,13 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex beta;
     cuDoubleComplex *C;
     int ldc;
+    CUstream stream;
 
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     cublasZsyrk(
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index 510005d40d382230dcf86d242e4f28d69e06c5d0..92591140400bd2812c7224a7cdee479ce2801abf 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -202,12 +202,13 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
     int LDA;
     cuDoubleComplex *B;
     int LDB;
+    CUstream stream;
 
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB);
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     cublasZtrmm(
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index 0b100e81a01d055a030dd5f0be086ff5894d20d6..b88131bb6dbfd914007a47011bc19bdf86b6d4dd 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -202,12 +202,13 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
     int lda;
     cuDoubleComplex *B;
     int ldb;
+    CUstream stream;
 
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb);
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     cublasZtrsm(
diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c
index c81353fa0ea4ee709640088190cee8da3862b5fb..dffd2dd6ab8205ee53b0e53d0d4d2ab364f41fa3 100644
--- a/runtime/starpu/codelets/codelet_ztslqt.c
+++ b/runtime/starpu/codelets/codelet_ztslqt.c
@@ -333,6 +333,7 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W;
     cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W;
     int lda1, lda2, ldt;
+    CUstream stream;
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
 
@@ -351,7 +352,7 @@ static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg)
     h_W   = h_TAU + max(m,n);
     h_D   = h_W   + ib*m;
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     magma_ztslqt_gpu( m, n, ib,
                       d_A1, lda1, d_A2, lda2,
                       h_A2, ib,
diff --git a/runtime/starpu/codelets/codelet_ztsmlq.c b/runtime/starpu/codelets/codelet_ztsmlq.c
index f819ec03c8020dad5ec10a39b6c95bc88407fa34..308dd22cc2c00009d2c85eca706c1ece4b245230 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq.c
@@ -363,6 +363,7 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *W, *WC;
     int ldwork;
     int ldworkc;
+    CUstream stream;
 
     A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -376,7 +377,7 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
     WC = W + ib * ldwork;
     ldworkc = (side == MorseLeft) ? m1 : ib;
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     magma_ztsmlq_gpu( side, trans, m1, n1, m2, n2, k, ib,
diff --git a/runtime/starpu/codelets/codelet_ztsmqr.c b/runtime/starpu/codelets/codelet_ztsmqr.c
index e88ca291d05880f79ded733d97daf0c942ef48a8..93a5e24b0144f665f03d0c810b6c1926d95cd494 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr.c
@@ -912,6 +912,7 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *W, *WC;
     int ldwork;
     int ldworkc;
+    CUstream stream;
 
     A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -925,7 +926,7 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg)
     WC = W + ib * (side == MorseLeft ? n1 : m1);
     ldworkc = (side == MorseLeft) ? m1 : ib;
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     cublasSetKernelStream( stream );
 
     magma_ztsmqr_gpu( side, trans, m1, n1, m2, n2, k, ib,
diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c
index 9eedc071b00c1bac821a21741b27df3723c8fc22..5e003b2af10ff13dab30c645a18c40aeb2d3acd4 100644
--- a/runtime/starpu/codelets/codelet_ztsqrt.c
+++ b/runtime/starpu/codelets/codelet_ztsqrt.c
@@ -345,6 +345,7 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W;
     cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W;
     int lda1, lda2, ldt;
+    CUstream stream;
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
 
@@ -363,7 +364,7 @@ static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg)
     h_W   = h_TAU + max(m,n);
     h_D   = h_W   + ib*n;
 
-    CUstream stream = starpu_cuda_get_local_stream();
+    stream = starpu_cuda_get_local_stream();
     magma_ztsqrt2_gpu( m, n, ib,
                        d_A1, lda1, d_A2, lda2,
                        h_A2, lda2,