diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 592c9c2c405ea503124b31c3fbb386fc08e215d4..91b9ccdf4d89be209b3e183b2bc0f6aa570a10a9 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -131,7 +131,6 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum transA;
@@ -156,45 +155,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
 
     stream = starpu_cuda_get_local_stream();
 
-    CUDA_zgemm_V2(
-        transA, transB,
-        m, n, k,
-        &alpha, A, lda,
-        B, ldb,
-        &beta,  C, ldc,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
-static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum transA;
-    MORSE_enum transB;
-    int m;
-    int n;
-    int k;
-    cuDoubleComplex alpha;
-    cuDoubleComplex *A;
-    int lda;
-    cuDoubleComplex *B;
-    int ldb;
-    cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int ldc;
-    CUstream stream;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
-
-    stream = starpu_cuda_get_local_stream();
-
     CUDA_zgemm(
         transA, transB,
         m, n, k,
@@ -209,7 +169,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index 8f47ad6309397e79b9cc8ebb8e5c5b656ec8d223..6fd6706820ead9907f01f7bc93da6c227c75e82a 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -101,7 +101,6 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum side;
@@ -125,46 +124,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
 
     stream = starpu_cuda_get_local_stream();
 
-    CUDA_zhemm_V2(handle,
-        side, uplo,
-        M, N,
-        &alpha,
-        A, LDA,
-        B, LDB,
-        &beta,
-        C, LDC,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
-static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum side;
-    MORSE_enum uplo;
-    int M;
-    int N;
-    cuDoubleComplex alpha;
-    cuDoubleComplex *A;
-    int LDA;
-    cuDoubleComplex *B;
-    int LDB;
-    cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int LDC;
-    CUstream stream;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
-
-    stream = starpu_cuda_get_local_stream();
-
     CUDA_zhemm(
         side, uplo,
         M, N,
@@ -179,7 +138,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 80c4cb7a0113303a5959a955300764d5effddd05..fa002e2126f653651b445ccfd3173acb2c2556e5 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -98,7 +98,6 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum uplo;
@@ -122,41 +121,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
 
     stream = starpu_cuda_get_local_stream();
 
-    CUDA_zher2k_V2( uplo, trans,
-        n, k, &alpha, A, lda, B, ldb,
-        &beta, C, ldc,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
-static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum uplo;
-    MORSE_enum trans;
-    int n;
-    int k;
-    cuDoubleComplex alpha;
-    cuDoubleComplex *A;
-    int lda;
-    cuDoubleComplex *B;
-    int ldb;
-    double beta;
-    cuDoubleComplex *C;
-    int ldc;
-    CUstream stream;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
-
-    stream = starpu_cuda_get_local_stream();
-
     CUDA_zher2k( uplo, trans,
                  n, k, &alpha, A, lda, B, ldb, &beta, C, ldc,
                  stream);
@@ -167,7 +131,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index d9bfef3b526b1ae9e16ee2289f241fe8a570bd7f..df6d8718f51783d57f0b5fa5b18fa7ea77b4d1f3 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -92,7 +92,6 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum uplo;
@@ -106,10 +105,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *C;
     int ldc;
     CUstream stream;
-    cublasHandle_t handle;
-    cublasStatus_t stat;
-    cublasFillMode_t cublasUplo;
-    cublasOperation_t cublasTrans;
 
     A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -117,40 +112,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
 
     stream = starpu_cuda_get_local_stream();
 
-    CUDA_zherk_V2(
-        uplo, trans,
-        n, k,
-        &alpha, A, lda,
-        &beta, C, ldc,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
-static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum uplo;
-    MORSE_enum trans;
-    int n;
-    int k;
-    double alpha;
-    cuDoubleComplex *A;
-    int lda;
-    double beta;
-    cuDoubleComplex *C;
-    int ldc;
-    CUstream stream;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
-
-    stream = starpu_cuda_get_local_stream();
-
     CUDA_zherk(
         uplo, trans,
         n, k,
@@ -164,7 +125,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 5356f6a0321a97d1f4703b4cc47491afc035443c..030ccc1b7016d49b304c00b2b50776e82841b2eb 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -101,7 +101,6 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum side;
@@ -125,44 +124,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
 
     stream = starpu_cuda_get_local_stream();
 
-    CUDA_zsymm_V2(
-        side, uplo,
-        M, N,
-        &alpha, A, LDA,
-        B, LDB,
-        &beta, C, LDC,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
-static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum side;
-    MORSE_enum uplo;
-    int M;
-    int N;
-    cuDoubleComplex alpha;
-    cuDoubleComplex *A;
-    int LDA;
-    cuDoubleComplex *B;
-    int LDB;
-    cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int LDC;
-    CUstream stream;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
-
-    stream = starpu_cuda_get_local_stream();
-
     CUDA_zsymm(
         side, uplo,
         M, N,
@@ -177,7 +138,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index afaf7ce666cfc3262929e810d0c1e2b338783e79..b3bebf3b051647fcdafe93cafa4c48b98e062581 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -98,7 +98,6 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum uplo;
@@ -122,41 +121,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
 
     stream = starpu_cuda_get_local_stream();
 
-    CUDA_zsyr2k_V2( uplo, trans,
-        n, k, &alpha, A, lda, B, ldb,
-        &beta, C, ldc,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
-static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum uplo;
-    MORSE_enum trans;
-    int n;
-    int k;
-    cuDoubleComplex alpha;
-    cuDoubleComplex *A;
-    int lda;
-    cuDoubleComplex *B;
-    int ldb;
-    cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int ldc;
-    CUstream stream;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
-
-    stream = starpu_cuda_get_local_stream();
-
     CUDA_zsyr2k( uplo, trans,
                  n, k, &alpha, A, lda, B, ldb, &beta, C, ldc,
                  stream);
@@ -167,7 +131,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 9b30fbad70c6fe66a5525680991e0d34b25ed904..98e472cc47cc4d92bc01e46b048c27dc39f09b2f 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -93,7 +93,6 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum uplo;
@@ -108,37 +107,7 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
     int ldc;
     CUstream stream;
 
-    stream = starpu_cuda_get_local_stream();
-
-    CUDA_zsyrk_V2(
-        uplo, trans,
-        n, k,
-        &alpha, A, lda,
-        &beta, C, ldc,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
-static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum uplo;
-    MORSE_enum trans;
-    int n;
-    int k;
-    cuDoubleComplex alpha;
-    cuDoubleComplex *A;
-    int lda;
-    cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int ldc;
-    CUstream stream;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
 
@@ -157,7 +126,6 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index bbf932282c77ebb72a41119713fb7575c49b5a23..9878b02fcd81ff9164d43a9c79b9d7b99dfdc943 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -96,7 +96,6 @@ static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum side;
@@ -118,41 +117,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
 
     stream = starpu_cuda_get_local_stream();
 
-    CUDA_ztrmm_V2(
-        side, uplo, transA, diag,
-        M, N,
-        &alpha, A, LDA,
-        B, LDB, B, LDB,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
-static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum side;
-    MORSE_enum uplo;
-    MORSE_enum transA;
-    MORSE_enum diag;
-    int M;
-    int N;
-    cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int LDA;
-    cuDoubleComplex *B;
-    int LDB;
-    CUstream stream;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB);
-
-    stream = starpu_cuda_get_local_stream();
-
     CUDA_ztrmm(
         side, uplo,
         transA, diag,
@@ -167,7 +131,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index 2e1ec6c82b52d7ed8acd501c1f45bfaea214e21d..e009a14b374327a452aebcc8cd829453d92a9f0d 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -118,43 +118,6 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
 }
 
 #ifdef CHAMELEON_USE_CUDA
-#if defined(CHAMELEON_USE_CUBLAS_V2)
-static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum side;
-    MORSE_enum uplo;
-    MORSE_enum transA;
-    MORSE_enum diag;
-    int m;
-    int n;
-    const cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int lda;
-    cuDoubleComplex *B;
-    int ldb;
-    CUstream stream;
-
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb);
-
-
-    stream = starpu_cuda_get_local_stream();
-
-    CUDA_ztrsm_V2(
-        side, uplo, transA, diag,
-        m, n,
-        &alpha, A, lda,
-        B, ldb,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#else /* CHAMELEON_USE_CUBLAS_V2 */
 static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
 {
     MORSE_enum side;
@@ -170,7 +133,7 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
     int ldb;
     CUstream stream;
 
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb);
 
@@ -189,7 +152,6 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUBLAS_V2 */
 #endif /* CHAMELEON_USE_CUDA */
 #endif /* !defined(CHAMELEON_SIMULATION) */