diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 22355977823a4df1aa73e905cc7db4d158025d1f..e215ce20f9898969a609b186b433522efcf77126 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -155,55 +155,7 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
-#if defined(CHAMELEON_USE_MAGMA)
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU;
-    cuDoubleComplex *d_A, *d_T, *d_D, *d_W;
-    int lda, ldt;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
-
-    /* Gather pointer to data on device */
-    d_A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    d_T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    d_W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); /* m*ib + ib*ib*/
-    d_D = d_W + m*ib;
-
-    /* scratch data on host */
-    /* ib*n + ib*ib + max(m,n) + ib*ib + ib*ib */
-    h_A = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-
-    /* Gather pointer to scratch data on host */
-    h_T   = h_A   + ib*n;
-    h_TAU = h_T   + ib*ib;
-    h_W   = h_TAU + chameleon_max(m,n);
-    h_D   = h_W   + ib*ib;
-
-    RUNTIME_getStream(stream);
-
-    CUDA_zgelqt(
-            m, n, ib,
-            d_A, lda, h_A, ib,
-            d_T, ldt, h_T, ib,
-            d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream );
-
-    cudaThreadSynchronize();
-}
-#endif /* defined(CHAMELEON_USE_MAGMA) */
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(zgelqt, 3, cl_zgelqt_cpu_func, cl_zgelqt_cuda_func, 0)
-#else
 CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index 868c10c16064001632a036ec7b5a7791f6fbb0fa..595bafa1b86321d57ec47e76f2dfdd96922801d9 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -154,56 +154,9 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
     WORK = TAU + chameleon_max( m, n );
     CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
 }
-
-
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU;
-    cuDoubleComplex *d_A, *d_T, *d_D, *d_W;
-    int lda, ldt;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
-
-    /* Gather pointer to data on device */
-    d_A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    d_T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    d_W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); /* ib*n + ib * ib*/
-    d_D = d_W + ib*n;
-
-    /* scratch data on host */
-    /* m*ib + ib*ib + max(m,n) + ib*ib + ib*ib */
-    h_A = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-
-    /* Gather pointer to scratch data on host */
-    h_T   = h_A   + m*ib;
-    h_TAU = h_T   + ib*ib;
-    h_W   = h_TAU + chameleon_max(m,n);
-    h_D   = h_W   + ib*ib;
-
-    RUNTIME_getStream(stream);
-
-    CUDA_zgeqrt(
-            m, n, ib,
-            d_A, lda, h_A, m,
-            d_T, ldt, h_T, ib,
-            d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream);
-
-    cudaThreadSynchronize();
-}
-#endif /* defined(CHAMELEON_USE_MAGMA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(zgeqrt, 3, cl_zgeqrt_cpu_func, cl_zgeqrt_cuda_func, 0)
-#else
 CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c
index 547111784edd1c1e30720fbb22e6f4f44d5485dd..1b5a72908c1b76b9a1b34b46d62e3a65cbf02153 100644
--- a/runtime/starpu/codelets/codelet_zgessm.c
+++ b/runtime/starpu/codelets/codelet_zgessm.c
@@ -137,42 +137,9 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
     CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
 }
-
-#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
-static void cl_zgessm_cuda_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int k;
-    int ib;
-    int *IPIV;
-    cuDoubleComplex *dL, *dD, *dA;
-    int lddl, lddd, ldda;
-    int info = 0;
-    /*
-     *  hwork => nb*nb
-     */
-    dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    dD = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &lddl, &lddd, &ldda);
-
-    CUDA_zgessm(
-            MagmaColMajor, m, n, k, ib,
-            IPIV, dL, lddl, dD, lddd, dA, ldda, &info );
-
-    cudaThreadSynchronize();
-
-    return;
-}
-#endif /* defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if (defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU))
-CODELETS(zgessm, 3, cl_zgessm_cpu_func, cl_zgessm_cuda_func, 0)
-#else
 CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
index ad3475cb764564a20fac78f3afd4b6e063bfe58f..bc91e972a6d224bd783a0dc1ba02cfae6fdbf734 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
@@ -147,99 +147,10 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work);
     CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
-
-#if defined(CHAMELEON_USE_MAGMA)
-    {
-        MORSE_Complex64_t *L = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-        /*
-         * L stores:
-         *      L1     L2    L3     ...
-         *      L1^-1  L2^-1 L3^-1  ...
-         */
-        /* Compute L-1 in lower rectangle of L */
-        if ( ldl >= 2*ib )
-        {
-            int i, sb;
-
-            L += ib;
-            for (i=0; i<n; i+=ib) {
-                sb = chameleon_min( ib, n-i );
-                CORE_zlacpy(MorseUpperLower, sb, sb, A+(i*lda+i), lda, L+(i*ldl), ldl );
-
-                CORE_ztrtri( MorseLower, MorseUnit, sb, L+(i*ldl), ldl, &info );
-                if (info != 0 ) {
-                    fprintf(stderr, "ERROR, trtri returned with info = %d\n", info);
-                }
-            }
-        }
-    }
-#endif
-}
-
-
-/*
- * Codelet GPU
- */
-#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
-static void cl_zgetrf_incpiv_cuda_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *hA, *dA;
-    cuDoubleComplex *hL, *dL;
-    cuDoubleComplex *dwork;
-    MORSE_starpu_ws_t *h_work;
-    int lda, ldl;
-    int *IPIV;
-    MORSE_bool check_info;
-    int iinfo;
-    int info;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work);
-
-    dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    /*
-     * hwork => at least (IB+NB)*IB contains all hA and hL
-     * dwork => at least IB*NB
-     */
-    hA    = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-    dwork = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-    hL = hA + lda*ib;
-
-    /* Initialize L to 0 */
-    memset(hL, 0, ib*ib*sizeof(cuDoubleComplex));
-
-    if ( ldl >= 2*ib ) {
-      /* Let's compute the inverses in the bottom part of L */
-      dL += ib;
-    } else {
-      /* We prefer to stick with TRSM */
-      dL = NULL;
-      hL = NULL;
-    }
-
-    CUDA_zgetrf_incpiv(
-            MagmaColMajor, m, n, ib,
-            hA, lda, dA, lda,
-            hL, ib,  dL, ldl,
-            IPIV,
-            dwork, lda,
-            &info );
-
-    cudaThreadSynchronize();
 }
-#endif /* defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
-
 /*
  * Codelet definition
  */
-#if defined(HAVE_MAGMA_GETRF_INCPIV_GPU) && ( defined(CHAMELEON_USE_MAGMA) )
-CODELETS(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func, cl_zgetrf_incpiv_cuda_func, 0)
-#else
 CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
index ab9d87acebd7cfafb43e597c38b3f220e5ac2133..8ca85664bf98c89e205b848e96642a6a0d859175 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
@@ -123,35 +123,9 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
     CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
 }
-
-/*
- * Codelet GPU
- */
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *dA;
-    int lda;
-    int iinfo;
-
-    int info = 0;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
-    dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    CUDA_zgetrf_nopiv( m, n, dA, lda, &info );
-    cudaThreadSynchronize();
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func, cl_zgetrf_nopiv_cuda_func, 0)
-#else
 CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index f578695ff8599d3a0804d44f38d75ecf7125771a..7f23c56821d3f39b14fda2e2828da23cb118f17f 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -77,30 +77,9 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
     CORE_zlauum(uplo, N, A, LDA);
 }
-
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_zlauum_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum uplo;
-    int info = 0;
-    int N;
-    cuDoubleComplex *A;
-    int LDA;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
-    CUDA_zlauum( uplo, N, A, LDA, &info);
-    cudaThreadSynchronize();
-    return;
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(zlauum, 1, cl_zlauum_cpu_func, cl_zlauum_cuda_func, 0)
-#else
 CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index 857cf3356edda7507b788e90ce56682cdd1f4608..686814274424ef112cf8bb90ca6c44333e8af9f3 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -83,48 +83,10 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
     CORE_zpotrf(uplo, n, A, lda, &info);
 }
-
-#ifdef CHAMELEON_USE_MAGMA
-static void cl_zpotrf_cuda_func(void *descr[], void *cl_arg)
-{
-    cudaStream_t stream[2], currentt_stream;
-    MORSE_enum uplo;
-    int n;
-    cuDoubleComplex *A;
-    /* cuDoubleComplex *hA; */
-    int lda;
-    int iinfo;
-    int info = 0;
-
-    A  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
-
-    /* /\* */
-    /*  *  hwork => nb*nb */
-    /*  *\/ */
-    /* hA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); */
-
-/*      stream[0] = starpu_cuda_get_local_stream(); */
-/*      if ( cudaStreamCreate( stream+1 ) != CUDA_SUCCESS ){ */
-/*          fprintf(stderr, "Error while creating stream in codelet_zpotrf\n"); */
-/*          exit(-1); */
-/*      } */
-
-    CUDA_zpotrf( uplo, n, A, lda, &info);
-
-    cudaThreadSynchronize();
-/*      cudaStreamDestroy( stream[1] ); */
-
-    return;
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined CHAMELEON_USE_MAGMA
-CODELETS(zpotrf, 1, cl_zpotrf_cpu_func, cl_zpotrf_cuda_func, 0)
-#else
 CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func)
-#endif
+
diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c
index a1fae08bb05f5eb552ea5a2492b543009b294a71..6d1c3ee08cefee4ab99bd72354fb4c1cb6684ef9 100644
--- a/runtime/starpu/codelets/codelet_zssssm.c
+++ b/runtime/starpu/codelets/codelet_zssssm.c
@@ -174,56 +174,10 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
     CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
 }
-
-#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
-static void cl_zssssm_cuda_func(void *descr[], void *cl_arg)
-{
-    int m1;
-    int n1;
-    int m2;
-    int n2;
-    int k;
-    int ib;
-    cuDoubleComplex *dA1;
-    int lda1;
-    cuDoubleComplex *dA2;
-    int lda2;
-    cuDoubleComplex *dL1;
-    int ldl1;
-    cuDoubleComplex *dL2;
-    int ldl2;
-    int *IPIV;
-    int info;
-
-    starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
-
-    dA1  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    dA2  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    dL1  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    dL2  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
-
-    if ( ldl1 >= 2*ib ) {
-        /* dL1 stores L and invL and the kernel is just using the inverted part */
-        dL1 += ib;
-    }
-
-    CUDA_zssssm(
-        MagmaColMajor, m1, n1, m2, n2, k, ib,
-        dA1, lda1, dA2, lda2,
-        dL1, ldl1, dL2, ldl2,
-        IPIV, &info);
-
-    cudaThreadSynchronize();
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if (defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU))
-CODELETS(zssssm, 4, cl_zssssm_cpu_func, cl_zssssm_cuda_func, 0)
-#else
 CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func)
-#endif
 
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index 1f619cd097dbd1a75dbdc1f6f6b9d606ef3020be..73d1a439e4c6baca68e701d2d96645fceca82b80 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -85,32 +85,9 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo);
     CORE_ztrtri(uplo, diag, N, A, LDA, &info);
 }
-
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_ztrtri_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum uplo;
-    MORSE_enum diag;
-    int N;
-    cuDoubleComplex *A;
-    int LDA;
-    int iinfo;
-    int info = 0;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo);
-    CUDA_ztrtri( uplo, diag, N, A, LDA, &info);
-    cudaThreadSynchronize();
-    return;
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(ztrtri, 1, cl_ztrtri_cpu_func, cl_ztrtri_cuda_func, 0)
-#else
 CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c
index 101feebfe80cede6ddb1f759a5883da3a495ce79..56c278094de88b9bc90214642700e670b3312cd9 100644
--- a/runtime/starpu/codelets/codelet_ztslqt.c
+++ b/runtime/starpu/codelets/codelet_ztslqt.c
@@ -174,54 +174,9 @@ static void cl_ztslqt_cpu_func(void *descr[], void *cl_arg)
     WORK = TAU + chameleon_max( m, n );
     CORE_ztslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
 }
-
-#if defined(CHAMELEON_USE_MAGMA) && 0
-static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W;
-    cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W;
-    int lda1, lda2, ldt;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
-
-    /* Gather pointer to data on device */
-    d_A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    d_A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    d_T  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    d_W  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* 2*ib*m + ib*ib */
-    d_D  = d_W + 2*ib*m;
-
-    /* scratch data on host */
-    /* ib*n + ib*n + max(m,n) + ib*m + ib*ib */
-    h_A2  = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-    h_T   = h_A2  + ib*n;
-    h_TAU = h_T   + ib*n;
-    h_W   = h_TAU + chameleon_max(m,n);
-    h_D   = h_W   + ib*m;
-
-    RUNTIME_getStream(stream);
-    CUDA_ztslqt(
-            m, n, ib,
-            d_A1, lda1, d_A2, lda2,
-            h_A2, ib,
-            d_T, ldt, h_T, ib,
-            d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream);
-
-    cudaThreadSynchronize();
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if (defined(CHAMELEON_USE_MAGMA)) && 0
-CODELETS(ztslqt, 4, cl_ztslqt_cpu_func, cl_ztslqt_cuda_func, 0)
-#else
 CODELETS_CPU(ztslqt, 4, cl_ztslqt_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c
index 4c5d03fe2b4eeb60fa30dd14f9a250f3bfae080c..969da0611698b3c9c72b23eaa04d869b3411f105 100644
--- a/runtime/starpu/codelets/codelet_ztsqrt.c
+++ b/runtime/starpu/codelets/codelet_ztsqrt.c
@@ -165,53 +165,9 @@ static void cl_ztsqrt_cpu_func(void *descr[], void *cl_arg)
     WORK = TAU + chameleon_max( m, n );
     CORE_ztsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
 }
-
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W;
-    cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W;
-    int lda1, lda2, ldt;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
-
-    /* Gather pointer to data on device */
-    d_A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    d_A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    d_T  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    d_W  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* 2*ib*n + ib*ib */
-    d_D  = d_W + 2*ib*n;
-
-    /* scratch data on host */
-    /* m*ib + ib*ib + max(m,n) + ib*n + ib*ib */
-    h_A2  = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-    h_T   = h_A2  + m*ib;
-    h_TAU = h_T   + ib*ib;
-    h_W   = h_TAU + chameleon_max(m,n);
-    h_D   = h_W   + ib*n;
-
-    RUNTIME_getStream(stream);
-    CUDA_ztsqrt(
-            m, n, ib,
-            d_A1, lda1, d_A2, lda2,
-            h_A2, lda2,
-            d_T, ldt, h_T, ib,
-            d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream);
-    cudaThreadSynchronize();
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(ztsqrt, 4, cl_ztsqrt_cpu_func, cl_ztsqrt_cuda_func, 0)
-#else
 CODELETS_CPU(ztsqrt, 4, cl_ztsqrt_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c
index eae5108ee9c28ba1335d524f6b6a5dd9a65c060f..59f7428077125914ce178b330265057444137afb 100644
--- a/runtime/starpu/codelets/codelet_ztstrf.c
+++ b/runtime/starpu/codelets/codelet_ztstrf.c
@@ -176,102 +176,11 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, &IPIV, &d_work, &ldwork, &check_info, &iinfo);
 
     CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
-
-#if defined(CHAMELEON_USE_MAGMA)
-    /*
-     * L stores the following if enough place:
-     *      L1     L2    L3     ...
-     *      L1^-1  L2^-1 L3^-1  ...
-     */
-    /* Compute L-1 in lower rectangle of L */
-    if ( ldl >= 2*ib )
-    {
-        int i, sb;
-        for (i=0; i<n; i+=ib) {
-            sb = chameleon_min( ib, n-i );
-            CORE_zlacpy(MorseUpperLower, sb, sb, L+(i*ldl), ldl, L+(i*ldl)+ib, ldl );
-
-            CORE_ztrtri( MorseLower, MorseUnit, sb, L+(i*ldl)+ib, ldl, &info );
-            if (info != 0 ) {
-                fprintf(stderr, "ERROR, trtri returned with info = %d\n", info);
-            }
-        }
-    }
-#endif
-}
-
-
-/*
- * Codelet GPU
- */
-/* TODO/WARNING: tstrf is not working on GPU for now */
-#if defined(CHAMELEON_USE_MAGMA) && 0
-static void cl_ztstrf_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *d_work;
-    int m;
-    int n;
-    int ib;
-    int nb;
-    cuDoubleComplex *hU, *dU;
-    int ldu;
-    cuDoubleComplex *hA, *dA;
-    int lda;
-    cuDoubleComplex *hL, *dL;
-    int ldl;
-    int *ipiv;
-    cuDoubleComplex *hw2, *hw, *dw;
-    int ldwork;
-    MORSE_bool check_info;
-    int iinfo;
-    int info;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, &ipiv,
-			       &d_work, &ldwork, &check_info, &iinfo);
-
-    dU = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-    /*
-     *  hwork => 2*nb*(2*ib+2nb)
-     *  dwork => 2*ib*nb
-     */
-    hw2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
-    dw  = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(d_work);
-
-    hU = hw2;
-    hA = hU + ldu * nb;
-    hL = hA + lda * nb;
-    hw = hL + ldl * nb;
-
-    /* Download first panel from A and U */
-    cublasGetMatrix( nb, n,  sizeof(cuDoubleComplex), dU, ldu, hU, ldu );
-    cublasGetMatrix( m,  ib, sizeof(cuDoubleComplex), dA, lda, hA, lda );
-
-    /* Initialize L to 0 */
-    memset(hL, 0, ldl*nb*sizeof(cuDoubleComplex));
-
-    CUDA_ztstrf(
-            MagmaColMajor, m, n, ib, nb,
-            hU, ldu, dU, ldu,
-            hA, lda, dA, lda,
-            hL, ldl, dL, ldl,
-            ipiv,
-            hw, ldwork, dw, lda,
-            &info );
-
-    cudaThreadSynchronize();
 }
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if (defined(CHAMELEON_USE_MAGMA) && 0)
-CODELETS(ztstrf, 4, cl_ztstrf_cpu_func, cl_ztstrf_cuda_func, 0)
-#else
 CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func)
-#endif