From 4819e83d68f79d2bfb87affd52ea2a18ad58c43f Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 2 May 2017 18:02:25 +0200
Subject: [PATCH 1/8] Remove magma codelets for StarPU

---
 runtime/starpu/codelets/codelet_zgelqt.c      | 48 ----------
 runtime/starpu/codelets/codelet_zgeqrt.c      | 47 ----------
 runtime/starpu/codelets/codelet_zgessm.c      | 33 -------
 .../starpu/codelets/codelet_zgetrf_incpiv.c   | 89 ------------------
 .../starpu/codelets/codelet_zgetrf_nopiv.c    | 26 ------
 runtime/starpu/codelets/codelet_zlauum.c      | 21 -----
 runtime/starpu/codelets/codelet_zpotrf.c      | 40 +-------
 runtime/starpu/codelets/codelet_zssssm.c      | 46 ----------
 runtime/starpu/codelets/codelet_ztrtri.c      | 23 -----
 runtime/starpu/codelets/codelet_ztslqt.c      | 45 ---------
 runtime/starpu/codelets/codelet_ztsqrt.c      | 44 ---------
 runtime/starpu/codelets/codelet_ztstrf.c      | 91 -------------------
 12 files changed, 1 insertion(+), 552 deletions(-)

diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 223559778..e215ce20f 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -155,55 +155,7 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
-#if defined(CHAMELEON_USE_MAGMA)
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU;
-    cuDoubleComplex *d_A, *d_T, *d_D, *d_W;
-    int lda, ldt;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
-
-    /* Gather pointer to data on device */
-    d_A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    d_T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    d_W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); /* m*ib + ib*ib*/
-    d_D = d_W + m*ib;
-
-    /* scratch data on host */
-    /* ib*n + ib*ib + max(m,n) + ib*ib + ib*ib */
-    h_A = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-
-    /* Gather pointer to scratch data on host */
-    h_T   = h_A   + ib*n;
-    h_TAU = h_T   + ib*ib;
-    h_W   = h_TAU + chameleon_max(m,n);
-    h_D   = h_W   + ib*ib;
-
-    RUNTIME_getStream(stream);
-
-    CUDA_zgelqt(
-            m, n, ib,
-            d_A, lda, h_A, ib,
-            d_T, ldt, h_T, ib,
-            d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream );
-
-    cudaThreadSynchronize();
-}
-#endif /* defined(CHAMELEON_USE_MAGMA) */
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(zgelqt, 3, cl_zgelqt_cpu_func, cl_zgelqt_cuda_func, 0)
-#else
 CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index 868c10c16..595bafa1b 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -154,56 +154,9 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
     WORK = TAU + chameleon_max( m, n );
     CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
 }
-
-
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU;
-    cuDoubleComplex *d_A, *d_T, *d_D, *d_W;
-    int lda, ldt;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
-
-    /* Gather pointer to data on device */
-    d_A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    d_T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    d_W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); /* ib*n + ib * ib*/
-    d_D = d_W + ib*n;
-
-    /* scratch data on host */
-    /* m*ib + ib*ib + max(m,n) + ib*ib + ib*ib */
-    h_A = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-
-    /* Gather pointer to scratch data on host */
-    h_T   = h_A   + m*ib;
-    h_TAU = h_T   + ib*ib;
-    h_W   = h_TAU + chameleon_max(m,n);
-    h_D   = h_W   + ib*ib;
-
-    RUNTIME_getStream(stream);
-
-    CUDA_zgeqrt(
-            m, n, ib,
-            d_A, lda, h_A, m,
-            d_T, ldt, h_T, ib,
-            d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream);
-
-    cudaThreadSynchronize();
-}
-#endif /* defined(CHAMELEON_USE_MAGMA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(zgeqrt, 3, cl_zgeqrt_cpu_func, cl_zgeqrt_cuda_func, 0)
-#else
 CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c
index 547111784..1b5a72908 100644
--- a/runtime/starpu/codelets/codelet_zgessm.c
+++ b/runtime/starpu/codelets/codelet_zgessm.c
@@ -137,42 +137,9 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
     CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
 }
-
-#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
-static void cl_zgessm_cuda_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int k;
-    int ib;
-    int *IPIV;
-    cuDoubleComplex *dL, *dD, *dA;
-    int lddl, lddd, ldda;
-    int info = 0;
-    /*
-     *  hwork => nb*nb
-     */
-    dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    dD = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &lddl, &lddd, &ldda);
-
-    CUDA_zgessm(
-            MagmaColMajor, m, n, k, ib,
-            IPIV, dL, lddl, dD, lddd, dA, ldda, &info );
-
-    cudaThreadSynchronize();
-
-    return;
-}
-#endif /* defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if (defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU))
-CODELETS(zgessm, 3, cl_zgessm_cpu_func, cl_zgessm_cuda_func, 0)
-#else
 CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
index ad3475cb7..bc91e972a 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
@@ -147,99 +147,10 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work);
     CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
-
-#if defined(CHAMELEON_USE_MAGMA)
-    {
-        MORSE_Complex64_t *L = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-        /*
-         * L stores:
-         *      L1     L2    L3     ...
-         *      L1^-1  L2^-1 L3^-1  ...
-         */
-        /* Compute L-1 in lower rectangle of L */
-        if ( ldl >= 2*ib )
-        {
-            int i, sb;
-
-            L += ib;
-            for (i=0; i<n; i+=ib) {
-                sb = chameleon_min( ib, n-i );
-                CORE_zlacpy(MorseUpperLower, sb, sb, A+(i*lda+i), lda, L+(i*ldl), ldl );
-
-                CORE_ztrtri( MorseLower, MorseUnit, sb, L+(i*ldl), ldl, &info );
-                if (info != 0 ) {
-                    fprintf(stderr, "ERROR, trtri returned with info = %d\n", info);
-                }
-            }
-        }
-    }
-#endif
-}
-
-
-/*
- * Codelet GPU
- */
-#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
-static void cl_zgetrf_incpiv_cuda_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *hA, *dA;
-    cuDoubleComplex *hL, *dL;
-    cuDoubleComplex *dwork;
-    MORSE_starpu_ws_t *h_work;
-    int lda, ldl;
-    int *IPIV;
-    MORSE_bool check_info;
-    int iinfo;
-    int info;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work);
-
-    dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    /*
-     * hwork => at least (IB+NB)*IB contains all hA and hL
-     * dwork => at least IB*NB
-     */
-    hA    = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-    dwork = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-    hL = hA + lda*ib;
-
-    /* Initialize L to 0 */
-    memset(hL, 0, ib*ib*sizeof(cuDoubleComplex));
-
-    if ( ldl >= 2*ib ) {
-      /* Let's compute the inverses in the bottom part of L */
-      dL += ib;
-    } else {
-      /* We prefer to stick with TRSM */
-      dL = NULL;
-      hL = NULL;
-    }
-
-    CUDA_zgetrf_incpiv(
-            MagmaColMajor, m, n, ib,
-            hA, lda, dA, lda,
-            hL, ib,  dL, ldl,
-            IPIV,
-            dwork, lda,
-            &info );
-
-    cudaThreadSynchronize();
 }
-#endif /* defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
-
 /*
  * Codelet definition
  */
-#if defined(HAVE_MAGMA_GETRF_INCPIV_GPU) && ( defined(CHAMELEON_USE_MAGMA) )
-CODELETS(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func, cl_zgetrf_incpiv_cuda_func, 0)
-#else
 CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
index ab9d87ace..8ca85664b 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
@@ -123,35 +123,9 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
     CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
 }
-
-/*
- * Codelet GPU
- */
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *dA;
-    int lda;
-    int iinfo;
-
-    int info = 0;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
-    dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    CUDA_zgetrf_nopiv( m, n, dA, lda, &info );
-    cudaThreadSynchronize();
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func, cl_zgetrf_nopiv_cuda_func, 0)
-#else
 CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index f578695ff..7f23c5682 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -77,30 +77,9 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
     CORE_zlauum(uplo, N, A, LDA);
 }
-
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_zlauum_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum uplo;
-    int info = 0;
-    int N;
-    cuDoubleComplex *A;
-    int LDA;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
-    CUDA_zlauum( uplo, N, A, LDA, &info);
-    cudaThreadSynchronize();
-    return;
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(zlauum, 1, cl_zlauum_cpu_func, cl_zlauum_cuda_func, 0)
-#else
 CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index 857cf3356..686814274 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -83,48 +83,10 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
     CORE_zpotrf(uplo, n, A, lda, &info);
 }
-
-#ifdef CHAMELEON_USE_MAGMA
-static void cl_zpotrf_cuda_func(void *descr[], void *cl_arg)
-{
-    cudaStream_t stream[2], currentt_stream;
-    MORSE_enum uplo;
-    int n;
-    cuDoubleComplex *A;
-    /* cuDoubleComplex *hA; */
-    int lda;
-    int iinfo;
-    int info = 0;
-
-    A  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
-
-    /* /\* */
-    /*  *  hwork => nb*nb */
-    /*  *\/ */
-    /* hA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); */
-
-/*      stream[0] = starpu_cuda_get_local_stream(); */
-/*      if ( cudaStreamCreate( stream+1 ) != CUDA_SUCCESS ){ */
-/*          fprintf(stderr, "Error while creating stream in codelet_zpotrf\n"); */
-/*          exit(-1); */
-/*      } */
-
-    CUDA_zpotrf( uplo, n, A, lda, &info);
-
-    cudaThreadSynchronize();
-/*      cudaStreamDestroy( stream[1] ); */
-
-    return;
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined CHAMELEON_USE_MAGMA
-CODELETS(zpotrf, 1, cl_zpotrf_cpu_func, cl_zpotrf_cuda_func, 0)
-#else
 CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func)
-#endif
+
diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c
index a1fae08bb..6d1c3ee08 100644
--- a/runtime/starpu/codelets/codelet_zssssm.c
+++ b/runtime/starpu/codelets/codelet_zssssm.c
@@ -174,56 +174,10 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
     CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
 }
-
-#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
-static void cl_zssssm_cuda_func(void *descr[], void *cl_arg)
-{
-    int m1;
-    int n1;
-    int m2;
-    int n2;
-    int k;
-    int ib;
-    cuDoubleComplex *dA1;
-    int lda1;
-    cuDoubleComplex *dA2;
-    int lda2;
-    cuDoubleComplex *dL1;
-    int ldl1;
-    cuDoubleComplex *dL2;
-    int ldl2;
-    int *IPIV;
-    int info;
-
-    starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
-
-    dA1  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    dA2  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    dL1  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    dL2  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
-
-    if ( ldl1 >= 2*ib ) {
-        /* dL1 stores L and invL and the kernel is just using the inverted part */
-        dL1 += ib;
-    }
-
-    CUDA_zssssm(
-        MagmaColMajor, m1, n1, m2, n2, k, ib,
-        dA1, lda1, dA2, lda2,
-        dL1, ldl1, dL2, ldl2,
-        IPIV, &info);
-
-    cudaThreadSynchronize();
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if (defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU))
-CODELETS(zssssm, 4, cl_zssssm_cpu_func, cl_zssssm_cuda_func, 0)
-#else
 CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func)
-#endif
 
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index 1f619cd09..73d1a439e 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -85,32 +85,9 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo);
     CORE_ztrtri(uplo, diag, N, A, LDA, &info);
 }
-
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_ztrtri_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_enum uplo;
-    MORSE_enum diag;
-    int N;
-    cuDoubleComplex *A;
-    int LDA;
-    int iinfo;
-    int info = 0;
-
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo);
-    CUDA_ztrtri( uplo, diag, N, A, LDA, &info);
-    cudaThreadSynchronize();
-    return;
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(ztrtri, 1, cl_ztrtri_cpu_func, cl_ztrtri_cuda_func, 0)
-#else
 CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c
index 101feebfe..56c278094 100644
--- a/runtime/starpu/codelets/codelet_ztslqt.c
+++ b/runtime/starpu/codelets/codelet_ztslqt.c
@@ -174,54 +174,9 @@ static void cl_ztslqt_cpu_func(void *descr[], void *cl_arg)
     WORK = TAU + chameleon_max( m, n );
     CORE_ztslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
 }
-
-#if defined(CHAMELEON_USE_MAGMA) && 0
-static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W;
-    cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W;
-    int lda1, lda2, ldt;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
-
-    /* Gather pointer to data on device */
-    d_A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    d_A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    d_T  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    d_W  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* 2*ib*m + ib*ib */
-    d_D  = d_W + 2*ib*m;
-
-    /* scratch data on host */
-    /* ib*n + ib*n + max(m,n) + ib*m + ib*ib */
-    h_A2  = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-    h_T   = h_A2  + ib*n;
-    h_TAU = h_T   + ib*n;
-    h_W   = h_TAU + chameleon_max(m,n);
-    h_D   = h_W   + ib*m;
-
-    RUNTIME_getStream(stream);
-    CUDA_ztslqt(
-            m, n, ib,
-            d_A1, lda1, d_A2, lda2,
-            h_A2, ib,
-            d_T, ldt, h_T, ib,
-            d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream);
-
-    cudaThreadSynchronize();
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if (defined(CHAMELEON_USE_MAGMA)) && 0
-CODELETS(ztslqt, 4, cl_ztslqt_cpu_func, cl_ztslqt_cuda_func, 0)
-#else
 CODELETS_CPU(ztslqt, 4, cl_ztslqt_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c
index 4c5d03fe2..969da0611 100644
--- a/runtime/starpu/codelets/codelet_ztsqrt.c
+++ b/runtime/starpu/codelets/codelet_ztsqrt.c
@@ -165,53 +165,9 @@ static void cl_ztsqrt_cpu_func(void *descr[], void *cl_arg)
     WORK = TAU + chameleon_max( m, n );
     CORE_ztsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
 }
-
-#if defined(CHAMELEON_USE_MAGMA)
-static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W;
-    cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W;
-    int lda1, lda2, ldt;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work);
-
-    /* Gather pointer to data on device */
-    d_A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    d_A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    d_T  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    d_W  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* 2*ib*n + ib*ib */
-    d_D  = d_W + 2*ib*n;
-
-    /* scratch data on host */
-    /* m*ib + ib*ib + max(m,n) + ib*n + ib*ib */
-    h_A2  = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work);
-    h_T   = h_A2  + m*ib;
-    h_TAU = h_T   + ib*ib;
-    h_W   = h_TAU + chameleon_max(m,n);
-    h_D   = h_W   + ib*n;
-
-    RUNTIME_getStream(stream);
-    CUDA_ztsqrt(
-            m, n, ib,
-            d_A1, lda1, d_A2, lda2,
-            h_A2, lda2,
-            d_T, ldt, h_T, ib,
-            d_D, h_D, ib, h_TAU,
-            h_W, d_W, stream);
-    cudaThreadSynchronize();
-}
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_MAGMA)
-CODELETS(ztsqrt, 4, cl_ztsqrt_cpu_func, cl_ztsqrt_cuda_func, 0)
-#else
 CODELETS_CPU(ztsqrt, 4, cl_ztsqrt_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c
index eae5108ee..59f742807 100644
--- a/runtime/starpu/codelets/codelet_ztstrf.c
+++ b/runtime/starpu/codelets/codelet_ztstrf.c
@@ -176,102 +176,11 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, &IPIV, &d_work, &ldwork, &check_info, &iinfo);
 
     CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
-
-#if defined(CHAMELEON_USE_MAGMA)
-    /*
-     * L stores the following if enough place:
-     *      L1     L2    L3     ...
-     *      L1^-1  L2^-1 L3^-1  ...
-     */
-    /* Compute L-1 in lower rectangle of L */
-    if ( ldl >= 2*ib )
-    {
-        int i, sb;
-        for (i=0; i<n; i+=ib) {
-            sb = chameleon_min( ib, n-i );
-            CORE_zlacpy(MorseUpperLower, sb, sb, L+(i*ldl), ldl, L+(i*ldl)+ib, ldl );
-
-            CORE_ztrtri( MorseLower, MorseUnit, sb, L+(i*ldl)+ib, ldl, &info );
-            if (info != 0 ) {
-                fprintf(stderr, "ERROR, trtri returned with info = %d\n", info);
-            }
-        }
-    }
-#endif
-}
-
-
-/*
- * Codelet GPU
- */
-/* TODO/WARNING: tstrf is not working on GPU for now */
-#if defined(CHAMELEON_USE_MAGMA) && 0
-static void cl_ztstrf_cuda_func(void *descr[], void *cl_arg)
-{
-    MORSE_starpu_ws_t *d_work;
-    int m;
-    int n;
-    int ib;
-    int nb;
-    cuDoubleComplex *hU, *dU;
-    int ldu;
-    cuDoubleComplex *hA, *dA;
-    int lda;
-    cuDoubleComplex *hL, *dL;
-    int ldl;
-    int *ipiv;
-    cuDoubleComplex *hw2, *hw, *dw;
-    int ldwork;
-    MORSE_bool check_info;
-    int iinfo;
-    int info;
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, &ipiv,
-			       &d_work, &ldwork, &check_info, &iinfo);
-
-    dU = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-    /*
-     *  hwork => 2*nb*(2*ib+2nb)
-     *  dwork => 2*ib*nb
-     */
-    hw2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
-    dw  = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(d_work);
-
-    hU = hw2;
-    hA = hU + ldu * nb;
-    hL = hA + lda * nb;
-    hw = hL + ldl * nb;
-
-    /* Download first panel from A and U */
-    cublasGetMatrix( nb, n,  sizeof(cuDoubleComplex), dU, ldu, hU, ldu );
-    cublasGetMatrix( m,  ib, sizeof(cuDoubleComplex), dA, lda, hA, lda );
-
-    /* Initialize L to 0 */
-    memset(hL, 0, ldl*nb*sizeof(cuDoubleComplex));
-
-    CUDA_ztstrf(
-            MagmaColMajor, m, n, ib, nb,
-            hU, ldu, dU, ldu,
-            hA, lda, dA, lda,
-            hL, ldl, dL, ldl,
-            ipiv,
-            hw, ldwork, dw, lda,
-            &info );
-
-    cudaThreadSynchronize();
 }
-#endif
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if (defined(CHAMELEON_USE_MAGMA) && 0)
-CODELETS(ztstrf, 4, cl_ztstrf_cpu_func, cl_ztstrf_cuda_func, 0)
-#else
 CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func)
-#endif
 
-- 
GitLab


From b878db6323c5e2d563134d8657d9f001619a2a00 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 2 May 2017 18:03:06 +0200
Subject: [PATCH 2/8] Remove MAGMA from StraPU CmakeList

---
 runtime/starpu/CMakeLists.txt | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index 7b2973667..f2109a8ce 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -29,17 +29,6 @@
 
 cmake_minimum_required(VERSION 2.8)
 
-# check if magma_dgetrf_incpiv_gpu is accessible in libmagma and activate it in chameleon
-if ( CBLAS_FOUND AND LAPACKE_FOUND AND LAPACK_FOUND AND CUDA_FOUND AND CUDA_CUBLAS_LIBRARIES AND MAGMA_FOUND )
-    set(CMAKE_REQUIRED_LIBRARIES "${CBLAS_LIBRARIES};${LAPACKE_LIBRARIES};${LAPACK_SEQ_LIBRARIES};${CUDA_LIBRARIES};${CUDA_CUBLAS_LIBRARIES};${MAGMA_LIBRARIES};${COREBLAS_LIBRARIES}")
-    unset(MAGMA_DGETRF_INCPIV_GPU_FOUND CACHE)
-    check_function_exists(magma_dgetrf_incpiv_gpu MAGMA_DGETRF_INCPIV_GPU_FOUND)
-    if ( MAGMA_DGETRF_INCPIV_GPU_FOUND )
-        message(STATUS "Set HAVE_MAGMA_GETRF_INCPIV_GPU")
-        set(HAVE_MAGMA_GETRF_INCPIV_GPU 1)
-    endif()
-endif()
-
 # Generate headers for all possible precisions
 # --------------------------------------------
 set(RUNTIME_HDRS_GENERATED "")
-- 
GitLab


From 5f6f225af8ac6d249c0fed92c05a65c5e9122e47 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 2 May 2017 18:05:43 +0200
Subject: [PATCH 3/8] Remove MAGMA references from runtime directory

---
 runtime/parsec/CMakeLists.txt         | 11 -----------
 runtime/quark/CMakeLists.txt          |  6 +++---
 runtime/quark/include/morse_quark.h   |  8 ++------
 runtime/starpu/include/morse_starpu.h |  6 ------
 4 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/runtime/parsec/CMakeLists.txt b/runtime/parsec/CMakeLists.txt
index f57259215..41ee03942 100644
--- a/runtime/parsec/CMakeLists.txt
+++ b/runtime/parsec/CMakeLists.txt
@@ -29,17 +29,6 @@
 
 cmake_minimum_required(VERSION 2.8)
 
-# check if magma_dgetrf_incpiv_gpu is accessible in libmagma and activate it in chameleon
-if ( CBLAS_FOUND AND LAPACKE_FOUND AND LAPACK_FOUND AND CUDA_FOUND AND CUDA_CUBLAS_LIBRARIES AND MAGMA_FOUND )
-    set(CMAKE_REQUIRED_LIBRARIES "${CBLAS_LIBRARIES};${LAPACKE_LIBRARIES};${LAPACK_SEQ_LIBRARIES};${CUDA_LIBRARIES};${CUDA_CUBLAS_LIBRARIES};${MAGMA_LIBRARIES};${COREBLAS_LIBRARIES}")
-    unset(MAGMA_DGETRF_INCPIV_GPU_FOUND CACHE)
-    check_function_exists(magma_dgetrf_incpiv_gpu MAGMA_DGETRF_INCPIV_GPU_FOUND)
-    if ( MAGMA_DGETRF_INCPIV_GPU_FOUND )
-        message(STATUS "Set HAVE_MAGMA_GETRF_INCPIV_GPU")
-        set(HAVE_MAGMA_GETRF_INCPIV_GPU 1)
-    endif()
-endif()
-
 # Generate headers for all possible precisions
 # --------------------------------------------
 set(RUNTIME_HDRS_GENERATED "")
diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt
index fa7952a15..e1cff5885 100644
--- a/runtime/quark/CMakeLists.txt
+++ b/runtime/quark/CMakeLists.txt
@@ -29,7 +29,7 @@
 
 cmake_minimum_required(VERSION 2.8)
 
-# Generate the magma headers for all possible precisions
+# Generate the quark headers for all possible precisions
 # ------------------------------------------------------
 set(RUNTIME_HDRS_GENERATED "")
 set(ZHDR
@@ -105,9 +105,9 @@ set_property(TARGET chameleon_quark PROPERTY LINKER_LANGUAGE Fortran)
 set_property(TARGET chameleon_quark PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
 
 target_link_libraries(chameleon_quark coreblas ${QUARK_LIBRARIES_DEP})
-if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA)
+if(CHAMELEON_USE_CUDA)
   target_link_libraries(chameleon_quark cudablas)
-endif(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA)
+endif(CHAMELEON_USE_CUDA)
 
 add_dependencies(chameleon_quark
   chameleon_include
diff --git a/runtime/quark/include/morse_quark.h b/runtime/quark/include/morse_quark.h
index e841a251e..2c4dc590b 100644
--- a/runtime/quark/include/morse_quark.h
+++ b/runtime/quark/include/morse_quark.h
@@ -12,8 +12,8 @@
  *
  * @file morse_quark.h
  *
- *  MAGMA codelets kernel
- *  MAGMA is a software package provided by Univ. of Tennessee,
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
  *  Univ. of California Berkeley and Univ. of Colorado Denver,
  *  and INRIA Bordeaux Sud-Ouest
  *
@@ -23,10 +23,6 @@
  * @date 2011-06-01
  *
  **/
-
-/*******************************************************************************
- *  MAGMA facilities of interest to both src and magmablas directories
- **/
 #ifndef _MORSE_QUARK_H_
 #define _MORSE_QUARK_H_
 
diff --git a/runtime/starpu/include/morse_starpu.h b/runtime/starpu/include/morse_starpu.h
index f0649aeaf..095b6886b 100644
--- a/runtime/starpu/include/morse_starpu.h
+++ b/runtime/starpu/include/morse_starpu.h
@@ -23,12 +23,6 @@
  * @date 2011-06-01
  *
  **/
-
-/******************************************************************************/
-
-/*
- *  MORSE facilities of interest to both src and magmablas directories
- **/
 #ifndef _MORSE_STARPU_H_
 #define _MORSE_STARPU_H_
 
-- 
GitLab


From 47a66a2f5acb0f54718ea8bfd9c69e412cf66938 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 2 May 2017 18:08:31 +0200
Subject: [PATCH 4/8] Remove MAGMA from compute directory

---
 compute/pzgelqf.c        | 17 -----------------
 compute/pzgelqfrh.c      | 17 -----------------
 compute/pzgeqrf.c        | 17 -----------------
 compute/pzgeqrfrh.c      | 17 -----------------
 compute/pzgetrf_incpiv.c |  6 +-----
 compute/pzhetrd_he2hb.c  | 17 -----------------
 compute/pzpotrf.c        | 10 ----------
 compute/pzpotrimm.c      | 14 --------------
 compute/pztpqrt.c        | 15 ---------------
 9 files changed, 1 insertion(+), 129 deletions(-)

diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c
index 712ded8af..36cf19e1b 100644
--- a/compute/pzgelqf.c
+++ b/compute/pzgelqf.c
@@ -86,23 +86,6 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
     ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
 #endif
 
-#if defined(CHAMELEON_USE_MAGMA)
-    /* Worker space
-     *
-     * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     */
-    ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) );
-
-    /* Host space
-     *
-     * zgelqt =     ib * A->nb + 3 * ib * ib + A->nb
-     * ztslqt = 3 * ib * A->nb +     ib * ib + A->nb
-     */
-    ws_host = chameleon_max( ws_host,     ib * A->nb + 3 * ib * ib + A->nb );
-    ws_host = chameleon_max( ws_host, 3 * ib * A->nb +     ib * ib + A->nb );
-#endif
-
     ws_worker *= sizeof(MORSE_Complex64_t);
     ws_host   *= sizeof(MORSE_Complex64_t);
 
diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c
index b24b6db89..57b1d613c 100644
--- a/compute/pzgelqfrh.c
+++ b/compute/pzgelqfrh.c
@@ -87,23 +87,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
 #endif
 
-#if defined(CHAMELEON_USE_MAGMA)
-    /* Worker space
-     *
-     * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     */
-    ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) );
-
-    /* Host space
-     *
-     * zgelqt =     ib * A->nb + 3 * ib * ib + A->nb
-     * ztslqt = 3 * ib * A->nb +     ib * ib + A->nb
-     */
-    ws_host = chameleon_max( ws_host,     ib * A->nb + 3 * ib * ib + A->nb );
-    ws_host = chameleon_max( ws_host, 3 * ib * A->nb +     ib * ib + A->nb );
-#endif
-
     ws_worker *= sizeof(MORSE_Complex64_t);
     ws_host   *= sizeof(MORSE_Complex64_t);
 
diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c
index a54aa8853..6e061f0ed 100644
--- a/compute/pzgeqrf.c
+++ b/compute/pzgeqrf.c
@@ -81,23 +81,6 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
     ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
 #endif
 
-#if defined(CHAMELEON_USE_MAGMA)
-    /* Worker space
-     *
-     * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     */
-    ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) );
-
-    /* Host space
-     *
-     * zgeqrt = ib * (A->mb+3*ib) + A->mb )
-     * ztsqrt = 2 * ib * (A->nb+ib) + A->nb
-     */
-    ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb );
-    ws_host = chameleon_max( ws_host,  2 * ib * (A->nb + ib) + A->nb );
-#endif
-
     ws_worker *= sizeof(MORSE_Complex64_t);
     ws_host   *= sizeof(MORSE_Complex64_t);
 
diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c
index a5e828c8b..c5b026358 100644
--- a/compute/pzgeqrfrh.c
+++ b/compute/pzgeqrfrh.c
@@ -85,23 +85,6 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
 #endif
 
-#if defined(CHAMELEON_USE_MAGMA)
-    /* Worker space
-     *
-     * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     */
-    ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) );
-
-    /* Host space
-     *
-     * zgeqrt = ib * (A->nb+3*ib) + A->nb )
-     * ztsqrt = 2 * ib * (A->nb+ib) + A->nb
-     */
-    ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb );
-    ws_host = chameleon_max( ws_host,  2 * ib * (A->nb + ib) + A->nb );
-#endif
-
     ws_worker *= sizeof(MORSE_Complex64_t);
     ws_host   *= sizeof(MORSE_Complex64_t);
 
diff --git a/compute/pzgetrf_incpiv.c b/compute/pzgetrf_incpiv.c
index f960873e6..5da79407c 100644
--- a/compute/pzgetrf_incpiv.c
+++ b/compute/pzgetrf_incpiv.c
@@ -64,13 +64,9 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
     RUNTIME_options_init(&options, morse, sequence, request);
 
     ib = MORSE_IB;
-#if defined(CHAMELEON_USE_MAGMA)
-    h_work_size  = sizeof(MORSE_Complex64_t)*( 2*ib + 2*L->nb )*2*A->mb;
-    d_work_size  = sizeof(MORSE_Complex64_t)*(   ib           )*2*A->mb;
-#else
     h_work_size  = sizeof(MORSE_Complex64_t)*( ib*L->nb );
     d_work_size  = 0;
-#endif
+
     RUNTIME_options_ws_alloc( &options, h_work_size, d_work_size );
 
     /* necessary to avoid dependencies between tasks regarding the diag tile */
diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c
index 497b574d4..2173faa09 100644
--- a/compute/pzhetrd_he2hb.c
+++ b/compute/pzhetrd_he2hb.c
@@ -85,23 +85,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
     ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
 #endif
 
-#if defined(CHAMELEON_USE_MAGMA)
-    /* Worker space
-     *
-     * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     */
-    ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) );
-
-    /* Host space
-     *
-     * zgeqrt = ib * (A->mb+3*ib) + A->mb )
-     * ztsqrt = 2 * ib * (A->nb+ib) + A->nb
-     */
-    ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb );
-    ws_host = chameleon_max( ws_host,  2 * ib * (A->nb + ib) + A->nb );
-#endif
-
     ws_worker *= sizeof(MORSE_Complex64_t);
     ws_host   *= sizeof(MORSE_Complex64_t);
 
diff --git a/compute/pzpotrf.c b/compute/pzpotrf.c
index 44b7d25f5..bc648c74c 100644
--- a/compute/pzpotrf.c
+++ b/compute/pzpotrf.c
@@ -54,16 +54,6 @@ void morse_pzpotrf(MORSE_enum uplo, MORSE_desc_t *A,
         return;
     RUNTIME_options_init(&options, morse, sequence, request);
 
-#ifdef CHAMELEON_USE_MAGMA
-    if (0) /* Disable the workspace as long as it is is not used (See StarPU codelet) */
-    {
-        int nb = MORSE_IB; /* Approximate nb for simulation */
-#if !defined(CHAMELEON_SIMULATION)
-        nb = magma_get_zpotrf_nb(A->nb);
-#endif
-        ws_host = sizeof(MORSE_Complex64_t)*nb*nb;
-    }
-#endif
     RUNTIME_options_ws_alloc( &options, 0, ws_host );
 
     /*
diff --git a/compute/pzpotrimm.c b/compute/pzpotrimm.c
index d1b7323c1..a9d779b30 100644
--- a/compute/pzpotrimm.c
+++ b/compute/pzpotrimm.c
@@ -57,17 +57,6 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de
         return;
     RUNTIME_options_init(&options, morse, sequence, request);
 
-#ifdef CHAMELEON_USE_MAGMA
-    {
-#if !defined(CHAMELEON_SIMULATION)
-        int nb = magma_get_zpotrf_nb(A->nb);
-#else
-        int nb = A->nb;
-#endif
-        RUNTIME_options_ws_alloc( &options, nb*nb, 0 );
-    }
-#endif
-
     /*
      *  MorseLower
      */
@@ -489,8 +478,5 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de
         }
     }
 
-#ifdef CHAMELEON_USE_MAGMA
-    RUNTIME_options_ws_free(&options);
-#endif
     RUNTIME_options_finalize(&options, morse);
 }
diff --git a/compute/pztpqrt.c b/compute/pztpqrt.c
index 1807857a2..d20512803 100644
--- a/compute/pztpqrt.c
+++ b/compute/pztpqrt.c
@@ -70,21 +70,6 @@ void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T,
     ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
 #endif
 
-#if defined(CHAMELEON_USE_MAGMA)
-    /* Worker space
-     *
-     * ztpqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     */
-    ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) );
-
-    /* Host space
-     *
-     * ztpqrt = 2 * ib * (A->nb+ib) + A->nb
-     */
-    ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb );
-    ws_host = chameleon_max( ws_host,  2 * ib * (A->nb + ib) + A->nb );
-#endif
-
     ws_worker *= sizeof(MORSE_Complex64_t);
     ws_host   *= sizeof(MORSE_Complex64_t);
 
-- 
GitLab


From b0d52a35de628c43180a46757d3662a1e8c3468c Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 2 May 2017 18:16:44 +0200
Subject: [PATCH 5/8] Remove MAGMA from cudablas directory

---
 cudablas/compute/CMakeLists.txt | 36 ++++++++++++++++-----------------
 cudablas/include/cudablas.h     |  4 ----
 cudablas/include/cudablas_z.h   | 15 --------------
 3 files changed, 17 insertions(+), 38 deletions(-)

diff --git a/cudablas/compute/CMakeLists.txt b/cudablas/compute/CMakeLists.txt
index 20e012939..5ab3c65c4 100644
--- a/cudablas/compute/CMakeLists.txt
+++ b/cudablas/compute/CMakeLists.txt
@@ -55,22 +55,23 @@ if( CHAMELEON_USE_CUBLAS_V2 )
     )
 endif( CHAMELEON_USE_CUBLAS_V2 )
 
-if( CHAMELEON_USE_MAGMA )
-  set(ZSRC
-    ${ZSRC}
-    cuda_zgelqt.c
-    cuda_zgeqrt.c
-    cuda_zgessm.c
-    cuda_zgetrf.c
-    cuda_zlauum.c
-    cuda_zpotrf.c
-    cuda_zssssm.c
-    cuda_ztrtri.c
-    cuda_ztslqt.c
-    cuda_ztsqrt.c
-    cuda_ztstrf.c
-    )
-endif()
+# Former MAGMA files that are no longer supported
+# if( CHAMELEON_USE_MAGMA )
+#   set(ZSRC
+#     ${ZSRC}
+#     cuda_zgelqt.c
+#     cuda_zgeqrt.c
+#     cuda_zgessm.c
+#     cuda_zgetrf.c
+#     cuda_zlauum.c
+#     cuda_zpotrf.c
+#     cuda_zssssm.c
+#     cuda_ztrtri.c
+#     cuda_ztslqt.c
+#     cuda_ztsqrt.c
+#     cuda_ztstrf.c
+#     )
+# endif()
 
 precisions_rules_py(
   CUDABLAS_SRCS_GENERATED "${ZSRC}"
@@ -94,9 +95,6 @@ add_dependencies(cudablas cudablas_include)
 set_property(TARGET cudablas PROPERTY LINKER_LANGUAGE Fortran)
 
 target_link_libraries(cudablas coreblas ${CUDA_LIBRARIES})
-if(CHAMELEON_USE_MAGMA)
-  target_link_libraries(cudablas ${MAGMA_LIBRARIES})
-endif(CHAMELEON_USE_MAGMA)
 
 # installation
 # ------------
diff --git a/cudablas/include/cudablas.h b/cudablas/include/cudablas.h
index b181fc833..6732a0b5b 100644
--- a/cudablas/include/cudablas.h
+++ b/cudablas/include/cudablas.h
@@ -65,10 +65,6 @@
 
 #endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
 
-#if defined(CHAMELEON_USE_MAGMA)
-#include <magma.h>
-#endif
-
 /** ****************************************************************************
  * MORSE types and constants
  **/
diff --git a/cudablas/include/cudablas_z.h b/cudablas/include/cudablas_z.h
index 7d9ccbbf7..0413fd541 100644
--- a/cudablas/include/cudablas_z.h
+++ b/cudablas/include/cudablas_z.h
@@ -56,21 +56,6 @@ int CUDA_zttmqr( MORSE_enum side, MORSE_enum trans, int M1, int N1, int M2, int
 int CUDA_zunmlqt(MORSE_enum side, MORSE_enum trans, int M, int N, int K, int IB, const cuDoubleComplex *A,    int LDA, const cuDoubleComplex *T,    int LDT, cuDoubleComplex *C,    int LDC, cuDoubleComplex *WORK, int LDWORK, CUBLAS_STREAM_PARAM );
 int CUDA_zunmqrt(MORSE_enum side, MORSE_enum trans, int M, int N, int K, int IB, const cuDoubleComplex *A,    int LDA, const cuDoubleComplex *T,    int LDT, cuDoubleComplex *C,    int LDC, cuDoubleComplex *WORK, int LDWORK, CUBLAS_STREAM_PARAM );
 
-#if defined(CHAMELEON_USE_MAGMA)
-int CUDA_zgelqt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *v, magma_int_t ldv, magmaDoubleComplex *dt, magma_int_t lddt, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *dd, magmaDoubleComplex *d, magma_int_t ldd, magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, CUBLAS_STREAM_PARAM );
-int CUDA_zgeqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *v, magma_int_t ldv, magmaDoubleComplex *dt, magma_int_t lddt, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *dd, magmaDoubleComplex *d, magma_int_t ldd, magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, CUBLAS_STREAM_PARAM );
-int CUDA_zgessm( char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, cuDoubleComplex *dL1, magma_int_t lddl1, cuDoubleComplex *dL, magma_int_t lddl, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info );
-int CUDA_zgetrf_incpiv( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info );
-int CUDA_zgetrf_nopiv( magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info );
-int CUDA_zlauum( char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info );
-int CUDA_zpotrf( magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info );
-int CUDA_zssssm( magma_storev_t storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, magmaDoubleComplex *dA1, magma_int_t ldda1, magmaDoubleComplex *dA2, magma_int_t ldda2, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info );
-int CUDA_ztrtri( magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info );
-int CUDA_ztslqt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da1, magma_int_t ldda1, magmaDoubleComplex *da2, magma_int_t ldda2, magmaDoubleComplex *a2, magma_int_t lda2, magmaDoubleComplex *dt, magma_int_t lddt, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *dd, magmaDoubleComplex *d, magma_int_t ldd, magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, CUBLAS_STREAM_PARAM );
-int CUDA_ztsqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da1, magma_int_t ldda1, magmaDoubleComplex *da2, magma_int_t ldda2, magmaDoubleComplex *a2, magma_int_t lda2, magmaDoubleComplex *dt, magma_int_t lddt, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *dd, magmaDoubleComplex *d, magma_int_t ldd, magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, CUBLAS_STREAM_PARAM );
-int CUDA_ztstrf( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, cuDoubleComplex *hU, magma_int_t ldhu, cuDoubleComplex *dU, magma_int_t lddu, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *hwork, magma_int_t ldhwork, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info );
-#endif
-
 #ifdef __cplusplus
 }
 #endif
-- 
GitLab


From 73b87ca5d4e394ce489b364b87055932696bd9d7 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 2 May 2017 18:16:54 +0200
Subject: [PATCH 6/8] Remove magma from configuration

---
 CMakeLists.txt    | 64 ++---------------------------------------------
 CTestConfig.cmake |  5 ++--
 2 files changed, 5 insertions(+), 64 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0802349b5..6105696a8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -190,14 +190,6 @@ if (CHAMELEON_ENABLE_CUDA AND NOT CHAMELEON_USE_CUDA)
     message("-- ${BoldGreen}CHAMELEON_USE_CUDA is set to OFF, turn it ON to use CUDA (unsupported by Quark)${ColourReset}")
 endif()
 
-# Enable MAGMA advanced kernels if CUDA is enabled
-cmake_dependent_option(CHAMELEON_USE_MAGMA
-                    "Enable MAGMA Cuda kernels" OFF
-                    "CHAMELEON_USE_CUDA" OFF)
-if (CHAMELEON_USE_CUDA AND NOT CHAMELEON_USE_MAGMA)
-    message("-- ${BoldGreen}CHAMELEON_USE_MAGMA is set to OFF, turn it ON to use MAGMA (only with StarPU)${ColourReset}")
-endif()
-
 # Enable FXT if StarPU
 option(CHAMELEON_ENABLE_TRACING "Enable tracing support" OFF)
 if (NOT CHAMELEON_ENABLE_TRACING)
@@ -469,43 +461,6 @@ if(NOT CHAMELEON_SIMULATION)
             #endif()
         endif (CUDA_FOUND)
 
-        # CHAMELEON depends on MAGMA gpu kernels
-        # call our cmake module to test (in cmake_modules)
-        # change this call position if not appropriated
-        #----------------------------------------------
-        if ( CUDA_FOUND AND CHAMELEON_USE_MAGMA )
-            set(CHAMELEON_MAGMA_VERSION "1.4" CACHE STRING "oldest MAGMA version desired")
-            find_package(MAGMA ${CHAMELEON_MAGMA_VERSION})
-            if ( MAGMA_FOUND )
-                message("-- ${Blue}Add definition CHAMELEON_USE_MAGMA"
-                " - Use GPU kernels from MAGMA${ColourReset}")
-                set(CHAMELEON_USE_MAGMA 1)
-                if(MAGMA_INCLUDE_DIRS)
-                    include_directories(${MAGMA_INCLUDE_DIRS})
-                endif()
-                if(MAGMA_LIBRARY_DIRS)
-                    # the RPATH to be used when installing
-                    list(APPEND CMAKE_INSTALL_RPATH "${MAGMA_LIBRARY_DIRS}")
-                endif()
-                if (MAGMA_LIBRARIES)
-                    list(INSERT EXTRA_LIBRARIES_CUDA 0 ${MAGMA_LIBRARIES})
-                endif()
-            else( MAGMA_FOUND )
-                if(MORSE_VERBOSE_FIND_PACKAGE)
-                    if (NOT MAGMA_magma.h_DIRS)
-                        Print_Find_Header_Status(magma magma.h)
-                    endif ()
-                    if (NOT MAGMA_magma_LIBRARY)
-                        Print_Find_Library_Status(magma libmagma)
-                    endif ()
-                else()
-                    message(WARNING "MAGMA library has not been found and MORSE_VERBOSE_FIND_PACKAGE is set to OFF."
-                    " Try to activate MORSE_VERBOSE_FIND_PACKAGE option (-DMORSE_VERBOSE_FIND_PACKAGE=ON) to get some hints for the detection")
-                endif()
-                message(FATAL_ERROR "MAGMA library is required but has not been found")
-            endif( MAGMA_FOUND )
-        endif()
-
         list(APPEND EXTRA_LIBRARIES ${EXTRA_LIBRARIES_CUDA})
 
     endif(CHAMELEON_USE_CUDA)
@@ -559,29 +514,17 @@ else (NOT CHAMELEON_SIMULATION)
     # Guard against mis-used simulation mode
     if(NOT DEFINED CHAMELEON_USE_CUDA)
         message(FATAL_ERROR "${BoldBlue}"
-        "In simulation mode CHAMELEON_USE_CUDA and CHAMELEON_USE_MAGMA should be set to"
+        "In simulation mode CHAMELEON_USE_CUDA should be set to"
         "   ensure that the user is aware of the version to be used. If Chameleon's"
         "   kernels are available for NVIDIA CUDA GPUs and if the according"
         "   perfmodels are available in STARPU_HOME then use CHAMELEON_USE_CUDA=ON"
-        "   else set CHAMELEON_USE_CUDA=OFF. The same idea is applicable with MAGMA."
+        "   else set CHAMELEON_USE_CUDA=OFF."
         "${ColourReset}")
     endif()
-    if(NOT DEFINED CHAMELEON_USE_MAGMA)
-        message(WARNING "${BoldBlue}"
-        "In simulation mode CHAMELEON_USE_CUDA and CHAMELEON_USE_MAGMA should be set to"
-        "   ensure that the user is aware of the version to be used. If Chameleon's"
-        "   MAGMA kernels are available for NVIDIA CUDA GPUs and if the according"
-        "   perfmodels are available in STARPU_HOME then use CHAMELEON_USE_MAGMA=ON"
-        "   else set CHAMELEON_USE_MAGMA=OFF.${ColourReset}")
-    endif()
 
     # Add CUDA definition if required
     if (CHAMELEON_USE_CUDA)
         set(CHAMELEON_USE_CUDA 1)
-        # Add MAGMA definition if required
-        if (CHAMELEON_USE_MAGMA)
-            set(CHAMELEON_USE_MAGMA 1)
-        endif()
     endif()
 
     if (NOT CHAMELEON_SCHED_STARPU)
@@ -1034,9 +977,6 @@ if(NOT CHAMELEON_SIMULATION)
     if(CHAMELEON_USE_CUDA)
         link_directories(${CUDA_LIBRARY_DIRS})
     endif()
-    if(CHAMELEON_USE_MAGMA)
-        link_directories(${MAGMA_LIBRARY_DIRS})
-    endif()
 endif()
 
 # Save extra dependencies (all required links)
diff --git a/CTestConfig.cmake b/CTestConfig.cmake
index bb95c68b2..c2999e9d7 100644
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -4,17 +4,18 @@
 ## # The following are required to uses Dart and the Cdash dashboard
 ##   ENABLE_TESTING()
 ##   INCLUDE(CTest)
-set(CTEST_PROJECT_NAME "Morse-Magma")
+set(CTEST_PROJECT_NAME "Chameleon")
 set(CTEST_NIGHTLY_START_TIME "00:00:00 GMT")
 
 set(CTEST_DROP_METHOD "http")
 set(CTEST_DROP_SITE "cdash.inria.fr")
+# Shouldn't we change that to Chameleon ?
 set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Morse-Magma")
 set(CTEST_DROP_SITE_CDASH TRUE)
 
 #--------------------------------------------------------------------
 # BUILDNAME variable construction
-# This variable will be used to set the build name which will appear 
+# This variable will be used to set the build name which will appear
 # on the Morse-Magma dashboard http://cdash.inria.fr/CDash/
 #--------------------------------------------------------------------
 # Start with the short system name, e.g. "Linux", "FreeBSD" or "Windows"
-- 
GitLab


From 1f7476fe70d57b916e3d13e42b98187a1c4240bc Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 2 May 2017 18:20:34 +0200
Subject: [PATCH 7/8] Remove magma from crontrol directory

---
 control/common.h    | 12 ++----------
 control/config.h.in |  3 ---
 control/control.c   |  6 ------
 control/workspace.c |  4 ----
 4 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/control/common.h b/control/common.h
index ab1a98867..194359d3b 100644
--- a/control/common.h
+++ b/control/common.h
@@ -69,17 +69,9 @@
 #include <mpi.h>
 #endif
 
-
-/** ****************************************************************************
- * Linear Algebra headers
- **/
-#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION)
-#include <magma.h>
-#endif
-
 /** ****************************************************************************
- *  Line to avoid conflict with magma, because, we don't know why
- *  but lapacke provide a wrong interface of lapack in fortran
+ *  Line to avoid conflict with other linear algebra libraries, because, we
+ *  don't know why but lapacke provide a wrong interface of lapack in fortran
  **/
 #ifndef LAPACK_NAME
 #define LAPACK_NAME(a, b) lapackef77_##a
diff --git a/control/config.h.in b/control/config.h.in
index 6691c4c16..48c3ce5aa 100644
--- a/control/config.h.in
+++ b/control/config.h.in
@@ -42,7 +42,4 @@
 #cmakedefine HAVE_STARPU_MPI_COMM_RANK
 #cmakedefine HAVE_STARPU_MPI_CACHED_RECEIVE
 
-/* MAGMA functions */
-#cmakedefine HAVE_MAGMA_GETRF_INCPIV_GPU
-
 #endif  /* CONFIG_H_HAS_BEEN_INCLUDED */
diff --git a/control/control.c b/control/control.c
index 9fa06b781..7bb54781d 100644
--- a/control/control.c
+++ b/control/control.c
@@ -110,9 +110,6 @@ int MORSE_InitPar(int ncpus, int ncudas, int nthreads_per_worker)
       }
     }
 #  endif
-#endif
-#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION)
-    magma_init();
 #endif
     RUNTIME_init_scheduler( morse, ncpus, ncudas, nthreads_per_worker );
     return MORSE_SUCCESS;
@@ -142,9 +139,6 @@ int MORSE_Finalize(void)
     RUNTIME_barrier(morse);
 #  endif
     RUNTIME_finalize_scheduler( morse );
-#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION)
-    magma_finalize();
-#endif
 
 #if defined(CHAMELEON_USE_MPI)
     if (!morse->mpi_outer_init)
diff --git a/control/workspace.c b/control/workspace.c
index 097a26711..efcb49f8c 100644
--- a/control/workspace.c
+++ b/control/workspace.c
@@ -133,10 +133,6 @@ int morse_alloc_ipiv(int M, int N, MORSE_enum func, int type, MORSE_desc_t **des
     NB = MORSE_NB;
     IB = MORSE_IB;
 
-#if defined(CHAMELEON_USE_MAGMA)
-/*     IB *= 2; */
-#endif
-
     NT = (N%NB==0) ? (N/NB) : ((N/NB)+1);
     MT = (M%NB==0) ? (M/NB) : ((M/NB)+1);
 
-- 
GitLab


From 8a4cf35363b5841aec393b24bd6ddc4e634b88b4 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 2 May 2017 18:45:24 +0200
Subject: [PATCH 8/8] Remove all remaining references to MAGMA

---
 .gitlab-ci.yml                           |  2 +-
 CTestConfig.cmake                        |  4 +-
 ChangeLog                                |  3 +-
 INSTALL.txt                              | 65 +++++++++++-------------
 cmake_modules/GenPkgConfig.cmake         | 16 ++----
 cmake_modules/PrintOpts.cmake            |  3 +-
 cmake_modules/morse_cmake                |  2 +-
 docs/texinfo/chapters/configuration.texi | 10 +---
 docs/texinfo/chapters/installing.texi    | 23 +--------
 docs/texinfo/chapters/introduction.texi  | 10 ++--
 example/basic_zposv/CMakeLists.txt       |  5 +-
 example/lapack_to_morse/CMakeLists.txt   | 11 ++--
 example/lapack_to_morse/step1.c          |  2 +-
 example/link_chameleon/CMakeLists.txt    |  2 +-
 example/link_chameleon/link_chameleon.c  |  4 +-
 example/out_of_core/CMakeLists.txt       |  5 +-
 include/chameleon_config.h.in            |  1 -
 include/morse_fortran.h                  | 37 +++++++-------
 include/morse_kernels.h                  |  4 +-
 include/morse_struct.h                   |  2 +-
 testing/CMakeLists.txt                   |  8 +--
 testing/lin/CMakeLists.txt               |  2 +-
 timing/CMakeLists.txt                    |  8 +--
 timing/time_zgetrf_incpiv_tile.c         |  7 ---
 timing/time_zgetrs_incpiv_tile.c         |  7 ---
 25 files changed, 86 insertions(+), 157 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a0a031858..6cd75ba0e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -19,7 +19,7 @@ starpu-simgrid:
     - cd build
     - . $HOME/spack/share/spack/setup-env.sh
     - spack load hwloc && spack load fxt && spack load simgrid && spack load starpu
-    - cmake .. -DCHAMELEON_SIMULATION=ON -DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MAGMA=OFF -DCHAMELEON_USE_MPI=OFF
+    - cmake .. -DCHAMELEON_SIMULATION=ON -DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=OFF
     - make -j2
     - ctest -V
 
diff --git a/CTestConfig.cmake b/CTestConfig.cmake
index c2999e9d7..ddb6f3e0c 100644
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -16,7 +16,7 @@ set(CTEST_DROP_SITE_CDASH TRUE)
 #--------------------------------------------------------------------
 # BUILDNAME variable construction
 # This variable will be used to set the build name which will appear
-# on the Morse-Magma dashboard http://cdash.inria.fr/CDash/
+# on the Chameleon dashboard http://cdash.inria.fr/CDash/
 #--------------------------------------------------------------------
 # Start with the short system name, e.g. "Linux", "FreeBSD" or "Windows"
 if(NOT BUILDNAME)
@@ -39,7 +39,7 @@ if(NOT BUILDNAME)
         set(BUILDNAME "${BUILDNAME}-${CMAKE_BUILD_TYPE}")
     endif(CMAKE_BUILD_TYPE)
 
-    # Specific options of Magma-Morse
+    # Specific options of Chameleon
     if(CHAMELEON_SCHED_QUARK)
         set(BUILDNAME "${BUILDNAME}-Quark")
     endif(CHAMELEON_SCHED_QUARK)
diff --git a/ChangeLog b/ChangeLog
index dcdeab285..f58b24a5b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
-chameleon-???
+chameleon-1.0.0
 ------------------------------------------------------------------------
 
+- MAGMA kernels are no longer supported in Chameleon
 - Add SVD/EVD drivers based on parallel first stage, and sequential LAPACK second stage and solve
 - Add First stage algorithm fo r the SVD/EVD solvers
 - add timing drivers time_zpotrs_tile and time_zgeqrs_tile
diff --git a/INSTALL.txt b/INSTALL.txt
index 9470afb3c..a35db9bbd 100644
--- a/INSTALL.txt
+++ b/INSTALL.txt
@@ -5,18 +5,18 @@
 This is a brief discussion about CHAMELEON usage.
 For more information, please read the document users_guide.
 
-Compilation of CHAMELEON libraries and executables are done with CMake 
+Compilation of CHAMELEON libraries and executables are done with CMake
 (http://www.cmake.org/). This version has been tested with CMake 2.8.8.
 
-Usage: three steps are required to compile and install CHAMELEON 
+Usage: three steps are required to compile and install CHAMELEON
 
-1) configure : 
-> cmake path/to/chameleon -DOPTION1= -DOPTION2= ... 
+1) configure :
+> cmake path/to/chameleon -DOPTION1= -DOPTION2= ...
 see the "Options" section to get list of options
 see the "Dependencies detection" for details about libraries detection
 
-2) build : 
-> make 
+2) build :
+> make
 do not hesitate to use -j[ncores] option to speedup the compilation
 
 3) install (optional) :
@@ -37,7 +37,7 @@ lapack  : netlib, openblas, eigen or intel mkl
 lapacke : netlib, openblas or intel mkl
 tmg     : netlib, openblas or intel mkl
 
-runtime : quark (http://icl.cs.utk.edu/quark/) or 
+runtime : quark (http://icl.cs.utk.edu/quark/) or
           starpu (http://runtime.bordeaux.inria.fr/StarPU/)
 hwloc   : (http://www.open-mpi.org/projects/hwloc/)
 
@@ -46,7 +46,6 @@ optional libraries
 
 cuda   : (https://developer.nvidia.com/cuda-downloads)
 cublas : comes with cuda (http://docs.nvidia.com/cuda/cublas/)
-magma  : (http://icl.cs.utk.edu/magma/) (version 1.6.2 recommended)
 mpi    : openmpi (http://www.open-mpi.org/)
 FxT    : linux package (libfxt) or releases here http://download.savannah.gnu.org/releases/fkt/
 
@@ -59,19 +58,19 @@ Please look at the distrib/ directory which gives some hints for the installatio
 * Dependencies detection *
 -------------------------------------------
 
-You have different choices to detect dependencies on your system, either by 
-setting some environment variables containing paths to the libs and headers or 
+You have different choices to detect dependencies on your system, either by
+setting some environment variables containing paths to the libs and headers or
 by specifying them directly at cmake configure. Different cases :
 
-1) detection of dependencies through environment variables: 
+1) detection of dependencies through environment variables:
     - LD_LIBRARY_PATH should contain the list of paths where to find the libraries:
         * export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:install/path/to/your/lib
     - INCLUDE should contain the list of paths where to find the header files of libraries
         * export INCLUDE=$INCLUDE:install/path/to/your/headers
 
 2) detection with user's given paths:
-    - you can specify the path at cmake configure by invoking 
-      cmake path/to/your/CMakeLists.txt -DLIB_DIR=path/to/your/lib 
+    - you can specify the path at cmake configure by invoking
+      cmake path/to/your/CMakeLists.txt -DLIB_DIR=path/to/your/lib
       where LIB stands for the name of the lib to look for
         * example: cmake path/to/your/CMakeLists.txt -DSTARPU_DIR=path/to/starpudir \
                                                      -DCBLAS_DIR= ...
@@ -86,16 +85,15 @@ by specifying them directly at cmake configure. Different cases :
 * Options *
 -------------------------------------------
 
-You can optionally activate some options at cmake configure (like CUDA, MPI, ...) 
+You can optionally activate some options at cmake configure (like CUDA, MPI, ...)
 invoking cmake path/to/your/CMakeLists.txt -DOPTION1= -DOPTION2= ...
 example: cmake /home/toto/chameleon/ -DCMAKE_BUILD_TYPE=Debug \
                                       -DCMAKE_INSTALL_PREFIX=/home/toto/install/ \
-                                      -DCHAMELEON_USE_CUDA=ON \ 
-                                      -DCHAMELEON_USE_MAGMA=ON \
+                                      -DCHAMELEON_USE_CUDA=ON \
                                       -DCHAMELEON_USE_MPI=ON \
-                                      -DBLA_VENDOR=Intel10_64lp \ 
+                                      -DBLA_VENDOR=Intel10_64lp \
                                       -DSTARPU_DIR=/home/toto/install/starpu-1.1/build/include/starpu/1.1/ \
-                                      -DCHAMELEON_ENABLE_TRACING=ON 
+                                      -DCHAMELEON_ENABLE_TRACING=ON
 
 You can get the full list of options with -L[A][H] options of cmake command
 example: cmake -LH /home/toto/chameleon/
@@ -109,7 +107,7 @@ Some options (non-exhaustive list) :
 Basic CMake:
 ------------
 CMAKE_BUILD_TYPE=Debug|Release
-CMAKE_INSTALL_PREFIX=path/to/your/install/dir (where headers and libraries will be copied 
+CMAKE_INSTALL_PREFIX=path/to/your/install/dir (where headers and libraries will be copied
                                                when invoking make install)
 
 Related to specific modules (find_package):
@@ -120,44 +118,43 @@ STARPU_DIR=...
 STARPU_INCDIR=...
 STARPU_LIBDIR=...
 # same idea can be used for some packages, replace STARPU by one of these:
-BLAS - CBLAS - FXT - HWLOC - LAPACK - LAPACKE - MAGMA - QUARK - TMG
+BLAS - CBLAS - FXT - HWLOC - LAPACK - LAPACKE - QUARK - TMG
 
 CHAMELEON specific:
 --------------------
 CHAMELEON_USE_MPI=ON|OFF (default OFF)
-CHAMELEON_USE_CUDA=ON|OFF (default OFF) 
-CHAMELEON_USE_MAGMA=ON|OFF (default OFF)
+CHAMELEON_USE_CUDA=ON|OFF (default OFF)
 CHAMELEON_ENABLE_TRACING=ON|OFF (default OFF)
 CHAMELEON_SCHED_STARPU=ON|OFF (default ON)
 CHAMELEON_SCHED_QUARK=ON|OFF (default OFF)
 CHAMELEON_SIMULATION=ON|OFF (default OFF)
 
 Libraries detected with an official cmake module (see module files in CMAKE_ROOT/Modules/):
-CUDA - MPI - Threads 
+CUDA - MPI - Threads
 
 Libraries detected with our cmake modules (see module files in cmake_modules/morse/find/ directory of CHAMELEON sources):
-BLAS - CBLAS - FXT - HWLOC - LAPACK - LAPACKE - MAGMA - QUARK - STARPU - TMG 
+BLAS - CBLAS - FXT - HWLOC - LAPACK - LAPACKE - QUARK - STARPU - TMG
 
 
 -------------------------------------------
 * Use FxT profiling through StarPU *
 -------------------------------------------
 
-StarPU can generate its own trace log files by compiling it with the --with-fxt 
-option at the configure step (you can have to specify the directory where you 
-installed FxT by giving --with-fxt=... instead of --with-fxt alone). 
-By doing so, traces are generated after each execution of a program which uses 
+StarPU can generate its own trace log files by compiling it with the --with-fxt
+option at the configure step (you can have to specify the directory where you
+installed FxT by giving --with-fxt=... instead of --with-fxt alone).
+By doing so, traces are generated after each execution of a program which uses
 StarPU in the directory pointed by the STARPU_FXT_PREFIX environment variable.
 example: export STARPU_FXT_PREFIX=/home/toto/fxt_files/
 
-When executing a ./timing/... CHAMELEON program, if it has been enabled 
-(StarPU compiled with FxT and -DCHAMELEON_ENABLE_TRACING=ON), you can give the option --trace 
+When executing a ./timing/... CHAMELEON program, if it has been enabled
+(StarPU compiled with FxT and -DCHAMELEON_ENABLE_TRACING=ON), you can give the option --trace
 to tell the program to generate trace log files.
 
-Finally, to generate the trace file which can be opened with Vite program 
-(http://vite.gforge.inria.fr/), you have to use the starpu_fxt_tool tool of StarPU. 
-This tool should be in $STARPU_INSTALL_REPOSITORY/bin. 
-You can use it to generate the trace file like this: 
+Finally, to generate the trace file which can be opened with Vite program
+(http://vite.gforge.inria.fr/), you have to use the starpu_fxt_tool tool of StarPU.
+This tool should be in $STARPU_INSTALL_REPOSITORY/bin.
+You can use it to generate the trace file like this:
 > path/to/your/install/starpu/bin/starpu_fxt_tool -i prof_filename
 There is one file per mpi processus (prof_filename_0, prof_filename_1 ...).
 To generate a trace of mpi programs you can call it like this:
diff --git a/cmake_modules/GenPkgConfig.cmake b/cmake_modules/GenPkgConfig.cmake
index 16749d220..44b431e14 100644
--- a/cmake_modules/GenPkgConfig.cmake
+++ b/cmake_modules/GenPkgConfig.cmake
@@ -16,7 +16,7 @@
 #     Univ. of Tennessee,
 #     King Abdullah Univesity of Science and Technology
 #     Univ. of California Berkeley,
-#     Univ. of Colorado Denver. 
+#     Univ. of Colorado Denver.
 #
 #  @version 0.9.1
 #  @author Cedric Castagnede
@@ -57,7 +57,7 @@ ENDMACRO(CONVERT_LIBSTYLE_TO_PKGCONFIG)
 ###
 #
 # CLEAN_LIB_LIST: clean libraries lists to follow the pkg-config style
-#                 used in GENERATE_PKGCONFIG_FILE 
+#                 used in GENERATE_PKGCONFIG_FILE
 #
 ###
 MACRO(CLEAN_LIB_LIST _package)
@@ -96,7 +96,7 @@ MACRO(GENERATE_PKGCONFIG_FILE)
     set(CHAMELEON_PKGCONFIG_REQUIRED "")
     set(COREBLAS_PKGCONFIG_REQUIRED  "")
     set(CUDABLAS_PKGCONFIG_REQUIRED  "")
-    
+
     # A list of private packages required by this package but not exposed to
     # applications
     set(CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE "")
@@ -127,12 +127,6 @@ MACRO(GENERATE_PKGCONFIG_FILE)
         list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "coreblas")
 
         if(CHAMELEON_USE_CUDA)
-            if(CHAMELEON_USE_MAGMA)
-                list(APPEND CUDABLAS_PKGCONFIG_REQUIRED_PRIVATE  magma)
-                list(APPEND CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE magma)
-            else()
-                
-            endif()
             list(APPEND CUDABLAS_PKGCONFIG_LIBS_PRIVATE ${CUDA_LIBRARIES})
             list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "cudablas")
         endif()
@@ -142,7 +136,7 @@ MACRO(GENERATE_PKGCONFIG_FILE)
         if(CHAMELEON_USE_CUDA)
             list(APPEND CHAMELEON_PKGCONFIG_LIBS -lcudablas)
         endif()
-        list(APPEND CHAMELEON_PKGCONFIG_LIBS 
+        list(APPEND CHAMELEON_PKGCONFIG_LIBS
         -lcoreblas
         ${EXTRA_LIBRARIES}
         )
@@ -158,7 +152,7 @@ MACRO(GENERATE_PKGCONFIG_FILE)
     if(CHAMELEON_USE_CUDA)
         CLEAN_LIB_LIST(CUDABLAS)
     endif()
-    
+
     # Create .pc file
     # ---------------
     SET(_output_chameleon_file "${CMAKE_BINARY_DIR}/chameleon.pc")
diff --git a/cmake_modules/PrintOpts.cmake b/cmake_modules/PrintOpts.cmake
index 1e7c9208c..f21bce9af 100644
--- a/cmake_modules/PrintOpts.cmake
+++ b/cmake_modules/PrintOpts.cmake
@@ -16,7 +16,7 @@
 #     Univ. of Tennessee,
 #     King Abdullah Univesity of Science and Technology
 #     Univ. of California Berkeley,
-#     Univ. of Colorado Denver. 
+#     Univ. of Colorado Denver.
 #
 #  @version 0.9.0
 #  @author Florent Pruvost
@@ -53,7 +53,6 @@ set(dep_message "${dep_message}"
 "       Kernels specific\n"
 "       BLAS ................: ${BLAS_VENDOR_FOUND}\n"
 "       LAPACK...............: ${LAPACK_VENDOR_FOUND}\n"
-"       MAGMA ...............: ${CHAMELEON_USE_MAGMA}\n"
 "\n"
 "       Trace ...............: ${CHAMELEON_ENABLE_TRACING}\n"
 "       Simulation mode .....: ${CHAMELEON_SIMULATION}\n"
diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake
index 00aae15be..0a974775b 160000
--- a/cmake_modules/morse_cmake
+++ b/cmake_modules/morse_cmake
@@ -1 +1 @@
-Subproject commit 00aae15be5fd9d9662c9d4bf35453cafe9d66f81
+Subproject commit 0a974775b7192227b887dcba77515305083c1f13
diff --git a/docs/texinfo/chapters/configuration.texi b/docs/texinfo/chapters/configuration.texi
index 883e82f38..92aec0af6 100644
--- a/docs/texinfo/chapters/configuration.texi
+++ b/docs/texinfo/chapters/configuration.texi
@@ -57,7 +57,6 @@ Example of configuration using the command line
 cmake ~/chameleon/ -DCMAKE_BUILD_TYPE=Debug          \
                    -DCMAKE_INSTALL_PREFIX=~/install  \
                    -DCHAMELEON_USE_CUDA=ON           \
-                   -DCHAMELEON_USE_MAGMA=ON          \
                    -DCHAMELEON_USE_MPI=ON            \
                    -DBLA_VENDOR=Intel10_64lp         \
                    -DSTARPU_DIR=~/install/starpu-1.1 \
@@ -120,9 +119,6 @@ to link with QUARK library (runtime system)
 to link with CUDA runtime (implementation paradigm for accelerated codes on
 GPUs) and cuBLAS library (optimized BLAS kernels on GPUs), can only be used with
 StarPU
-@item @option{-DCHAMELEON_USE_MAGMA}=@option{trigger} (default: @code{OFF})
-to link with MAGMA library (kernels on GPUs, higher level than cuBLAS), can only
-be used with StarPU
 
 @item @option{-DCHAMELEON_USE_MPI}=@option{trigger} (default: @code{OFF})
 to link with MPI library (message passing implementation for use of multiple
@@ -200,7 +196,7 @@ directory of the LIBNAME library headers installation
 directory of the LIBNAME libraries (.so, .a, .dylib, etc) installation
 @end table
 LIBNAME can be one of the following: BLAS - CBLAS - FXT - HWLOC -
-LAPACK - LAPACKE - MAGMA - QUARK - STARPU - TMG.
+LAPACK - LAPACKE - QUARK - STARPU - TMG.
 See paragraph about @ref{Dependencies detection} for details.
 
 Libraries detected with an official CMake module (see module files in
@@ -220,7 +216,6 @@ Libraries detected with CHAMELEON cmake modules (see module files in
 @item HWLOC
 @item LAPACK
 @item LAPACKE
-@item MAGMA
 @item QUARK
 @item STARPU
 @item TMG
@@ -343,9 +338,6 @@ precision) on mirage machine are available for now.
 Database of models is subject to change, it should be enrich in a near future.
 @end itemize
 
-One can additionally decide to enable the magma kernels by setting the cmake
-option @option{-DCHAMELEON_SIMULATION_MAGMA=ON} .
-
 @node Use out of core support with StarPU
 @section Use out of core support with StarPU
 
diff --git a/docs/texinfo/chapters/installing.texi b/docs/texinfo/chapters/installing.texi
index fec50baab..edde68e8d 100644
--- a/docs/texinfo/chapters/installing.texi
+++ b/docs/texinfo/chapters/installing.texi
@@ -194,7 +194,6 @@ It is a standard component of any such system.
 @menu
 * OpenMPI::
 * Nvidia CUDA Toolkit::
-* MAGMA::
 * FxT::
 @end menu
 
@@ -231,27 +230,7 @@ enabled.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
 CUDA releases from versions 4 to 7.5.
-Your compiler and MAGMA library must be compatible with CUDA.
-
-@node MAGMA
-@subsubsection MAGMA
-
-@uref{http://icl.cs.utk.edu/magma/, MAGMA} project aims to develop a dense
-linear algebra library similar to LAPACK but for heterogeneous/hybrid
-architectures, starting with current "Multicore+GPU" systems.
-CHAMELEON can use a set of high level MAGMA routines to accelerate
-computations on GPUs.
-To fully benefit from GPUs, the user should enable MAGMA in addition to
-CUDA/cuBLAS.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-MAGMA releases from versions 1.4 to 1.6.
-MAGMA library must be compatible with CUDA.
-MAGMA library should be built with sequential versions of BLAS/LAPACK.
-We should not get some MAGMA link flags embarking multithreaded
-BLAS/LAPACK because it could affect performances (take care about the
-MAGMA link flag @option{-lmkl_intel_thread} for example that we could inheritate
-from the pkg-config file @file{magma.pc}).
+Your compiler must be compatible with CUDA.
 
 @node FxT
 @subsubsection FxT
diff --git a/docs/texinfo/chapters/introduction.texi b/docs/texinfo/chapters/introduction.texi
index 695d252cd..b94921f37 100644
--- a/docs/texinfo/chapters/introduction.texi
+++ b/docs/texinfo/chapters/introduction.texi
@@ -181,11 +181,11 @@ task-based algorithms behave regarding different runtime systems
 implementations.
 Using CHAMELEON with @uref{http://runtime.bordeaux.inria.fr/StarPU/,
 StarPU} runtime system allows to exploit GPUs through
-kernels provided by @uref{https://developer.nvidia.com/cublas, cuBLAS} and
-@uref{http://icl.cs.utk.edu/magma/, MAGMA} and clusters of interconnected
-nodes with distributed memory (using @uref{http://www.open-mpi.org/, MPI}).
-Computation of very large systems with dense matrices on a cluster of nodes is
-still being experimented and stabilized.
+kernels provided by @uref{https://developer.nvidia.com/cublas, cuBLAS}
+and clusters of interconnected nodes with distributed memory (using
+@uref{http://www.open-mpi.org/, MPI}).  Computation of very large
+systems with dense matrices on a cluster of nodes is still being
+experimented and stabilized.
 It is not expected to get stable performances with the current version using
 MPI.
 
diff --git a/example/basic_zposv/CMakeLists.txt b/example/basic_zposv/CMakeLists.txt
index 89208ee18..555e7f72e 100644
--- a/example/basic_zposv/CMakeLists.txt
+++ b/example/basic_zposv/CMakeLists.txt
@@ -68,16 +68,13 @@ endif()
 
 if(NOT CHAMELEON_SIMULATION)
 
-    if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA)
+    if(CHAMELEON_USE_CUDA)
         list(APPEND libs_for_examples
         cudablas)
     endif()
     if(CHAMELEON_USE_CUDA)
         link_directories(${CUDA_LIBRARY_DIRS})
     endif()
-    if(CHAMELEON_USE_MAGMA)
-        link_directories(${MAGMA_LIBRARY_DIRS})
-    endif()
 
     list(APPEND libs_for_examples
          coreblas
diff --git a/example/lapack_to_morse/CMakeLists.txt b/example/lapack_to_morse/CMakeLists.txt
index 00ad92183..c627c5eea 100644
--- a/example/lapack_to_morse/CMakeLists.txt
+++ b/example/lapack_to_morse/CMakeLists.txt
@@ -25,7 +25,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 
 # compilation for other sources step1 and >
-set(LTM_SOURCES 
+set(LTM_SOURCES
     step1.c
     step2.c
     step3.c
@@ -63,22 +63,19 @@ elseif(CHAMELEON_SCHED_QUARK)
 endif()
 
 
-# specific compilation for step0 because we potentially want to use 
+# specific compilation for step0 because we potentially want to use
 # multithreaded BLAS and LAPACK libraries for this step
 unset(libs_for_step0)
 
 if(NOT CHAMELEON_SIMULATION)
 
-    if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA)
+    if(CHAMELEON_USE_CUDA)
         list(APPEND libs_for_ltm
         cudablas)
     endif()
     if(CHAMELEON_USE_CUDA)
         link_directories(${CUDA_LIBRARY_DIRS})
     endif()
-    if(CHAMELEON_USE_MAGMA)
-        link_directories(${MAGMA_LIBRARY_DIRS})
-    endif()
 
     list(APPEND libs_for_step0 ${libs_for_ltm})
 
@@ -104,7 +101,7 @@ if(NOT CHAMELEON_SIMULATION)
         list(APPEND libs_for_step0
              coreblas
              ${LAPACKE_LIBRARIES}
-             ${CBLAS_LIBRARIES} 
+             ${CBLAS_LIBRARIES}
              ${LAPACK_PAR_LIBRARIES}
              ${BLAS_PAR_LIBRARIES}
              ${HWLOC_LIBRARIES}
diff --git a/example/lapack_to_morse/step1.c b/example/lapack_to_morse/step1.c
index cb87ad8ea..7af034728 100644
--- a/example/lapack_to_morse/step1.c
+++ b/example/lapack_to_morse/step1.c
@@ -40,7 +40,7 @@
  * MORSE_Set to give some specific parameters.
  * This code allows you to expoit parallelism coming from all the cores of your
  * computer and from gpus if you have properly linked with pthread and CUDA
- * ( + CUBLAS and MAGMA optionnaly ).
+ * ( + CUBLAS optionnaly ).
  * The precision is: double
  */
 int main(int argc, char *argv[]) {
diff --git a/example/link_chameleon/CMakeLists.txt b/example/link_chameleon/CMakeLists.txt
index fbaab5845..e13eab1a9 100644
--- a/example/link_chameleon/CMakeLists.txt
+++ b/example/link_chameleon/CMakeLists.txt
@@ -27,7 +27,7 @@ if (MORSE_DISTRIB_DIR)
     if (MORSE_CHAMELEON_USE_QUARK)
         find_package(CHAMELEON COMPONENTS QUARK)
     else()
-        find_package(CHAMELEON COMPONENTS STARPU MPI CUDA MAGMA FXT)
+        find_package(CHAMELEON COMPONENTS STARPU MPI CUDA FXT)
     endif()
     if (CHAMELEON_FOUND)
         link_directories(${CHAMELEON_LIBRARY_DIRS_DEP})
diff --git a/example/link_chameleon/link_chameleon.c b/example/link_chameleon/link_chameleon.c
index b38b12d50..b63b45e32 100644
--- a/example/link_chameleon/link_chameleon.c
+++ b/example/link_chameleon/link_chameleon.c
@@ -164,7 +164,7 @@ static void read_args(int argc, char *argv[], int *iparam){
  * Print a header message to summarize main parameters
  */
 static void print_header(char *prog_name, int * iparam) {
-#if defined(MAGMAMORSE_SIMULATION)
+#if defined(CHAMELEON_SIMULATION)
     double    eps = 0.;
 #else
     double    eps = LAPACKE_dlamch_work( 'e' );
@@ -208,7 +208,7 @@ static void print_header(char *prog_name, int * iparam) {
     }
 
 /*
- * test external application link with magmamorse
+ * test external application link with chameleon
  */
 int main(int argc, char *argv[]) {
 
diff --git a/example/out_of_core/CMakeLists.txt b/example/out_of_core/CMakeLists.txt
index 14aec8847..01bb27041 100644
--- a/example/out_of_core/CMakeLists.txt
+++ b/example/out_of_core/CMakeLists.txt
@@ -42,16 +42,13 @@ link_directories(${STARPU_LIBRARY_DIRS})
 
 if(NOT CHAMELEON_SIMULATION)
 
-    if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA)
+    if(CHAMELEON_USE_CUDA)
         list(APPEND libs_for_ooc
         cudablas)
     endif()
     if(CHAMELEON_USE_CUDA)
         link_directories(${CUDA_LIBRARY_DIRS})
     endif()
-    if(CHAMELEON_USE_MAGMA)
-        link_directories(${MAGMA_LIBRARY_DIRS})
-    endif()
 
     list(APPEND libs_for_ooc
          coreblas
diff --git a/include/chameleon_config.h.in b/include/chameleon_config.h.in
index 065558af0..83350ba3a 100644
--- a/include/chameleon_config.h.in
+++ b/include/chameleon_config.h.in
@@ -41,7 +41,6 @@
 #cmakedefine CHAMELEON_USE_CUDA
 #cmakedefine CHAMELEON_USE_CUBLAS
 #cmakedefine CHAMELEON_USE_CUBLAS_V2
-#cmakedefine CHAMELEON_USE_MAGMA
 
 /* Simulating */
 #cmakedefine CHAMELEON_SIMULATION
diff --git a/include/morse_fortran.h b/include/morse_fortran.h
index 5cf58ae11..813a31b01 100644
--- a/include/morse_fortran.h
+++ b/include/morse_fortran.h
@@ -6,19 +6,19 @@
 ! This software is a computer program whose purpose is to process
 ! Matrices Over Runtime Systems @ Exascale (MORSE). More information
 ! can be found on the following website: http://www.inria.fr/en/teams/morse.
-! 
+!
 ! This software is governed by the CeCILL-B license under French law and
-! abiding by the rules of distribution of free software.  You can  use, 
+! abiding by the rules of distribution of free software.  You can  use,
 ! modify and/ or redistribute the software under the terms of the CeCILL-B
 ! license as circulated by CEA, CNRS and INRIA at the following URL
-! "http://www.cecill.info". 
-! 
+! "http://www.cecill.info".
+!
 ! As a counterpart to the access to the source code and  rights to copy,
 ! modify and redistribute granted by the license, users are provided only
 ! with a limited warranty  and the software's author,  the holder of the
 ! economic rights,  and the successive licensors  have only  limited
-! liability. 
-! 
+! liability.
+!
 ! In this respect, the user's attention is drawn to the risks associated
 ! with loading,  using,  modifying and/or developing or reproducing the
 ! software by the user in light of its specific status of free software,
@@ -26,10 +26,10 @@
 ! therefore means  that it is reserved for developers  and  experienced
 ! professionals having in-depth computer knowledge. Users are therefore
 ! encouraged to load and test the software's suitability as regards their
-! requirements in conditions enabling the security of their systems and/or 
-! data to be ensured and,  more generally, to use and operate it in the 
-! same conditions as regards security. 
-! 
+! requirements in conditions enabling the security of their systems and/or
+! data to be ensured and,  more generally, to use and operate it in the
+! same conditions as regards security.
+!
 ! The fact that you are presently reading this means that you have had
 ! knowledge of the CeCILL-B license and that you accept its terms.
 !
@@ -123,14 +123,14 @@
       parameter ( MorseNonsymPosv  = 243 )
       parameter ( MorseSymPosv     = 244 )
 
-      integer MorseNoPacking     
-      integer MorsePackSubdiag   
-      integer MorsePackSupdiag   
-      integer MorsePackColumn    
-      integer MorsePackLowerBand 
-      integer MorsePackRow       
-      integer MorsePackUpeprBand 
-      integer MorsePackAll       
+      integer MorseNoPacking
+      integer MorsePackSubdiag
+      integer MorsePackSupdiag
+      integer MorsePackColumn
+      integer MorsePackLowerBand
+      integer MorsePackRow
+      integer MorsePackUpeprBand
+      integer MorsePackAll
       parameter ( MorseNoPacking     = 291 )
       parameter ( MorsePackSubdiag   = 292 )
       parameter ( MorsePackSupdiag   = 293 )
@@ -239,4 +239,3 @@
       parameter ( PRIORITY = 16 )
       parameter ( CALLBACK = 17 )
       parameter ( REDUX    = 18 )
-
diff --git a/include/morse_kernels.h b/include/morse_kernels.h
index 2d2923399..d043eba37 100644
--- a/include/morse_kernels.h
+++ b/include/morse_kernels.h
@@ -12,8 +12,8 @@
  *
  *  @file morse_kernels.h
  *
- *  MAGMA codelets kernel
- *  MAGMA is a software package provided by Univ. of Tennessee,
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
  *  Univ. of California Berkeley and Univ. of Colorado Denver,
  *  and INRIA Bordeaux Sud-Ouest
  *
diff --git a/include/morse_struct.h b/include/morse_struct.h
index 2aacefcdf..a87b8ca47 100644
--- a/include/morse_struct.h
+++ b/include/morse_struct.h
@@ -161,7 +161,7 @@ typedef struct morse_request_s {
  *  sharing common exception handling.
  **/
 typedef struct morse_sequence_s {
-    MORSE_bool       status;    /* MAGMA_SUCCESS or appropriate error code */
+    MORSE_bool       status;    /* MORSE_SUCCESS or appropriate error code */
     MORSE_request_t *request;   /* failed request                          */
     void            *schedopt;
 } MORSE_sequence_t;
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index a095b33b3..0bd42f7ce 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -124,17 +124,13 @@ endif()
 
 if(NOT CHAMELEON_SIMULATION)
 
-    if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA)
+    if(CHAMELEON_USE_CUDA)
         list(APPEND libs_for_tests
-        cudablas
-        )
+        cudablas)
     endif()
     if(CHAMELEON_USE_CUDA)
         link_directories(${CUDA_LIBRARY_DIRS})
     endif()
-    if(CHAMELEON_USE_MAGMA)
-        link_directories(${MAGMA_LIBRARY_DIRS})
-    endif()
 
     list(APPEND libs_for_tests
     coreblas
diff --git a/testing/lin/CMakeLists.txt b/testing/lin/CMakeLists.txt
index da3f049c4..290647bb4 100644
--- a/testing/lin/CMakeLists.txt
+++ b/testing/lin/CMakeLists.txt
@@ -153,7 +153,7 @@ elseif(MORSE_SCHED_QUARK)
     list(APPEND libs_for_tests coreblas)
 endif()
 
-foreach(_dep MAGMA LAPACKE LAPACK CBLAS BLAS CUDA HWLOC MPI DL)
+foreach(_dep LAPACKE LAPACK CBLAS BLAS CUDA HWLOC MPI DL)
     if(HAVE_${_dep})
         list(APPEND libs_for_tests ${${_dep}_LIBRARY})
     endif()
diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt
index d30584ac4..61334fa42 100644
--- a/timing/CMakeLists.txt
+++ b/timing/CMakeLists.txt
@@ -222,17 +222,13 @@ endif()
 
 if(NOT CHAMELEON_SIMULATION)
 
-    if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA)
+    if(CHAMELEON_USE_CUDA)
         list(APPEND libs_for_timings
-        cudablas
-        )
+        cudablas)
     endif()
     if(CHAMELEON_USE_CUDA)
         link_directories(${CUDA_LIBRARY_DIRS})
     endif()
-    if(CHAMELEON_USE_MAGMA)
-        link_directories(${MAGMA_LIBRARY_DIRS})
-    endif()
 
     list(APPEND libs_for_timings
     coreblas
diff --git a/timing/time_zgetrf_incpiv_tile.c b/timing/time_zgetrf_incpiv_tile.c
index 907f53ac3..a0a20509a 100644
--- a/timing/time_zgetrf_incpiv_tile.c
+++ b/timing/time_zgetrf_incpiv_tile.c
@@ -36,13 +36,6 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_)
         check = 0;
     }
 
-#if defined(CHAMELEON_USE_MAGMA)
-    if ( iparam[IPARAM_NB]%iparam[IPARAM_IB] != 0 ) {
-        fprintf(stderr, "NB must be a multiple of IB for LU on GPU\n");
-        exit(-1);
-    }
-#endif
-
     /* Allocate Data */
     PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, N );
     PASTE_CODE_ALLOCATE_MATRIX_TILE( descX,  check, MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS );
diff --git a/timing/time_zgetrs_incpiv_tile.c b/timing/time_zgetrs_incpiv_tile.c
index 3ba081e9c..29b9c41b2 100644
--- a/timing/time_zgetrs_incpiv_tile.c
+++ b/timing/time_zgetrs_incpiv_tile.c
@@ -37,13 +37,6 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_)
         check = 0;
     }
 
-#if defined(CHAMELEON_USE_MAGMA)
-    if ( iparam[IPARAM_NB]%iparam[IPARAM_IB] != 0 ) {
-        fprintf(stderr, "NB must be a multiple of IB for LU on GPU\n");
-        exit(-1);
-    }
-#endif
-
     /* Allocate Data */
     PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, N );
     PASTE_CODE_ALLOCATE_MATRIX_TILE( descX,  check, MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS );
-- 
GitLab