From 22869cafe224b2ea75a63cc543f36d292bc388e9 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Fri, 16 Dec 2016 00:06:47 +0100
Subject: [PATCH] Add codelets in all three runtimes

---
 coreblas/compute/core_ztpmqrt.c               |   2 +-
 runtime/CMakeLists.txt                        |  78 +++++++++
 runtime/parsec/CMakeLists.txt                 |  72 +-------
 runtime/parsec/codelets/codelet_ztpmqrt.c     | 102 +++++++++++
 runtime/parsec/codelets/codelet_ztpqrt.c      |  85 ++++++++++
 runtime/quark/CMakeLists.txt                  |  72 +-------
 runtime/quark/codelets/codelet_ztpmqrt.c      |  84 +++++++++
 runtime/quark/codelets/codelet_ztpqrt.c       |  72 ++++++++
 runtime/starpu/CMakeLists.txt                 |  72 +-------
 runtime/starpu/codelets/codelet_zcallback.c   |   4 +-
 runtime/starpu/codelets/codelet_ztpmqrt.c     | 159 ++++++++++++++++++
 runtime/starpu/codelets/codelet_ztpqrt.c      |  99 +++++++++++
 .../starpu/include/runtime_codelet_profile.h  |   2 +-
 runtime/starpu/include/runtime_codelet_z.h    |   2 +
 runtime/starpu/include/runtime_codelets.h     |   2 +-
 runtime/starpu/include/runtime_workspace.h    |   8 +-
 16 files changed, 694 insertions(+), 221 deletions(-)
 create mode 100644 runtime/parsec/codelets/codelet_ztpmqrt.c
 create mode 100644 runtime/parsec/codelets/codelet_ztpqrt.c
 create mode 100644 runtime/quark/codelets/codelet_ztpmqrt.c
 create mode 100644 runtime/quark/codelets/codelet_ztpqrt.c
 create mode 100644 runtime/starpu/codelets/codelet_ztpmqrt.c
 create mode 100644 runtime/starpu/codelets/codelet_ztpqrt.c

diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c
index ee44b8cf8..2241b5d39 100644
--- a/coreblas/compute/core_ztpmqrt.c
+++ b/coreblas/compute/core_ztpmqrt.c
@@ -98,7 +98,7 @@
  *
  * @param[out] WORK
  *         Workspace array of size LDWORK-by-NB.
- *         LDWORK = N if side =MorseLeft, or  M if side = MorseRight.
+ *         LDWORK = N if side = MorseLeft, or  M if side = MorseRight.
  *
  *******************************************************************************
  *
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index b0d76eeba..0224e3644 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -26,6 +26,84 @@
 #
 ###
 
+# List of codelets required by all runtimes
+# -----------------------------------------
+set(CODELETS_ZSRC
+    codelets/codelet_ztile_zero.c
+    codelets/codelet_zasum.c
+    ##################
+    # BLAS 1
+    ##################
+    codelets/codelet_zaxpy.c
+    ##################
+    # BLAS 3
+    ##################
+    codelets/codelet_zgemm.c
+    codelets/codelet_zhemm.c
+    codelets/codelet_zher2k.c
+    codelets/codelet_zherk.c
+    codelets/codelet_zsymm.c
+    codelets/codelet_zsyr2k.c
+    codelets/codelet_zsyrk.c
+    codelets/codelet_ztrmm.c
+    codelets/codelet_ztrsm.c
+    ##################
+    # LAPACK
+    ##################
+    codelets/codelet_zgeadd.c
+    codelets/codelet_zlascal.c
+    codelets/codelet_zgelqt.c
+    codelets/codelet_zgeqrt.c
+    codelets/codelet_zgessm.c
+    codelets/codelet_zgessq.c
+    codelets/codelet_zgetrf.c
+    codelets/codelet_zgetrf_incpiv.c
+    codelets/codelet_zgetrf_nopiv.c
+    codelets/codelet_zhe2ge.c
+    codelets/codelet_zherfb.c
+    codelets/codelet_zhessq.c
+    codelets/codelet_zlacpy.c
+    codelets/codelet_zlange.c
+    codelets/codelet_zlanhe.c
+    codelets/codelet_zlansy.c
+    codelets/codelet_zlantr.c
+    codelets/codelet_zlaset2.c
+    codelets/codelet_zlaset.c
+    codelets/codelet_zlatro.c
+    codelets/codelet_zlauum.c
+    codelets/codelet_zplghe.c
+    codelets/codelet_zplgsy.c
+    codelets/codelet_zplrnt.c
+    codelets/codelet_zplssq.c
+    codelets/codelet_zpotrf.c
+    codelets/codelet_zssssm.c
+    codelets/codelet_zsyssq.c
+    codelets/codelet_zsytrf_nopiv.c
+    codelets/codelet_ztpqrt.c
+    codelets/codelet_ztpmqrt.c
+    codelets/codelet_ztradd.c
+    codelets/codelet_ztrasm.c
+    codelets/codelet_ztrssq.c
+    codelets/codelet_ztrtri.c
+    codelets/codelet_ztslqt.c
+    codelets/codelet_ztsmlq.c
+    codelets/codelet_ztsmqr.c
+    codelets/codelet_ztsmlq_hetra1.c
+    codelets/codelet_ztsmqr_hetra1.c
+    codelets/codelet_ztsqrt.c
+    codelets/codelet_ztstrf.c
+    codelets/codelet_zttlqt.c
+    codelets/codelet_zttmlq.c
+    codelets/codelet_zttmqr.c
+    codelets/codelet_zttqrt.c
+    codelets/codelet_zunmlq.c
+    codelets/codelet_zunmqr.c
+    ##################
+    # BUILD
+    ##################
+    codelets/codelet_zbuild.c
+    )
+
 # Check for the subdirectories
 # ----------------------------
 if( CHAMELEON_SCHED_QUARK )
diff --git a/runtime/parsec/CMakeLists.txt b/runtime/parsec/CMakeLists.txt
index a19890afe..872c19c48 100644
--- a/runtime/parsec/CMakeLists.txt
+++ b/runtime/parsec/CMakeLists.txt
@@ -88,77 +88,7 @@ set(RUNTIME_COMMON
 # ------------------------------------------------------
 set(RUNTIME_SRCS_GENERATED "")
 set(ZSRC
-    codelets/codelet_ztile_zero.c
-    codelets/codelet_zasum.c
-    ##################
-    # BLAS 1
-    ##################
-    codelets/codelet_zaxpy.c
-    ##################
-    # BLAS 3
-    ##################
-    codelets/codelet_zgemm.c
-    codelets/codelet_zhemm.c
-    codelets/codelet_zher2k.c
-    codelets/codelet_zherk.c
-    codelets/codelet_zsymm.c
-    codelets/codelet_zsyr2k.c
-    codelets/codelet_zsyrk.c
-    codelets/codelet_ztrmm.c
-    codelets/codelet_ztrsm.c
-    ##################
-    # LAPACK
-    ##################
-    codelets/codelet_zgeadd.c
-    codelets/codelet_zlascal.c
-    codelets/codelet_zgelqt.c
-    codelets/codelet_zgeqrt.c
-    codelets/codelet_zgessm.c
-    codelets/codelet_zgessq.c
-    codelets/codelet_zgetrf.c
-    codelets/codelet_zgetrf_incpiv.c
-    codelets/codelet_zgetrf_nopiv.c
-    codelets/codelet_zhe2ge.c
-    codelets/codelet_zherfb.c
-    codelets/codelet_zhessq.c
-    codelets/codelet_zlacpy.c
-    codelets/codelet_zlange.c
-    codelets/codelet_zlanhe.c
-    codelets/codelet_zlansy.c
-    codelets/codelet_zlantr.c
-    codelets/codelet_zlaset2.c
-    codelets/codelet_zlaset.c
-    codelets/codelet_zlatro.c
-    codelets/codelet_zlauum.c
-    codelets/codelet_zplghe.c
-    codelets/codelet_zplgsy.c
-    codelets/codelet_zplrnt.c
-    codelets/codelet_zplssq.c
-    codelets/codelet_zpotrf.c
-    codelets/codelet_zssssm.c
-    codelets/codelet_zsyssq.c
-    codelets/codelet_zsytrf_nopiv.c
-    codelets/codelet_ztradd.c
-    codelets/codelet_ztrasm.c
-    codelets/codelet_ztrssq.c
-    codelets/codelet_ztrtri.c
-    codelets/codelet_ztslqt.c
-    codelets/codelet_ztsmlq.c
-    codelets/codelet_ztsmqr.c
-    codelets/codelet_ztsmlq_hetra1.c
-    codelets/codelet_ztsmqr_hetra1.c
-    codelets/codelet_ztsqrt.c
-    codelets/codelet_ztstrf.c
-    codelets/codelet_zttlqt.c
-    codelets/codelet_zttmlq.c
-    codelets/codelet_zttmqr.c
-    codelets/codelet_zttqrt.c
-    codelets/codelet_zunmlq.c
-    codelets/codelet_zunmqr.c
-    ##################
-    # BUILD
-    ##################
-    codelets/codelet_zbuild.c
+    ${CODELETS_ZSRC}
     )
 
 precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
diff --git a/runtime/parsec/codelets/codelet_ztpmqrt.c b/runtime/parsec/codelets/codelet_ztpmqrt.c
new file mode 100644
index 000000000..612e9d54a
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_ztpmqrt.c
@@ -0,0 +1,102 @@
+/**
+ *
+ * @copyright (c) 2009-2016 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                          Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztpqrt.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 0.9.0
+ * @author Mathieu Faverge
+ * @date 2016-12-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "runtime/parsec/include/morse_parsec.h"
+
+static int
+CORE_ztpmqrt_parsec(dague_execution_unit_t    *context,
+                    dague_execution_context_t *this_task)
+{
+    MORSE_enum *side;
+    MORSE_enum *trans;
+    int *M;
+    int *N;
+    int *K;
+    int *L;
+    int *ib;
+    const MORSE_Complex64_t *V;
+    int *ldv;
+    const MORSE_Complex64_t *T;
+    int *ldt;
+    MORSE_Complex64_t *A;
+    int *lda;
+    MORSE_Complex64_t *B;
+    int *ldb;
+    MORSE_Complex64_t *WORK;
+
+    dague_dtd_unpack_args(
+        this_task,
+        UNPACK_VALUE,   &side,
+        UNPACK_VALUE,   &trans,
+        UNPACK_VALUE,   &M,
+        UNPACK_VALUE,   &N,
+        UNPACK_VALUE,   &K,
+        UNPACK_VALUE,   &L,
+        UNPACK_VALUE,   &ib,
+        UNPACK_DATA,    &V,
+        UNPACK_VALUE,   &ldv,
+        UNPACK_DATA,    &T,
+        UNPACK_VALUE,   &ldt,
+        UNPACK_DATA,    &A,
+        UNPACK_VALUE,   &lda,
+        UNPACK_DATA,    &B,
+        UNPACK_VALUE,   &ldb,
+        UNPACK_SCRATCH, &WORK );
+
+    CORE_ztpmqrt( *side, *trans, *M, *N, *K, *L, *ib,
+                  V, *ldv, T, *ldt, A, *lda, B, *ldb, WORK );
+
+    return 0;
+}
+
+void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
+                         MORSE_enum side, MORSE_enum trans,
+                         int M, int N, int K, int L, int ib, int nb,
+                         const MORSE_desc_t *V, int Vm, int Vn, int ldv,
+                         const MORSE_desc_t *T, int Tm, int Tn, int ldt,
+                         const MORSE_desc_t *A, int Am, int An, int lda,
+                         const MORSE_desc_t *B, int Bm, int Bn, int ldb )
+{
+    dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt);
+
+    dague_insert_task(
+        DAGUE_dtd_handle, CORE_ztpmqrt_parsec, "tpmqrt",
+        sizeof(MORSE_enum), &side,  VALUE,
+        sizeof(MORSE_enum), &trans, VALUE,
+        sizeof(int),        &M,     VALUE,
+        sizeof(int),        &N,     VALUE,
+        sizeof(int),        &K,     VALUE,
+        sizeof(int),        &L,     VALUE,
+        sizeof(int),        &ib,    VALUE,
+        PASSED_BY_REF,       RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT | REGION_FULL,
+        sizeof(int),        &ldv,   VALUE,
+        PASSED_BY_REF,       RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INPUT | REGION_FULL,
+        sizeof(int),        &ldt,   VALUE,
+        PASSED_BY_REF,       RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | REGION_FULL,
+        sizeof(int),        &lda,   VALUE,
+        PASSED_BY_REF,       RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | REGION_FULL,
+        sizeof(int),        &ldb,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH,
+        0);
+}
diff --git a/runtime/parsec/codelets/codelet_ztpqrt.c b/runtime/parsec/codelets/codelet_ztpqrt.c
new file mode 100644
index 000000000..a0b3f6e06
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_ztpqrt.c
@@ -0,0 +1,85 @@
+/**
+ *
+ * @copyright (c) 2009-2016 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                          Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztpqrt.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 0.9.0
+ * @author Mathieu Faverge
+ * @date 2016-12-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "runtime/parsec/include/morse_parsec.h"
+
+static int
+CORE_ztpqrt_parsec(dague_execution_unit_t    *context,
+                   dague_execution_context_t *this_task)
+{
+    int *M;
+    int *N;
+    int *L;
+    int *ib;
+    MORSE_Complex64_t *A;
+    int *lda;
+    MORSE_Complex64_t *B;
+    int *ldb;
+    MORSE_Complex64_t *T;
+    int *ldt;
+    MORSE_Complex64_t *WORK;
+
+    dague_dtd_unpack_args(
+        this_task,
+        UNPACK_VALUE,   &M,
+        UNPACK_VALUE,   &N,
+        UNPACK_VALUE,   &L,
+        UNPACK_VALUE,   &ib,
+        UNPACK_DATA,    &A,
+        UNPACK_VALUE,   &lda,
+        UNPACK_DATA,    &B,
+        UNPACK_VALUE,   &ldb,
+        UNPACK_DATA,    &T,
+        UNPACK_VALUE,   &ldt,
+        UNPACK_SCRATCH, &WORK );
+
+    CORE_ztpqrt( *M, *N, *L, *ib,
+                 A, *lda, B, *ldb, T, *ldt, WORK );
+
+    return 0;
+}
+
+void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const MORSE_desc_t *A, int Am, int An, int lda,
+                         const MORSE_desc_t *B, int Bm, int Bn, int ldb,
+                         const MORSE_desc_t *T, int Tm, int Tn, int ldt )
+{
+    dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt);
+
+    dague_insert_task(
+        DAGUE_dtd_handle, CORE_ztpqrt_parsec, "tpqrt",
+        sizeof(int),   &M,   VALUE,
+        sizeof(int),   &N,   VALUE,
+        sizeof(int),   &L,   VALUE,
+        sizeof(int),   &ib,  VALUE,
+        PASSED_BY_REF,  RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | REGION_U | REGION_D,
+        sizeof(int),   &lda, VALUE,
+        PASSED_BY_REF,  RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | REGION_FULL,
+        sizeof(int),   &ldb, VALUE,
+        PASSED_BY_REF,  RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INOUT | REGION_FULL,
+        sizeof(int),   &ldt, VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH,
+        0);
+}
diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt
index 9366a00d7..fa7952a15 100644
--- a/runtime/quark/CMakeLists.txt
+++ b/runtime/quark/CMakeLists.txt
@@ -86,77 +86,7 @@ set(RUNTIME_COMMON
 # ------------------------------------------------------
 set(RUNTIME_SRCS_GENERATED "")
 set(ZSRC
-    codelets/codelet_ztile_zero.c
-    codelets/codelet_zasum.c
-    ##################
-    # BLAS 1
-    ##################
-    codelets/codelet_zaxpy.c
-    ##################
-    # BLAS 3
-    ##################
-    codelets/codelet_zgemm.c
-    codelets/codelet_zhemm.c
-    codelets/codelet_zher2k.c
-    codelets/codelet_zherk.c
-    codelets/codelet_zsymm.c
-    codelets/codelet_zsyr2k.c
-    codelets/codelet_zsyrk.c
-    codelets/codelet_ztrmm.c
-    codelets/codelet_ztrsm.c
-    ##################
-    # LAPACK
-    ##################
-    codelets/codelet_zgeadd.c
-    codelets/codelet_zlascal.c
-    codelets/codelet_zgelqt.c
-    codelets/codelet_zgeqrt.c
-    codelets/codelet_zgessm.c
-    codelets/codelet_zgessq.c
-    codelets/codelet_zgetrf.c
-    codelets/codelet_zgetrf_incpiv.c
-    codelets/codelet_zgetrf_nopiv.c
-    codelets/codelet_zhe2ge.c
-    codelets/codelet_zherfb.c
-    codelets/codelet_zhessq.c
-    codelets/codelet_zlacpy.c
-    codelets/codelet_zlange.c
-    codelets/codelet_zlanhe.c
-    codelets/codelet_zlansy.c
-    codelets/codelet_zlantr.c
-    codelets/codelet_zlaset2.c
-    codelets/codelet_zlaset.c
-    codelets/codelet_zlatro.c
-    codelets/codelet_zlauum.c
-    codelets/codelet_zplghe.c
-    codelets/codelet_zplgsy.c
-    codelets/codelet_zplrnt.c
-    codelets/codelet_zplssq.c
-    codelets/codelet_zpotrf.c
-    codelets/codelet_zssssm.c
-    codelets/codelet_zsyssq.c
-    codelets/codelet_zsytrf_nopiv.c
-    codelets/codelet_ztradd.c
-    codelets/codelet_ztrasm.c
-    codelets/codelet_ztrssq.c
-    codelets/codelet_ztrtri.c
-    codelets/codelet_ztslqt.c
-    codelets/codelet_ztsmlq.c
-    codelets/codelet_ztsmqr.c
-    codelets/codelet_ztsmlq_hetra1.c
-    codelets/codelet_ztsmqr_hetra1.c
-    codelets/codelet_ztsqrt.c
-    codelets/codelet_ztstrf.c
-    codelets/codelet_zttlqt.c
-    codelets/codelet_zttmlq.c
-    codelets/codelet_zttmqr.c
-    codelets/codelet_zttqrt.c
-    codelets/codelet_zunmlq.c
-    codelets/codelet_zunmqr.c
-    ##################
-    # BUILD
-    ##################
-    codelets/codelet_zbuild.c
+    ${CODELETS_ZSRC}
     )
 
 precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
diff --git a/runtime/quark/codelets/codelet_ztpmqrt.c b/runtime/quark/codelets/codelet_ztpmqrt.c
new file mode 100644
index 000000000..25bd5ac83
--- /dev/null
+++ b/runtime/quark/codelets/codelet_ztpmqrt.c
@@ -0,0 +1,84 @@
+/**
+ *
+ * @copyright (c) 2009-2016 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                          Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztpqrt.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 0.9.0
+ * @author Mathieu Faverge
+ * @date 2016-12-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "runtime/quark/include/morse_quark.h"
+
+static void
+CORE_ztpmqrt_quark( Quark *quark )
+{
+    MORSE_enum side;
+    MORSE_enum trans;
+    int M;
+    int N;
+    int K;
+    int L;
+    int ib;
+    const MORSE_Complex64_t *V;
+    int ldv;
+    const MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *A;
+    int lda;
+    MORSE_Complex64_t *B;
+    int ldb;
+    MORSE_Complex64_t *WORK;
+
+    quark_unpack_args_16( quark, side, trans, M, N, K, L, ib,
+                          V, ldv, T, ldt, A, lda, B, ldb, WORK );
+
+    CORE_ztpmqrt( side, trans, M, N, K, L, ib,
+                  V, ldv, T, ldt, A, lda, B, ldb, WORK );
+}
+
+void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
+                         MORSE_enum side, MORSE_enum trans,
+                         int M, int N, int K, int L, int ib, int nb,
+                         const MORSE_desc_t *V, int Vm, int Vn, int ldv,
+                         const MORSE_desc_t *T, int Tm, int Tn, int ldt,
+                         const MORSE_desc_t *A, int Am, int An, int lda,
+                         const MORSE_desc_t *B, int Bm, int Bn, int ldb )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_TSMQR;
+
+    QUARK_Insert_Task(
+        opt->quark, CORE_ztpmqrt_quark, (Quark_Task_Flags*)opt,
+        sizeof(MORSE_enum),              &side,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &M,     VALUE,
+        sizeof(int),                     &N,     VALUE,
+        sizeof(int),                     &K,     VALUE,
+        sizeof(int),                     &L,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT,
+        sizeof(int),                     &ldv,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT,
+        sizeof(int),                     &lda,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT,
+        sizeof(int),                     &ldb,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  NULL, SCRATCH,
+        0);
+}
diff --git a/runtime/quark/codelets/codelet_ztpqrt.c b/runtime/quark/codelets/codelet_ztpqrt.c
new file mode 100644
index 000000000..9b7e09876
--- /dev/null
+++ b/runtime/quark/codelets/codelet_ztpqrt.c
@@ -0,0 +1,72 @@
+/**
+ *
+ * @copyright (c) 2009-2016 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                          Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztpqrt.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 0.9.0
+ * @author Mathieu Faverge
+ * @date 2016-12-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "runtime/quark/include/morse_quark.h"
+
+static void
+CORE_ztpqrt_quark( Quark *quark )
+{
+    int M;
+    int N;
+    int L;
+    int ib;
+    MORSE_Complex64_t *A;
+    int lda;
+    MORSE_Complex64_t *B;
+    int ldb;
+    MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *WORK;
+
+    quark_unpack_args_11( quark, M, N, L, ib,
+                          A, lda, B, ldb, T, ldt, WORK );
+
+    CORE_ztpqrt( M, N, L, ib,
+                 A, lda, B, ldb, T, ldt, WORK );
+}
+
+void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const MORSE_desc_t *A, int Am, int An, int lda,
+                         const MORSE_desc_t *B, int Bm, int Bn, int ldb,
+                         const MORSE_desc_t *T, int Tm, int Tn, int ldt )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_TSQRT;
+
+    QUARK_Insert_Task(
+        opt->quark, CORE_ztpqrt_quark, (Quark_Task_Flags*)opt,
+        sizeof(int),                         &M,   VALUE,
+        sizeof(int),                         &N,   VALUE,
+        sizeof(int),                         &L,   VALUE,
+        sizeof(int),                         &ib,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,      RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | QUARK_REGION_U | QUARK_REGION_D,
+        sizeof(int),                         &lda, VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,      RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT,
+        sizeof(int),                         &ldb, VALUE,
+        sizeof(MORSE_Complex64_t)*nb*ib,      RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), OUTPUT,
+        sizeof(int),                         &ldt, VALUE,
+        sizeof(MORSE_Complex64_t)*(ib+1)*nb,  NULL, SCRATCH,
+        0);
+}
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index 08956acaf..b2748379d 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -106,77 +106,7 @@ set_source_files_properties(control/runtime_profiling.c PROPERTIES COMPILE_FLAGS
 set(RUNTIME_SRCS_GENERATED "")
 set(ZSRC
     codelets/codelet_zcallback.c
-    codelets/codelet_ztile_zero.c
-    codelets/codelet_zasum.c
-    ##################
-    # BLAS 1
-    ##################
-    codelets/codelet_zaxpy.c
-    ##################
-    # BLAS 3
-    ##################
-    codelets/codelet_zgemm.c
-    codelets/codelet_zhemm.c
-    codelets/codelet_zher2k.c
-    codelets/codelet_zherk.c
-    codelets/codelet_zsymm.c
-    codelets/codelet_zsyr2k.c
-    codelets/codelet_zsyrk.c
-    codelets/codelet_ztrmm.c
-    codelets/codelet_ztrsm.c
-    ##################
-    # LAPACK
-    ##################
-    codelets/codelet_zgeadd.c
-    codelets/codelet_zlascal.c
-    codelets/codelet_zgelqt.c
-    codelets/codelet_zgeqrt.c
-    codelets/codelet_zgessm.c
-    codelets/codelet_zgessq.c
-    codelets/codelet_zgetrf.c
-    codelets/codelet_zgetrf_incpiv.c
-    codelets/codelet_zgetrf_nopiv.c
-    codelets/codelet_zhe2ge.c
-    codelets/codelet_zherfb.c
-    codelets/codelet_zhessq.c
-    codelets/codelet_zlacpy.c
-    codelets/codelet_zlange.c
-    codelets/codelet_zlanhe.c
-    codelets/codelet_zlansy.c
-    codelets/codelet_zlantr.c
-    codelets/codelet_zlaset2.c
-    codelets/codelet_zlaset.c
-    codelets/codelet_zlatro.c
-    codelets/codelet_zlauum.c
-    codelets/codelet_zplghe.c
-    codelets/codelet_zplgsy.c
-    codelets/codelet_zplrnt.c
-    codelets/codelet_zplssq.c
-    codelets/codelet_zpotrf.c
-    codelets/codelet_zssssm.c
-    codelets/codelet_zsyssq.c
-    codelets/codelet_zsytrf_nopiv.c
-    codelets/codelet_ztradd.c
-    codelets/codelet_ztrasm.c
-    codelets/codelet_ztrssq.c
-    codelets/codelet_ztrtri.c
-    codelets/codelet_ztslqt.c
-    codelets/codelet_ztsmlq.c
-    codelets/codelet_ztsmqr.c
-    codelets/codelet_ztsmlq_hetra1.c
-    codelets/codelet_ztsmqr_hetra1.c
-    codelets/codelet_ztsqrt.c
-    codelets/codelet_ztstrf.c
-    codelets/codelet_zttlqt.c
-    codelets/codelet_zttmlq.c
-    codelets/codelet_zttmqr.c
-    codelets/codelet_zttqrt.c
-    codelets/codelet_zunmlq.c
-    codelets/codelet_zunmqr.c
-    ##################
-    # BUILD
-    ##################
-    codelets/codelet_zbuild.c
+    ${CODELETS_ZSRC}
     )
 
 precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index 8af4ec546..bb26aa301 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -67,7 +67,9 @@ CHAMELEON_CL_CB(zssssm,        starpu_matrix_get_nx(task->handles[0]), starpu_ma
 CHAMELEON_CL_CB(zsymm,         starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0,                                           2.*M*M *N);
 CHAMELEON_CL_CB(zsyr2k,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      ( 1.+2.*M*N)*M);
 CHAMELEON_CL_CB(zsyrk,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      ( 1.+   M)*M*N);
-CHAMELEON_CL_CB(ztrasm,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                               0.5*M*(M+1));
+CHAMELEON_CL_CB(ztpqrt,        starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]),       2.*M*N*K);
+CHAMELEON_CL_CB(ztpmqrt,       starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]),       4.*M*N*K);
+CHAMELEON_CL_CB(ztrasm,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                         0.5*M*(M+1));
 CHAMELEON_CL_CB(ztrmm,         starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0,                                               M*M*N);
 CHAMELEON_CL_CB(ztrsm,         starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0,                                               M*M*N);
 CHAMELEON_CL_CB(ztrtri,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M *M*M);
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
new file mode 100644
index 000000000..98188588e
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -0,0 +1,159 @@
+/**
+ *
+ * @copyright (c) 2009-2016 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                          Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztpmqrt.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 0.9.0
+ * @author Mathieu Faverge
+ * @date 2016-12-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "runtime/starpu/include/morse_starpu.h"
+#include "runtime/starpu/include/runtime_codelet_z.h"
+
+void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
+                         MORSE_enum side, MORSE_enum trans,
+                         int M, int N, int K, int L, int ib, int nb,
+                         const MORSE_desc_t *V, int Vm, int Vn, int ldv,
+                         const MORSE_desc_t *T, int Tm, int Tn, int ldt,
+                         const MORSE_desc_t *A, int Am, int An, int lda,
+                         const MORSE_desc_t *B, int Bm, int Bn, int ldb )
+{
+    struct starpu_codelet *codelet = &cl_ztpmqrt;
+    void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
+
+    if ( morse_desc_islocal( A, Am, An ) ||
+         morse_desc_islocal( B, Bm, Bn ) ||
+         morse_desc_islocal( V, Vm, Vn ) ||
+         morse_desc_islocal( T, Tm, Tn ) )
+    {
+        starpu_insert_task(
+            codelet,
+            STARPU_VALUE, &side,  sizeof(MORSE_enum),
+            STARPU_VALUE, &trans, sizeof(MORSE_enum),
+            STARPU_VALUE, &M,     sizeof(int),
+            STARPU_VALUE, &N,     sizeof(int),
+            STARPU_VALUE, &K,     sizeof(int),
+            STARPU_VALUE, &L,     sizeof(int),
+            STARPU_R,      RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
+            STARPU_VALUE, &ldv,   sizeof(int),
+            STARPU_R,      RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
+            STARPU_VALUE, &ldt,   sizeof(int),
+            STARPU_RW,     RTBLKADDR(A, MORSE_Complex64_t, Am, An),
+            STARPU_VALUE, &lda,   sizeof(int),
+            STARPU_RW,     RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
+            STARPU_VALUE, &ldb,   sizeof(int),
+            /* Other options */
+            STARPU_SCRATCH,   options->ws_worker,
+            STARPU_PRIORITY,  options->priority,
+            STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_USE_MPI)
+            STARPU_EXECUTE_ON_NODE, execution_rank,
+#endif
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+            STARPU_NAME, "ztpmqrt",
+#endif
+            0);
+    }
+}
+
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg)
+{
+    MORSE_enum side;
+    MORSE_enum trans;
+    int M;
+    int N;
+    int K;
+    int L;
+    int ib;
+    const MORSE_Complex64_t *V;
+    int ldv;
+    const MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *A;
+    int lda;
+    MORSE_Complex64_t *B;
+    int ldb;
+    MORSE_Complex64_t *WORK;
+
+    V    = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    B    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
+
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
+                                &ldv, &ldt, &lda, &ldb );
+
+    CORE_ztpmqrt( side, trans, M, N, K, L, ib,
+                  V, ldv, T, ldt, A, lda, B, ldb, WORK );
+}
+
+
+#if defined(CHAMELEON_USE_CUDA)
+static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
+{
+    MORSE_enum side;
+    MORSE_enum trans;
+    int M;
+    int N;
+    int K;
+    int L;
+    int k;
+    int ib;
+    const cuDoubleComplex *V;
+    int ldv;
+    const cuDoubleComplex *T;
+    int ldt;
+    cuDoubleComplex *A;
+    int lda;
+    cuDoubleComplex *B;
+    int ldb;
+    cuDoubleComplex *W;
+    CUstream stream;
+
+    V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
+    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
+    W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
+
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
+                                &ldv, &ldt, &lda, &ldb );
+
+    stream = starpu_cuda_get_local_stream();
+    cublasSetKernelStream( stream );
+
+    CUDA_ztpmqrt(
+            side, trans, M, N, K, L, ib,
+            A, lda, B, ldb, V, ldv, T, ldt,
+            W, stream );
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+
+/*
+ * Codelet definition
+ */
+CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c
new file mode 100644
index 000000000..b6da13320
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_ztpqrt.c
@@ -0,0 +1,99 @@
+/**
+ *
+ * @copyright (c) 2009-2016 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                          Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztpqrt.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 0.9.0
+ * @author Mathieu Faverge
+ * @date 2016-12-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "runtime/starpu/include/morse_starpu.h"
+#include "runtime/starpu/include/runtime_codelet_z.h"
+
+void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
+                        int M, int N, int L, int ib, int nb,
+                        const MORSE_desc_t *A, int Am, int An, int lda,
+                        const MORSE_desc_t *B, int Bm, int Bn, int ldb,
+                        const MORSE_desc_t *T, int Tm, int Tn, int ldt )
+{
+    struct starpu_codelet *codelet = &cl_ztpqrt;
+    void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL;
+
+    if ( morse_desc_islocal( A, Am, An ) ||
+         morse_desc_islocal( B, Bm, Bn ) ||
+         morse_desc_islocal( T, Tm, Tn ) )
+    {
+        starpu_insert_task(
+            codelet,
+            STARPU_VALUE, &M,     sizeof(int),
+            STARPU_VALUE, &N,     sizeof(int),
+            STARPU_VALUE, &L,     sizeof(int),
+            STARPU_RW,     RTBLKADDR(A, MORSE_Complex64_t, Am, An),
+            STARPU_VALUE, &lda,   sizeof(int),
+            STARPU_RW,     RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
+            STARPU_VALUE, &ldb,   sizeof(int),
+            STARPU_RW,     RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
+            STARPU_VALUE, &ldt,   sizeof(int),
+            /* Other options */
+            STARPU_SCRATCH,   options->ws_worker,
+            STARPU_PRIORITY,  options->priority,
+            STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_USE_MPI)
+            STARPU_EXECUTE_ON_NODE, execution_rank,
+#endif
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+            STARPU_NAME, "ztpqrt",
+#endif
+            0);
+    }
+}
+
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
+{
+    int M;
+    int N;
+    int L;
+    int ib;
+    MORSE_Complex64_t *A;
+    int lda;
+    MORSE_Complex64_t *B;
+    int ldb;
+    MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *WORK;
+
+    A    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    T    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
+                                &lda, &ldb, &ldt );
+
+    CORE_ztpqrt( M, N, L, ib,
+                 A, lda, B, ldb, T, ldt, WORK );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
diff --git a/runtime/starpu/include/runtime_codelet_profile.h b/runtime/starpu/include/runtime_codelet_profile.h
index 99303041a..67942fc01 100644
--- a/runtime/starpu/include/runtime_codelet_profile.h
+++ b/runtime/starpu/include/runtime_codelet_profile.h
@@ -119,6 +119,6 @@
     extern struct starpu_perfmodel cl_##name##_fake;    \
     void cl_##name##_callback();                        \
     void profiling_display_##name##_info(void);         \
-    void estimate_##name##_sustained_peak(double *res);
+    void estimate_##name##_sustained_peak(double *res)
 
 #endif /* __CODELET_PROFILE_H__ */
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 0da29addb..16de7ea01 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -73,6 +73,8 @@ ZCODELETS_HEADER(syssq)
 ZCODELETS_HEADER(trasm)
 ZCODELETS_HEADER(trssq)
 ZCODELETS_HEADER(trtri)
+ZCODELETS_HEADER(tpqrt)
+ZCODELETS_HEADER(tpmqrt)
 ZCODELETS_HEADER(tslqt)
 ZCODELETS_HEADER(tsmlq)
 ZCODELETS_HEADER(tsmqr)
diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h
index cf0a3bb31..14b1c8e56 100644
--- a/runtime/starpu/include/runtime_codelets.h
+++ b/runtime/starpu/include/runtime_codelets.h
@@ -87,7 +87,7 @@
 
 
 #define CODELETS_ALL_HEADER(name)                                             \
-     CHAMELEON_CL_CB_HEADER(name)                                             \
+     CHAMELEON_CL_CB_HEADER(name);                                            \
      void cl_##name##_load_fake_model(void);                                  \
      void cl_##name##_restore_model(void);                                    \
      extern struct starpu_codelet cl_##name;                                  \
diff --git a/runtime/starpu/include/runtime_workspace.h b/runtime/starpu/include/runtime_workspace.h
index e1bd1859d..a7d25d38e 100644
--- a/runtime/starpu/include/runtime_workspace.h
+++ b/runtime/starpu/include/runtime_workspace.h
@@ -26,10 +26,10 @@
 #ifndef _MORSE_STARPU_WORKSPACE_H_
 #define _MORSE_STARPU_WORKSPACE_H_
 
-/* 
- * Allocate workspace in host memory: CPU for any worker 
+/*
+ * Allocate workspace in host memory: CPU for any worker
  * or allocate workspace in worker's memory: main memory for cpu workers,
- * and embedded memory for CUDA devices. 
+ * and embedded memory for CUDA devices.
  */
 #define MORSE_HOST_MEM    0
 #define MORSE_WORKER_MEM  1
@@ -48,7 +48,7 @@ typedef struct morse_starpu_ws_s MORSE_starpu_ws_t;
  * (eg. MORSE_CUDA|MORSE_CPU for all CPU and GPU workers).  The
  * memory_location argument indicates whether this should be a buffer in host
  * memory or in worker's memory (MORSE_HOST_MEM or MORSE_WORKER_MEM). This function
- * returns 0 upon successful completion. 
+ * returns 0 upon successful completion.
  */
 int   RUNTIME_starpu_ws_alloc   ( MORSE_starpu_ws_t **workspace, size_t size, int which_workers, int memory_location);
 int   RUNTIME_starpu_ws_free    ( MORSE_starpu_ws_t  *workspace);
-- 
GitLab