From 21bc5a7f9379f56efb49ffd38adedb5f93052649 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Wed, 31 Jan 2018 20:20:16 +0100
Subject: [PATCH] Update quark codelets

---
 runtime/quark/codelets/codelet_zgelqt.c  | 18 ++---
 runtime/quark/codelets/codelet_zgeqrt.c  | 18 ++---
 runtime/quark/codelets/codelet_ztplqt.c  | 76 ++++++++++++++++++++
 runtime/quark/codelets/codelet_ztpmlqt.c | 88 ++++++++++++++++++++++++
 runtime/quark/codelets/codelet_ztpmqrt.c |  6 +-
 runtime/quark/codelets/codelet_ztpqrt.c  |  4 +-
 runtime/quark/codelets/codelet_ztslqt.c  |  4 +-
 runtime/quark/codelets/codelet_ztsmlq.c  | 36 +++++-----
 runtime/quark/codelets/codelet_ztsmqr.c  | 36 +++++-----
 runtime/quark/codelets/codelet_ztsqrt.c  |  4 +-
 runtime/quark/codelets/codelet_zttlqt.c  | 22 +++---
 runtime/quark/codelets/codelet_zttmlq.c  | 36 +++++-----
 runtime/quark/codelets/codelet_zttmqr.c  | 39 ++++++-----
 runtime/quark/codelets/codelet_zttqrt.c  | 22 +++---
 14 files changed, 289 insertions(+), 120 deletions(-)
 create mode 100644 runtime/quark/codelets/codelet_ztplqt.c
 create mode 100644 runtime/quark/codelets/codelet_ztpmlqt.c

diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c
index 16919df2d..32983f643 100644
--- a/runtime/quark/codelets/codelet_zgelqt.c
+++ b/runtime/quark/codelets/codelet_zgelqt.c
@@ -118,14 +118,14 @@ void MORSE_TASK_zgelqt(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GELQT;
     QUARK_Insert_Task(opt->quark, CORE_zgelqt_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),                        &m,     VALUE,
-        sizeof(int),                        &n,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A, MORSE_Complex64_t, Am, An),             INOUT,
-        sizeof(int),                        &lda,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             OUTPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*nb,       NULL,          SCRATCH,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
+        sizeof(int),                     &m,     VALUE,
+        sizeof(int),                     &n,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INOUT,
+        sizeof(int),                     &lda,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), OUTPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb,    NULL,          SCRATCH,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL,          SCRATCH,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c
index c2575a706..f18033e27 100644
--- a/runtime/quark/codelets/codelet_zgeqrt.c
+++ b/runtime/quark/codelets/codelet_zgeqrt.c
@@ -118,14 +118,14 @@ void MORSE_TASK_zgeqrt(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEQRT;
     QUARK_Insert_Task(opt->quark, CORE_zgeqrt_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),                        &m,     VALUE,
-        sizeof(int),                        &n,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A, MORSE_Complex64_t, Am, An),             INOUT,
-        sizeof(int),                        &lda,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             OUTPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*nb,       NULL,          SCRATCH,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
+        sizeof(int),                     &m,     VALUE,
+        sizeof(int),                     &n,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A, MORSE_Complex64_t, Am, An), INOUT,
+        sizeof(int),                     &lda,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), OUTPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb,    NULL,   SCRATCH,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL,   SCRATCH,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztplqt.c b/runtime/quark/codelets/codelet_ztplqt.c
new file mode 100644
index 000000000..b1723f8fd
--- /dev/null
+++ b/runtime/quark/codelets/codelet_ztplqt.c
@@ -0,0 +1,76 @@
+/**
+ *
+ * @copyright (c) 2009-2016 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                          Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztplqt.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 0.9.0
+ * @author Mathieu Faverge
+ * @date 2016-12-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "chameleon_quark.h"
+#include "chameleon/morse_tasks_z.h"
+#include "coreblas/coreblas_z.h"
+
+static void
+CORE_ztplqt_quark( Quark *quark )
+{
+    int M;
+    int N;
+    int L;
+    int ib;
+    MORSE_Complex64_t *A;
+    int lda;
+    MORSE_Complex64_t *B;
+    int ldb;
+    MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *WORK;
+
+    quark_unpack_args_11( quark, M, N, L, ib,
+                          A, lda, B, ldb, T, ldt, WORK );
+
+    CORE_ztplqt( M, N, L, ib,
+                 A, lda, B, ldb, T, ldt, WORK );
+}
+
+void MORSE_TASK_ztplqt( const MORSE_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const MORSE_desc_t *A, int Am, int An, int lda,
+                         const MORSE_desc_t *B, int Bm, int Bn, int ldb,
+                         const MORSE_desc_t *T, int Tm, int Tn, int ldt )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_TSLQT;
+
+    int shapeB = ( L == 0 ) ? 0 : (QUARK_REGION_L | QUARK_REGION_D);
+
+    QUARK_Insert_Task(
+        opt->quark, CORE_ztplqt_quark, (Quark_Task_Flags*)opt,
+        sizeof(int),                         &M,   VALUE,
+        sizeof(int),                         &N,   VALUE,
+        sizeof(int),                         &L,   VALUE,
+        sizeof(int),                         &ib,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,      RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | QUARK_REGION_L | QUARK_REGION_D,
+        sizeof(int),                         &lda, VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,      RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | shapeB | LOCALITY,
+        sizeof(int),                         &ldb, VALUE,
+        sizeof(MORSE_Complex64_t)*nb*ib,      RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), OUTPUT,
+        sizeof(int),                         &ldt, VALUE,
+        sizeof(MORSE_Complex64_t)*(ib+1)*nb,  NULL, SCRATCH,
+        0);
+}
diff --git a/runtime/quark/codelets/codelet_ztpmlqt.c b/runtime/quark/codelets/codelet_ztpmlqt.c
new file mode 100644
index 000000000..0d6f2e6f6
--- /dev/null
+++ b/runtime/quark/codelets/codelet_ztpmlqt.c
@@ -0,0 +1,88 @@
+/**
+ *
+ * @copyright (c) 2009-2016 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                          Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztpmlqt.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 0.9.0
+ * @author Mathieu Faverge
+ * @date 2016-12-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "chameleon_quark.h"
+#include "chameleon/morse_tasks_z.h"
+#include "coreblas/coreblas_z.h"
+
+static void
+CORE_ztpmlqt_quark( Quark *quark )
+{
+    MORSE_enum side;
+    MORSE_enum trans;
+    int M;
+    int N;
+    int K;
+    int L;
+    int ib;
+    const MORSE_Complex64_t *V;
+    int ldv;
+    const MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *A;
+    int lda;
+    MORSE_Complex64_t *B;
+    int ldb;
+    MORSE_Complex64_t *WORK;
+
+    quark_unpack_args_16( quark, side, trans, M, N, K, L, ib,
+                          V, ldv, T, ldt, A, lda, B, ldb, WORK );
+
+    CORE_ztpmlqt( side, trans, M, N, K, L, ib,
+                  V, ldv, T, ldt, A, lda, B, ldb, WORK );
+}
+
+void MORSE_TASK_ztpmlqt( const MORSE_option_t *options,
+                         MORSE_enum side, MORSE_enum trans,
+                         int M, int N, int K, int L, int ib, int nb,
+                         const MORSE_desc_t *V, int Vm, int Vn, int ldv,
+                         const MORSE_desc_t *T, int Tm, int Tn, int ldt,
+                         const MORSE_desc_t *A, int Am, int An, int lda,
+                         const MORSE_desc_t *B, int Bm, int Bn, int ldb )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_TSMQR;
+
+    int shapeV = ( L == 0 ) ? 0 : (QUARK_REGION_L | QUARK_REGION_D);
+
+    QUARK_Insert_Task(
+        opt->quark, CORE_ztpmlqt_quark, (Quark_Task_Flags*)opt,
+        sizeof(MORSE_enum),              &side,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &M,     VALUE,
+        sizeof(int),                     &N,     VALUE,
+        sizeof(int),                     &K,     VALUE,
+        sizeof(int),                     &L,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT | shapeV,
+        sizeof(int),                     &ldv,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT,
+        sizeof(int),                     &lda,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | LOCALITY,
+        sizeof(int),                     &ldb,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  NULL, SCRATCH,
+        0);
+}
diff --git a/runtime/quark/codelets/codelet_ztpmqrt.c b/runtime/quark/codelets/codelet_ztpmqrt.c
index d0efd49bd..16b1bbff6 100644
--- a/runtime/quark/codelets/codelet_ztpmqrt.c
+++ b/runtime/quark/codelets/codelet_ztpmqrt.c
@@ -64,6 +64,8 @@ void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TSMQR;
 
+    int shapeV = ( L == 0 ) ? 0 : (QUARK_REGION_U | QUARK_REGION_D);
+
     QUARK_Insert_Task(
         opt->quark, CORE_ztpmqrt_quark, (Quark_Task_Flags*)opt,
         sizeof(MORSE_enum),              &side,  VALUE,
@@ -73,13 +75,13 @@ void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
         sizeof(int),                     &K,     VALUE,
         sizeof(int),                     &L,     VALUE,
         sizeof(int),                     &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT | shapeV,
         sizeof(int),                     &ldv,   VALUE,
         sizeof(MORSE_Complex64_t)*ib*nb,  RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INPUT,
         sizeof(int),                     &ldt,   VALUE,
         sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT,
         sizeof(int),                     &lda,   VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | LOCALITY,
         sizeof(int),                     &ldb,   VALUE,
         sizeof(MORSE_Complex64_t)*ib*nb,  NULL, SCRATCH,
         0);
diff --git a/runtime/quark/codelets/codelet_ztpqrt.c b/runtime/quark/codelets/codelet_ztpqrt.c
index 72c763a66..a7f1067fd 100644
--- a/runtime/quark/codelets/codelet_ztpqrt.c
+++ b/runtime/quark/codelets/codelet_ztpqrt.c
@@ -57,6 +57,8 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TSQRT;
 
+    int shapeB = ( L == 0 ) ? 0 : (QUARK_REGION_U | QUARK_REGION_D);
+
     QUARK_Insert_Task(
         opt->quark, CORE_ztpqrt_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                         &M,   VALUE,
@@ -65,7 +67,7 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
         sizeof(int),                         &ib,  VALUE,
         sizeof(MORSE_Complex64_t)*nb*nb,      RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | QUARK_REGION_U | QUARK_REGION_D,
         sizeof(int),                         &lda, VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,      RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT,
+        sizeof(MORSE_Complex64_t)*nb*nb,      RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | shapeB | LOCALITY,
         sizeof(int),                         &ldb, VALUE,
         sizeof(MORSE_Complex64_t)*nb*ib,      RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), OUTPUT,
         sizeof(int),                         &ldt, VALUE,
diff --git a/runtime/quark/codelets/codelet_ztslqt.c b/runtime/quark/codelets/codelet_ztslqt.c
index 766d3ff87..ed30b43b2 100644
--- a/runtime/quark/codelets/codelet_ztslqt.c
+++ b/runtime/quark/codelets/codelet_ztslqt.c
@@ -136,9 +136,9 @@ void MORSE_TASK_ztslqt(const MORSE_option_t *options,
         sizeof(int),                        &m,     VALUE,
         sizeof(int),                        &n,     VALUE,
         sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_D | QUARK_REGION_L | LOCALITY,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_L | QUARK_REGION_D,
         sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY,
         sizeof(int),                        &lda2,  VALUE,
         sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    OUTPUT,
         sizeof(int),                        &ldt,   VALUE,
diff --git a/runtime/quark/codelets/codelet_ztsmlq.c b/runtime/quark/codelets/codelet_ztsmlq.c
index a59d5b4e0..9020a57f5 100644
--- a/runtime/quark/codelets/codelet_ztsmlq.c
+++ b/runtime/quark/codelets/codelet_ztsmlq.c
@@ -173,23 +173,23 @@ void MORSE_TASK_ztsmlq(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TSMLQ;
     QUARK_Insert_Task(opt->quark, CORE_ztsmlq_quark, (Quark_Task_Flags*)opt,
-        sizeof(MORSE_enum),                &side,  VALUE,
-        sizeof(MORSE_enum),                &trans, VALUE,
-        sizeof(int),                        &m1,    VALUE,
-        sizeof(int),                        &n1,    VALUE,
-        sizeof(int),                        &m2,    VALUE,
-        sizeof(int),                        &n2,    VALUE,
-        sizeof(int),                        &k,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT,
-        sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
-        sizeof(int),                        &lda2,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),             INPUT,
-        sizeof(int),                        &ldv,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             INPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
-        sizeof(int),                        &ldwork, VALUE,
+        sizeof(MORSE_enum),              &side,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &m1,    VALUE,
+        sizeof(int),                     &n1,    VALUE,
+        sizeof(int),                     &m2,    VALUE,
+        sizeof(int),                     &n2,    VALUE,
+        sizeof(int),                     &k,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT,
+        sizeof(int),                     &lda1,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY,
+        sizeof(int),                     &lda2,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),    INPUT,
+        sizeof(int),                     &ldv,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    INPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL,          SCRATCH,
+        sizeof(int),                     &ldwork, VALUE,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztsmqr.c b/runtime/quark/codelets/codelet_ztsmqr.c
index de57a8aa8..ed68791bf 100644
--- a/runtime/quark/codelets/codelet_ztsmqr.c
+++ b/runtime/quark/codelets/codelet_ztsmqr.c
@@ -173,23 +173,23 @@ void MORSE_TASK_ztsmqr(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TSMQR;
     QUARK_Insert_Task(opt->quark, CORE_ztsmqr_quark, (Quark_Task_Flags*)opt,
-        sizeof(MORSE_enum),                &side,  VALUE,
-        sizeof(MORSE_enum),                &trans, VALUE,
-        sizeof(int),                        &m1,    VALUE,
-        sizeof(int),                        &n1,    VALUE,
-        sizeof(int),                        &m2,    VALUE,
-        sizeof(int),                        &n2,    VALUE,
-        sizeof(int),                        &k,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT,
-        sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
-        sizeof(int),                        &lda2,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),             INPUT,
-        sizeof(int),                        &ldv,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             INPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
-        sizeof(int),                        &ldwork, VALUE,
+        sizeof(MORSE_enum),              &side,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &m1,    VALUE,
+        sizeof(int),                     &n1,    VALUE,
+        sizeof(int),                     &m2,    VALUE,
+        sizeof(int),                     &n2,    VALUE,
+        sizeof(int),                     &k,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT,
+        sizeof(int),                     &lda1,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY,
+        sizeof(int),                     &lda2,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),    INPUT,
+        sizeof(int),                     &ldv,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    INPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL,          SCRATCH,
+        sizeof(int),                     &ldwork, VALUE,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztsqrt.c b/runtime/quark/codelets/codelet_ztsqrt.c
index f3f09a3ea..eb960b0d5 100644
--- a/runtime/quark/codelets/codelet_ztsqrt.c
+++ b/runtime/quark/codelets/codelet_ztsqrt.c
@@ -124,9 +124,9 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options,
         sizeof(int),                        &m,     VALUE,
         sizeof(int),                        &n,     VALUE,
         sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_D | QUARK_REGION_U | LOCALITY,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_U | QUARK_REGION_D,
         sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY,
         sizeof(int),                        &lda2,  VALUE,
         sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    OUTPUT,
         sizeof(int),                        &ldt,   VALUE,
diff --git a/runtime/quark/codelets/codelet_zttlqt.c b/runtime/quark/codelets/codelet_zttlqt.c
index 9ec251332..89020dc0a 100644
--- a/runtime/quark/codelets/codelet_zttlqt.c
+++ b/runtime/quark/codelets/codelet_zttlqt.c
@@ -133,16 +133,16 @@ void MORSE_TASK_zttlqt(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TTLQT;
     QUARK_Insert_Task(opt->quark, CORE_zttlqt_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),                        &m,     VALUE,
-        sizeof(int),                        &n,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT,
-        sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
-        sizeof(int),                        &lda2,  VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             OUTPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*nb,       NULL,          SCRATCH,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
+        sizeof(int),                     &m,     VALUE,
+        sizeof(int),                     &n,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_L | QUARK_REGION_D,
+        sizeof(int),                     &lda1,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | QUARK_REGION_L | QUARK_REGION_D | LOCALITY,
+        sizeof(int),                     &lda2,  VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    OUTPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb,    NULL,          SCRATCH,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL,          SCRATCH,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zttmlq.c b/runtime/quark/codelets/codelet_zttmlq.c
index e2de20109..f7a258e6b 100644
--- a/runtime/quark/codelets/codelet_zttmlq.c
+++ b/runtime/quark/codelets/codelet_zttmlq.c
@@ -165,23 +165,23 @@ void MORSE_TASK_zttmlq(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TTMLQ;
     QUARK_Insert_Task(opt->quark, CORE_zttmlq_quark, (Quark_Task_Flags*)opt,
-        sizeof(MORSE_enum),                &side,  VALUE,
-        sizeof(MORSE_enum),                &trans, VALUE,
-        sizeof(int),                        &m1,    VALUE,
-        sizeof(int),                        &n1,    VALUE,
-        sizeof(int),                        &m2,    VALUE,
-        sizeof(int),                        &n2,    VALUE,
-        sizeof(int),                        &k,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT,
-        sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT,
-        sizeof(int),                        &lda2,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),             INPUT|QUARK_REGION_D|QUARK_REGION_L,
-        sizeof(int),                        &ldv,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             INPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
-        sizeof(int),                        &ldwork,    VALUE,
+        sizeof(MORSE_enum),              &side,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &m1,    VALUE,
+        sizeof(int),                     &n1,    VALUE,
+        sizeof(int),                     &m2,    VALUE,
+        sizeof(int),                     &n2,    VALUE,
+        sizeof(int),                     &k,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT,
+        sizeof(int),                     &lda1,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY,
+        sizeof(int),                     &lda2,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),    INPUT | QUARK_REGION_L | QUARK_REGION_D,
+        sizeof(int),                     &ldv,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    INPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL,          SCRATCH,
+        sizeof(int),                     &ldwork,    VALUE,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zttmqr.c b/runtime/quark/codelets/codelet_zttmqr.c
index ed4311d99..89402c127 100644
--- a/runtime/quark/codelets/codelet_zttmqr.c
+++ b/runtime/quark/codelets/codelet_zttmqr.c
@@ -32,7 +32,8 @@
 #include "chameleon/morse_tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-void CORE_zttmqr_quark(Quark *quark)
+static void
+CORE_zttmqr_quark( Quark *quark )
 {
     MORSE_enum side;
     MORSE_enum trans;
@@ -165,23 +166,23 @@ void MORSE_TASK_zttmqr(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TTMQR;
     QUARK_Insert_Task(opt->quark, CORE_zttmqr_quark, (Quark_Task_Flags*)opt,
-        sizeof(MORSE_enum),                &side,  VALUE,
-        sizeof(MORSE_enum),                &trans, VALUE,
-        sizeof(int),                        &m1,    VALUE,
-        sizeof(int),                        &n1,    VALUE,
-        sizeof(int),                        &m2,    VALUE,
-        sizeof(int),                        &n2,    VALUE,
-        sizeof(int),                        &k,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT,
-        sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT,
-        sizeof(int),                        &lda2,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),             INPUT|QUARK_REGION_D|QUARK_REGION_U,
-        sizeof(int),                        &ldv,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             INPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
-        sizeof(int),                        &ldwork,    VALUE,
+        sizeof(MORSE_enum),              &side,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &m1,    VALUE,
+        sizeof(int),                     &n1,    VALUE,
+        sizeof(int),                     &m2,    VALUE,
+        sizeof(int),                     &n2,    VALUE,
+        sizeof(int),                     &k,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT,
+        sizeof(int),                     &lda1,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | LOCALITY,
+        sizeof(int),                     &lda2,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),    INPUT | QUARK_REGION_U | QUARK_REGION_D,
+        sizeof(int),                     &ldv,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    INPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL,          SCRATCH,
+        sizeof(int),                     &ldwork, VALUE,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zttqrt.c b/runtime/quark/codelets/codelet_zttqrt.c
index c7397d924..45284f08c 100644
--- a/runtime/quark/codelets/codelet_zttqrt.c
+++ b/runtime/quark/codelets/codelet_zttqrt.c
@@ -133,16 +133,16 @@ void MORSE_TASK_zttqrt(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TTQRT;
     QUARK_Insert_Task(opt->quark, CORE_zttqrt_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),                        &m,     VALUE,
-        sizeof(int),                        &n,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT,
-        sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
-        sizeof(int),                        &lda2,  VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             OUTPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*nb,       NULL,          SCRATCH,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
+        sizeof(int),                     &m,     VALUE,
+        sizeof(int),                     &n,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_U | QUARK_REGION_D,
+        sizeof(int),                     &lda1,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT | QUARK_REGION_U | QUARK_REGION_D | LOCALITY,
+        sizeof(int),                     &lda2,  VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    OUTPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb,    NULL,          SCRATCH,
+        sizeof(MORSE_Complex64_t)*ib*nb, NULL,          SCRATCH,
         0);
 }
-- 
GitLab