From fd2fcb03ada8abca7e588871b321fa5c7fa7b4d5 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Wed, 31 Jan 2018 17:20:46 +0100
Subject: [PATCH] Add migration and swith to TP kernels in unmlq algorithms

---
 compute/pzunmlq.c       | 117 +++++++++++------
 compute/pzunmlq_param.c | 283 +++++++++++++++++++++-------------------
 compute/pzunmlqrh.c     | 162 +++++++++++++++--------
 3 files changed, 334 insertions(+), 228 deletions(-)

diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c
index 9c9cfd679..28def3d11 100644
--- a/compute/pzunmlq.c
+++ b/compute/pzunmlq.c
@@ -3,8 +3,7 @@
  * @copyright (c) 2009-2014 The University of Tennessee and The University
  *                          of Tennessee Research Foundation.
  *                          All rights reserved.
- * @copyright (c) 2012-2016 Inria. All rights reserved.
- * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
  *
  **/
 
@@ -35,12 +34,12 @@
 #define B(m,n) B,  m,  n
 #define T(m,n) T,  m,  n
 #if defined(CHAMELEON_COPY_DIAG)
-#define D(k) D, k, 0
+#define D(k)   D,  k,  0
 #else
-#define D(k) A, k, k
+#define D(k)   D,  k,  k
 #endif
 
-/*******************************************************************************
+/**
  *  Parallel application of Q using tile V - LQ factorization - dynamic scheduling
  **/
 void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
@@ -72,6 +71,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
         minMT = A->mt;
     }
 
+    if (D == NULL) {
+        D = A;
+    }
+
     /*
      * zunmlq = A->mb * ib
      * ztsmlq = A->mb * ib
@@ -133,24 +136,34 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                     ldbm = BLKLDD(B, m);
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                        MORSE_TASK_ztsmlq(
+
+                        RUNTIME_data_migrate( sequence, B(k, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmlqt(
                             &options,
                             side, trans,
-                            B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb,
-                            B(k, n), ldbk,
-                            B(m, n), ldbm,
+                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(k, m), ldak,
-                            T(k, m), T->mb);
+                            T(k, m), T->mb,
+                            B(k, n), ldbk,
+                            B(m, n), ldbm);
                     }
                 }
 
+                /* Restore the original location of the tiles */
+                for (n = 0; n < B->nt; n++) {
+                    RUNTIME_data_migrate( sequence, B(k, n),
+                                          B->get_rankof( B, k, n ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
         }
+        /*
+         *  MorseLeft / MorseConjTrans
+         */
         else {
-            /*
-             *  MorseLeft / MorseConjTrans
-             */
             for (k = minMT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(morse, k);
 
@@ -162,15 +175,19 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                     ldbm = BLKLDD(B, m);
                     for (n = 0; n < B->nt; n++) {
-                        tempnn   = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                        MORSE_TASK_ztsmlq(
+                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                        RUNTIME_data_migrate( sequence, B(k, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmlqt(
                             &options,
                             side, trans,
-                            B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb,
-                            B(k, n), ldbk,
-                            B(m, n), ldbm,
+                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(k, m), ldak,
-                            T(k, m), T->mb);
+                            T(k, m), T->mb,
+                            B(k, n), ldbk,
+                            B(m, n), ldbm);
                     }
                 }
 #if defined(CHAMELEON_COPY_DIAG)
@@ -189,6 +206,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
 #endif
                 for (n = 0; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                    RUNTIME_data_migrate( sequence, B(k, n),
+                                          B->get_rankof( B, k, n ) );
+
                     MORSE_TASK_zunmlq(
                         &options,
                         side, trans,
@@ -197,35 +218,38 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
-
                 RUNTIME_iteration_pop(morse);
             }
         }
     }
+    /*
+     *  MorseRight / MorseNoTrans
+     */
     else {
         if (trans == MorseNoTrans) {
-            /*
-             *  MorseRight / MorseNoTrans
-             */
             for (k = minMT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(morse, k);
 
-                tempkn   = k == B->nt -1 ? B->n -k*B->nb : B->nb;
-                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
+                tempkn   = k == B->nt - 1 ? B->n - k * B->nb : B->nb;
+                tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb;
                 ldak = BLKLDD(A, k);
                 for (n = B->nt-1; n > k; n--) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                         ldbm = BLKLDD(B, m);
-                        MORSE_TASK_ztsmlq(
+
+                        RUNTIME_data_migrate( sequence, B(m, k),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmlqt(
                             &options,
                             side, trans,
-                            tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb,
-                            B(m, k), ldbm,
-                            B(m, n), ldbm,
+                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(k, n), ldak,
-                            T(k, n), T->mb);
+                            T(k, n), T->mb,
+                            B(m, k), ldbm,
+                            B(m, n), ldbm);
                     }
                 }
 #if defined(CHAMELEON_COPY_DIAG)
@@ -245,6 +269,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                 for (m = 0; m < B->mt; m++) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                     ldbm = BLKLDD(B, m);
+
+                    RUNTIME_data_migrate( sequence, B(m, k),
+                                          B->get_rankof( B, m, k ) );
+
                     MORSE_TASK_zunmlq(
                         &options,
                         side, trans,
@@ -257,14 +285,14 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                 RUNTIME_iteration_pop(morse);
             }
         }
+        /*
+         *  MorseRight / MorseConjTrans
+         */
         else {
-            /*
-             *  MorseRight / MorseConjTrans
-             */
             for (k = 0; k < minMT; k++) {
                 RUNTIME_iteration_push(morse, k);
 
-                tempkn   = k == B->nt -1 ? B->n -k*B->nb : B->nb;
+                tempkn   = k == B->nt-1 ? B->n-k*B->nb : B->nb;
                 tempkmin = k == minMT-1 ? minM-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
 #if defined(CHAMELEON_COPY_DIAG)
@@ -297,17 +325,27 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                         ldbm = BLKLDD(B, m);
-                        MORSE_TASK_ztsmlq(
+
+                        RUNTIME_data_migrate( sequence, B(m, k),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmlqt(
                             &options,
                             side, trans,
-                            tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb,
-                            B(m, k), ldbm,
-                            B(m, n), ldbm,
+                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(k, n), ldak,
-                            T(k, n), T->mb);
+                            T(k, n), T->mb,
+                            B(m, k), ldbm,
+                            B(m, n), ldbm);
                     }
                 }
 
+                /* Restore the original location of the tiles */
+                for (m = 0; m < B->mt; m++) {
+                    RUNTIME_data_migrate( sequence, B(m, k),
+                                          B->get_rankof( B, m, k ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -315,5 +353,4 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
 
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
-    (void)D;
 }
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
index 5ad102180..8fce43522 100644
--- a/compute/pzunmlq_param.c
+++ b/compute/pzunmlq_param.c
@@ -27,13 +27,8 @@
 
 #define A(m,n) A,  m,  n
 #define B(m,n) B,  m,  n
-#define TS(m,n) TS,  m,  n
-#define TT(m,n) TT,  m,  n
-#if defined(CHAMELEON_COPY_DIAG)
-#define D(m,n)   D,  m,  n
-#else
-#define D(m,n)   A,  m,  n
-#endif
+#define T(m,n) T,  m,  n
+#define D(m,n) D,  m,  n
 
 /**
  *  Parallel application of Q using tile V - LQ factorization - dynamic scheduling
@@ -46,13 +41,14 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
+    MORSE_desc_t *T;
     size_t ws_worker = 0;
     size_t ws_host = 0;
 
     int k, m, n, i, p;
     int ldbm, ldak, ldbp;
     int tempnn, temppn, tempkmin, tempmm, tempkm;
-    int ib, K;
+    int ib, K, L;
     int *tiles;
 
     morse = morse_context_self();
@@ -64,6 +60,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
 
     K = chameleon_min(A->mt, A->nt);
 
+    if (D == NULL) {
+        D = A;
+    }
+
     /*
      * zunmlq = A->nb * ib
      * ztsmlq = A->nb * ib
@@ -99,6 +99,7 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
 
+                T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     p = qrtree->getm(qrtree, k, i);
 
@@ -125,10 +126,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                         MORSE_TASK_zunmlq(
                             &options,
                             side, trans,
-                            temppn, tempnn, tempkmin, ib, TS->nb,
-                            D( k, p), ldak,
-                            TS(k, p), TS->mb,
-                            B( p, n), ldbp);
+                            temppn, tempnn, tempkmin, ib, T->nb,
+                            D(k, p), ldak,
+                            T(k, p), T->mb,
+                            B(p, n), ldbp);
                     }
                 }
 
@@ -145,40 +146,45 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
 
                     /* TT or TS */
                     if(qrtree->gettype(qrtree, k, m) == 0){
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-
-                            MORSE_TASK_ztsmlq(
-                                &options,
-                                side, trans,
-                                B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( k, m), ldak,
-                                TS(k, m), TS->mb);
-                        }
+                        L = 0;
+                        T = TS;
                     }
                     else {
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-
-                            MORSE_TASK_zttmlq(
-                                &options,
-                                side, trans,
-                                B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( k, m), ldak,
-                                TT(k, m), TS->mb);
-                        }
+                        L = A->nb;
+                        T = TT;
+                    }
+                    for (n = 0; n < B->nt; n++) {
+                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                        RUNTIME_data_migrate( sequence, B(p, n),
+                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmlqt(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkm, chameleon_min( L, tempnn ), ib, T->nb,
+                            A(k, m), ldak,
+                            T(k, m), T->mb,
+                            B(p, n), ldbp,
+                            B(m, n), ldbm);
                     }
                 }
+
+                /* Restore the original location of the tiles */
+                for (n = 0; n < B->nt; n++) {
+                    RUNTIME_data_migrate( sequence, B(k, n),
+                                          B->get_rankof( B, k, n ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
-        } else {
-            /*
-             *  MorseLeft / MorseConjTrans
-             */
+        }
+        /*
+         *  MorseLeft / MorseConjTrans
+         */
+        else {
             for (k = K-1; k >= 0; k--) {
                 RUNTIME_iteration_push(morse, k);
 
@@ -198,32 +204,33 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
 
                     /* TT or TS */
                     if(qrtree->gettype(qrtree, k, m) == 0){
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_ztsmlq(
-                                &options,
-                                side, trans,
-                                B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( k, m), ldak,
-                                TS(k, m), TS->mb);
-                        }
+                        L = 0;
+                        T = TS;
                     }
                     else {
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_zttmlq(
-                                &options,
-                                side, trans,
-                                B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( k, m), ldak,
-                                TT(k, m), TT->mb);
-                        }
+                        L = A->nb;
+                        T = TT;
+                    }
+                    for (n = 0; n < B->nt; n++) {
+                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                        RUNTIME_data_migrate( sequence, B(p, n),
+                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmlqt(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkm, chameleon_min(L, tempnn), ib, T->nb,
+                            A(k, m), ldak,
+                            T(k, m), T->mb,
+                            B(p, n), ldbp,
+                            B(m, n), ldbm);
                     }
                 }
+
+                T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     p = qrtree->getm(qrtree, k, i);
 
@@ -247,23 +254,28 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
 #endif
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                        RUNTIME_data_migrate( sequence, B(p, n),
+                                              B->get_rankof( B, p, n ) );
+
                         MORSE_TASK_zunmlq(
                             &options,
                             side, trans,
-                            temppn, tempnn, tempkmin, ib, TS->nb,
-                            D( k, p), ldak,
-                            TS(k, p), TS->mb,
-                            B( p, n), ldbp);
+                            temppn, tempnn, tempkmin, ib, T->nb,
+                            D(k, p), ldak,
+                            T(k, p), T->mb,
+                            B(p, n), ldbp);
                     }
                 }
                 RUNTIME_iteration_pop(morse);
             }
         }
-    } else {
+    }
+    /*
+     *  MorseRight / MorseNoTrans
+     */
+    else {
         if (trans == MorseNoTrans) {
-            /*
-             *  MorseRight / MorseNoTrans
-             */
             for (k = K-1; k >= 0; k--) {
                 RUNTIME_iteration_push(morse, k);
 
@@ -280,37 +292,36 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                     ldbp = BLKLDD(B, p);
 
-                    /* TT or TS */
-
+                    /* TS or TT */
                     if(qrtree->gettype(qrtree, k, n) == 0){
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-                            MORSE_TASK_ztsmlq(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb,
-                                B( m, p), ldbm,
-                                B( m, n), ldbm,
-                                A( k, n), ldak,
-                                TS(k, n), TS->mb);
-                        }
+                        L = 0;
+                        T = TS;
                     }
                     else {
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-                            MORSE_TASK_zttmlq(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb,
-                                B( m, p), ldbm,
-                                B( m, n), ldbm,
-                                A( k, n), ldak,
-                                TT(k, n), TT->mb);
-                        }
+                        L = tempnn;
+                        T = TT;
+                    }
+                    for (m = 0; m < B->mt; m++) {
+                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                        ldbm = BLKLDD(B, m);
+
+                        RUNTIME_data_migrate( sequence, B(m, p),
+                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmlqt(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkm, L, ib, T->nb,
+                            A(k, n), ldak,
+                            T(k, n), T->mb,
+                            B(m, p), ldbm,
+                            B(m, n), ldbm);
                     }
                 }
+
+                T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     p = qrtree->getm(qrtree, k, i);
 
@@ -334,26 +345,33 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                     for (m = 0; m < B->mt; m++) {
                         ldbm = BLKLDD(B, m);
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+
+                        RUNTIME_data_migrate( sequence, B(m, p),
+                                              B->get_rankof( B, m, p ) );
+
                         MORSE_TASK_zunmlq(
                             &options,
                             side, trans,
-                            tempmm, temppn, tempkmin, ib, TS->nb,
-                            D( k, p), ldak,
-                            TS(k, p), TS->mb,
-                            B( m, p), ldbm);
+                            tempmm, temppn, tempkmin, ib, T->nb,
+                            D(k, p), ldak,
+                            T(k, p), T->mb,
+                            B(m, p), ldbm);
                     }
                 }
                 RUNTIME_iteration_pop(morse);
             }
-        } else {
-            /*
-             *  MorseRight / MorseConjTrans
-             */
+        }
+        /*
+         *  MorseRight / MorseConjTrans
+         */
+        else {
             for (k = 0; k < K; k++) {
                 RUNTIME_iteration_push(morse, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+
+                T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     p = qrtree->getm(qrtree, k, i);
 
@@ -381,10 +399,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                         MORSE_TASK_zunmlq(
                             &options,
                             side, trans,
-                            tempmm, temppn, tempkmin, ib, TS->nb,
-                            D( k, p), ldak,
-                            TS(k, p), TS->mb,
-                            B( m, p), ldbm);
+                            tempmm, temppn, tempkmin, ib, T->nb,
+                            D(k, p), ldak,
+                            T(k, p), TS->mb,
+                            B(m, p), ldbm);
                     }
                 }
                 /* Setting the order of tiles */
@@ -398,32 +416,31 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                     ldbp = BLKLDD(B, p);
 
                     if(qrtree->gettype(qrtree, k, n) == 0){
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-                            MORSE_TASK_ztsmlq(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( k, n), ldak,
-                                TS(k, n), TS->mb);
-                        }
+                        L = 0;
+                        T = TS;
                     }
                     else {
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-                            MORSE_TASK_zttmlq(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( k, n), ldak,
-                                TT(k, n), TT->mb);
-                        }
+                        L = tempnn;
+                        T = TT;
+                    }
+
+                    for (m = 0; m < B->mt; m++) {
+                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                        ldbm = BLKLDD(B, m);
+
+                        RUNTIME_data_migrate( sequence, B(m, p),
+                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmlqt(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkm, L, ib, T->nb,
+                            A(k, n), ldak,
+                            T(k, n), T->mb,
+                            B(m, p), ldbm,
+                            B(m, n), ldbm);
                     }
                 }
 
@@ -435,6 +452,4 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
     free(tiles);
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
-
-    (void)D;
 }
diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c
index 8444f0bb3..0b4af0cb8 100644
--- a/compute/pzunmlqrh.c
+++ b/compute/pzunmlqrh.c
@@ -34,7 +34,7 @@
 #define A(m,n) A,  (m),  (n)
 #define B(m,n) B,  (m),  (n)
 #define T(m,n) T,  (m),  (n)
-#define T2(m,n) T,  (m),  (n)+A->nt
+#define T2(m,n) T,  (m),  ((n)+A->nt)
 #if defined(CHAMELEON_COPY_DIAG)
 #define D(m,n) D, ((n)/BS), 0
 #else
@@ -133,15 +133,19 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                         ldbm = BLKLDD(B, m);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_ztsmlq(
-                                &options,
-                                side, trans,
-                                B->nb, tempnn, tempmm, tempnn,
-                                tempkm, ib, T->nb,
-                                B(N, n), ldbN,
-                                B(m, n), ldbm,
+
+                            RUNTIME_data_migrate( sequence, B(N, n),
+                                                  B->get_rankof( B, m, n ) );
+                            RUNTIME_data_migrate( sequence, B(m, n),
+                                                  B->get_rankof( B, m, n ) );
+
+                            MORSE_TASK_ztpmlqt(
+                                &options, side, trans,
+                                tempmm, tempnn, tempkm, 0, ib, T->nb,
                                 A(k, m), ldak,
-                                T(k, m), T->mb);
+                                T(k, m), T->mb,
+                                B(N, n), ldbN,
+                                B(m, n), ldbm);
                         }
                     }
                 }
@@ -152,19 +156,30 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                         ldbNRD = BLKLDD(B, N+RD);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_zttmlq(
+
+                            RUNTIME_data_migrate( sequence, B(N, n),
+                                                  B->get_rankof( B, N+RD, n ) );
+                            RUNTIME_data_migrate( sequence, B(N+RD, n),
+                                                  B->get_rankof( B, N+RD, n ) );
+
+                            MORSE_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                B->mb, tempnn, tempNRDn, tempnn,
-                                tempkm, ib, T->nb,
-                                B (N,    n), ldbN,
-                                B (N+RD, n), ldbNRD,
+                                tempNRDn, tempnn, tempkm, tempnn, ib, T->nb,
                                 A (k, N+RD), ldak,
-                                T2(k, N+RD), T->mb);
+                                T2(k, N+RD), T->mb,
+                                B (N,    n), ldbN,
+                                B (N+RD, n), ldbNRD);
                         }
                     }
                 }
 
+                /* Restore the original location of the tiles */
+                for (n = 0; n < B->nt; n++) {
+                    RUNTIME_data_migrate( sequence, B(k, n),
+                                          B->get_rankof( B, k, n ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
         } else {
@@ -186,15 +201,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                         ldbNRD = BLKLDD(B, N+RD);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_zttmlq(
+
+                            RUNTIME_data_migrate( sequence, B(N, n),
+                                                  B->get_rankof( B, N+RD, n ) );
+                            RUNTIME_data_migrate( sequence, B(N+RD, n),
+                                                  B->get_rankof( B, N+RD, n ) );
+
+                            MORSE_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                B->nb, tempnn, tempNRDn, tempnn,
-                                tempkm, ib, T->nb,
-                                B (N,    n), ldbN,
-                                B (N+RD, n), ldbNRD,
+                                tempNRDn, tempnn, tempkm, tempnn, ib, T->nb,
                                 A (k, N+RD), ldak,
-                                T2(k, N+RD), T->mb);
+                                T2(k, N+RD), T->mb,
+                                B (N,    n), ldbN,
+                                B (N+RD, n), ldbNRD);
                         }
                     }
                 }
@@ -207,15 +227,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                         ldbm = BLKLDD(B, m);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_ztsmlq(
+
+                            RUNTIME_data_migrate( sequence, B(N, n),
+                                                  B->get_rankof( B, m, n ) );
+                            RUNTIME_data_migrate( sequence, B(m, n),
+                                                  B->get_rankof( B, m, n ) );
+
+                            MORSE_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                B->mb, tempnn, tempmm, tempnn,
-                                tempkm, ib, T->nb,
-                                B(N, n), ldbN,
-                                B(m, n), ldbm,
+                                tempmm, tempnn, tempkm, 0, ib, T->nb,
                                 A(k, m), ldak,
-                                T(k, m), T->mb);
+                                T(k, m), T->mb,
+                                B(N, n), ldbN,
+                                B(m, n), ldbm);
                         }
                     }
 #if defined(CHAMELEON_COPY_DIAG)
@@ -244,12 +269,11 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                             B(N, n), ldbN);
                     }
                 }
-
                 RUNTIME_iteration_pop(morse);
             }
-
         }
-    } else {
+    }
+    else {
         if (trans == MorseNoTrans) {
             /*
              *  MorseRight / MorseNoTrans
@@ -268,15 +292,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                         for (m = 0; m < B->mt; m++) {
                             ldbm   = BLKLDD(B, m);
                             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            MORSE_TASK_zttmlq(
+
+                            RUNTIME_data_migrate( sequence, B(m, N),
+                                                  B->get_rankof( B, m, N+RD ) );
+                            RUNTIME_data_migrate( sequence, B(m, N+RD),
+                                                  B->get_rankof( B, m, N+RD ) );
+
+                            MORSE_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                tempmm, B->nb, tempmm, tempNRDn,
-                                tempkm, ib, T->nb,
-                                B (m, N   ), ldbm,
-                                B (m, N+RD), ldbm,
+                                tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb,
                                 A (k, N+RD), ldak,
-                                T2(k, N+RD), T->mb);
+                                T2(k, N+RD), T->mb,
+                                B (m, N   ), ldbm,
+                                B (m, N+RD), ldbm);
                         }
                     }
                 }
@@ -288,15 +317,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                         for (m = 0; m < B->mt; m++) {
                             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                             ldbm = BLKLDD(B, m);
-                            MORSE_TASK_ztsmlq(
+
+                            RUNTIME_data_migrate( sequence, B(m, N),
+                                                  B->get_rankof( B, m, n ) );
+                            RUNTIME_data_migrate( sequence, B(m, m),
+                                                  B->get_rankof( B, m, n ) );
+
+                            MORSE_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                tempmm, B->nb, tempmm, tempnn,
-                                tempkm, ib, T->nb,
-                                B(m, N), ldbm,
-                                B(m, n), ldbm,
+                                tempmm, tempnn, tempkm, 0, ib, T->nb,
                                 A(k, n), ldak,
-                                T(k, n), T->mb);
+                                T(k, n), T->mb,
+                                B(m, N), ldbm,
+                                B(m, n), ldbm);
                         }
                     }
 #if defined(CHAMELEON_COPY_DIAG)
@@ -316,6 +350,10 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                         ldbm = BLKLDD(B, m);
+
+                        RUNTIME_data_migrate( sequence, B(m, N),
+                                              B->get_rankof( B, m, N ) );
+
                         MORSE_TASK_zunmlq(
                             &options,
                             side, trans,
@@ -372,15 +410,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                         for (m = 0; m < B->mt; m++) {
                             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                             ldbm = BLKLDD(B, m);
-                            MORSE_TASK_ztsmlq(
+
+                            RUNTIME_data_migrate( sequence, B(m, N),
+                                                  B->get_rankof( B, m, n ) );
+                            RUNTIME_data_migrate( sequence, B(m, n),
+                                                  B->get_rankof( B, m, n ) );
+
+                            MORSE_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                tempmm, tempNn, tempmm, tempnn,
-                                tempkm, ib, T->nb,
-                                B(m, N), ldbm,
-                                B(m, n), ldbm,
+                                tempmm, tempnn, tempkm, 0, ib, T->nb,
                                 A(k, n), ldak,
-                                T(k, n), T->mb);
+                                T(k, n), T->mb,
+                                B(m, N), ldbm,
+                                B(m, n), ldbm);
                         }
                     }
                 }
@@ -390,19 +433,30 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                         for (m = 0; m < B->mt; m++) {
                             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                             ldbm   = BLKLDD(B, m);
-                            MORSE_TASK_zttmlq(
+
+                            RUNTIME_data_migrate( sequence, B(m, N),
+                                                  B->get_rankof( B, m, N+RD ) );
+                            RUNTIME_data_migrate( sequence, B(m, N+RD),
+                                                  B->get_rankof( B, m, N+RD ) );
+
+                            MORSE_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                tempmm, B->nb, tempmm, tempNRDn,
-                                tempkm, ib, T->nb,
-                                B (m, N   ), ldbm,
-                                B (m, N+RD), ldbm,
+                                tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb,
                                 A (k, N+RD), ldak,
-                                T2(k, N+RD), T->mb);
+                                T2(k, N+RD), T->mb,
+                                B (m, N   ), ldbm,
+                                B (m, N+RD), ldbm);
                         }
                     }
                 }
 
+                /* Restore the original location of the tiles */
+                for (m = 0; m < B->mt; m++) {
+                    RUNTIME_data_migrate( sequence, B(m, k),
+                                          B->get_rankof( B, m, k ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
         }
-- 
GitLab