From 7e28d2d5ecc0200745ed1cab5cf9f76bee582ace Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Wed, 31 Jan 2018 13:49:50 +0100
Subject: [PATCH] Add migration and tp kernels to unmqr functions

---
 compute/pzunmqr.c       |  92 +++++++++----
 compute/pzunmqr_param.c | 281 +++++++++++++++++++++-------------------
 compute/pzunmqrrh.c     | 186 ++++++++++++++++----------
 3 files changed, 331 insertions(+), 228 deletions(-)

diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c
index 4e81716dd..e9d74dea3 100644
--- a/compute/pzunmqr.c
+++ b/compute/pzunmqr.c
@@ -3,8 +3,7 @@
  * @copyright (c) 2009-2014 The University of Tennessee and The University
  *                          of Tennessee Research Foundation.
  *                          All rights reserved.
- * @copyright (c) 2012-2016 Inria. All rights reserved.
- * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
  *
  **/
 
@@ -35,12 +34,12 @@
 #define B(m,n) B,  m,  n
 #define T(m,n) T,  m,  n
 #if defined(CHAMELEON_COPY_DIAG)
-#define D(k) D, k, 0
+#define D(k)   D,  k,  0
 #else
-#define D(k) A, k, k
+#define D(k)   D,  k,  k
 #endif
 
-/*******************************************************************************
+/**
  *  Parallel application of Q using tile V - QR factorization - dynamic scheduling
  **/
 void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
@@ -72,6 +71,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
         minMT = A->mt;
     }
 
+    if (D == NULL) {
+        D = A;
+    }
+
     /*
      * zunmqr = A->nb * ib
      * ztsmqr = A->nb * ib
@@ -134,17 +137,27 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     ldbm = BLKLDD(B, m);
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                        MORSE_TASK_ztsmqr(
+
+                        RUNTIME_data_migrate( sequence, B(k, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmqrt(
                             &options,
                             side, trans,
-                            B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb,
-                            B(k, n), ldbk,
-                            B(m, n), ldbm,
+                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(m, k), ldam,
-                            T(m, k), T->mb);
+                            T(m, k), T->mb,
+                            B(k, n), ldbk,
+                            B(m, n), ldbm);
                     }
                 }
 
+                /* Restore the original location of the tiles */
+                for (n = 0; n < B->nt; n++) {
+                    RUNTIME_data_migrate( sequence, B(k, n),
+                                          B->get_rankof( B, k, n ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -165,14 +178,18 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     ldbm = BLKLDD(B, m);
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                        MORSE_TASK_ztsmqr(
+
+                        RUNTIME_data_migrate( sequence, B(k, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmqrt(
                             &options,
                             side, trans,
-                            B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb,
-                            B(k, n), ldbk,
-                            B(m, n), ldbm,
+                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(m, k), ldam,
-                            T(m, k), T->mb);
+                            T(m, k), T->mb,
+                            B(k, n), ldbk,
+                            B(m, n), ldbm);
                     }
                 }
 #if defined(CHAMELEON_COPY_DIAG)
@@ -189,8 +206,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     D(k), ldak );
 #endif
 #endif
+
                 for (n = 0; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                    RUNTIME_data_migrate( sequence, B(k, n),
+                                          B->get_rankof( B, k, n ) );
+
                     MORSE_TASK_zunmqr(
                         &options,
                         side, trans,
@@ -199,7 +221,6 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
-
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -222,14 +243,18 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                         ldbm = BLKLDD(B, m);
-                        MORSE_TASK_ztsmqr(
+
+                        RUNTIME_data_migrate( sequence, B(m, k),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmqrt(
                             &options,
                             side, trans,
-                            tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb,
-                            B(m, k), ldbm,
-                            B(m, n), ldbm,
+                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(n, k), ldan,
-                            T(n, k), T->mb);
+                            T(n, k), T->mb,
+                            B(m, k), ldbm,
+                            B(m, n), ldbm);
                     }
                 }
 #if defined(CHAMELEON_COPY_DIAG)
@@ -249,6 +274,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                 for (m = 0; m < B->mt; m++) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                     ldbm = BLKLDD(B, m);
+
+                    RUNTIME_data_migrate( sequence, B(m, k),
+                                          B->get_rankof( B, m, k ) );
+
                     MORSE_TASK_zunmqr(
                         &options,
                         side, trans,
@@ -302,17 +331,27 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                         ldbm = BLKLDD(B, m);
-                        MORSE_TASK_ztsmqr(
+
+                        RUNTIME_data_migrate( sequence, B(m, k),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmqrt(
                             &options,
                             side, trans,
-                            tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb,
-                            B(m, k), ldbm,
-                            B(m, n), ldbm,
+                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(n, k), ldan,
-                            T(n, k), T->mb);
+                            T(n, k), T->mb,
+                            B(m, k), ldbm,
+                            B(m, n), ldbm);
                     }
                 }
 
+                /* Restore the original location of the tiles */
+                for (m = 0; m < B->mt; m++) {
+                    RUNTIME_data_migrate( sequence, B(m, k),
+                                          B->get_rankof( B, m, k ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -320,5 +359,4 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
 
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
-    (void)D;
 }
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index ae9b46d5e..2390cb761 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -27,13 +27,8 @@
 
 #define A(m,n) A,  m,  n
 #define B(m,n) B,  m,  n
-#define TS(m,n) TS,  m,  n
-#define TT(m,n) TT,  m,  n
-#if defined(CHAMELEON_COPY_DIAG)
-#define D(m,n)   D,  m,  n
-#else
-#define D(m,n)   A,  m,  n
-#endif
+#define T(m,n) T,  m,  n
+#define D(m,n) D,  m,  n
 
 /**
  *  Parallel application of Q using tile V - QR factorization - dynamic scheduling
@@ -45,13 +40,14 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
+    MORSE_desc_t *T;
     size_t ws_worker = 0;
     size_t ws_host = 0;
 
     int k, m, n, i, p;
     int ldam, ldan, ldbm, ldbp;
     int tempnn, tempkmin, tempmm, tempkn;
-    int ib, K;
+    int ib, K, L;
     int *tiles;
 
     morse = morse_context_self();
@@ -63,6 +59,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
 
     K = chameleon_min(A->mt, A->nt);
 
+    if (D == NULL) {
+        D = A;
+    }
+
     /*
      * zunmqr = A->nb * ib
      * ztsmqr = A->nb * ib
@@ -80,7 +80,6 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
 #endif
 
     /* Initialisation of tiles */
-
     tiles = (int*)calloc( qrtree->mt, sizeof(int) );
 
     ws_worker *= sizeof(MORSE_Complex64_t);
@@ -98,6 +97,7 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
 
                 tempkn   = k == A->nt-1 ? A->n-k*A->nb : A->nb;
 
+                T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     m = qrtree->getm(qrtree, k, i);
 
@@ -125,10 +125,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                         MORSE_TASK_zunmqr(
                             &options,
                             side, trans,
-                            tempmm, tempnn, tempkmin, ib, TS->nb,
-                            D( m, k), ldam,
-                            TS(m, k), TS->mb,
-                            B( m, n), ldbm);
+                            tempmm, tempnn, tempkmin, ib, T->nb,
+                            D(m, k), ldam,
+                            T(m, k), T->mb,
+                            B(m, n), ldbm);
                     }
                 }
                 /* Setting the order of the tiles*/
@@ -143,38 +143,45 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                     ldbm = BLKLDD(B, m);
                     ldbp = BLKLDD(B, p);
                     if(qrtree->gettype(qrtree, k, m) == 0){
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_ztsmqr(
-                                &options,
-                                side, trans,
-                                B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( m, k), ldam,
-                                TS(m, k), TS->mb);
-                        }
+                        L = 0;
+                        T = TS;
                     }
                     else {
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_zttmqr(
-                                &options,
-                                side, trans,
-                                B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( m, k), ldam,
-                                TT(m, k), TT->mb);
-                        }
+                        L = tempmm;
+                        T = TT;
+                    }
+                    for (n = 0; n < B->nt; n++) {
+                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                        RUNTIME_data_migrate( sequence, B(p, n),
+                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmqrt(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkn, L, ib, T->nb,
+                            A(m, k), ldam,
+                            T(m, k), T->mb,
+                            B(p, n), ldbp,
+                            B(m, n), ldbm);
                     }
                 }
+
+                /* Restore the original location of the tiles */
+                for (n = 0; n < B->nt; n++) {
+                    RUNTIME_data_migrate( sequence, B(k, n),
+                                          B->get_rankof( B, k, n ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
-        } else {
-            /*
-             *  MorseLeft / MorseNoTrans
-             */
+        }
+        /*
+         *  MorseLeft / MorseNoTrans
+         */
+        else {
             for (k = K-1; k >= 0; k--) {
                 RUNTIME_iteration_push(morse, k);
 
@@ -193,34 +200,34 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                     ldbp = BLKLDD(B, p);
 
                     /* TT or TS */
-
                     if(qrtree->gettype(qrtree, k, m) == 0){
-                        for (n = k; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_ztsmqr(
-                                &options,
-                                side, trans,
-                                B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( m, k), ldam,
-                                TS(m, k), TS->mb);
-                        }
+                        L = 0;
+                        T = TS;
                     }
                     else {
-                        for (n = k; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_zttmqr(
-                                &options,
-                                side, trans,
-                                B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb,
-                                B( p, n), ldbp,
-                                B( m, n), ldbm,
-                                A( m, k), ldam,
-                                TT(m, k), TT->mb);
-                        }
+                        L = tempmm;
+                        T = TT;
+                    }
+                    for (n = k; n < B->nt; n++) {
+                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                        RUNTIME_data_migrate( sequence, B(p, n),
+                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmqrt(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkn, L, ib, T->nb,
+                            A(m, k), ldam,
+                            T(m, k), T->mb,
+                            B(p, n), ldbp,
+                            B(m, n), ldbm);
                     }
                 }
+
+                T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     m = qrtree->getm(qrtree, k, i);
 
@@ -245,23 +252,28 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
 #endif
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
                         MORSE_TASK_zunmqr(
                             &options,
                             side, trans,
-                            tempmm, tempnn, tempkmin, ib, TS->nb,
-                            D( m, k), ldam,
-                            TS(m, k), TS->mb,
-                            B( m, n), ldbm);
+                            tempmm, tempnn, tempkmin, ib, T->nb,
+                            D(m, k), ldam,
+                            T(m, k), T->mb,
+                            B(m, n), ldbm);
                     }
                 }
                 RUNTIME_iteration_pop(morse);
             }
         }
-    } else {
+    }
+    /*
+     *  MorseRight / MorseConjTrans
+     */
+    else {
         if (trans == MorseConjTrans) {
-            /*
-             *  MorseRight / MorseConjTrans
-             */
             for (k = K-1; k >= 0; k--) {
                 RUNTIME_iteration_push(morse, k);
 
@@ -280,34 +292,34 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
 
                     /* TS or TT */
                     if(qrtree->gettype(qrtree, k, n) == 0){
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-                            MORSE_TASK_ztsmqr(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb,
-                                B( m, p), ldbm,
-                                B( m, n), ldbm,
-                                A( n, k), ldan,
-                                TS(n, k), TS->mb);
-                        }
+                        L = 0;
+                        T = TS;
                     }
-                    else{
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-                            MORSE_TASK_zttmqr(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb,
-                                B( m, p), ldbm,
-                                B( m, n), ldbm,
-                                A( n, k), ldan,
-                                TT(n, k), TT->mb);
-                        }
+                    else {
+                        L = tempmm;
+                        T = TT;
+                    }
+                    for (m = 0; m < B->mt; m++) {
+                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                        ldbm = BLKLDD(B, m);
+
+                        RUNTIME_data_migrate( sequence, B(m, p),
+                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmqrt(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkn, L, ib, T->nb,
+                            A(n, k), ldan,
+                            T(n, k), T->mb,
+                            B(m, p), ldbm,
+                            B(m, n), ldbm);
                     }
                 }
+
+                T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     n = qrtree->getm(qrtree, k, i);
 
@@ -332,27 +344,33 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                     for (m = 0; m < B->mt; m++) {
                         ldbm = BLKLDD(B, m);
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
                         MORSE_TASK_zunmqr(
                             &options,
                             side, trans,
-                            tempmm, tempnn, tempkmin, ib, TS->nb,
-                            D( n, k), ldan,
-                            TS(n, k), TS->mb,
-                            B( m, n), ldbm);
+                            tempmm, tempnn, tempkmin, ib, T->nb,
+                            D(n, k), ldan,
+                            T(n, k), T->mb,
+                            B(m, n), ldbm);
                     }
                 }
 
                 RUNTIME_iteration_pop(morse);
             }
-        } else {
-            /*
-             *  MorseRight / MorseNoTrans
-             */
+        }
+        /*
+         *  MorseRight / MorseNoTrans
+         */
+        else {
             for (k = 0; k < K; k++) {
                 RUNTIME_iteration_push(morse, k);
 
                 tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
 
+                T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     n = qrtree->getm(qrtree, k, i);
 
@@ -380,10 +398,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                         MORSE_TASK_zunmqr(
                             &options,
                             side, trans,
-                            tempmm, tempnn, tempkmin, ib, TS->nb,
-                            D( n, k), ldan,
-                            TS(n, k), TS->mb,
-                            B( m, n), ldbm);
+                            tempmm, tempnn, tempkmin, ib, T->nb,
+                            D(n, k), ldan,
+                            T(n, k), T->mb,
+                            B(m, n), ldbm);
                     }
                 }
                 /* Setting the order of tiles */
@@ -397,32 +415,31 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                     ldan = BLKLDD(A, n);
                     ldbp = BLKLDD(B, p);
                     if(qrtree->gettype(qrtree, k, n) == 0){
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-                            MORSE_TASK_ztsmqr(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb,
-                                B( m, p), ldbm,
-                                B( m, n), ldbm,
-                                A( n, k), ldan,
-                                TS(n, k), TS->mb);
-                        }
+                        L = 0;
+                        T = T;
                     }
                     else {
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-                            MORSE_TASK_zttmqr(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb,
-                                B( m, p), ldbm,
-                                B( m, n), ldbm,
-                                A( n, k), ldan,
-                                TT(n, k), TT->mb);
-                        }
+                        L = tempmm;
+                        T = TT;
+                    }
+
+                    for (m = 0; m < B->mt; m++) {
+                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                        ldbm = BLKLDD(B, m);
+
+                        RUNTIME_data_migrate( sequence, B(m, p),
+                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, B(m, n),
+                                              B->get_rankof( B, m, n ) );
+
+                        MORSE_TASK_ztpmqrt(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkn, L, ib, T->nb,
+                            A(n, k), ldan,
+                            T(n, k), T->mb,
+                            B(m, p), ldbm,
+                            B(m, n), ldbm);
                     }
                 }
 
@@ -434,6 +451,4 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
     free(tiles);
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
-
-    (void)D;
 }
diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c
index ca7097ca9..00b4d3ded 100644
--- a/compute/pzunmqrrh.c
+++ b/compute/pzunmqrrh.c
@@ -134,15 +134,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                         ldam = BLKLDD(A, m);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_ztsmqr(
-                                &options,
-                                side, trans,
-                                A->nb, tempnn, tempmm, tempnn,
-                                tempkn, ib, T->nb,
-                                B(M, n), ldbM,
-                                B(m, n), ldbm,
+
+                            RUNTIME_data_migrate( sequence, B(M, n),
+                                                  B->get_rankof( B, m, n ) );
+                            RUNTIME_data_migrate( sequence, B(m, n),
+                                                  B->get_rankof( B, m, n ) );
+
+                            MORSE_TASK_ztpmqrt(
+                                &options, side, trans,
+                                tempmm, tempnn, tempkn, 0, ib, T->nb,
                                 A(m, k), ldam,
-                                T(m, k), T->mb);
+                                T(m, k), T->mb,
+                                B(M, n), ldbM,
+                                B(m, n), ldbm);
                         }
                     }
                 }
@@ -154,19 +158,29 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                         ldaMRD = BLKLDD(A, M+RD);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_zttmqr(
-                                &options,
-                                side, trans,
-                                A->nb, tempnn, tempMRDm, tempnn,
-                                tempkn, ib, T->nb,
-                                B (M,    n), ldbM,
-                                B (M+RD, n), ldbMRD,
+
+                            RUNTIME_data_migrate( sequence, B(M, n),
+                                                  B->get_rankof( B, M+RD, n ) );
+                            RUNTIME_data_migrate( sequence, B(M+RD, n),
+                                                  B->get_rankof( B, M+RD, n ) );
+
+                            MORSE_TASK_ztpmqrt(
+                                &options, side, trans,
+                                tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb,
                                 A (M+RD, k), ldaMRD,
-                                T2(M+RD, k), T->mb);
+                                T2(M+RD, k), T->mb,
+                                B (M,    n), ldbM,
+                                B (M+RD, n), ldbMRD);
                         }
                     }
                 }
 
+                /* Restore the original location of the tiles */
+                for (n = 0; n < B->nt; n++) {
+                    RUNTIME_data_migrate( sequence, B(k, n),
+                                          B->get_rankof( B, k, n ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
         } else {
@@ -188,15 +202,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                         ldaMRD = BLKLDD(A, M+RD);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_zttmqr(
-                                &options,
-                                side, trans,
-                                A->nb, tempnn, tempMRDm, tempnn,
-                                tempkn, ib, T->nb,
-                                B (M,    n), ldbM,
-                                B (M+RD, n), ldbMRD,
+
+                            RUNTIME_data_migrate( sequence, B(M, n),
+                                                  B->get_rankof( B, M+RD, n ) );
+                            RUNTIME_data_migrate( sequence, B(M+RD, n),
+                                                  B->get_rankof( B, M+RD, n ) );
+
+                            MORSE_TASK_ztpmqrt(
+                                &options, side, trans,
+                                tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb,
                                 A (M+RD, k), ldaMRD,
-                                T2(M+RD, k), T->mb);
+                                T2(M+RD, k), T->mb,
+                                B (M,    n), ldbM,
+                                B (M+RD, n), ldbMRD);
                         }
                     }
                 }
@@ -211,15 +229,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                         ldam = BLKLDD(A, m);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            MORSE_TASK_ztsmqr(
-                                &options,
-                                side, trans,
-                                A->nb, tempnn, tempmm, tempnn,
-                                tempkn, ib, T->nb,
-                                B(M, n), ldbM,
-                                B(m, n), ldbm,
+
+                            RUNTIME_data_migrate( sequence, B(M, n),
+                                                  B->get_rankof( B, m, n ) );
+                            RUNTIME_data_migrate( sequence, B(m, n),
+                                                  B->get_rankof( B, m, n ) );
+
+                            MORSE_TASK_ztpmqrt(
+                                &options, side, trans,
+                                tempmm, tempnn, tempkn, 0, ib, T->nb,
                                 A(m, k), ldam,
-                                T(m, k), T->mb);
+                                T(m, k), T->mb,
+                                B(M, n), ldbM,
+                                B(m, n), ldbm);
                         }
                     }
 #if defined(CHAMELEON_COPY_DIAG)
@@ -238,11 +260,13 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
 #endif
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                        RUNTIME_data_migrate( sequence, B(M, n),
+                                              B->get_rankof( B, M, n ) );
+
                         MORSE_TASK_zunmqr(
-                            &options,
-                            side, trans,
-                            tempMm, tempnn,
-                            tempkmin, ib, T->nb,
+                            &options, side, trans,
+                            tempMm, tempnn, tempkmin, ib, T->nb,
                             D(M, k), ldaM,
                             T(M, k), T->mb,
                             B(M, n), ldbM);
@@ -270,15 +294,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                         for (m = 0; m < B->mt; m++) {
                             ldbm   = BLKLDD(B, m);
                             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            MORSE_TASK_zttmqr(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempMRDm,
-                                tempkn, ib, T->nb,
-                                B (m, M), ldbm,
-                                B (m, M+RD), ldbm,
+
+                            RUNTIME_data_migrate( sequence, B(m, M),
+                                                  B->get_rankof( B, m, M+RD ) );
+                            RUNTIME_data_migrate( sequence, B(m, M+RD),
+                                                  B->get_rankof( B, m, M+RD ) );
+
+                            MORSE_TASK_ztpmqrt(
+                                &options, side, trans,
+                                tempmm, tempMRDm, tempkn, tempmm, ib, T->nb,
                                 A (M+RD, k), ldaMRD,
-                                T2(M+RD, k), T->mb);
+                                T2(M+RD, k), T->mb,
+                                B (m, M), ldbm,
+                                B (m, M+RD), ldbm);
                         }
                     }
                 }
@@ -293,15 +321,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                         for (m = 0; m < B->mt; m++) {
                             ldbm = BLKLDD(B, m);
                             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            MORSE_TASK_ztsmqr(
-                                &options,
-                                side, trans,
-                                tempmm, tempMm, tempmm, tempnn,
-                                tempkn, ib, T->nb,
-                                B(m, M), ldbm,
-                                B(m, n), ldbm,
+
+                            RUNTIME_data_migrate( sequence, B(m, M),
+                                                  B->get_rankof( B, m, n ) );
+                            RUNTIME_data_migrate( sequence, B(m, m),
+                                                  B->get_rankof( B, m, n ) );
+
+                            MORSE_TASK_ztpmqrt(
+                                &options, side, trans,
+                                tempmm, tempnn, tempkn, 0, ib, T->nb,
                                 A(n, k), ldan,
-                                T(n, k), T->mb);
+                                T(n, k), T->mb,
+                                B(m, M), ldbm,
+                                B(m, n), ldbm);
                         }
                     }
 #if defined(CHAMELEON_COPY_DIAG)
@@ -321,6 +353,10 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                     for (m = 0; m < B->mt; m++) {
                         ldbm = BLKLDD(B, m);
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+
+                        RUNTIME_data_migrate( sequence, B(m, M),
+                                              B->get_rankof( B, m, M ) );
+
                         MORSE_TASK_zunmqr(
                             &options,
                             side, trans,
@@ -376,15 +412,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                         for (m = 0; m < B->mt; m++) {
                             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                             ldbm = BLKLDD(B, m);
-                            MORSE_TASK_ztsmqr(
-                                &options,
-                                side, trans,
-                                tempmm, tempMm, tempmm, tempnn,
-                                tempkn, ib, T->nb,
-                                B(m, M), ldbm,
-                                B(m, n), ldbm,
+
+                            RUNTIME_data_migrate( sequence, B(m, M),
+                                                  B->get_rankof( B, m, n ) );
+                            RUNTIME_data_migrate( sequence, B(m, n),
+                                                  B->get_rankof( B, m, n ) );
+
+                            MORSE_TASK_ztpmqrt(
+                                &options, side, trans,
+                                tempmm, tempnn, tempkn, 0, ib, T->nb,
                                 A(n, k), ldan,
-                                T(n, k), T->mb);
+                                T(n, k), T->mb,
+                                B(m, M), ldbm,
+                                B(m, n), ldbm);
                         }
                     }
                 }
@@ -395,19 +435,29 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans,
                         for (m = 0; m < B->mt; m++) {
                             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                             ldbm   = BLKLDD(B, m);
-                            MORSE_TASK_zttmqr(
-                                &options,
-                                side, trans,
-                                tempmm, B->nb, tempmm, tempMRDm,
-                                tempkn, ib, T->nb,
-                                B (m, M   ), ldbm,
-                                B (m, M+RD), ldbm,
+
+                            RUNTIME_data_migrate( sequence, B(m, M),
+                                                  B->get_rankof( B, m, M+RD ) );
+                            RUNTIME_data_migrate( sequence, B(m, M+RD),
+                                                  B->get_rankof( B, m, M+RD ) );
+
+                            MORSE_TASK_ztpmqrt(
+                                &options, side, trans,
+                                tempmm, tempMRDm, tempkn, tempmm, ib, T->nb,
                                 A (M+RD, k), ldaMRD,
-                                T2(M+RD, k), T->mb);
+                                T2(M+RD, k), T->mb,
+                                B (m, M   ), ldbm,
+                                B (m, M+RD), ldbm);
                         }
                     }
                 }
 
+                /* Restore the original location of the tiles */
+                for (m = 0; m < B->mt; m++) {
+                    RUNTIME_data_migrate( sequence, B(m, k),
+                                          B->get_rankof( B, m, k ) );
+                }
+
                 RUNTIME_iteration_pop(morse);
             }
         }
-- 
GitLab