From b586e208fb1a2a20d2cbc991976ce78ca1b2db84 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Wed, 31 Jan 2018 13:13:44 +0100
Subject: [PATCH] Add migration and tp kernels to ungqr functions

---
 compute/pzungqr.c       | 31 +++++++++++-----
 compute/pzungqr_param.c | 79 ++++++++++++++++++++++-------------------
 compute/pzungqrrh.c     | 40 ++++++++++++++-------
 3 files changed, 92 insertions(+), 58 deletions(-)

diff --git a/compute/pzungqr.c b/compute/pzungqr.c
index 389f79e85..e305f329f 100644
--- a/compute/pzungqr.c
+++ b/compute/pzungqr.c
@@ -34,9 +34,9 @@
 #define Q(m,n) Q,  m,  n
 #define T(m,n) T,  m,  n
 #if defined(CHAMELEON_COPY_DIAG)
-#define D(k) D, k, 0
+#define D(k)   D,  k,  0
 #else
-#define D(k) A, k, k
+#define D(k)   D,  k,  k
 #endif
 
 /*******************************************************************************
@@ -69,6 +69,10 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc
         minMT = A->mt;
     }
 
+    if (D == NULL) {
+        D = A;
+    }
+
     /*
      * zunmqr = A->nb * ib
      * ztsmqr = A->nb * ib
@@ -105,16 +109,21 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc
             ldqm = BLKLDD(Q, m);
             for (n = k; n < Q->nt; n++) {
                 tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
-                MORSE_TASK_ztsmqr(
+
+                RUNTIME_data_migrate( sequence, Q(k, n),
+                                      Q->get_rankof( Q, m, n ) );
+
+                MORSE_TASK_ztpmqrt(
                     &options,
                     MorseLeft, MorseNoTrans,
-                    Q->mb, tempnn, tempmm, tempnn, tempAkn, ib, T->nb,
-                    Q(k, n), ldqk,
-                    Q(m, n), ldqm,
+                    tempmm, tempnn, tempAkn, 0, ib, T->nb,
                     A(m, k), ldam,
-                    T(m, k), T->mb);
+                    T(m, k), T->mb,
+                    Q(k, n), ldqk,
+                    Q(m, n), ldqm);
             }
         }
+
 #if defined(CHAMELEON_COPY_DIAG)
         MORSE_TASK_zlacpy(
             &options,
@@ -131,11 +140,16 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc
 #endif
         for (n = k; n < Q->nt; n++) {
             tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
+
+            /* Restore the original location of the tiles */
+            RUNTIME_data_migrate( sequence, Q(k, n),
+                                  Q->get_rankof( Q, k, n ) );
+
             MORSE_TASK_zunmqr(
                 &options,
                 MorseLeft, MorseNoTrans,
                 tempkm, tempnn, tempkmin, ib, T->nb,
-                D(k), ldak,
+                D(k),    ldak,
                 T(k, k), T->mb,
                 Q(k, n), ldqk);
         }
@@ -144,5 +158,4 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc
 
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
-    (void)D;
 }
diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c
index a0849b634..d78459ae9 100644
--- a/compute/pzungqr_param.c
+++ b/compute/pzungqr_param.c
@@ -25,15 +25,10 @@
 #include "control/common.h"
 #include <stdlib.h>
 
-#define A(m,n) A,  m,  n
-#define Q(m,n) Q,  m,  n
-#define TS(m,n) TS,  m,  n
-#define TT(m,n) TT,  m,  n
-#if defined(CHAMELEON_COPY_DIAG)
-#define D(m,n) D,  m,  n
-#else
-#define D(m,n) A,  m,  n
-#endif
+#define A(m,n) A, m, n
+#define Q(m,n) Q, m, n
+#define T(m,n) T, m, n
+#define D(m,n) D, m, n
 
 /**
  *  Parallel construction of Q using tile V (application to identity) - dynamic scheduling
@@ -45,10 +40,11 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree,
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
+    MORSE_desc_t *T;
     size_t ws_worker = 0;
     size_t ws_host = 0;
 
-    int k, m, n, i, p;
+    int k, m, n, i, p, L;
     int ldam, ldqm, ldqp;
     int tempmm, tempnn, tempkmin, tempkn;
     int ib, minMT;
@@ -67,6 +63,10 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree,
         minMT = A->mt;
     }
 
+    if (D == NULL) {
+        D = A;
+    }
+
     /*
      * zunmqr = A->nb * ib
      * ztsmqr = A->nb * ib
@@ -110,35 +110,35 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree,
             ldqp = BLKLDD(Q, p);
 
             /* TT or TS */
-
-            if(qrtree->gettype(qrtree, k , m) == 0){
-                for (n = k; n < Q->nt; n++) {
-                    tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
-                    MORSE_TASK_ztsmqr(
-                        &options,
-                        MorseLeft, MorseNoTrans,
-                        Q->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb,
-                        Q(p, n), ldqp,
-                        Q(m, n), ldqm,
-                        A(m, k), ldam,
-                        TS(m, k), TS->mb);
-                }
+            if(qrtree->gettype(qrtree, k , m) == 0) {
+                T = TS;
+                L = 0;
             }
             else {
-                for (n = k; n < Q->nt; n++) {
-                    tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
-                    MORSE_TASK_zttmqr(
-                        &options,
-                        MorseLeft, MorseNoTrans,
-                        Q->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb,
-                        Q(p, n), ldqp,
-                        Q(m, n), ldqm,
-                        A(m, k), ldam,
-                        TT(m, k), TT->mb);
-                }
+                T = TT;
+                L = tempmm;
+            }
+
+            for (n = k; n < Q->nt; n++) {
+                tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
+
+                RUNTIME_data_migrate( sequence, Q(p, n),
+                                      Q->get_rankof( Q, m, n ) );
+                RUNTIME_data_migrate( sequence, Q(m, n),
+                                      Q->get_rankof( Q, m, n ) );
+
+                MORSE_TASK_ztpmqrt(
+                    &options,
+                    MorseLeft, MorseNoTrans,
+                    tempmm, tempnn, tempkn, L, ib, T->nb,
+                    A(m, k), ldam,
+                    T(m, k), T->mb,
+                    Q(p, n), ldqp,
+                    Q(m, n), ldqm);
             }
         }
 
+        T = TS;
         for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
             m = qrtree->getm(qrtree, k, i);
 
@@ -161,14 +161,20 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree,
                 D(m, k), ldam );
 #endif
 #endif
+
             for (n = k; n < Q->nt; n++) {
                 tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
+
+                /* Restore the original location of the tiles */
+                RUNTIME_data_migrate( sequence, Q(m, n),
+                                      Q->get_rankof( Q, m, n ) );
+
                 MORSE_TASK_zunmqr(
                     &options,
                     MorseLeft, MorseNoTrans,
-                    tempmm, tempnn, tempkmin, ib, TS->nb,
+                    tempmm, tempnn, tempkmin, ib, T->nb,
                     D(m, k), ldam,
-                    TS(m, k), TS->mb,
+                    T(m, k), T->mb,
                     Q(m, n), ldqm);
             }
         }
@@ -178,5 +184,4 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree,
     free(tiles);
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
-    (void)D;
 }
diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c
index 1d0516298..89853c6cf 100644
--- a/compute/pzungqrrh.c
+++ b/compute/pzungqrrh.c
@@ -105,15 +105,20 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
                 ldaMRD = BLKLDD(A, M+RD);
                 for (n = k; n < Q->nt; n++) {
                     tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
-                    MORSE_TASK_zttmqr(
+
+                    RUNTIME_data_migrate( sequence, Q(M, n),
+                                          Q->get_rankof( Q, M+RD, n ) );
+                    RUNTIME_data_migrate( sequence, Q(M+RD, n),
+                                          Q->get_rankof( Q, M+RD, n ) );
+
+                    MORSE_TASK_ztpmqrt(
                         &options,
                         MorseLeft, MorseNoTrans,
-                        Q->mb, tempnn, tempMRDm, tempnn,
-                        tempkn, ib, T->nb,
-                        Q (M,    n), ldqM,
-                        Q (M+RD, n), ldqMRD,
+                        tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb,
                         A (M+RD, k), ldaMRD,
-                        T2(M+RD, k), T->mb);
+                        T2(M+RD, k), T->mb,
+                        Q (M,    n), ldqM,
+                        Q (M+RD, n), ldqMRD);
                 }
             }
         }
@@ -129,17 +134,23 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
 
                 for (n = k; n < Q->nt; n++) {
                     tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
-                    MORSE_TASK_ztsmqr(
+
+                    RUNTIME_data_migrate( sequence, Q(M, n),
+                                          Q->get_rankof( Q, m, n ) );
+                    RUNTIME_data_migrate( sequence, Q(m, n),
+                                          Q->get_rankof( Q, m, n ) );
+
+                    MORSE_TASK_ztpmqrt(
                         &options,
                         MorseLeft, MorseNoTrans,
-                        Q->mb, tempnn, tempmm, tempnn,
-                        tempkn, ib, T->nb,
-                        Q(M, n), ldqM,
-                        Q(m, n), ldqm,
+                        tempmm, tempnn, tempkn, 0, ib, T->nb,
                         A(m, k), ldam,
-                        T(m, k), T->mb);
+                        T(m, k), T->mb,
+                        Q(M, n), ldqM,
+                        Q(m, n), ldqm);
                 }
             }
+
 #if defined(CHAMELEON_COPY_DIAG)
             MORSE_TASK_zlacpy(
                 &options,
@@ -156,6 +167,11 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
 #endif
             for (n = k; n < Q->nt; n++) {
                 tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
+
+                /* Restore the original location of the tiles */
+                RUNTIME_data_migrate( sequence, Q(M, n),
+                                      Q->get_rankof( Q, M, n ) );
+
                 MORSE_TASK_zunmqr(
                     &options,
                     MorseLeft, MorseNoTrans,
-- 
GitLab