From fc2d01f133d92fbcaddbab53ba0960212fea6377 Mon Sep 17 00:00:00 2001
From: Raphael Boucherie <raphael.boucherie@inria.fr>
Date: Fri, 25 Aug 2017 12:32:10 +0200
Subject: [PATCH] add flush for unmqr and unmlq

---
 compute/pzunmlq.c       | 25 +++++++++++++++++++++++++
 compute/pzunmlq_param.c | 21 +++++++++++++++++++++
 compute/pzunmqr.c       | 25 +++++++++++++++++++++++++
 compute/pzunmqr_param.c | 21 ++++++++++++++++++++-
 4 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c
index df686642b..84475c878 100644
--- a/compute/pzunmlq.c
+++ b/compute/pzunmlq.c
@@ -127,6 +127,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
+
+                MORSE_TASK_dataflush( &options, D(k)    );
+                MORSE_TASK_dataflush( &options, T(k, k) );
+ 
                 for (m = k+1; m < B->mt; m++) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                     ldbm = BLKLDD(B, m);
@@ -146,6 +150,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                             B(k, n), ldbk,
                             B(m, n), ldbm);
                     }
+
+                    MORSE_TASK_dataflush( &options, A(k, m) );
+                    MORSE_TASK_dataflush( &options, T(k, m) );
                 }
 
                 /* Restore the original location of the tiles */
@@ -187,6 +194,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                             B(k, n), ldbk,
                             B(m, n), ldbm);
                     }
+
+                    MORSE_TASK_dataflush( &options, A(k, m) );
+                    MORSE_TASK_dataflush( &options, T(k, m) );
                 }
 #if defined(CHAMELEON_COPY_DIAG)
                 MORSE_TASK_zlacpy(
@@ -216,6 +226,8 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
+                MORSE_TASK_dataflush( &options, D(k)    );
+                MORSE_TASK_dataflush( &options, T(k, k) );
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -250,6 +262,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                             B(m, k), ldbm,
                             B(m, n), ldbm);
                     }
+
+                    MORSE_TASK_dataflush( &options, A(k, n) );
+                    MORSE_TASK_dataflush( &options, T(k, n) );
                 }
 #if defined(CHAMELEON_COPY_DIAG)
                 MORSE_TASK_zlacpy(
@@ -281,6 +296,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                         B(m, k), ldbm);
                 }
 
+                MORSE_TASK_dataflush( &options, D(k)    );
+                MORSE_TASK_dataflush( &options, T(k, k) );
+
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -319,6 +337,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                         T(k, k), T->mb,
                         B(m, k), ldbm);
                 }
+
+                MORSE_TASK_dataflush( &options, D(k)    );
+                MORSE_TASK_dataflush( &options, T(k, k) );
+
                 for (n = k+1; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                     for (m = 0; m < B->mt; m++) {
@@ -338,6 +360,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                             B(m, k), ldbm,
                             B(m, n), ldbm);
                     }
+
+                    MORSE_TASK_dataflush( &options, A(k, n) );
+                    MORSE_TASK_dataflush( &options, T(k, n) );
                 }
 
                 /* Restore the original location of the tiles */
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
index c37ea1b59..1bee53e27 100644
--- a/compute/pzunmlq_param.c
+++ b/compute/pzunmlq_param.c
@@ -129,6 +129,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                     }
                 }
 
+                MORSE_TASK_dataflush( &options, D(k, p) );
+                MORSE_TASK_dataflush( &options, T(k, p) );
                 /* Setting the order of the tiles*/
                 libhqr_walk_stepk(qrtree, k, tiles + (k+1));
 
@@ -167,6 +169,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                             B(p, n), ldbp,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, A(k, m) );
+                    MORSE_TASK_dataflush( &options, T(k, m) );
                 }
 
                 /* Restore the original location of the tiles */
@@ -226,6 +230,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                             B(p, n), ldbp,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, A(k, m) );
+                    MORSE_TASK_dataflush( &options, T(k, m) );
                 }
 
                 T = TS;
@@ -265,6 +271,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                             B(p, n), ldbp);
                     }
                 }
+
+                MORSE_TASK_dataflush( &options, D(k, p) );
+                MORSE_TASK_dataflush( &options, T(k, p) );
+
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -318,6 +328,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                             B(m, p), ldbm,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, A(k, n) );
+                    MORSE_TASK_dataflush( &options, T(k, n) );
                 }
 
                 T = TS;
@@ -357,6 +369,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                             B(m, p), ldbm);
                     }
                 }
+
+                MORSE_TASK_dataflush( &options, D(k, p) );
+                MORSE_TASK_dataflush( &options, T(k, p) );
+
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -404,6 +420,9 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                             B(m, p), ldbm);
                     }
                 }
+
+                MORSE_TASK_dataflush( &options, D(k, p) );
+                MORSE_TASK_dataflush( &options, T(k, p) );
                 /* Setting the order of tiles */
                 libhqr_walk_stepk(qrtree, k, tiles + (k+1));
 
@@ -443,6 +462,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                             B(m, p), ldbm,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, A( k, n) );
+                    MORSE_TASK_dataflush( &options, TT(k, n) );
                 }
 
                 RUNTIME_iteration_pop(morse);
diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c
index 3a0f3c18d..72b7b62d4 100644
--- a/compute/pzunmqr.c
+++ b/compute/pzunmqr.c
@@ -127,6 +127,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
+
+                MORSE_TASK_dataflush( &options, D(k)    );
+                MORSE_TASK_dataflush( &options, T(k, k) );
+
                 for (m = k+1; m < B->mt; m++) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                     ldam = BLKLDD(A, m);
@@ -147,6 +151,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                             B(k, n), ldbk,
                             B(m, n), ldbm);
                     }
+
+                    MORSE_TASK_dataflush( &options, A(m, k) );
+                    MORSE_TASK_dataflush( &options, T(m, k) );
                 }
 
                 /* Restore the original location of the tiles */
@@ -189,6 +196,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                             B(k, n), ldbk,
                             B(m, n), ldbm);
                     }
+
+                    MORSE_TASK_dataflush( &options, A(m, k) );
+                    MORSE_TASK_dataflush( &options, T(m, k) );
                 }
 #if defined(CHAMELEON_COPY_DIAG)
                 MORSE_TASK_zlacpy(
@@ -218,6 +228,8 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
+                MORSE_TASK_dataflush( &options, D(k)    );
+                MORSE_TASK_dataflush( &options, T(k, k) );
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -254,6 +266,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                             B(m, k), ldbm,
                             B(m, n), ldbm);
                     }
+
+                    MORSE_TASK_dataflush( &options, A(n, k) );
+                    MORSE_TASK_dataflush( &options, T(n, k) );
                 }
 #if defined(CHAMELEON_COPY_DIAG)
                 MORSE_TASK_zlacpy(
@@ -285,6 +300,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                         B(m, k), ldbm);
                 }
 
+                MORSE_TASK_dataflush( &options, D(k)    );
+                MORSE_TASK_dataflush( &options, T(k, k) );
+
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -323,6 +341,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                         T(k, k), T->mb,
                         B(m, k), ldbm);
                 }
+
+                MORSE_TASK_dataflush( &options, D(k)    );
+                MORSE_TASK_dataflush( &options, T(k, k) );
+
                 for (n = k+1; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                     ldan = BLKLDD(A, n);
@@ -343,6 +365,9 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                             B(m, k), ldbm,
                             B(m, n), ldbm);
                     }
+
+                    MORSE_TASK_dataflush( &options, A(n, k) );
+                    MORSE_TASK_dataflush( &options, T(n, k) );
                 }
 
                 /* Restore the original location of the tiles */
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index 0ad3b1f87..4b415ff3d 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -127,6 +127,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                             T(m, k), T->mb,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, D(m, k) );
+                    MORSE_TASK_dataflush( &options, T(m, k) );
                 }
 
                 /* Setting the order of the tiles*/
@@ -168,7 +170,9 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                             B(p, n), ldbp,
                             B(m, n), ldbm);
                     }
-                }
+                    MORSE_TASK_dataflush( &options, A(m, k) );
+                    MORSE_TASK_dataflush( &options, T(m, k) );
+               }
 
                 /* Restore the original location of the tiles */
                 for (n = 0; n < B->nt; n++) {
@@ -227,6 +231,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                             B(p, n), ldbp,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, A(m, k) );
+                    MORSE_TASK_dataflush( &options, T(m, k) );
                 }
 
                 T = TS;
@@ -267,6 +273,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                             B(m, n), ldbm);
                     }
                 }
+
+                MORSE_TASK_dataflush( &options, D(m, k) );
+                MORSE_TASK_dataflush( &options, T(m, k) );
+
                 RUNTIME_iteration_pop(morse);
             }
         }
@@ -320,6 +330,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                             B(m, p), ldbm,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, A(n, k) );
+                    MORSE_TASK_dataflush( &options, T(n, k) );
                 }
 
                 T = TS;
@@ -359,6 +371,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                             T(n, k), T->mb,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, D(n, k) );
+                    MORSE_TASK_dataflush( &options, T(n, k) );
                 }
                 RUNTIME_iteration_pop(morse);
             }
@@ -405,7 +419,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                             T(n, k), T->mb,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, D(n, k) );
+                    MORSE_TASK_dataflush( &options, T(n, k) );
                 }
+
                 /* Setting the order of tiles */
                 libhqr_walk_stepk(qrtree, k, tiles + (k+1));
 
@@ -446,6 +463,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                             B(m, p), ldbm,
                             B(m, n), ldbm);
                     }
+                    MORSE_TASK_dataflush( &options, A(n, k) );
+                    MORSE_TASK_dataflush( &options, T(n, k) );
                 }
 
                 RUNTIME_iteration_pop(morse);
-- 
GitLab