From 624de04738c4f63ca7f8f50ef423f053b36fc5eb Mon Sep 17 00:00:00 2001
From: Raphael Boucherie <raphael.boucherie@inria.fr>
Date: Wed, 14 Jun 2017 13:52:38 +0200
Subject: [PATCH] 1 of 3 tests works

---
 compute/pzgelqf_param.c          | 18 ++-----------
 compute/pzunglq_param.c          | 17 ++----------
 compute/pzunmlq_param.c          | 16 ++---------
 compute/zgelqf_param.c           | 11 ++++++--
 compute/zgelqs_param.c           | 13 ++++++---
 compute/zgels_param.c            | 46 +++++++++++++++++++-------------
 compute/zunglq_param.c           | 11 +++++++-
 compute/zunmlq_param.c           | 10 ++++++-
 testing/testing_zgels_systolic.c | 16 +++++------
 9 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c
index d85123145..2bebdfea4 100644
--- a/compute/pzgelqf_param.c
+++ b/compute/pzgelqf_param.c
@@ -37,14 +37,14 @@
 /**
  *  Parallel tile LQ factorization (reduction Householder) - dynamic scheduling
  */
-void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT,
+void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A,
+                          MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                           MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-    MORSE_desc_t *D = NULL;
 
     int k, m, n, i, p;
     int K;
@@ -90,14 +90,6 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-#if defined(CHAMELEON_COPY_DIAG)
-    {
-        /* necessary to avoid dependencies between tasks regarding the diag tile */
-        D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-        morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, );
-    }
-#endif
-
     K = chameleon_min(A->mt, A->nt);
 
     /* The number of the factorization */
@@ -208,11 +200,5 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
     MORSE_TASK_dataflush_all();
-
-#if defined(CHAMELEON_COPY_DIAG)
-    MORSE_Sequence_Wait(sequence);
-    morse_desc_mat_free(D);
-    free(D);
-#endif
     (void)D;
 }
diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c
index 4a2e3da00..2dbe8f83a 100644
--- a/compute/pzunglq_param.c
+++ b/compute/pzunglq_param.c
@@ -38,14 +38,13 @@
  *  Parallel construction of Q using tile V - dynamic scheduling
  */
 void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q,
-                        MORSE_desc_t *TS, MORSE_desc_t *TT,
-                        MORSE_sequence_t *sequence, MORSE_request_t *request)
+                         MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
+                         MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-    MORSE_desc_t *D = NULL;
 
     int k, m, n, i, p;
     int K;
@@ -89,12 +88,6 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des
 
     K = chameleon_min(A->mt, A->nt);
 
-        /* necessary to avoid dependencies between tasks regarding the diag tile */
-#if defined(CHAMELEON_COPY_DIAG)
-    D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->mb, A->nb, 0, 0, K*A->mb, A->nb, A->p, A->q);
-#endif
-
     for (k = K-1; k >= 0; k--) {
         RUNTIME_iteration_push(morse, k);
 
@@ -178,11 +171,5 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
     MORSE_TASK_dataflush_all();
-
-#if defined(CHAMELEON_COPY_DIAG)
-    MORSE_Sequence_Wait(sequence);
-    morse_desc_mat_free(D);
-    free(D);
-#endif
     (void)D;
 }
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
index 3bcec90f3..92a900324 100644
--- a/compute/pzunmlq_param.c
+++ b/compute/pzunmlq_param.c
@@ -39,14 +39,14 @@
  */
 void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
                          MORSE_enum side, MORSE_enum trans,
-                         MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT,
+                         MORSE_desc_t *A, MORSE_desc_t *B,
+                         MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                          MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-    MORSE_desc_t *D = NULL;
 
     int k, m, n, i, p;
     int ldbm, ldak, ldbp;
@@ -88,12 +88,6 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-    /* necessary to avoid dependencies between tasks regarding the diag tile */
-#if defined(CHAMELEON_COPY_DIAG)
-    D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->mb, A->nb, 0, 0, K*A->mb, A->nb, A->p, A->q);
-#endif
-
     if (side == MorseLeft ) {
         if (trans == MorseNoTrans) {
             /*
@@ -440,11 +434,5 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
     MORSE_TASK_dataflush_all();
-
-#if defined(CHAMELEON_COPY_DIAG)
-    MORSE_Sequence_Wait(sequence);
-    morse_desc_mat_free(D);
-    free(D);
-#endif
     (void)D;
 }
diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c
index 4b893047a..5d07b6eed 100644
--- a/compute/zgelqf_param.c
+++ b/compute/zgelqf_param.c
@@ -238,6 +238,7 @@ int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
                                   MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
+    MORSE_desc_t D;
 
     morse = morse_context_self();
     if (morse == NULL) {
@@ -281,7 +282,13 @@ int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
     if (chameleon_min(M, N) == 0)
         return MORSE_SUCCESS;
 */
-    morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request);
-
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+    morse_pzgelqf_param(qrtree, A, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzgelqf_param(qrtree, A, TS, TT, NULL, sequence, request);
+#endif
+    (void)D;
     return MORSE_SUCCESS;
 }
diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c
index 71550a71b..581d74139 100644
--- a/compute/zgelqs_param.c
+++ b/compute/zgelqs_param.c
@@ -270,6 +270,7 @@ int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
     MORSE_desc_t *subB;
     MORSE_desc_t *subA;
     MORSE_context_t *morse;
+    MORSE_desc_t D;
 
     morse = morse_context_self();
     if (morse == NULL) {
@@ -324,12 +325,18 @@ int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
 
     subB = morse_desc_submatrix(B, 0, 0, A->m, B->n);
     subA = morse_desc_submatrix(A, 0, 0, A->m, A->m);
-    morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
+    morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
     free(subA);
     free(subB);
 
-    morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
-
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+    morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request);
+#endif
 
+    (void)D;
     return MORSE_SUCCESS;
 }
diff --git a/compute/zgels_param.c b/compute/zgels_param.c
index de1a1b80f..92f63d817 100644
--- a/compute/zgels_param.c
+++ b/compute/zgels_param.c
@@ -393,37 +393,45 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans,
     if (A->m >= A->n) {
 
 #if defined(CHAMELEON_COPY_DIAG)
-    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
-    morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request);
-    morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request);
-    morse_desc_mat_free(&D);
+        morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+        morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request);
+        morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request);
+        morse_desc_mat_free(&D);
 #else
-    morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request);
-    morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request);
+        morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request);
+        morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request);
 #endif
 
-    subB = morse_desc_submatrix(B, 0, 0, A->n, B->n);
-    subA = morse_desc_submatrix(A, 0, 0, A->n, A->n);
-    morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
-    free(subA);
-    free(subB);
+        subB = morse_desc_submatrix(B, 0, 0, A->n, B->n);
+        subA = morse_desc_submatrix(A, 0, 0, A->n, A->n);
+        morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
+        free(subA);
+        free(subB);
     }
     else {
-        /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n);
-        morse_pztile_zero(subB, sequence, request);
-        free(subB); */
-
-        morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request);
 
+#if defined(CHAMELEON_COPY_DIAG)
+        morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+        morse_pzgelqf_param(qrtree, A, TS, TT, &D, sequence, request);
         subB = morse_desc_submatrix(B, 0, 0, A->m, B->n);
         subA = morse_desc_submatrix(A, 0, 0, A->m, A->m);
-        morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
+        morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
         free(subA);
         free(subB);
+        morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request);
+        morse_desc_mat_free(&D);
+#else
+        morse_pzgelqf_param(qrtree, A, TS, TT, NULL, sequence, request);
+        subB = morse_desc_submatrix(B, 0, 0, A->m, B->n);
+        subA = morse_desc_submatrix(A, 0, 0, A->m, A->m);
+        morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
+        free(subA);
+        free(subB);
+        morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request);
+#endif
 
-        morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
-        //morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request);
     }
+
     (void)D;
     return MORSE_SUCCESS;
 }
diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c
index 24294d56b..c0e13b6df 100644
--- a/compute/zunglq_param.c
+++ b/compute/zunglq_param.c
@@ -254,6 +254,7 @@ int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
                                   MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
+    MORSE_desc_t D;
 
     morse = morse_context_self();
     if (morse == NULL) {
@@ -302,7 +303,15 @@ int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
     if (chameleon_min(M, N) == 0)
         return MORSE_SUCCESS;
 */
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
     morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request);
-    morse_pzunglq_param(qrtree, A, Q, TS, TT, sequence, request);
+    morse_pzunglq_param(qrtree, A, Q, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request);
+    morse_pzunglq_param(qrtree, A, Q, TS, TT, NULL, sequence, request);
+#endif
+    (void)D;
     return MORSE_SUCCESS;
 }
diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c
index d00961827..4aac83e45 100644
--- a/compute/zunmlq_param.c
+++ b/compute/zunmlq_param.c
@@ -310,6 +310,7 @@ int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side,
                                   MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
+    MORSE_desc_t D;
 
     morse = morse_context_self();
     if (morse == NULL) {
@@ -364,7 +365,14 @@ int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side,
     if (chameleon_min(M, chameleon_min(N, K)) == 0)
         return MORSE_SUCCESS;
 */
-    morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, sequence, request);
 
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+    morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, NULL, sequence, request);
+#endif
+    (void)D;
     return MORSE_SUCCESS;
 }
diff --git a/testing/testing_zgels_systolic.c b/testing/testing_zgels_systolic.c
index ef9db84d5..fce16bfc8 100644
--- a/testing/testing_zgels_systolic.c
+++ b/testing/testing_zgels_systolic.c
@@ -218,10 +218,10 @@ int testing_zgels_systolic(int argc, char **argv)
 
         /* Morse routines */
         MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT);
-        MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA);
-        // MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA);
-        //MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB);
-        MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB);
+        //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA);
+        MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA);
+        MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB);
+        //MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB);
 
         /* Check the orthogonality, factorization and the solution */
         info_ortho = check_orthogonality(M, N, LDA, Q, eps);
@@ -284,10 +284,10 @@ int testing_zgels_systolic(int argc, char **argv)
 
         MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT);
         MORSE_ztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, M, NRHS, 1.0, A2, LDA, B2, LDB);
-        //MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA);
-        MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA);
-        //MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB);
-        MORSE_zunmlq(MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, B2, LDB);
+        MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA);
+        //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA);
+        MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB);
+        //MORSE_zunmlq(MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, B2, LDB);
     }
 
     /* Check the orthogonality, factorization and the solution */
-- 
GitLab