From 1521ba0d36b7c44bc2e32a78ad11cbe8e62c2ce7 Mon Sep 17 00:00:00 2001
From: Raphael Boucherie <raphael.boucherie@inria.fr>
Date: Wed, 17 May 2017 14:56:41 +0200
Subject: [PATCH] test por pzgelqf_param works

---
 compute/pzgelqf_param.c |  11 +-
 compute/pzunmlq_param.c | 456 ++++++++++++++++++++++++++++++++++++++++
 compute/pzunmqr_param.c |  11 +-
 3 files changed, 464 insertions(+), 14 deletions(-)
 create mode 100644 compute/pzunmlq_param.c

diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c
index ef16bf523..a43115496 100644
--- a/compute/pzgelqf_param.c
+++ b/compute/pzgelqf_param.c
@@ -53,7 +53,7 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
 
     int k, m, n, i, p;
     int K;
-    int ldak, ldam;
+    int ldak, ldam, ldap;
     int tempkmin, tempkm, tempnn, tempmm;
     int ib;
     int *tiles;
@@ -87,8 +87,8 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
 
     /* Initialisation of tiles */
 
-    tiles = (int*)malloc((qrtree->mt)*sizeof(int));
-    memset( tiles, 0, (qrtree->mt)*sizeof(int) );
+    tiles = (int*)malloc((qrtree->nt)*sizeof(int));
+    memset( tiles, 0, (qrtree->nt)*sizeof(int) );
 
     ws_worker *= sizeof(MORSE_Complex64_t);
     ws_host   *= sizeof(MORSE_Complex64_t);
@@ -138,8 +138,9 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
 #endif
 #endif
             }
-            for (m = k+1; n < A->mt; m++) {
+            for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+                ldam = BLKLDD(A, m);
                 MORSE_TASK_zunmlq(
                     &options,
                     MorseRight, MorseConjTrans,
@@ -158,7 +159,7 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
             p = qrtree->currpiv(qrtree, k, n);
 
             tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-
+            ldap = BLKLDD(A, p);
             /* Tiles killed is a TS */
             if(qrtree->gettype(qrtree, k, n) == 0){
                 MORSE_TASK_ztslqt(
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
new file mode 100644
index 000000000..180e19c6d
--- /dev/null
+++ b/compute/pzunmlq_param.c
@@ -0,0 +1,456 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2016 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file pzunmlq_param.c
+ *
+ *  MORSE auxiliary routines
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.5.0
+ * @comment This file has been automatically generated
+ *          from Plasma 2.5.0 for MORSE 1.0.0
+ * @author Hatem Ltaief
+ * @author Jakub Kurzak
+ * @author Azzam Haidar
+ * @author Mathieu Faverge
+ * @author Emmanuel Agullo
+ * @author Cedric Castagnede
+ * @author Raphael Boucherie
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "control/common.h"
+
+#define A(m,n) A,  m,  n
+#define B(m,n) B,  m,  n
+#define TS(m,n) TS,  m,  n
+#define TT(m,n) TT,  m,  n
+#if defined(CHAMELEON_COPY_DIAG)
+#define D(m,n)   D,  m,  n
+#else
+#define D(m,n)   A,  m,  n
+#endif
+
+/**
+ *  Parallel application of Q using tile V - LQ factorization - dynamic scheduling
+ */
+void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
+                         MORSE_enum side, MORSE_enum trans,
+                         MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT,
+                         MORSE_sequence_t *sequence, MORSE_request_t *request)
+{
+    MORSE_context_t *morse;
+    MORSE_option_t options;
+    size_t ws_worker = 0;
+    size_t ws_host = 0;
+    MORSE_desc_t *D = NULL;
+
+    int k, m, n, i, p;
+    int ldam, ldan, ldbm, ldak;
+    int tempnn, tempkmin, tempmm, tempkn;
+    int ib, K;
+    int *tiles;
+
+    morse = morse_context_self();
+    if (sequence->status != MORSE_SUCCESS)
+        return;
+    RUNTIME_options_init(&options, morse, sequence, request);
+
+    ib = MORSE_IB;
+
+    K = chameleon_min(A->mt, A->nt);
+
+    /*
+     * zunmlq = A->nb * ib
+     * ztsmlq = A->nb * ib
+     * zttmlq = A->nb * ib
+     */
+    ws_worker = A->nb * ib;
+
+#if defined(CHAMELEON_USE_CUDA)
+    /* Worker space
+     *
+     * zunmlq = A->nb * ib
+     * ztsmlq = 2 * A->nb * ib
+     */
+    ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
+#endif
+
+    /* Initialisation of tiles */
+
+    tiles = (int*)malloc((qrtree->nt)*sizeof(int));
+    memset( tiles, 0, (qrtree->nt)*sizeof(int) );
+
+    ws_worker *= sizeof(MORSE_Complex64_t);
+    ws_host   *= sizeof(MORSE_Complex64_t);
+
+    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
+
+    /* necessary to avoid dependencies between tasks regarding the diag tile */
+#if defined(CHAMELEON_COPY_DIAG)
+    D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+    morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->nb, A->nb, 0, 0, K*A->nb, A->nb, A->p, A->q);
+#endif
+
+    if (side == MorseLeft ) {
+        if (trans == MorseConjTrans) {
+            /*
+             *  MorseLeft / MorseConjTrans
+             */
+            for (k = 0; k < K; k++) {
+                RUNTIME_iteration_push(morse, k);
+
+                tempkm   = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+                ldak = BLKLDD(A, k);
+                for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
+                    m = qrtree->getm(qrtree, k, i);
+
+                    tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+                    tempkmin = chameleon_min(tempmm, tempkm);
+                    ldbm = BLKLDD(B, m);
+#if defined(CHAMELEON_COPY_DIAG)
+                    MORSE_TASK_zlacpy(
+                        &options,
+                        MorseUpper, tempkmin, tempmm, A->nb,
+                        A(k, m), ldak,
+                        D(k, m), ldak );
+#if defined(CHAMELEON_USE_CUDA)
+                    MORSE_TASK_zlaset(
+                        &options,
+                        MorseLower, tempmm, tempkmin,
+                        0., 1.,
+                        D(k, m), ldak );
+#endif
+#endif
+                    for (n = 0; n < B->nt; n++) {
+                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                        MORSE_TASK_zunmlq(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkmin, ib, TS->nb,
+                            D( k, m), ldak,
+                            TS(k, m), TS->mb,
+                            B( m, n), ldbm);
+                    }
+                }
+                /* Setting the order of the tiles*/
+                libhqr_treewalk(qrtree, k, tiles);
+
+                for (i = k; i < B->mt-1; i++) {
+                    n = tiles[i];
+                    p = qrtree->currpiv(qrtree, k, n);
+
+                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    ldbp = BLKLDD(B, p);
+
+                    /* TT or TS */
+
+                    if(qrtree->gettype(qrtree, k, n) == 0){
+                        for (m = 0; m < B->mt; m++) {
+                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                            ldbm = BLKLDD(B, m);
+                            MORSE_TASK_ztsmlq(
+                                &options,
+                                side, trans,
+                                B->nb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb,
+                                B( m, p), ldbp,
+                                B( m, n), ldbm,
+                                A( k, n), ldak,
+                                TS(k, n), TS->mb);
+                        }
+                    }
+                    else {
+                        for (m = 0; m < B->mt; m++) {
+                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                            ldbm = BLKLDD(B, m);
+                            MORSE_TASK_zttmlq(
+                                &options,
+                                side, trans,
+                                B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->mb,
+                                B( m, p), ldbm,
+                                B( m, n), ldbm,
+                                A( k, n), ldak,
+                                TT(k, n), TT->mb);
+                        }
+                    }
+                }
+                RUNTIME_iteration_pop(morse);
+            }
+        } else {
+            /*
+             *  MorseLeft / MorseNoTrans
+             */
+            for (k = K-1; k >= 0; k--) {
+                RUNTIME_iteration_push(morse, k);
+
+                tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+
+                /* Setting the order of the tiles*/
+                libhqr_treewalk(qrtree, k, tiles);
+
+                for (i = B->mt-2; i >= k; i--) {
+                    n = tiles[i];
+                    p = qrtree->currpiv(qrtree, k, n);
+
+                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    ldbp = BLKLDD(B, p);
+
+                    /* TT or TS */
+
+                    if(qrtree->gettype(qrtree, k, n) == 0){
+                        for (m = 0; m < B->mt; m++) {
+                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                            ldbm = BLKLDD(B, m);
+                            MORSE_TASK_ztsmlq(
+                                &options,
+                                side, trans,
+                                B->nb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb,
+                                B( m, p), ldbp,
+                                B( m, n), ldbm,
+                                A( k, n), ldak,
+                                TS(k, n), TS->mb);
+                        }
+                    }
+                    else {
+                        for (m = k; m < B->mt; m++) {
+                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                            MORSE_TASK_zttmlq(
+                                &options,
+                                side, trans,
+                                B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb,
+                                B( m, p), ldbp,
+                                B( m, n), ldbm,
+                                A( k, n), ldak,
+                                TT(k, n), TT->mb);
+                        }
+                    }
+                }
+                for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
+                    m = qrtree->getm(qrtree, k, i);
+
+                    tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+                    tempkmin = chameleon_min(tempmm, tempkn);
+                    ldbm = BLKLDD(B, m);
+
+#if defined(CHAMELEON_COPY_DIAG)
+                    MORSE_TASK_zlacpy(
+                        &options,
+                        MorseUpper, tempkmim, tempmm, A->nb,
+                        A(k, m), ldak,
+                        D(k, m), ldak );
+#if defined(CHAMELEON_USE_CUDA)
+                    MORSE_TASK_zlaset(
+                        &options,
+                        MorseLower, tempkmin, tempmm,
+                        0., 1.,
+                        D(k, m), ldak );
+#endif
+#endif
+                    for (n = 0; n < B->nt; n++) {
+                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                        MORSE_TASK_zunmlq(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkmin, ib, TS->nb,
+                            D( k, m), ldak,
+                            TS(k, m), TS->mb,
+                            B( m, n), ldbm);
+                    }
+                }
+                RUNTIME_iteration_pop(morse);
+            }
+        }
+    } else {
+        if (trans == MorseConjTrans) {
+            /*
+             *  MorseRight / MorseConjTrans
+             */
+            for (k = K-1; k >= 0; k--) {
+                RUNTIME_iteration_push(morse, k);
+
+                tempkm = k == A->mt-1 ? A->m - k*A->mb : A->mb;
+                ldak = BLKLDD(A, k);
+
+                /* Setting the order of tiles */
+                libhqr_treewalk(qrtree, k, tiles);
+
+                for (i = B->nt-2; i >= k; i--) {
+                    m = tiles[i];
+                    p = qrtree->currpiv(qrtree, k, m);
+
+                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                    ldbm = BLKLDD(B, m);
+                    ldbp = BLKLDD(B, p);
+
+                    /* TS or TT */
+                    if(qrtree->gettype(qrtree, k, m) == 0){
+                        for (n = 0; n < B->nt; n++) {
+                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                            MORSE_TASK_ztsmlq(
+                                &options,
+                                side, trans,
+                                tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb,
+                                B( p, n), ldbp,
+                                B( m, n), ldbm,
+                                A( k, n), ldak,
+                                TS(k, n), TS->mb);
+                        }
+                    }
+                    else{
+                        for (n = 0; n < B->nt; n++) {
+                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                            MORSE_TASK_zttmlq(
+                                &options,
+                                side, trans,
+                                tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb,
+                                B( m, p), ldbp,
+                                B( m, n), ldbm,
+                                A( k, n), ldak,
+                                TT(k, n), TT->mb);
+                        }
+                    }
+                }
+                for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
+                    n = qrtree->getm(qrtree, k, i);
+
+                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    tempkmin = chameleon_min(tempnn, tempkn);
+                    ldan = BLKLDD(A, n);
+
+#if defined(CHAMELEON_COPY_DIAG)
+                    MORSE_TASK_zlacpy(
+                        &options,
+                        MorseUpper, tempnn, tempkmin, A->nb,
+                        A(n, k), ldan,
+                        D(n, k), ldan );
+#if defined(CHAMELEON_USE_CUDA)
+                    MORSE_TASK_zlaset(
+                        &options,
+                        MorseLower, tempnn, tempkmin,
+                        0., 1.,
+                        D(n, k), ldan );
+#endif
+#endif
+                    for (m = 0; m < B->mt; m++) {
+                        ldbm = BLKLDD(B, m);
+                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                        MORSE_TASK_zunmlq(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkmin, ib, TS->nb,
+                            D( n, k), ldan,
+                            TS(n, k), TS->mb,
+                            B( m, n), ldbm);
+                    }
+                }
+
+                RUNTIME_iteration_pop(morse);
+            }
+        } else {
+            /*
+             *  MorseRight / MorseNoTrans
+             */
+            for (k = 0; k < K; k++) {
+                RUNTIME_iteration_push(morse, k);
+
+                tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
+
+                for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
+                    n = qrtree->getm(qrtree, k, i);
+
+                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    tempkmin = chameleon_min(tempnn, tempkn);
+                    ldan = BLKLDD(A, n);
+
+#if defined(CHAMELEON_COPY_DIAG)
+                    MORSE_TASK_zlacpy(
+                        &options,
+                        MorseUpper, tempnn, tempkmin, A->nb,
+                        A(n, k), ldan,
+                        D(n, k), ldan );
+#if defined(CHAMELEON_USE_CUDA)
+                    MORSE_TASK_zlaset(
+                        &options,
+                        MorseLower, tempnn, tempkmin,
+                        0., 1.,
+                        D(n, k), ldan );
+#endif
+#endif
+                    for (m = 0; m < B->mt; m++) {
+                        ldbm = BLKLDD(B, m);
+                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                        MORSE_TASK_zunmlq(
+                            &options,
+                            side, trans,
+                            tempmm, tempnn, tempkmin, ib, TS->nb,
+                            D( n, k), ldan,
+                            TS(n, k), TS->mb,
+                            B( m, n), ldbm);
+                    }
+                }
+                /* Setting the order of tiles */
+                libhqr_treewalk(qrtree, k, tiles);
+
+                for (i = k; i < B->nt-1; n++) {
+                    n = tiles[i];
+                    p = qrtree->currpiv(qrtree, k, n);
+
+                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    ldan = BLKLDD(A, n);
+                    ldbp = BLKLDD(B, p);
+                    if(qrtree->gettype(qrtree, k, n) == 0){
+                        for (m = 0; m < B->mt; m++) {
+                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                            ldbm = BLKLDD(B, m);
+                            MORSE_TASK_ztsmlq(
+                                &options,
+                                side, trans,
+                                tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb,
+                                B( m, p), ldbm,
+                                B( m, n), ldbm,
+                                A( n, k), ldan,
+                                TS(n, k), TS->mb);
+                        }
+                    }
+                    else {
+                        for (m = 0; m < B->mt; m++) {
+                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                            ldbm = BLKLDD(B, m);
+                            MORSE_TASK_zttmlq(
+                                &options,
+                                side, trans,
+                                tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb,
+                                B( m, p), ldbm,
+                                B( m, n), ldbm,
+                                A( n, k), ldan,
+                                TT(n, k), TT->mb);
+                        }
+                    }
+                }
+
+                RUNTIME_iteration_pop(morse);
+            }
+        }
+    }
+    RUNTIME_options_ws_free(&options);
+    RUNTIME_options_finalize(&options, morse);
+    MORSE_TASK_dataflush_all();
+
+#if defined(CHAMELEON_COPY_DIAG)
+    MORSE_Sequence_Wait(sequence);
+    morse_desc_mat_free(D);
+    free(D);
+#endif
+    (void)D;
+}
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index f45091455..eeddd7a06 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -16,17 +16,10 @@
  *  MORSE is a software package provided by Univ. of Tennessee,
  *  Univ. of California Berkeley and Univ. of Colorado Denver
  *
- * @version 2.5.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for MORSE 1.0.0
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Azzam Haidar
+ * @version 1.0.0
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Raphael Boucherie
- * @date 2010-11-15
+ * @date 2017-05-17
  * @precisions normal z -> s d c
  *
  **/
-- 
GitLab