From 6b5343f841ea2fdcb033a763fa2cb7cc579e1125 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Sun, 4 Dec 2016 22:59:06 +0000
Subject: [PATCH] Fix workspaces sizes

---
 compute/pzgelqf.c   | 18 ++++++++++++------
 compute/pzgelqfrh.c | 18 ++++++++++++------
 compute/pzgeqrf.c   | 14 ++++++++++----
 compute/pzgeqrfrh.c | 14 ++++++++++----
 compute/pzunglq.c   |  4 ++--
 compute/pzunglqrh.c |  4 ++--
 compute/pzungqr.c   |  4 ++--
 compute/pzungqrrh.c |  4 ++--
 compute/pzunmlq.c   | 10 +++++-----
 compute/pzunmqr.c   | 10 +++++-----
 compute/pzunmqrrh.c | 10 +++++-----
 11 files changed, 67 insertions(+), 43 deletions(-)

diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c
index 99c5a88c7..cbbb8e44a 100644
--- a/compute/pzgelqf.c
+++ b/compute/pzgelqf.c
@@ -77,16 +77,22 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
     ws_worker = A->nb * (ib+1);
 
     /* Allocation of temporary (scratch) working space */
+#if defined(CHAMELEON_USE_CUDA)
+    /* Worker space
+     *
+     * zunmqr = A->nb * ib
+     * ztsmqr = 2 * A->nb * ib
+     */
+    ws_worker = max( ws_worker, ib * A->nb * 2 );
+#endif
+
 #if defined(CHAMELEON_USE_MAGMA)
     /* Worker space
      *
-     * zgelqt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * zunmlq = A->nb * ib
-     * ztslqt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsmlq = 2 * A->nb * ib
+     * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
+     * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
      */
     ws_worker = max( ws_worker, ib * (ib + A->nb) );
-    ws_worker = max( ws_worker, ib * A->nb * 2 );
 
     /* Host space
      *
@@ -124,7 +130,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
                 MorseUpper, A->mb, A->nb, A->nb,
                 A(k, k), ldak,
                 DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
             MORSE_TASK_zlaset(
                 &options,
                 MorseLower, A->mb, A->nb,
diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c
index ea9484acf..d108869da 100644
--- a/compute/pzgelqfrh.c
+++ b/compute/pzgelqfrh.c
@@ -79,16 +79,22 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     ws_worker = A->nb * (ib+1);
 
     /* Allocation of temporary (scratch) working space */
+#if defined(CHAMELEON_USE_CUDA)
+    /* Worker space
+     *
+     * zunmqr = A->nb * ib
+     * ztsmqr = 2 * A->nb * ib
+     */
+    ws_worker = max( ws_worker, ib * A->nb * 2 );
+#endif
+
 #if defined(CHAMELEON_USE_MAGMA)
     /* Worker space
      *
-     * zgelqt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * zunmlq = A->nb * ib
-     * ztslqt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsmlq = 2 * A->nb * ib
+     * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
+     * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
      */
     ws_worker = max( ws_worker, ib * (ib + A->nb) );
-    ws_worker = max( ws_worker, ib * A->nb * 2 );
 
     /* Host space
      *
@@ -129,7 +135,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
                 MorseUpper, tempkm, tempNn, A->nb,
                 A(k, N), ldak,
                 DIAG(k, N), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
             MORSE_TASK_zlaset(
                 &options,
                 MorseLower, tempkm, tempNn,
diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c
index 337796a16..9ab9cc1f3 100644
--- a/compute/pzgeqrf.c
+++ b/compute/pzgeqrf.c
@@ -72,16 +72,22 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
     ws_worker = A->nb * (ib+1);
 
     /* Allocation of temporary (scratch) working space */
+#if defined(CHAMELEON_USE_CUDA)
+    /* Worker space
+     *
+     * zunmqr = A->nb * ib
+     * ztsmqr = 2 * A->nb * ib
+     */
+    ws_worker = max( ws_worker, ib * A->nb * 2 );
+#endif
+
 #if defined(CHAMELEON_USE_MAGMA)
     /* Worker space
      *
      * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * zunmqr = A->nb * ib
      * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsmqr = 2 * A->nb * ib
      */
     ws_worker = max( ws_worker, ib * (ib + A->nb) );
-    ws_worker = max( ws_worker, ib * A->nb * 2 );
 
     /* Host space
      *
@@ -119,7 +125,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
                 MorseLower, A->mb, A->nb, A->nb,
                 A(k, k), ldak,
                 DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
             MORSE_TASK_zlaset(
                 &options,
                 MorseUpper, A->mb, A->nb,
diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c
index e4c1ba524..22c21f5bb 100644
--- a/compute/pzgeqrfrh.c
+++ b/compute/pzgeqrfrh.c
@@ -77,16 +77,22 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     ws_worker = A->nb * (ib+1);
 
     /* Allocation of temporary (scratch) working space */
+#if defined(CHAMELEON_USE_CUDA)
+    /* Worker space
+     *
+     * zunmqr = A->nb * ib
+     * ztsmqr = 2 * A->nb * ib
+     */
+    ws_worker = max( ws_worker, ib * A->nb * 2 );
+#endif
+
 #if defined(CHAMELEON_USE_MAGMA)
     /* Worker space
      *
      * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * zunmqr = A->nb * ib
      * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
-     * ztsmqr = 2 * A->nb * ib
      */
     ws_worker = max( ws_worker, ib * (ib + A->nb) );
-    ws_worker = max( ws_worker, ib * A->nb * 2 );
 
     /* Host space
      *
@@ -128,7 +134,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
                 MorseLower, tempMm, A->nb, A->nb,
                 A(M, k), ldaM,
                 DIAG(M, k), ldaM );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseUpper, tempMm, A->nb,
diff --git a/compute/pzunglq.c b/compute/pzunglq.c
index eb67186f4..7b1343866 100644
--- a/compute/pzunglq.c
+++ b/compute/pzunglq.c
@@ -77,7 +77,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
     ws_worker = A->nb * ib;
 
     /* Allocation of temporary (scratch) working space */
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
     /* Worker space
      *
      * zunmlq = A->nb * ib
@@ -124,7 +124,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
             MorseUpper, tempkmin, tempkn, A->nb,
             A(k, k), ldak,
             DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
         MORSE_TASK_zlaset(
             &options,
             MorseLower, tempkmin, tempkn,
diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c
index e0ca54931..cd4e8abc0 100644
--- a/compute/pzunglqrh.c
+++ b/compute/pzunglqrh.c
@@ -75,7 +75,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
      */
     ws_worker = A->nb * ib;
 
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
     /* Worker space
      *
      * zunmqr = A->nb * ib
@@ -147,7 +147,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
                 MorseUpper, tempkmin, tempNn, A->nb,
                 A(k, N), ldak,
                 DIAG(k, N), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
             MORSE_TASK_zlaset(
                 &options,
                 MorseLower, tempkmin, tempNn,
diff --git a/compute/pzungqr.c b/compute/pzungqr.c
index 77eba2535..8226c8477 100644
--- a/compute/pzungqr.c
+++ b/compute/pzungqr.c
@@ -77,7 +77,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
     ws_worker = A->nb * ib;
 
     /* Allocation of temporary (scratch) working space */
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
     /* Worker space
      *
      * zunmqr = A->nb * ib
@@ -126,7 +126,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
             MorseLower, tempkm, tempkmin, A->nb,
             A(k, k), ldak,
             DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
         MORSE_TASK_zlaset(
             &options,
             MorseUpper, tempkm, tempkmin,
diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c
index 87a910d02..8c2f64ddb 100644
--- a/compute/pzungqrrh.c
+++ b/compute/pzungqrrh.c
@@ -77,7 +77,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
      */
     ws_worker = A->nb * ib;
 
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
     /* Worker space
      *
      * zunmqr = A->nb * ib
@@ -153,7 +153,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
                 MorseLower, tempMm, tempkmin, A->nb,
                 A(M, k), ldaM,
                 DIAG(M, k), ldaM );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
             MORSE_TASK_zlaset(
                 &options,
                 MorseUpper, tempMm, tempkmin,
diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c
index 94a419258..f157fb241 100644
--- a/compute/pzunmlq.c
+++ b/compute/pzunmlq.c
@@ -79,7 +79,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
      */
     ws_worker = A->mb * ib;
 
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
     /* Worker space
      *
      * zunmlq = A->mb * ib
@@ -115,7 +115,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                     MorseUpper, tempkmin, tempkm, A->nb,
                     A(k, k), ldak,
                     DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseLower, tempkmin, tempkm,
@@ -180,7 +180,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                     MorseUpper, tempkmin, tempkm, A->nb,
                     A(k, k), ldak,
                     DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseLower, tempkmin, tempkm,
@@ -231,7 +231,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                     MorseUpper, tempkmin, tempkn, A->nb,
                     A(k, k), ldak,
                     DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseLower, tempkmin, tempkn,
@@ -266,7 +266,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
                     MorseUpper, tempkmin, tempkn, A->nb,
                     A(k, k), ldak,
                     DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseLower, tempkmin, tempkn,
diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c
index 3219aa229..3d53e459c 100644
--- a/compute/pzunmqr.c
+++ b/compute/pzunmqr.c
@@ -79,7 +79,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
      */
     ws_worker = A->nb * ib;
 
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
     /* Worker space
      *
      * zunmqr = A->nb * ib
@@ -115,7 +115,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     MorseLower, tempkm, tempkmin, A->nb,
                     A(k, k), ldak,
                     DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseUpper, tempkm, tempkmin,
@@ -182,7 +182,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     MorseLower, tempkm, tempkmin, A->nb,
                     A(k, k), ldak,
                     DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseUpper, tempkm, tempkmin,
@@ -235,7 +235,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     MorseLower, tempkn, tempkmin, A->nb,
                     A(k, k), ldak,
                     DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseUpper, tempkn, tempkmin,
@@ -270,7 +270,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
                     MorseLower, tempkn, tempkmin, A->nb,
                     A(k, k), ldak,
                     DIAG(k), ldak );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                 MORSE_TASK_zlaset(
                     &options,
                     MorseUpper, tempkn, tempkmin,
diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c
index 174de507f..aee79ee4a 100644
--- a/compute/pzunmqrrh.c
+++ b/compute/pzunmqrrh.c
@@ -77,7 +77,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans,
      */
     ws_worker = A->nb * ib;
 
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
     /* Worker space
      *
      * zunmqr = A->nb * ib
@@ -117,7 +117,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans,
                         MorseLower, tempMm, tempkmin, A->nb,
                         A(M, k), ldaM,
                         DIAG(M, k), ldaM );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                     MORSE_TASK_zlaset(
                         &options,
                         MorseUpper, tempMm, tempkmin,
@@ -232,7 +232,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans,
                         MorseLower, tempMm, tempkmin, A->nb,
                         A(M, k), ldaM,
                         DIAG(M, k), ldaM );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                     MORSE_TASK_zlaset(
                         &options,
                         MorseUpper, tempMm, tempkmin,
@@ -311,7 +311,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans,
                         MorseLower, tempMm, tempkmin, A->nb,
                         A(M, k), ldaM,
                         DIAG(M, k), ldaM );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                     MORSE_TASK_zlaset(
                         &options,
                         MorseUpper, tempMm, tempkmin,
@@ -349,7 +349,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans,
                         MorseLower, tempMm, tempkmin, A->nb,
                         A(M, k), ldaM,
                         DIAG(M, k), ldaM );
-#if defined(CHAMELEON_USE_MAGMA)
+#if defined(CHAMELEON_USE_CUDA)
                     MORSE_TASK_zlaset(
                         &options,
                         MorseUpper, tempMm, tempkmin,
-- 
GitLab