From 6b5343f841ea2fdcb033a763fa2cb7cc579e1125 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Sun, 4 Dec 2016 22:59:06 +0000 Subject: [PATCH] Fix workspaces sizes --- compute/pzgelqf.c | 18 ++++++++++++------ compute/pzgelqfrh.c | 18 ++++++++++++------ compute/pzgeqrf.c | 14 ++++++++++---- compute/pzgeqrfrh.c | 14 ++++++++++---- compute/pzunglq.c | 4 ++-- compute/pzunglqrh.c | 4 ++-- compute/pzungqr.c | 4 ++-- compute/pzungqrrh.c | 4 ++-- compute/pzunmlq.c | 10 +++++----- compute/pzunmqr.c | 10 +++++----- compute/pzunmqrrh.c | 10 +++++----- 11 files changed, 67 insertions(+), 43 deletions(-) diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 99c5a88c7..cbbb8e44a 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -77,16 +77,22 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, ws_worker = A->nb * (ib+1); /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + #if defined(CHAMELEON_USE_MAGMA) /* Worker space * - * zgelqt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * zunmlq = A->nb * ib - * ztslqt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsmlq = 2 * A->nb * ib + * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ ws_worker = max( ws_worker, ib * (ib + A->nb) ); - ws_worker = max( ws_worker, ib * A->nb * 2 ); /* Host space * @@ -124,7 +130,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MorseUpper, A->mb, A->nb, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, A->mb, A->nb, diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index ea9484acf..d108869da 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -79,16 +79,22 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ws_worker = A->nb * (ib+1); /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + #if defined(CHAMELEON_USE_MAGMA) /* Worker space * - * zgelqt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * zunmlq = A->nb * ib - * ztslqt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsmlq = 2 * A->nb * ib + * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ ws_worker = max( ws_worker, ib * (ib + A->nb) ); - ws_worker = max( ws_worker, ib * A->nb * 2 ); /* Host space * @@ -129,7 +135,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MorseUpper, tempkm, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkm, tempNn, diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index 337796a16..9ab9cc1f3 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -72,16 +72,22 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, ws_worker = A->nb * (ib+1); /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + #if defined(CHAMELEON_USE_MAGMA) /* Worker space * * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * zunmqr = A->nb * ib * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsmqr = 2 * A->nb * ib */ ws_worker = max( ws_worker, ib * (ib + A->nb) ); - ws_worker = max( ws_worker, ib * A->nb * 2 ); /* Host space * @@ -119,7 +125,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MorseLower, A->mb, A->nb, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, A->mb, A->nb, diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index e4c1ba524..22c21f5bb 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -77,16 +77,22 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ws_worker = A->nb * (ib+1); /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + #if defined(CHAMELEON_USE_MAGMA) /* Worker space * * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * zunmqr = A->nb * ib * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsmqr = 2 * A->nb * ib */ ws_worker = max( ws_worker, ib * (ib + A->nb) ); - ws_worker = max( ws_worker, ib * A->nb * 2 ); /* Host space * @@ -128,7 +134,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MorseLower, tempMm, A->nb, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, A->nb, diff --git a/compute/pzunglq.c b/compute/pzunglq.c index eb67186f4..7b1343866 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -77,7 +77,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ws_worker = A->nb * ib; /* Allocation of temporary (scratch) working space */ -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmlq = A->nb * ib @@ -124,7 +124,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index e0ca54931..cd4e8abc0 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -75,7 +75,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, */ ws_worker = A->nb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -147,7 +147,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, diff --git a/compute/pzungqr.c b/compute/pzungqr.c index 77eba2535..8226c8477 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -77,7 +77,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ws_worker = A->nb * ib; /* Allocation of temporary (scratch) working space */ -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -126,7 +126,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index 87a910d02..8c2f64ddb 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -77,7 +77,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, */ ws_worker = A->nb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -153,7 +153,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index 94a419258..f157fb241 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -79,7 +79,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, */ ws_worker = A->mb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmlq = A->mb * ib @@ -115,7 +115,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkm, @@ -180,7 +180,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkm, @@ -231,7 +231,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, @@ -266,7 +266,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index 3219aa229..3d53e459c 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -79,7 +79,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, */ ws_worker = A->nb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -115,7 +115,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, @@ -182,7 +182,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, @@ -235,7 +235,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkn, tempkmin, @@ -270,7 +270,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkn, tempkmin, diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index 174de507f..aee79ee4a 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -77,7 +77,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, */ ws_worker = A->nb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -117,7 +117,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, @@ -232,7 +232,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, @@ -311,7 +311,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, @@ -349,7 +349,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, -- GitLab