diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 99c5a88c723e4ec07e3b6ebe5de4b262495f46c4..cbbb8e44a210af323ee1ddbd538a100a19416a0b 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -77,16 +77,22 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, ws_worker = A->nb * (ib+1); /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + #if defined(CHAMELEON_USE_MAGMA) /* Worker space * - * zgelqt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * zunmlq = A->nb * ib - * ztslqt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsmlq = 2 * A->nb * ib + * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ ws_worker = max( ws_worker, ib * (ib + A->nb) ); - ws_worker = max( ws_worker, ib * A->nb * 2 ); /* Host space * @@ -124,7 +130,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MorseUpper, A->mb, A->nb, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, A->mb, A->nb, diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index ea9484acf8d167bf23ef34164fe3d1f63a20747c..d108869da821b4e8c7fbebbab0a783d71bca93fa 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -79,16 +79,22 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ws_worker = A->nb * (ib+1); /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + #if defined(CHAMELEON_USE_MAGMA) /* Worker space * - * zgelqt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * zunmlq = A->nb * ib - * ztslqt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsmlq = 2 * A->nb * ib + * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) */ ws_worker = max( ws_worker, ib * (ib + A->nb) ); - ws_worker = max( ws_worker, ib * A->nb * 2 ); /* Host space * @@ -129,7 +135,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MorseUpper, tempkm, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkm, tempNn, diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index 337796a160627ad6aa358fecae068389a6a72727..9ab9cc1f3d8b9afa51c3ba67ef5141f7c59c4158 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -72,16 +72,22 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, ws_worker = A->nb * (ib+1); /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + #if defined(CHAMELEON_USE_MAGMA) /* Worker space * * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * zunmqr = A->nb * ib * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsmqr = 2 * A->nb * ib */ ws_worker = max( ws_worker, ib * (ib + A->nb) ); - ws_worker = max( ws_worker, ib * A->nb * 2 ); /* Host space * @@ -119,7 +125,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MorseLower, A->mb, A->nb, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, A->mb, A->nb, diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index e4c1ba5247cc872dc563bc5364fcca16a4014ea1..22c21f5bb3545b43713880864c0abf04dfd12bae 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -77,16 +77,22 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ws_worker = A->nb * (ib+1); /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + #if defined(CHAMELEON_USE_MAGMA) /* Worker space * * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * zunmqr = A->nb * ib * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsmqr = 2 * A->nb * ib */ ws_worker = max( ws_worker, ib * (ib + A->nb) ); - ws_worker = max( ws_worker, ib * A->nb * 2 ); /* Host space * @@ -128,7 +134,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MorseLower, tempMm, A->nb, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, A->nb, diff --git a/compute/pzunglq.c b/compute/pzunglq.c index eb67186f45b3a88804096851fccb839c40846acd..7b1343866534893e900f092ddc29470c1dc96dad 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -77,7 +77,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ws_worker = A->nb * ib; /* Allocation of temporary (scratch) working space */ -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmlq = A->nb * ib @@ -124,7 +124,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index e0ca54931a8866975d3480cffefe36ebf40d8b0f..cd4e8abc014496e0076f68a05e2720dcd6f81195 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -75,7 +75,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, */ ws_worker = A->nb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -147,7 +147,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, DIAG(k, N), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, diff --git a/compute/pzungqr.c b/compute/pzungqr.c index 77eba2535926069915949cfb79ac3a0e0fed43e9..8226c8477bea418ccc59b2e908b78f8e4a4897a7 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -77,7 +77,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ws_worker = A->nb * ib; /* Allocation of temporary (scratch) working space */ -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -126,7 +126,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index 87a910d02aae53bb408e77b19fe618d1cff5ac55..8c2f64ddb809336b45cdacb293639e995adcfef9 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -77,7 +77,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, */ ws_worker = A->nb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -153,7 +153,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index 94a419258046aea7206dda13ef33298b32189c29..f157fb241b2237671735212a9e793e9b90357dfb 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -79,7 +79,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, */ ws_worker = A->mb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmlq = A->mb * ib @@ -115,7 +115,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkm, @@ -180,7 +180,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkm, @@ -231,7 +231,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, @@ -266,7 +266,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index 3219aa229d92b6e7af14f17d917dd8f7ff293276..3d53e459cd7d4ddbad667ff88944306796d118b0 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -79,7 +79,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, */ ws_worker = A->nb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -115,7 +115,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, @@ -182,7 +182,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, @@ -235,7 +235,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkn, tempkmin, @@ -270,7 +270,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, DIAG(k), ldak ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkn, tempkmin, diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index 174de507fc5c8b1a3fe8e158456283f476e99eff..aee79ee4a90c1129bdaa6ab89ecea83e431c8b5d 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -77,7 +77,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, */ ws_worker = A->nb * ib; -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) /* Worker space * * zunmqr = A->nb * ib @@ -117,7 +117,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, @@ -232,7 +232,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, @@ -311,7 +311,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, @@ -349,7 +349,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, DIAG(M, k), ldaM ); -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin,