From 5d03b644fec1ad4c9828b78b2c66d63fcd8aa04f Mon Sep 17 00:00:00 2001 From: Raphael Boucherie <raphael.boucherie@inria.fr> Date: Fri, 30 Jun 2017 16:28:50 +0200 Subject: [PATCH] moved diagonal copies to driver level --- compute/pzgebrd_ge2gb.c | 9 ++++++++ compute/pzgelqf.c | 26 +++++----------------- compute/pzgelqfrh.c | 29 +++++------------------- compute/pzgeqrf.c | 26 +++++----------------- compute/pzgeqrfrh.c | 29 +++++------------------- compute/pzunglq.c | 26 +++++----------------- compute/pzunglqrh.c | 29 +++++------------------- compute/pzungqr.c | 26 +++++----------------- compute/pzungqrrh.c | 29 +++++------------------- compute/pzunmlq.c | 44 +++++++++++++----------------------- compute/pzunmlqrh.c | 47 +++++++++++++-------------------------- compute/pzunmqr.c | 44 +++++++++++++----------------------- compute/pzunmqrrh.c | 48 +++++++++++++--------------------------- compute/zgelqf.c | 18 ++++++++++++--- compute/zgelqs.c | 17 ++++++++++++-- compute/zgels.c | 49 +++++++++++++++++++++++++++++------------ compute/zgeqrf.c | 18 ++++++++++++--- compute/zgeqrs.c | 17 ++++++++++++-- compute/zgesvd.c | 30 ++++++++++++++++++++----- compute/zheevd.c | 23 +++++++++++++++++-- compute/zunglq.c | 24 +++++++++++++++----- compute/zungqr.c | 17 ++++++++++++-- compute/zunmlq.c | 20 +++++++++++++---- compute/zunmqr.c | 22 ++++++++++++++---- control/compute_z.h | 24 ++++++++++---------- 25 files changed, 338 insertions(+), 353 deletions(-) diff --git a/compute/pzgebrd_ge2gb.c b/compute/pzgebrd_ge2gb.c index 710eb1604..443e53212 100644 --- a/compute/pzgebrd_ge2gb.c +++ b/compute/pzgebrd_ge2gb.c @@ -30,6 +30,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, { int k; int tempkm, tempkn; + MORSE_desc_t D; if (A.m >= A.n){ for (k = 0; k < A.nt; k++) { tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; @@ -38,6 +39,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_pzgeqrf( morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn), morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), + morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), sequence, request); morse_pzunmqr( @@ -46,6 +48,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn), morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, A.m-k*A.mb, A.n-(k+1)*A.nb), morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), + morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), sequence, request); if (k+1 < A.nt){ @@ -54,6 +57,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_pzgelqf( morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb), morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), + morse_desc_submatrix(&D, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), sequence, request); morse_pzunmlq( @@ -61,6 +65,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb), morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb), morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), + morse_desc_submatrix(&D, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), sequence, request); } } @@ -73,6 +78,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_pzgelqf( morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb), morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), + morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), sequence, request); morse_pzunmlq( @@ -80,6 +86,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb), morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, A.n-k*A.nb), morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), + morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), sequence, request); if (k+1 < A.mt){ @@ -88,6 +95,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_pzgeqrf( morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn), morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), + morse_desc_submatrix(&D, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), sequence, request); morse_pzunmqr( @@ -95,6 +103,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn), morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb), morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), + morse_desc_submatrix(&D, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), sequence, request); } } diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 36cf19e1b..46023eb1a 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -33,22 +33,21 @@ #define A(m,n) A, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel tile LQ factorization - dynamic scheduling **/ -void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, +void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldam; @@ -91,12 +90,6 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tslqt and unmlq tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); -#endif - for (k = 0; k < minMNT; k++) { RUNTIME_iteration_push(morse, k); @@ -114,13 +107,13 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, &options, MorseUpper, A->mb, A->nb, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, A->mb, A->nb, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif } @@ -131,7 +124,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, &options, MorseRight, MorseConjTrans, tempmm, tempkn, tempkn, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, A(m, k), ldam); } @@ -162,11 +155,4 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index 0fee71961..0e8afc068 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -34,22 +34,21 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+A->nt #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((n)/BS), 0 +#define D(m,n) D, ((n)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /* * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling */ -void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, +void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, N, RD; @@ -89,15 +88,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - { - int nblk = ( A->nt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); /* The number of the factorization */ @@ -120,13 +110,13 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, &options, MorseUpper, tempkm, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkm, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif } @@ -137,7 +127,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, &options, MorseRight, MorseConjTrans, tempmm, tempNn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, A(m, N), ldam); } @@ -193,11 +183,4 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index 6e061f0ed..f9574d9f3 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -33,22 +33,21 @@ #define A(m,n) A, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel tile QR factorization - dynamic scheduling **/ -void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, +void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldam; @@ -86,12 +85,6 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); -#endif - for (k = 0; k < minMNT; k++) { RUNTIME_iteration_push(morse, k); @@ -109,13 +102,13 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, &options, MorseLower, A->mb, A->nb, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, A->mb, A->nb, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif } @@ -125,7 +118,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, &options, MorseLeft, MorseConjTrans, tempkm, tempnn, tempkm, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, A(k, n), ldak); } @@ -156,11 +149,4 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index c5b026358..5355b97b7 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -35,22 +35,21 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), ((n)+A->nt) #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((m)/BS), 0 +#define D(m,n) D, ((m)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /***************************************************************************//** * Parallel tile QR factorization (reduction Householder) - dynamic scheduling **/ -void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, +void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, M, RD; @@ -90,15 +89,6 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - int nblk = ( A->mt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); @@ -119,13 +109,13 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, &options, MorseLower, tempMm, A->nb, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, A->nb, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif } @@ -135,7 +125,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, &options, MorseLeft, MorseConjTrans, tempMm, tempnn, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, A(M, n), ldaM); } @@ -193,11 +183,4 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunglq.c b/compute/pzunglq.c index 3b50ac59d..9ba778317 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -34,22 +34,21 @@ #define Q(m,n) Q, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling **/ -void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, +void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldqm; @@ -91,12 +90,6 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q); -#endif - for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -125,13 +118,13 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = k; m < Q->mt; m++) { @@ -141,7 +134,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, &options, MorseRight, MorseNoTrans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, Q(m, k), ldqm); } @@ -151,11 +144,4 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index 722613bb4..a3b360d99 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -34,9 +34,9 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+(A->nt) #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((n)/BS), 0 +#define D(m,n) D, ((n)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /** @@ -44,14 +44,13 @@ * reduction Householder) - dynamic scheduling **/ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, - MORSE_desc_t *T, int BS, + MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, N, RD, lastRD; @@ -88,15 +87,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - int nblk = ( A->nt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -149,13 +139,13 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (m = k; m < Q->mt; m++) { @@ -166,7 +156,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, MorseRight, MorseNoTrans, tempmm, tempNn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, Q(m, N), ldqm); } @@ -177,11 +167,4 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzungqr.c b/compute/pzungqr.c index d5a258ac8..da851bcc9 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -34,22 +34,21 @@ #define Q(m,n) Q, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling **/ -void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, +void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldqk, ldam, ldqm; @@ -91,12 +90,6 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q); -#endif - for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -127,13 +120,13 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = k; n < Q->nt; n++) { @@ -142,7 +135,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, &options, MorseLeft, MorseNoTrans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, Q(k, n), ldqk); } @@ -152,11 +145,4 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index c5d2d9c0a..49565edb5 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -36,9 +36,9 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+(A->nt) #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((m)/BS), 0 +#define D(m,n) D, ((m)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /** @@ -46,14 +46,13 @@ * reduction Householder) - dynamic scheduling **/ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, - MORSE_desc_t *T, int BS, + MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, M, RD, lastRD; @@ -90,15 +89,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - int nblk = ( A->mt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -155,13 +145,13 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (n = k; n < Q->nt; n++) { @@ -171,7 +161,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MorseLeft, MorseNoTrans, tempMm, tempnn, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, Q(M, n), ldqM); } @@ -182,11 +172,4 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index 94824c3bc..577ddbfc0 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -35,23 +35,22 @@ #define B(m,n) B, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel application of Q using tile V - LQ factorization - dynamic scheduling **/ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldbk, ldbm; @@ -93,12 +92,6 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q); -#endif - if (side == MorseLeft ) { if (trans == MorseNoTrans) { /* @@ -116,13 +109,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkm, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -131,7 +124,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(k, n), ldbk); } @@ -185,13 +178,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkm, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -200,7 +193,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(k, n), ldbk); } @@ -240,13 +233,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -256,7 +249,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(m, k), ldbm); } @@ -279,13 +272,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -295,7 +288,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(m, k), ldbm); } @@ -322,11 +315,4 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c index 4c637236d..e29ba3964 100644 --- a/compute/pzunmlqrh.c +++ b/compute/pzunmlqrh.c @@ -36,9 +36,9 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+A->nt #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((n)/BS), 0 +#define D(m,n) D, ((n)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /***************************************************************************//** @@ -46,14 +46,13 @@ * Householder) - dynamic scheduling **/ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, N, RD, lastRD; @@ -89,15 +88,6 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - { - int nblk = ( A->nt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); if (side == MorseLeft ) { if (trans == MorseNoTrans) { @@ -118,13 +108,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -134,7 +124,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, side, trans, tempNn, tempnn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, B(N, n), ldbN); } @@ -233,13 +223,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -249,7 +239,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, side, trans, tempNn, tempnn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, B(N, n), ldbN); } @@ -314,13 +304,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -331,7 +321,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, side, trans, tempmm, tempNn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, B(m, N), ldbm); } @@ -356,13 +346,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -373,7 +363,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, side, trans, tempmm, tempNn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, B(m, N), ldbm); } @@ -420,11 +410,4 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index cdf4cbdd3..c0eff1c95 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -35,23 +35,22 @@ #define B(m,n) B, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel application of Q using tile V - QR factorization - dynamic scheduling **/ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldbk, ldam, ldan, ldbm; @@ -93,12 +92,6 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->nb, A->nb, 0, 0, minMT*A->nb, A->nb, A->p, A->q); -#endif - if (side == MorseLeft ) { if (trans == MorseConjTrans) { /* @@ -116,13 +109,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -131,7 +124,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(k, n), ldbk); } @@ -187,13 +180,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -202,7 +195,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(k, n), ldbk); } @@ -244,13 +237,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkn, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -260,7 +253,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(m, k), ldbm); } @@ -283,13 +276,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkn, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -299,7 +292,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(m, k), ldbm); } @@ -327,11 +320,4 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index 777ec71ad..76a6eef59 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -36,9 +36,9 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), ((n)+A->nt) #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((m)/BS), 0 +#define D(m,n) D, ((m)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /***************************************************************************//** @@ -46,14 +46,13 @@ * Householder) - dynamic scheduling **/ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, M, RD, lastRD; @@ -90,16 +89,6 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - { - int nblk = ( A->mt + BS -1 ) / BS; - nblk = ( A->mt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); if (side == MorseLeft ) { if (trans == MorseConjTrans) { @@ -120,13 +109,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -135,7 +124,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempMm, tempnn, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, B(M, n), ldbM); } @@ -238,13 +227,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -254,7 +243,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, side, trans, tempMm, tempnn, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, B(M, n), ldbM); } @@ -320,13 +309,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -336,7 +325,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempMm, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, B(m, M), ldbm); } @@ -361,13 +350,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -377,7 +366,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempMm, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, B(m, M), ldbm); } @@ -426,11 +415,4 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/zgelqf.c b/compute/zgelqf.c index 95fe0544f..6fb6606b1 100644 --- a/compute/zgelqf.c +++ b/compute/zgelqf.c @@ -238,6 +238,7 @@ int MORSE_zgelqf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -277,12 +278,23 @@ int MORSE_zgelqf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgelqf(A, T, sequence, request); + morse_pzgelqf(A, T, Dptr, sequence, request); } else { - morse_pzgelqfrh(A, T, MORSE_RHBLK, sequence, request); + morse_pzgelqfrh(A, T, Dptr, MORSE_RHBLK, sequence, request); } - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgelqs.c b/compute/zgelqs.c index b998694a9..1c8186c47 100644 --- a/compute/zgelqs.c +++ b/compute/zgelqs.c @@ -266,6 +266,7 @@ int MORSE_zgelqs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, MORSE_desc_t *subB; MORSE_desc_t *subA; MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -320,12 +321,24 @@ int MORSE_zgelqs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, free(subA); free(subB); +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, T, sequence, request); + morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, T, Dptr, sequence, request); } else { - morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, T, MORSE_RHBLK, sequence, request); + morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, T, Dptr, MORSE_RHBLK, sequence, request); } + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgels.c b/compute/zgels.c index 8b5093369..53314a3fe 100644 --- a/compute/zgels.c +++ b/compute/zgels.c @@ -29,7 +29,7 @@ **/ #include "control/common.h" -/***************************************************************************//** +/** * * @ingroup MORSE_Complex64_t * @@ -328,6 +328,7 @@ int MORSE_zgels_Tile_Async(MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *subA; MORSE_desc_t *subB; MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -379,45 +380,65 @@ int MORSE_zgels_Tile_Async(MORSE_enum trans, MORSE_desc_t *A, } */ if (A->m >= A->n) { + +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgeqrf(A, T, sequence, request); - morse_pzunmqr(MorseLeft, MorseConjTrans, A, B, T, sequence, request); + morse_pzgeqrf(A, T, Dptr, sequence, request); + + morse_pzunmqr(MorseLeft, MorseConjTrans, A, B, T, Dptr, sequence, request); } else { - morse_pzgeqrfrh(A, T, MORSE_RHBLK, sequence, request); + morse_pzgeqrfrh(A, T, Dptr, MORSE_RHBLK, sequence, request); - morse_pzunmqrrh(MorseLeft, MorseConjTrans, A, B, T, MORSE_RHBLK, sequence, request); + morse_pzunmqrrh(MorseLeft, MorseConjTrans, A, B, T, Dptr, MORSE_RHBLK, sequence, request); } subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); - free(subA); - free(subB); + } else { /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); morse_pztile_zero(subB, sequence, request); free(subB); */ - +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgelqf(A, T, sequence, request); + morse_pzgelqf(A, T, Dptr, sequence, request); } else { - morse_pzgelqfrh(A, T, MORSE_RHBLK, sequence, request); + morse_pzgelqfrh(A, T, Dptr, MORSE_RHBLK, sequence, request); } subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); - free(subA); - free(subB); if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, T, sequence, request); + morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, T, Dptr, sequence, request); } else { - morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, T, MORSE_RHBLK, sequence, request); + morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, T, Dptr, MORSE_RHBLK, sequence, request); } } + + free(subA); + free(subB); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c index f17810f5c..de992ffa7 100644 --- a/compute/zgeqrf.c +++ b/compute/zgeqrf.c @@ -238,6 +238,7 @@ int MORSE_zgeqrf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -277,12 +278,23 @@ int MORSE_zgeqrf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgeqrf(A, T, sequence, request); + morse_pzgeqrf(A, T, Dptr, sequence, request); } else { - morse_pzgeqrfrh(A, T, MORSE_RHBLK, sequence, request); + morse_pzgeqrfrh(A, T, Dptr, MORSE_RHBLK, sequence, request); } - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c index 360d13ae9..757568882 100644 --- a/compute/zgeqrs.c +++ b/compute/zgeqrs.c @@ -263,6 +263,7 @@ int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, MORSE_desc_t *subA; MORSE_desc_t *subB; MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -307,11 +308,19 @@ int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, return MORSE_SUCCESS; } */ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunmqr(MorseLeft, MorseConjTrans, A, B, T, sequence, request); + morse_pzunmqr(MorseLeft, MorseConjTrans, A, B, T, Dptr, sequence, request); } else { - morse_pzunmqrrh(MorseLeft, MorseConjTrans, A, B, T, MORSE_RHBLK, sequence, request); + morse_pzunmqrrh(MorseLeft, MorseConjTrans, A, B, T, Dptr, MORSE_RHBLK, sequence, request); } subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); @@ -320,5 +329,9 @@ int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, free(subA); free(subB); + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgesvd.c b/compute/zgesvd.c index f61ddd9b9..1f312e220 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -398,6 +398,7 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, MORSE_desc_t descT; MORSE_desc_t descU, descVT; MORSE_desc_t descAB; + MORSE_desc_t D, *Dptr = NULL; MORSE_desc_t *subA, *subT, *subUVT; double *E; int M, N, MINMN, NB, LDAB; @@ -551,25 +552,40 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, subT = NULL; subUVT = NULL; if ( jobu != MorseNoVec ) { +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + if ( M < N ){ subA = morse_desc_submatrix(&descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb); subUVT = morse_desc_submatrix(&descU, descU.mb, 0, descU.m-descU.mb, descU.n); subT = morse_desc_submatrix(&descT, descT.mb, 0, descT.m-descT.mb, descT.n-descT.nb); morse_pzunmqr( MorseLeft, MorseNoTrans, - subA, subUVT, subT, + subA, subUVT, subT, Dptr, sequence, request ); } else { morse_pzunmqr( MorseLeft, MorseNoTrans, - &descA, &descU, &descT, + &descA, &descU, &descT, Dptr, sequence, request ); } } if ( jobvt != MorseNoVec ) { - if ( M < N ){ +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + if ( M < N ){ morse_pzunmlq( MorseRight, MorseNoTrans, - &descA, &descVT, &descT, + &descA, &descVT, &descT, Dptr, sequence, request ); } else { @@ -577,7 +593,7 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, subUVT = morse_desc_submatrix(&descVT, 0, descVT.nb, descVT.m, descVT.n-descVT.nb); subT = morse_desc_submatrix(&descT, 0, descT.nb, descT.m-descT.mb, descT.n -descT.nb); morse_pzunmlq( MorseRight, MorseNoTrans, - subA, subUVT, subT, + subA, subUVT, subT, Dptr, sequence, request ); } } @@ -612,5 +628,9 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, if (jobvt != MorseNoVec) morse_desc_mat_free( &descVT ); free(E); + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zheevd.c b/compute/zheevd.c index 664fb2ab3..671ffc5c1 100644 --- a/compute/zheevd.c +++ b/compute/zheevd.c @@ -335,6 +335,7 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, MORSE_context_t *morse; MORSE_desc_t descA; MORSE_desc_t descT; + MORSE_desc_t D, *Dptr = NULL; MORSE_Complex64_t *Q2; int N, NB, status; double *E; @@ -468,13 +469,20 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, morse_desc_mat_free(&(descQ2)); morse_desc_mat_free(&(descV)) ); if (uplo == MorseLower) { +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif subA = morse_desc_submatrix(&descA, descA.mb, 0, descA.m -descA.mb, descA.n-descA.nb); subQ = morse_desc_submatrix(&descQ2, descQ2.mb, 0, descQ2.m-descQ2.mb, descQ2.n ); subT = morse_desc_submatrix(&descT, descT.mb, 0, descT.m -descT.mb, descT.n-descT.nb); /* Compute Q2 = Q1 * Q2 */ morse_pzunmqr( MorseLeft, MorseNoTrans, - subA, subQ, subT, + subA, subQ, subT, Dptr, sequence, request ); /* Compute the final eigenvectors A = (Q1 * Q2) * V */ @@ -485,13 +493,20 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, } else { +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif subA = morse_desc_submatrix(&descA, 0, descA.nb, descA.m -descA.mb, descA.n -descA.nb ); subQ = morse_desc_submatrix(&descQ2, descQ2.mb, 0, descQ2.m-descQ2.mb, descQ2.n ); subT = morse_desc_submatrix(&descT, 0, descT.nb, descT.m -descT.mb, descT.n -descT.nb ); /* Compute Q2 = Q1^h * Q2 */ morse_pzunmlq( MorseLeft, MorseConjTrans, - subA, subQ, subT, + subA, subQ, subT, Dptr, sequence, request ); /* Compute the final eigenvectors A = (Q1^h * Q2) * V */ @@ -511,5 +526,9 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, free(V); free(E); + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunglq.c b/compute/zunglq.c index 78865dd02..0ee4758b6 100644 --- a/compute/zunglq.c +++ b/compute/zunglq.c @@ -160,7 +160,7 @@ int MORSE_zunglq(int M, int N, int K, /* morse_ziptile2lap( descQ, Q, NB, NB, LDQ, N, sequence, &request);*/ /* morse_sequence_wait(morse, sequence);*/ /* }*/ - + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; @@ -216,8 +216,8 @@ int MORSE_zunglq_Tile(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q) MORSE_zunglq_Tile_Async(A, T, Q, sequence, &request); morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); - RUNTIME_desc_getoncpu(Q); - + RUNTIME_desc_getoncpu(Q); + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; @@ -254,6 +254,7 @@ int MORSE_zunglq_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -298,13 +299,26 @@ int MORSE_zunglq_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ + +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunglq(A, Q, T, sequence, request); + morse_pzunglq(A, Q, T, Dptr, sequence, request); } else { - morse_pzunglqrh(A, Q, T, MORSE_RHBLK, sequence, request); + morse_pzunglqrh(A, Q, T, Dptr, MORSE_RHBLK, sequence, request); } + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zungqr.c b/compute/zungqr.c index cb136d417..122b1b911 100644 --- a/compute/zungqr.c +++ b/compute/zungqr.c @@ -253,6 +253,7 @@ int MORSE_zungqr_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -296,13 +297,25 @@ int MORSE_zungqr_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, if (N <= 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzungqr(A, Q, T, sequence, request); + morse_pzungqr(A, Q, T, Dptr, sequence, request); } else { - morse_pzungqrrh(A, Q, T, MORSE_RHBLK, sequence, request); + morse_pzungqrrh(A, Q, T, Dptr, MORSE_RHBLK, sequence, request); } + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunmlq.c b/compute/zunmlq.c index 1138f4aee..33cf72a30 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -311,6 +311,7 @@ int MORSE_zunmlq_Tile_Async(MORSE_enum side, MORSE_enum trans, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -361,17 +362,28 @@ int MORSE_zunmlq_Tile_Async(MORSE_enum side, MORSE_enum trans, if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { if ( (trans == MorseConjTrans) && (side == MorseLeft) ) { - morse_pzunmlq(side, trans, A, C, T, sequence, request); + morse_pzunmlq(side, trans, A, C, T, Dptr, sequence, request); } else { - morse_pzunmlq(side, trans, A, C, T, sequence, request); + morse_pzunmlq(side, trans, A, C, T, Dptr, sequence, request); } } else { - morse_pzunmlqrh(side, trans, A, C, T, MORSE_RHBLK, sequence, request); + morse_pzunmlqrh(side, trans, A, C, T, Dptr, MORSE_RHBLK, sequence, request); } - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunmqr.c b/compute/zunmqr.c index 709947e87..a32d60367 100644 --- a/compute/zunmqr.c +++ b/compute/zunmqr.c @@ -312,6 +312,7 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -362,18 +363,31 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; */ - if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { + +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { if ( (trans == MorseConjTrans) && (side == MorseLeft) ) { - morse_pzunmqr(side, trans, A, C, T, sequence, request); + morse_pzunmqr(side, trans, A, C, T, Dptr, sequence, request); } else { - morse_pzunmqr(side, trans, A, C, T, sequence, request); + morse_pzunmqr(side, trans, A, C, T, Dptr, sequence, request); } } else { - morse_pzunmqrrh(side, trans, A, C, T, MORSE_RHBLK, sequence, request); + morse_pzunmqrrh(side, trans, A, C, T, Dptr, MORSE_RHBLK, sequence, request); } + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index f9608bdb2..a97a78c97 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -90,11 +90,11 @@ void morse_pzbarrier_tl2pnl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_r void morse_pzbarrier_tl2row(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgebrd_gb2bd(MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgemm(MORSE_enum transA, MORSE_enum transB, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgetmi2(MORSE_enum idep, MORSE_enum odep, MORSE_enum storev, int m, int n, int mb, int nb, MORSE_Complex64_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgetrf_nopiv(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); @@ -144,15 +144,15 @@ void morse_pztrsmrv(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_e void morse_pztrtri(MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzungbr(MORSE_enum side, MORSE_desc_t *A, MORSE_desc_t *O, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzungbrrh(MORSE_enum side, MORSE_desc_t *A, MORSE_desc_t *O, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D,int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzungtr(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* user_build_callback, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pzgelqf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, -- GitLab