diff --git a/compute/pzgebrd_ge2gb.c b/compute/pzgebrd_ge2gb.c index 710eb1604a0eaf34b90e858d7ef5a6f85efd9904..b9d0ec1e631d76f7f383786f194e83fb1f73cf4a 100644 --- a/compute/pzgebrd_ge2gb.c +++ b/compute/pzgebrd_ge2gb.c @@ -25,7 +25,7 @@ **/ #include "control/common.h" -void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, +void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, MORSE_desc_t D, MORSE_sequence_t *sequence, MORSE_request_t *request) { int k; @@ -38,6 +38,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_pzgeqrf( morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn), morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), + morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), sequence, request); morse_pzunmqr( @@ -46,6 +47,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn), morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, A.m-k*A.mb, A.n-(k+1)*A.nb), morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), + morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), sequence, request); if (k+1 < A.nt){ @@ -54,6 +56,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_pzgelqf( morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb), morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), + morse_desc_submatrix(&D, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), sequence, request); morse_pzunmlq( @@ -61,6 +64,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb), morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb), morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), + morse_desc_submatrix(&D, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), sequence, request); } } @@ -73,6 +77,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_pzgelqf( morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb), morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), + morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), sequence, request); morse_pzunmlq( @@ -80,6 +85,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb), morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, A.n-k*A.nb), morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), + morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), sequence, request); if (k+1 < A.mt){ @@ -88,6 +94,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_pzgeqrf( morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn), morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), + morse_desc_submatrix(&D, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), sequence, request); morse_pzunmqr( @@ -95,6 +102,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn), morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb), morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), + morse_desc_submatrix(&D, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), sequence, request); } } diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 36cf19e1b672bb219fdbd2eb6f28f0e221ebeb28..46023eb1ae2049fc09a1aa0ff1d201a89b33eb0c 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -33,22 +33,21 @@ #define A(m,n) A, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel tile LQ factorization - dynamic scheduling **/ -void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, +void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldam; @@ -91,12 +90,6 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tslqt and unmlq tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); -#endif - for (k = 0; k < minMNT; k++) { RUNTIME_iteration_push(morse, k); @@ -114,13 +107,13 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, &options, MorseUpper, A->mb, A->nb, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, A->mb, A->nb, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif } @@ -131,7 +124,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, &options, MorseRight, MorseConjTrans, tempmm, tempkn, tempkn, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, A(m, k), ldam); } @@ -162,11 +155,4 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index 0fee71961a000b184ea3723471914f464e085546..0e8afc068eafdd010cb3843a8bb7a1effdc7a88b 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -34,22 +34,21 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+A->nt #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((n)/BS), 0 +#define D(m,n) D, ((n)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /* * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling */ -void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, +void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, N, RD; @@ -89,15 +88,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - { - int nblk = ( A->nt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); /* The number of the factorization */ @@ -120,13 +110,13 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, &options, MorseUpper, tempkm, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkm, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif } @@ -137,7 +127,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, &options, MorseRight, MorseConjTrans, tempmm, tempNn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, A(m, N), ldam); } @@ -193,11 +183,4 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index 6e061f0ed2fdf05f64649c31c677f55928f7dd51..f9574d9f3fc87c17308fb6b83d1d75d142bd3b39 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -33,22 +33,21 @@ #define A(m,n) A, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel tile QR factorization - dynamic scheduling **/ -void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, +void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldam; @@ -86,12 +85,6 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); -#endif - for (k = 0; k < minMNT; k++) { RUNTIME_iteration_push(morse, k); @@ -109,13 +102,13 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, &options, MorseLower, A->mb, A->nb, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, A->mb, A->nb, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif } @@ -125,7 +118,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, &options, MorseLeft, MorseConjTrans, tempkm, tempnn, tempkm, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, A(k, n), ldak); } @@ -156,11 +149,4 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index c5b026358376ce0d2e06481bd166e760983aa90c..5355b97b7357926872ecb36e9cc30bafd6f98342 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -35,22 +35,21 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), ((n)+A->nt) #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((m)/BS), 0 +#define D(m,n) D, ((m)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /***************************************************************************//** * Parallel tile QR factorization (reduction Householder) - dynamic scheduling **/ -void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, +void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, M, RD; @@ -90,15 +89,6 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - int nblk = ( A->mt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); @@ -119,13 +109,13 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, &options, MorseLower, tempMm, A->nb, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, A->nb, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif } @@ -135,7 +125,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, &options, MorseLeft, MorseConjTrans, tempMm, tempnn, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, A(M, n), ldaM); } @@ -193,11 +183,4 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c index 2173faa09def0a8a31440baf84f563dbce432f6f..2230bd0c20244c7d7eb397670cfab357d464fab4 100644 --- a/compute/pzhetrd_he2hb.c +++ b/compute/pzhetrd_he2hb.c @@ -41,12 +41,11 @@ * Parallel tile BAND Tridiagonal Reduction - dynamic scheduler **/ void morse_pzhetrd_he2hb(MORSE_enum uplo, - MORSE_desc_t *A, MORSE_desc_t *T, + MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; - MORSE_desc_t *E = NULL; MORSE_desc_t *D = NULL; MORSE_desc_t *AT = NULL; size_t ws_worker = 0; @@ -90,12 +89,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* Copy of the extra-diagonal to generate more parallelism by releasing anti-dependencies on UNMQR/TSMQR triangle conflict */ - E = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*E, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); -#endif - /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */ D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q); @@ -451,10 +444,4 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, morse_desc_mat_free(AT); free(AT); - -#if defined(CHAMELEON_COPY_DIAG) - morse_desc_mat_free(E); - free(E); -#endif - (void)E; } diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c index 4f8b5e8b0d09ec1489912fd59518b51782511539..27f2c17018934fee64173c65c5a2dd357d2151f8 100644 --- a/compute/pztpgqrt.c +++ b/compute/pztpgqrt.c @@ -31,9 +31,9 @@ #define Q1(m,n) Q1, m, n #define Q2(m,n) Q2, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) V1, k, k +#define D(k) V1, k, k #endif /***************************************************************************//** @@ -43,19 +43,19 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, + MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request ) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldvk, ldvm; int ldqk, ldqm; int tempkm, tempkn, tempkk, tempnn, tempmm, templm; - int ib, minMT; + int ib; /* Dimension of the first column */ int maxm = chameleon_max( Q2->m - L, 1 ); @@ -68,13 +68,6 @@ void morse_pztpgqrt( int L, RUNTIME_options_init(&options, morse, sequence, request); ib = MORSE_IB; - - if (V1->m > V1->n) { - minMT = V1->nt; - } else { - minMT = V1->mt; - } - /* * ztpmqrt = Q1->nb * ib */ @@ -94,12 +87,6 @@ void morse_pztpgqrt( int L, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q); -#endif - for (k = V1->nt-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -152,13 +139,13 @@ void morse_pztpgqrt( int L, &options, MorseLower, tempkm, tempkk, V1->nb, V1(k, k), ldvk, - DIAG(k), ldvk ); + D(k), ldvk ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkk, 0., 1., - DIAG(k), ldvk ); + D(k), ldvk ); #endif #endif for (n = k; n < Q1->nt; n++) { @@ -167,7 +154,7 @@ void morse_pztpgqrt( int L, &options, MorseLeft, MorseNoTrans, tempkm, tempnn, tempkk, ib, T1->nb, - DIAG(k), ldvk, + D(k), ldvk, T1(k, k), T1->mb, Q1(k, n), ldqk); } @@ -178,11 +165,4 @@ void morse_pztpgqrt( int L, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; (void)minMT; } diff --git a/compute/pzunglq.c b/compute/pzunglq.c index 3b50ac59df3a8aaa7ca4e4e5f5bf70ac7c208cb0..9ba7783179b094636cbb3867c47477870d05b3fe 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -34,22 +34,21 @@ #define Q(m,n) Q, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling **/ -void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, +void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldqm; @@ -91,12 +90,6 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q); -#endif - for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -125,13 +118,13 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = k; m < Q->mt; m++) { @@ -141,7 +134,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, &options, MorseRight, MorseNoTrans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, Q(m, k), ldqm); } @@ -151,11 +144,4 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index 722613bb426c559496192735af405e1821e0ef8e..a3b360d99d63140869ea3ec81d4300513f3ec231 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -34,9 +34,9 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+(A->nt) #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((n)/BS), 0 +#define D(m,n) D, ((n)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /** @@ -44,14 +44,13 @@ * reduction Householder) - dynamic scheduling **/ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, - MORSE_desc_t *T, int BS, + MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, N, RD, lastRD; @@ -88,15 +87,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - int nblk = ( A->nt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -149,13 +139,13 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (m = k; m < Q->mt; m++) { @@ -166,7 +156,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, MorseRight, MorseNoTrans, tempmm, tempNn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, Q(m, N), ldqm); } @@ -177,11 +167,4 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzungqr.c b/compute/pzungqr.c index d5a258ac88b9d91f4776697a9647f3ec758adbab..da851bcc94e392cd8dc4e7001a98ce669c4495b1 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -34,22 +34,21 @@ #define Q(m,n) Q, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling **/ -void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, +void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldqk, ldam, ldqm; @@ -91,12 +90,6 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q); -#endif - for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -127,13 +120,13 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = k; n < Q->nt; n++) { @@ -142,7 +135,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, &options, MorseLeft, MorseNoTrans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, Q(k, n), ldqk); } @@ -152,11 +145,4 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index c5d2d9c0a024f5cb9b6399c5c355f356e52499e4..49565edb59298c7393ff862135b2b2129031478d 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -36,9 +36,9 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+(A->nt) #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((m)/BS), 0 +#define D(m,n) D, ((m)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /** @@ -46,14 +46,13 @@ * reduction Householder) - dynamic scheduling **/ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, - MORSE_desc_t *T, int BS, + MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, M, RD, lastRD; @@ -90,15 +89,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - int nblk = ( A->mt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -155,13 +145,13 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (n = k; n < Q->nt; n++) { @@ -171,7 +161,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MorseLeft, MorseNoTrans, tempMm, tempnn, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, Q(M, n), ldqM); } @@ -182,11 +172,4 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index 94824c3bc7f1a2d0e08d88a097b23af853564034..577ddbfc0eb7bb8052be6d6c26fe89148cfaeb6f 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -35,23 +35,22 @@ #define B(m,n) B, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel application of Q using tile V - LQ factorization - dynamic scheduling **/ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldbk, ldbm; @@ -93,12 +92,6 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q); -#endif - if (side == MorseLeft ) { if (trans == MorseNoTrans) { /* @@ -116,13 +109,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkm, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -131,7 +124,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(k, n), ldbk); } @@ -185,13 +178,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempkm, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkm, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -200,7 +193,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(k, n), ldbk); } @@ -240,13 +233,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -256,7 +249,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(m, k), ldbm); } @@ -279,13 +272,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempkn, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempkn, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -295,7 +288,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(m, k), ldbm); } @@ -322,11 +315,4 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c index 4c637236d1937debbaecef686ddf1c2c878bf91c..e29ba396482cb6bdc7148dc4f142438b5e763025 100644 --- a/compute/pzunmlqrh.c +++ b/compute/pzunmlqrh.c @@ -36,9 +36,9 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+A->nt #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((n)/BS), 0 +#define D(m,n) D, ((n)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /***************************************************************************//** @@ -46,14 +46,13 @@ * Householder) - dynamic scheduling **/ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, N, RD, lastRD; @@ -89,15 +88,6 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - { - int nblk = ( A->nt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); if (side == MorseLeft ) { if (trans == MorseNoTrans) { @@ -118,13 +108,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -134,7 +124,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, side, trans, tempNn, tempnn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, B(N, n), ldbN); } @@ -233,13 +223,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -249,7 +239,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, side, trans, tempNn, tempnn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, B(N, n), ldbN); } @@ -314,13 +304,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -331,7 +321,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, side, trans, tempmm, tempNn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, B(m, N), ldbm); } @@ -356,13 +346,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, &options, MorseUpper, tempkmin, tempNn, A->nb, A(k, N), ldak, - DIAG(k, N), ldak ); + D(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseLower, tempkmin, tempNn, 0., 1., - DIAG(k, N), ldak ); + D(k, N), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -373,7 +363,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, side, trans, tempmm, tempNn, tempkmin, ib, T->nb, - DIAG(k, N), ldak, + D(k, N), ldak, T(k, N), T->mb, B(m, N), ldbm); } @@ -420,11 +410,4 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index cdf4cbdd3e713f2dd38c866726c0e8720593755c..c0eff1c9579002a1514664ad4be9c4cef47354b0 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -35,23 +35,22 @@ #define B(m,n) B, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) A, k, k +#define D(k) A, k, k #endif /***************************************************************************//** * Parallel application of Q using tile V - QR factorization - dynamic scheduling **/ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldbk, ldam, ldan, ldbm; @@ -93,12 +92,6 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->nb, A->nb, 0, 0, minMT*A->nb, A->nb, A->p, A->q); -#endif - if (side == MorseLeft ) { if (trans == MorseConjTrans) { /* @@ -116,13 +109,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -131,7 +124,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(k, n), ldbk); } @@ -187,13 +180,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempkm, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -202,7 +195,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(k, n), ldbk); } @@ -244,13 +237,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkn, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -260,7 +253,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(m, k), ldbm); } @@ -283,13 +276,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempkn, tempkmin, A->nb, A(k, k), ldak, - DIAG(k), ldak ); + D(k), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkn, tempkmin, 0., 1., - DIAG(k), ldak ); + D(k), ldak ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -299,7 +292,7 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, + D(k), ldak, T(k, k), T->mb, B(m, k), ldbm); } @@ -327,11 +320,4 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index 777ec71ad5a96e12e3870131fdefa24611dec60a..76a6eef59a7a1b5c3c4c7370717a544683c01647 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -36,9 +36,9 @@ #define T(m,n) T, (m), (n) #define T2(m,n) T, (m), ((n)+A->nt) #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(m,n) DIAG, ((m)/BS), 0 +#define D(m,n) D, ((m)/BS), 0 #else -#define DIAG(m,n) A, (m), (n) +#define D(m,n) A, (m), (n) #endif /***************************************************************************//** @@ -46,14 +46,13 @@ * Householder) - dynamic scheduling **/ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int K, M, RD, lastRD; @@ -90,16 +89,6 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - { - int nblk = ( A->mt + BS -1 ) / BS; - nblk = ( A->mt + BS -1 ) / BS; - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); - } -#endif - K = chameleon_min(A->mt, A->nt); if (side == MorseLeft ) { if (trans == MorseConjTrans) { @@ -120,13 +109,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -135,7 +124,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempMm, tempnn, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, B(M, n), ldbM); } @@ -238,13 +227,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -254,7 +243,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, side, trans, tempMm, tempnn, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, B(M, n), ldbM); } @@ -320,13 +309,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -336,7 +325,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempMm, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, B(m, M), ldbm); } @@ -361,13 +350,13 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, MorseLower, tempMm, tempkmin, A->nb, A(M, k), ldaM, - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempMm, tempkmin, 0., 1., - DIAG(M, k), ldaM ); + D(M, k), ldaM ); #endif #endif for (m = 0; m < B->mt; m++) { @@ -377,7 +366,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, &options, side, trans, tempmm, tempMm, tempkmin, ib, T->nb, - DIAG(M, k), ldaM, + D(M, k), ldaM, T(M, k), T->mb, B(m, M), ldbm); } @@ -426,11 +415,4 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/zgelqf.c b/compute/zgelqf.c index 95fe0544f6fcbd4098e3c44ef98b443f9d711e19..6fb6606b14c249906af6c883978856eb4afd65ca 100644 --- a/compute/zgelqf.c +++ b/compute/zgelqf.c @@ -238,6 +238,7 @@ int MORSE_zgelqf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -277,12 +278,23 @@ int MORSE_zgelqf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgelqf(A, T, sequence, request); + morse_pzgelqf(A, T, Dptr, sequence, request); } else { - morse_pzgelqfrh(A, T, MORSE_RHBLK, sequence, request); + morse_pzgelqfrh(A, T, Dptr, MORSE_RHBLK, sequence, request); } - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgelqs.c b/compute/zgelqs.c index b998694a910e89f1dd1ca315b35bad2a22bcdf36..1c8186c47914c86502754696f1e243363ec25322 100644 --- a/compute/zgelqs.c +++ b/compute/zgelqs.c @@ -266,6 +266,7 @@ int MORSE_zgelqs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, MORSE_desc_t *subB; MORSE_desc_t *subA; MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -320,12 +321,24 @@ int MORSE_zgelqs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, free(subA); free(subB); +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, T, sequence, request); + morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, T, Dptr, sequence, request); } else { - morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, T, MORSE_RHBLK, sequence, request); + morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, T, Dptr, MORSE_RHBLK, sequence, request); } + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgels.c b/compute/zgels.c index 8b5093369e3d088a4359d89a322cd8ddef0d9ae5..53314a3fe1388299402c1672a0fd026dbb8d7fc4 100644 --- a/compute/zgels.c +++ b/compute/zgels.c @@ -29,7 +29,7 @@ **/ #include "control/common.h" -/***************************************************************************//** +/** * * @ingroup MORSE_Complex64_t * @@ -328,6 +328,7 @@ int MORSE_zgels_Tile_Async(MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *subA; MORSE_desc_t *subB; MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -379,45 +380,65 @@ int MORSE_zgels_Tile_Async(MORSE_enum trans, MORSE_desc_t *A, } */ if (A->m >= A->n) { + +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgeqrf(A, T, sequence, request); - morse_pzunmqr(MorseLeft, MorseConjTrans, A, B, T, sequence, request); + morse_pzgeqrf(A, T, Dptr, sequence, request); + + morse_pzunmqr(MorseLeft, MorseConjTrans, A, B, T, Dptr, sequence, request); } else { - morse_pzgeqrfrh(A, T, MORSE_RHBLK, sequence, request); + morse_pzgeqrfrh(A, T, Dptr, MORSE_RHBLK, sequence, request); - morse_pzunmqrrh(MorseLeft, MorseConjTrans, A, B, T, MORSE_RHBLK, sequence, request); + morse_pzunmqrrh(MorseLeft, MorseConjTrans, A, B, T, Dptr, MORSE_RHBLK, sequence, request); } subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); - free(subA); - free(subB); + } else { /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); morse_pztile_zero(subB, sequence, request); free(subB); */ - +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgelqf(A, T, sequence, request); + morse_pzgelqf(A, T, Dptr, sequence, request); } else { - morse_pzgelqfrh(A, T, MORSE_RHBLK, sequence, request); + morse_pzgelqfrh(A, T, Dptr, MORSE_RHBLK, sequence, request); } subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); - free(subA); - free(subB); if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, T, sequence, request); + morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, T, Dptr, sequence, request); } else { - morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, T, MORSE_RHBLK, sequence, request); + morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, T, Dptr, MORSE_RHBLK, sequence, request); } } + + free(subA); + free(subB); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c index f17810f5cd0b0ee6a5abf7dc83c2b5e54f5d2b76..de992ffa755e19cac65af0dcb5cc62332ea68a4d 100644 --- a/compute/zgeqrf.c +++ b/compute/zgeqrf.c @@ -238,6 +238,7 @@ int MORSE_zgeqrf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -277,12 +278,23 @@ int MORSE_zgeqrf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgeqrf(A, T, sequence, request); + morse_pzgeqrf(A, T, Dptr, sequence, request); } else { - morse_pzgeqrfrh(A, T, MORSE_RHBLK, sequence, request); + morse_pzgeqrfrh(A, T, Dptr, MORSE_RHBLK, sequence, request); } - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c index 360d13ae92960bac2f4396b70abbe06265ca63ad..757568882712e140b66c622f5d461bcaddbbb056 100644 --- a/compute/zgeqrs.c +++ b/compute/zgeqrs.c @@ -263,6 +263,7 @@ int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, MORSE_desc_t *subA; MORSE_desc_t *subB; MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -307,11 +308,19 @@ int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, return MORSE_SUCCESS; } */ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunmqr(MorseLeft, MorseConjTrans, A, B, T, sequence, request); + morse_pzunmqr(MorseLeft, MorseConjTrans, A, B, T, Dptr, sequence, request); } else { - morse_pzunmqrrh(MorseLeft, MorseConjTrans, A, B, T, MORSE_RHBLK, sequence, request); + morse_pzunmqrrh(MorseLeft, MorseConjTrans, A, B, T, Dptr, MORSE_RHBLK, sequence, request); } subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); @@ -320,5 +329,9 @@ int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, free(subA); free(subB); + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgesvd.c b/compute/zgesvd.c index f61ddd9b970dba43379b395d10c48b1d4e3298f0..4aea26f51876378bf7435b626237f9c3f31afe1f 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -398,6 +398,7 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, MORSE_desc_t descT; MORSE_desc_t descU, descVT; MORSE_desc_t descAB; + MORSE_desc_t D, *Dptr = NULL; MORSE_desc_t *subA, *subT, *subUVT; double *E; int M, N, MINMN, NB, LDAB; @@ -459,9 +460,14 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, NB = descA.mb; LDAB = NB + 1; uplo = M >= N ? MorseUpper : MorseLower; - +#if defined(CHAMELEON_COPY_DIAG) + { + morse_zdesc_alloc(D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); + Dptr = &D; + } +#endif /* Reduction to band */ - morse_pzgebrd_ge2gb( descA, descT, + morse_pzgebrd_ge2gb( descA, descT, D, sequence, request ); /* Allocate band structure */ @@ -556,12 +562,12 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, subUVT = morse_desc_submatrix(&descU, descU.mb, 0, descU.m-descU.mb, descU.n); subT = morse_desc_submatrix(&descT, descT.mb, 0, descT.m-descT.mb, descT.n-descT.nb); morse_pzunmqr( MorseLeft, MorseNoTrans, - subA, subUVT, subT, + subA, subUVT, subT, Dptr, sequence, request ); } else { morse_pzunmqr( MorseLeft, MorseNoTrans, - &descA, &descU, &descT, + &descA, &descU, &descT, Dptr, sequence, request ); } } @@ -569,7 +575,7 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, if ( jobvt != MorseNoVec ) { if ( M < N ){ morse_pzunmlq( MorseRight, MorseNoTrans, - &descA, &descVT, &descT, + &descA, &descVT, &descT, Dptr, sequence, request ); } else { @@ -577,7 +583,7 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, subUVT = morse_desc_submatrix(&descVT, 0, descVT.nb, descVT.m, descVT.n-descVT.nb); subT = morse_desc_submatrix(&descT, 0, descT.nb, descT.m-descT.mb, descT.n -descT.nb); morse_pzunmlq( MorseRight, MorseNoTrans, - subA, subUVT, subT, + subA, subUVT, subT, Dptr, sequence, request ); } } @@ -612,5 +618,9 @@ int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, if (jobvt != MorseNoVec) morse_desc_mat_free( &descVT ); free(E); + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zheevd.c b/compute/zheevd.c index 664fb2ab3f2968f77fc3fd173a8d2f8b36708225..671ffc5c16a668ce5c046075bde6b9841eee5dda 100644 --- a/compute/zheevd.c +++ b/compute/zheevd.c @@ -335,6 +335,7 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, MORSE_context_t *morse; MORSE_desc_t descA; MORSE_desc_t descT; + MORSE_desc_t D, *Dptr = NULL; MORSE_Complex64_t *Q2; int N, NB, status; double *E; @@ -468,13 +469,20 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, morse_desc_mat_free(&(descQ2)); morse_desc_mat_free(&(descV)) ); if (uplo == MorseLower) { +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif subA = morse_desc_submatrix(&descA, descA.mb, 0, descA.m -descA.mb, descA.n-descA.nb); subQ = morse_desc_submatrix(&descQ2, descQ2.mb, 0, descQ2.m-descQ2.mb, descQ2.n ); subT = morse_desc_submatrix(&descT, descT.mb, 0, descT.m -descT.mb, descT.n-descT.nb); /* Compute Q2 = Q1 * Q2 */ morse_pzunmqr( MorseLeft, MorseNoTrans, - subA, subQ, subT, + subA, subQ, subT, Dptr, sequence, request ); /* Compute the final eigenvectors A = (Q1 * Q2) * V */ @@ -485,13 +493,20 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, } else { +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif subA = morse_desc_submatrix(&descA, 0, descA.nb, descA.m -descA.mb, descA.n -descA.nb ); subQ = morse_desc_submatrix(&descQ2, descQ2.mb, 0, descQ2.m-descQ2.mb, descQ2.n ); subT = morse_desc_submatrix(&descT, 0, descT.nb, descT.m -descT.mb, descT.n -descT.nb ); /* Compute Q2 = Q1^h * Q2 */ morse_pzunmlq( MorseLeft, MorseConjTrans, - subA, subQ, subT, + subA, subQ, subT, Dptr, sequence, request ); /* Compute the final eigenvectors A = (Q1^h * Q2) * V */ @@ -511,5 +526,9 @@ int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, free(V); free(E); + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zhetrd.c b/compute/zhetrd.c index b74a90afc880a8a8f044cb0d421e727077e4db53..e3a6179cc2d1f430bdca9ff15308364c7ea27297 100644 --- a/compute/zhetrd.c +++ b/compute/zhetrd.c @@ -334,6 +334,7 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, MORSE_desc_t descAB; int N, NB, LDAB; int status; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -387,9 +388,14 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, N = descA.m; NB = descA.mb; - +#if defined(CHAMELEON_COPY_DIAG) + { + morse_zdesc_alloc_diag(D, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); + Dptr = &D; + } +#endif /* Reduction to band. On exit, T contains reflectors */ - morse_pzhetrd_he2hb( uplo, A, T, + morse_pzhetrd_he2hb( uplo, A, T, Dptr, sequence, request ); LDAB = NB+1; @@ -419,7 +425,9 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed"); } #endif /* !defined(CHAMELEON_SIMULATION) */ - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } morse_desc_mat_free(&descAB); return MORSE_SUCCESS; } diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c index 1cdab39d879bfe7d19355e55a7e60e33a8af37fe..3943a31d8b13e452e163789721c1a7017a1e59f7 100644 --- a/compute/ztpgqrt.c +++ b/compute/ztpgqrt.c @@ -341,6 +341,7 @@ int MORSE_ztpgqrt_Tile_Async( int L, MORSE_sequence_t *sequence, MORSE_request_t *request ) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -395,15 +396,29 @@ int MORSE_ztpgqrt_Tile_Async( int L, morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } +#if defined(CHAMELEON_COPY_DIAG) + { + int minMT; + if (V1->m > V1->n) { + minMT = V1->nt; + } else { + minMT = V1->mt; + } + morse_zdesc_alloc_diag(D, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q); + Dptr = &D; + } +#endif /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */ morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request ); morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request ); - morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request ); + morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, Dptr, sequence, request ); /* } */ /* else { */ /* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */ /* } */ - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } return MORSE_SUCCESS; } diff --git a/compute/zunglq.c b/compute/zunglq.c index 78865dd0228bf36a3e672f5fcbff6ab8657b535f..0ee4758b652a79c973ae9c54278ca92be62c155c 100644 --- a/compute/zunglq.c +++ b/compute/zunglq.c @@ -160,7 +160,7 @@ int MORSE_zunglq(int M, int N, int K, /* morse_ziptile2lap( descQ, Q, NB, NB, LDQ, N, sequence, &request);*/ /* morse_sequence_wait(morse, sequence);*/ /* }*/ - + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; @@ -216,8 +216,8 @@ int MORSE_zunglq_Tile(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q) MORSE_zunglq_Tile_Async(A, T, Q, sequence, &request); morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); - RUNTIME_desc_getoncpu(Q); - + RUNTIME_desc_getoncpu(Q); + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; @@ -254,6 +254,7 @@ int MORSE_zunglq_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -298,13 +299,26 @@ int MORSE_zunglq_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ + +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunglq(A, Q, T, sequence, request); + morse_pzunglq(A, Q, T, Dptr, sequence, request); } else { - morse_pzunglqrh(A, Q, T, MORSE_RHBLK, sequence, request); + morse_pzunglqrh(A, Q, T, Dptr, MORSE_RHBLK, sequence, request); } + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zungqr.c b/compute/zungqr.c index cb136d4176f92d0ebffcbc451428efe7187ee053..122b1b911adce9a201c93dbfcc62a9c41ef6405a 100644 --- a/compute/zungqr.c +++ b/compute/zungqr.c @@ -253,6 +253,7 @@ int MORSE_zungqr_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -296,13 +297,25 @@ int MORSE_zungqr_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, if (N <= 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzungqr(A, Q, T, sequence, request); + morse_pzungqr(A, Q, T, Dptr, sequence, request); } else { - morse_pzungqrrh(A, Q, T, MORSE_RHBLK, sequence, request); + morse_pzungqrrh(A, Q, T, Dptr, MORSE_RHBLK, sequence, request); } + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunmlq.c b/compute/zunmlq.c index 1138f4aee3a86d9f12658ede2fc41f97587140a2..33cf72a309ad39382c9c15b17b60caf59b46dcb0 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -311,6 +311,7 @@ int MORSE_zunmlq_Tile_Async(MORSE_enum side, MORSE_enum trans, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -361,17 +362,28 @@ int MORSE_zunmlq_Tile_Async(MORSE_enum side, MORSE_enum trans, if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { if ( (trans == MorseConjTrans) && (side == MorseLeft) ) { - morse_pzunmlq(side, trans, A, C, T, sequence, request); + morse_pzunmlq(side, trans, A, C, T, Dptr, sequence, request); } else { - morse_pzunmlq(side, trans, A, C, T, sequence, request); + morse_pzunmlq(side, trans, A, C, T, Dptr, sequence, request); } } else { - morse_pzunmlqrh(side, trans, A, C, T, MORSE_RHBLK, sequence, request); + morse_pzunmlqrh(side, trans, A, C, T, Dptr, MORSE_RHBLK, sequence, request); } - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunmqr.c b/compute/zunmqr.c index 709947e87ccbe27cf26f30283026ba39b9aea3d6..a32d603674ffd69e6baf2dc2175d923a6b0dd808 100644 --- a/compute/zunmqr.c +++ b/compute/zunmqr.c @@ -312,6 +312,7 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -362,18 +363,31 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; */ - if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { + +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { if ( (trans == MorseConjTrans) && (side == MorseLeft) ) { - morse_pzunmqr(side, trans, A, C, T, sequence, request); + morse_pzunmqr(side, trans, A, C, T, Dptr, sequence, request); } else { - morse_pzunmqr(side, trans, A, C, T, sequence, request); + morse_pzunmqr(side, trans, A, C, T, Dptr, sequence, request); } } else { - morse_pzunmqrrh(side, trans, A, C, T, MORSE_RHBLK, sequence, request); + morse_pzunmqrrh(side, trans, A, C, T, Dptr, MORSE_RHBLK, sequence, request); } + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; return MORSE_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index f9608bdb219ae7800a2e1c590fc1d7f29e695c2f..2ac2d5090918260460aad57952c551f59ff4e433 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -89,12 +89,12 @@ void morse_pzbarrier_row2tl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_r void morse_pzbarrier_tl2pnl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbarrier_tl2row(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgebrd_gb2bd(MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, MORSE_desc_t D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgemm(MORSE_enum transA, MORSE_enum transB, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgetmi2(MORSE_enum idep, MORSE_enum odep, MORSE_enum storev, int m, int n, int mb, int nb, MORSE_Complex64_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgetrf_nopiv(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); @@ -106,7 +106,7 @@ void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR void morse_pzherk(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc_t *A, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzher2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); #endif -void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlag2c(MORSE_desc_t *A, MORSE_desc_t *SB, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request); @@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request ); +void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); @@ -144,15 +144,15 @@ void morse_pztrsmrv(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_e void morse_pztrtri(MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzungbr(MORSE_enum side, MORSE_desc_t *A, MORSE_desc_t *O, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzungbrrh(MORSE_enum side, MORSE_desc_t *A, MORSE_desc_t *O, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D,int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzungtr(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* user_build_callback, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pzgelqf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,