diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 2d28d719c13d1855e0ab7fb5c698fc2dc6388743..3cec88b5714ad8d1c0ea2e84084be1062348d7db 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -35,7 +35,7 @@ #if defined(CHAMELEON_COPY_DIAG) #define D(k) D, k, 0 #else -#define D(k) A, k, k +#define D(k) D, k, k #endif /******************************************************************************* @@ -67,6 +67,10 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, minMNT = A->mt; } + if ( D == NULL ) { + D = A; + } + /* * zgelqt = A->nb * (ib+1) * zunmlq = A->nb * ib @@ -130,26 +134,40 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, } for (n = k+1; n < A->nt; n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - MORSE_TASK_ztslqt( + + RUNTIME_data_migrate( sequence, A(k, k), + A->get_rankof( A, k, n ) ); + + MORSE_TASK_ztplqt( &options, - tempkm, tempnn, ib, T->nb, + tempkm, tempnn, 0, ib, T->nb, A(k, k), ldak, A(k, n), ldak, T(k, n), T->mb); for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, A(m, k), + A->get_rankof( A, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, MorseRight, MorseConjTrans, - tempmm, A->nb, tempmm, tempnn, A->mb, ib, T->nb, - A(m, k), ldam, - A(m, n), ldam, + tempmm, tempnn, A->mb, 0, ib, T->nb, A(k, n), ldak, - T(k, n), T->mb); + T(k, n), T->mb, + A(m, k), ldam, + A(m, n), ldam); } } + /* Restore the original location of the tiles */ + for (m = k; m < A->mt; m++) { + RUNTIME_data_migrate( sequence, A(m, k), + A->get_rankof( A, m, k ) ); + } + RUNTIME_iteration_pop(morse); } diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c index 8a889b05c659d96c106362d8b1c82d7fbf0ff569..0977e086927cada79e5ee8c62302507d9c126f69 100644 --- a/compute/pzgelqf_param.c +++ b/compute/pzgelqf_param.c @@ -26,14 +26,10 @@ #include <stdlib.h> #include "libhqr.h" -#define A(m,n) A, (m), (n) -#define TS(m,n) TS, (m), (n) -#define TT(m,n) TT, (m), (n) -#if defined(CHAMELEON_COPY_DIAG) -#define D(m,n) D, (m), (n) -#else -#define D(m,n) A, (m), (n) -#endif +#define A(m,n) A, (m), (n) +#define T(m,n) T, (m), (n) +#define D(m,n) D, (m), (n) + /* * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling @@ -44,11 +40,12 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, { MORSE_context_t *morse; MORSE_option_t options; + MORSE_desc_t *T; size_t ws_worker = 0; size_t ws_host = 0; int k, m, n, i, p; - int K; + int K, L; int ldak, ldam; int tempkmin, tempkm, tempnn, tempmm, temppn; int ib; @@ -61,6 +58,10 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, ib = MORSE_IB; + if ( D == NULL ) { + D = A; + } + /* * zgelqt = A->nb * (ib+1) * zunmlq = A->nb * ib @@ -98,6 +99,8 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); + + T = TS; /* The number of geqrt to apply */ for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); @@ -106,9 +109,9 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_TASK_zgelqt( &options, - tempkm, temppn, ib, TS->nb, + tempkm, temppn, ib, T->nb, A( k, p), ldak, - TS(k, p), TS->mb); + T(k, p), T->mb); if ( k < (A->mt-1) ) { #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -131,10 +134,10 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_TASK_zunmlq( &options, MorseRight, MorseConjTrans, - tempmm, temppn, tempkmin, ib, TS->nb, - D( k, p), ldak, - TS(k, p), TS->mb, - A( m, p), ldam); + tempmm, temppn, tempkmin, ib, T->nb, + D(k, p), ldak, + T(k, p), T->mb, + A(m, p), ldam); } } @@ -146,58 +149,60 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, p = qrtree->currpiv(qrtree, k, n); tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; /* Tiles killed is a TS */ - if(qrtree->gettype(qrtree, k, n) == 0){ - MORSE_TASK_ztslqt( - &options, - tempkm, tempnn, ib, TS->nb, - A( k, p), ldak, - A( k, n), ldak, - TS(k, n), TS->mb); - - for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - MORSE_TASK_ztsmlq( - &options, - MorseRight, MorseConjTrans, - tempmm, A->nb, tempmm, tempnn, tempkm, ib, TS->nb, - A( m, p), ldam, - A( m, n), ldam, - A( k, n), ldak, - TS(k, n), TS->mb); - } + if (qrtree->gettype(qrtree, k, n) == 0) { + T = TS; + L = 0; } - - /* Tiles killed is a TT */ else { - MORSE_TASK_zttlqt( + T = TT; + L = tempnn; + } + + RUNTIME_data_migrate( sequence, A(k, p), + A->get_rankof( A, k, n ) ); + RUNTIME_data_migrate( sequence, A(k, n), + A->get_rankof( A, k, n ) ); + + MORSE_TASK_ztplqt( + &options, + tempkm, tempnn, chameleon_min(L, tempkm), ib, T->nb, + A(k, p), ldak, + A(k, n), ldak, + T(k, n), T->mb); + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + ldam = BLKLDD(A, m); + + RUNTIME_data_migrate( sequence, A(m, p), + A->get_rankof( A, m, n ) ); + RUNTIME_data_migrate( sequence, A(m, n), + A->get_rankof( A, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, - tempkm, tempnn, ib, TT->nb, - A( k, p), ldak, - A( k, n), ldak, - TT(k, n), TT->mb); - - for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - MORSE_TASK_zttmlq( - &options, - MorseRight, MorseConjTrans, - tempmm, A->nb, tempmm, tempnn, tempkm, ib, TT->nb, - A( m, p), ldam, - A( m, n), ldam, - A( k, n), ldak, - TT(k, n), TT->mb); - } + MorseRight, MorseConjTrans, + tempmm, tempnn, tempkm, L, ib, T->nb, + A(k, n), ldak, + T(k, n), T->mb, + A(m, p), ldam, + A(m, n), ldam); } } + + /* Restore the original location of the tiles */ + for (m = k; m < A->mt; m++) { + RUNTIME_data_migrate( sequence, A(m, k), + A->get_rankof( A, m, k ) ); + } + RUNTIME_iteration_pop(morse); } free(tiles); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - (void)D; } diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index ad11204ad69da663e4c680896ffe286436fb90cf..0ff1d84ddb0c2ff61c7d4a4e1d63f319471114a8 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -131,11 +131,16 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, T(k, N), T->mb, A(m, N), ldam); } + for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - MORSE_TASK_ztslqt( + + RUNTIME_data_migrate( sequence, A(k, N), + A->get_rankof( A, k, n ) ); + + MORSE_TASK_ztplqt( &options, - tempkm, tempnn, ib, T->nb, + tempkm, tempnn, 0, ib, T->nb, A(k, N), ldak, A(k, n), ldak, T(k, n), T->mb); @@ -143,23 +148,33 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, A(m, N), + A->get_rankof( A, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, MorseRight, MorseConjTrans, - tempmm, A->nb, tempmm, tempnn, tempkm, ib, T->nb, - A(m, N), ldam, - A(m, n), ldam, + tempmm, tempnn, tempkm, 0, ib, T->nb, A(k, n), ldak, - T(k, n), T->mb); + T(k, n), T->mb, + A(m, N), ldam, + A(m, n), ldam); } } } for (RD = BS; RD < A->nt-k; RD *= 2) { for (N = k; N+RD < A->nt; N += 2*RD) { tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb; - MORSE_TASK_zttlqt( + + RUNTIME_data_migrate( sequence, A(k, N), + A->get_rankof( A, k, N+RD ) ); + RUNTIME_data_migrate( sequence, A(k, N+RD), + A->get_rankof( A, k, N+RD ) ); + + MORSE_TASK_ztplqt( &options, - tempkm, tempNRDn, ib, T->nb, + tempkm, tempNRDn, chameleon_min(tempNRDn, tempkm), ib, T->nb, A (k, N ), ldak, A (k, N+RD), ldak, T2(k, N+RD), T->mb); @@ -167,17 +182,30 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m ); - MORSE_TASK_zttmlq( + + RUNTIME_data_migrate( sequence, A(m, N), + A->get_rankof( A, m, N+RD ) ); + RUNTIME_data_migrate( sequence, A(m, N+RD), + A->get_rankof( A, m, N+RD ) ); + + MORSE_TASK_ztpmlqt( &options, MorseRight, MorseConjTrans, - tempmm, A->nb, tempmm, tempNRDn, tempkm, ib, T->nb, - A (m, N ), ldam, - A (m, N+RD), ldam, + tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb, A (k, N+RD), ldak, - T2(k, N+RD), T->mb); + T2(k, N+RD), T->mb, + A (m, N ), ldam, + A (m, N+RD), ldam); } } } + + /* Restore the original location of the tiles */ + for (m = k; m < A->mt; m++) { + RUNTIME_data_migrate( sequence, A(m, k), + A->get_rankof( A, m, k ) ); + } + RUNTIME_iteration_pop(morse); }