diff --git a/compute/pzunglq.c b/compute/pzunglq.c index 13a034ce3d64ecf8f1c5f1abf47e41a09e494141..de9c1409fe92f64fddc26dc0de39554ac0cc8ee9 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -34,9 +34,9 @@ #define Q(m,n) Q, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define D(k) D, k, 0 +#define D(k) D, k, 0 #else -#define D(k) A, k, k +#define D(k) D, k, k #endif /******************************************************************************* @@ -69,6 +69,10 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc minMT = A->mt; } + if (D == NULL) { + D = A; + } + /* * zunmlq = A->nb * ib * ztsmlq = A->nb * ib @@ -103,14 +107,18 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc for (m = 0; m < Q->mt; m++) { tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; ldqm = BLKLDD(Q, m); - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, Q(m, k), + Q->get_rankof( Q, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, MorseRight, MorseNoTrans, - tempmm, Q->nb, tempmm, tempnn, tempAkm, ib, T->nb, - Q(m, k), ldqm, - Q(m, n), ldqm, + tempmm, tempnn, tempAkm, 0, ib, T->nb, A(k, n), ldak, - T(k, n), T->mb); + T(k, n), T->mb, + Q(m, k), ldqm, + Q(m, n), ldqm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -130,6 +138,10 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc for (m = k; m < Q->mt; m++) { tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; ldqm = BLKLDD(Q, m); + + RUNTIME_data_migrate( sequence, Q(m, k), + Q->get_rankof( Q, m, k ) ); + MORSE_TASK_zunmlq( &options, MorseRight, MorseNoTrans, @@ -144,5 +156,4 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - (void)D; } diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c index adc3512ff57fa88d9b79b1272eddee14cdad87c4..8346cbd39d92b3917276a6043db912eefeb785de 100644 --- a/compute/pzunglq_param.c +++ b/compute/pzunglq_param.c @@ -25,15 +25,10 @@ #include "control/common.h" #include <stdlib.h> -#define A(m,n) A, (m), (n) -#define Q(m,n) Q, (m), (n) -#define TS(m,n) TS, (m), (n) -#define TT(m,n) TT, (m), (n) -#if defined(CHAMELEON_COPY_DIAG) +#define A(m,n) A, (m), (n) +#define Q(m,n) Q, (m), (n) +#define T(m,n) T, (m), (n) #define D(m,n) D, (m), (n) -#else -#define D(m,n) A, (m), (n) -#endif /** * Parallel construction of Q using tile V - dynamic scheduling @@ -44,11 +39,12 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des { MORSE_context_t *morse; MORSE_option_t options; + MORSE_desc_t *T; size_t ws_worker = 0; size_t ws_host = 0; int k, m, n, i, p; - int K; + int K, L; int ldak, ldqm; int tempkm, tempkmin, temppn, tempnn, tempmm; int ib; @@ -61,6 +57,10 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des ib = MORSE_IB; + if (D == NULL) { + D = A; + } + /* * zunmqr = A->nb * ib * ztsmqr = A->nb * ib @@ -103,37 +103,36 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; - /* TT or TS */ - + /* TS or TT */ if(qrtree->gettype(qrtree, k, n) == 0){ - for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; - ldqm = BLKLDD(Q, m); - MORSE_TASK_ztsmlq( - &options, - MorseRight, MorseNoTrans, - tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TS->nb, - Q( m, p), ldqm, - Q( m, n), ldqm, - A( k, n), ldak, - TS(k, n), TS->mb); - } + L = 0; + T = TS; } else { - for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; - ldqm = BLKLDD(Q, m); - MORSE_TASK_zttmlq( - &options, - MorseRight, MorseNoTrans, - tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TT->nb, - Q( m, p), ldqm, - Q( m, n), ldqm, - A( k, n), ldak, - TT(k, n), TT->mb); - } + L = tempnn; + T = TT; + } + for (m = k; m < Q->mt; m++) { + tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + ldqm = BLKLDD(Q, m); + + RUNTIME_data_migrate( sequence, Q(m, p), + Q->get_rankof( Q, m, n ) ); + RUNTIME_data_migrate( sequence, Q(m, n), + Q->get_rankof( Q, m, n ) ); + + MORSE_TASK_ztpmlqt( + &options, + MorseRight, MorseNoTrans, + tempmm, tempnn, tempkm, L, ib, T->nb, + A(k, n), ldak, + T(k, n), T->mb, + Q(m, p), ldqm, + Q(m, n), ldqm); } } + + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); @@ -157,13 +156,17 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des for (m = k; m < Q->mt; m++) { tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; ldqm = BLKLDD(Q, m); + + RUNTIME_data_migrate( sequence, Q(m, p), + Q->get_rankof( Q, m, p ) ); + MORSE_TASK_zunmlq( &options, MorseRight, MorseNoTrans, - tempmm, temppn, tempkmin, ib, TS->nb, - D( k, p), ldak, - TS(k, p), TS->mb, - Q( m, p), ldqm); + tempmm, temppn, tempkmin, ib, T->nb, + D(k, p), ldak, + T(k, p), T->mb, + Q(m, p), ldqm); } } RUNTIME_iteration_pop(morse); @@ -172,5 +175,4 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des free(tiles); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - (void)D; } diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index 72836940f2ef8f4e87a7b218357e2f2b17e9fff2..2ae69ce0328649c2c0386eddd646f95c9976477a 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -102,15 +102,20 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, for (m = k; m < Q->mt; m++) { tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; ldqm = BLKLDD(Q, m ); - MORSE_TASK_zttmlq( + + RUNTIME_data_migrate( sequence, Q(m, N), + Q->get_rankof( Q, m, N+RD ) ); + RUNTIME_data_migrate( sequence, Q(m, N+RD), + Q->get_rankof( Q, m, N+RD ) ); + + MORSE_TASK_ztpmlqt( &options, MorseRight, MorseNoTrans, - tempmm, Q->nb, tempmm, tempNRDn, - tempkm, ib, T->nb, - Q (m, N ), ldqm, - Q (m, N+RD), ldqm, + tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb, A (k, N+RD), ldak, - T2(k, N+RD), T->mb); + T2(k, N+RD), T->mb, + Q (m, N ), ldqm, + Q (m, N+RD), ldqm); } } } @@ -123,15 +128,20 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, for (m = k; m < Q->mt; m++) { tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; ldqm = BLKLDD(Q, m); - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, Q(m, N), + Q->get_rankof( Q, m, n ) ); + RUNTIME_data_migrate( sequence, Q(m, n), + Q->get_rankof( Q, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, MorseRight, MorseNoTrans, - tempmm, Q->nb, tempmm, tempnn, - tempkm, ib, T->nb, - Q(m, N), ldqm, - Q(m, n), ldqm, + tempmm, tempnn, tempkm, 0, ib, T->nb, A(k, n), ldak, - T(k, n), T->mb); + T(k, n), T->mb, + Q(m, N), ldqm, + Q(m, n), ldqm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -151,6 +161,10 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, for (m = k; m < Q->mt; m++) { tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; ldqm = BLKLDD(Q, m); + + RUNTIME_data_migrate( sequence, Q(m, N), + Q->get_rankof( Q, m, N ) ); + MORSE_TASK_zunmlq( &options, MorseRight, MorseNoTrans, diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c index 0b4af0cb85ff14a8f1cef95700f8e72dac5d03ea..6dfffc232f00eb15fde9455834b5c23a1636c4eb 100644 --- a/compute/pzunmlqrh.c +++ b/compute/pzunmlqrh.c @@ -259,6 +259,10 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, #endif for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(N, n), + B->get_rankof( B, N, n ) ); + MORSE_TASK_zunmlq( &options, side, trans,