From fd2fcb03ada8abca7e588871b321fa5c7fa7b4d5 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 31 Jan 2018 17:20:46 +0100 Subject: [PATCH] Add migration and swith to TP kernels in unmlq algorithms --- compute/pzunmlq.c | 117 +++++++++++------ compute/pzunmlq_param.c | 283 +++++++++++++++++++++------------------- compute/pzunmlqrh.c | 162 +++++++++++++++-------- 3 files changed, 334 insertions(+), 228 deletions(-) diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index 9c9cfd679..28def3d11 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -3,8 +3,7 @@ * @copyright (c) 2009-2014 The University of Tennessee and The University * of Tennessee Research Foundation. * All rights reserved. - * @copyright (c) 2012-2016 Inria. All rights reserved. - * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. * **/ @@ -35,12 +34,12 @@ #define B(m,n) B, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define D(k) D, k, 0 +#define D(k) D, k, 0 #else -#define D(k) A, k, k +#define D(k) D, k, k #endif -/******************************************************************************* +/** * Parallel application of Q using tile V - LQ factorization - dynamic scheduling **/ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, @@ -72,6 +71,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, minMT = A->mt; } + if (D == NULL) { + D = A; + } + /* * zunmlq = A->mb * ib * ztsmlq = A->mb * ib @@ -133,24 +136,34 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, ldbm = BLKLDD(B, m); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb, - B(k, n), ldbk, - B(m, n), ldbm, + tempmm, tempnn, tempkmin, 0, ib, T->nb, A(k, m), ldak, - T(k, m), T->mb); + T(k, m), T->mb, + B(k, n), ldbk, + B(m, n), ldbm); } } + /* Restore the original location of the tiles */ + for (n = 0; n < B->nt; n++) { + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, k, n ) ); + } + RUNTIME_iteration_pop(morse); } } + /* + * MorseLeft / MorseConjTrans + */ else { - /* - * MorseLeft / MorseConjTrans - */ for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -162,15 +175,19 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmlq( + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb, - B(k, n), ldbk, - B(m, n), ldbm, + tempmm, tempnn, tempkmin, 0, ib, T->nb, A(k, m), ldak, - T(k, m), T->mb); + T(k, m), T->mb, + B(k, n), ldbk, + B(m, n), ldbm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -189,6 +206,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, #endif for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, k, n ) ); + MORSE_TASK_zunmlq( &options, side, trans, @@ -197,35 +218,38 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, T(k, k), T->mb, B(k, n), ldbk); } - RUNTIME_iteration_pop(morse); } } } + /* + * MorseRight / MorseNoTrans + */ else { if (trans == MorseNoTrans) { - /* - * MorseRight / MorseNoTrans - */ for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); - tempkn = k == B->nt -1 ? B->n -k*B->nb : B->nb; - tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; + tempkn = k == B->nt - 1 ? B->n - k * B->nb : B->nb; + tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb; ldak = BLKLDD(A, k); for (n = B->nt-1; n > k; n--) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb, - B(m, k), ldbm, - B(m, n), ldbm, + tempmm, tempnn, tempkmin, 0, ib, T->nb, A(k, n), ldak, - T(k, n), T->mb); + T(k, n), T->mb, + B(m, k), ldbm, + B(m, n), ldbm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -245,6 +269,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); + + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, k ) ); + MORSE_TASK_zunmlq( &options, side, trans, @@ -257,14 +285,14 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, RUNTIME_iteration_pop(morse); } } + /* + * MorseRight / MorseConjTrans + */ else { - /* - * MorseRight / MorseConjTrans - */ for (k = 0; k < minMT; k++) { RUNTIME_iteration_push(morse, k); - tempkn = k == B->nt -1 ? B->n -k*B->nb : B->nb; + tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; tempkmin = k == minMT-1 ? minM-k*A->mb : A->mb; ldak = BLKLDD(A, k); #if defined(CHAMELEON_COPY_DIAG) @@ -297,17 +325,27 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb, - B(m, k), ldbm, - B(m, n), ldbm, + tempmm, tempnn, tempkmin, 0, ib, T->nb, A(k, n), ldak, - T(k, n), T->mb); + T(k, n), T->mb, + B(m, k), ldbm, + B(m, n), ldbm); } } + /* Restore the original location of the tiles */ + for (m = 0; m < B->mt; m++) { + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, k ) ); + } + RUNTIME_iteration_pop(morse); } } @@ -315,5 +353,4 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - (void)D; } diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index 5ad102180..8fce43522 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -27,13 +27,8 @@ #define A(m,n) A, m, n #define B(m,n) B, m, n -#define TS(m,n) TS, m, n -#define TT(m,n) TT, m, n -#if defined(CHAMELEON_COPY_DIAG) -#define D(m,n) D, m, n -#else -#define D(m,n) A, m, n -#endif +#define T(m,n) T, m, n +#define D(m,n) D, m, n /** * Parallel application of Q using tile V - LQ factorization - dynamic scheduling @@ -46,13 +41,14 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, { MORSE_context_t *morse; MORSE_option_t options; + MORSE_desc_t *T; size_t ws_worker = 0; size_t ws_host = 0; int k, m, n, i, p; int ldbm, ldak, ldbp; int tempnn, temppn, tempkmin, tempmm, tempkm; - int ib, K; + int ib, K, L; int *tiles; morse = morse_context_self(); @@ -64,6 +60,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, K = chameleon_min(A->mt, A->nt); + if (D == NULL) { + D = A; + } + /* * zunmlq = A->nb * ib * ztsmlq = A->nb * ib @@ -99,6 +99,7 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); @@ -125,10 +126,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_TASK_zunmlq( &options, side, trans, - temppn, tempnn, tempkmin, ib, TS->nb, - D( k, p), ldak, - TS(k, p), TS->mb, - B( p, n), ldbp); + temppn, tempnn, tempkmin, ib, T->nb, + D(k, p), ldak, + T(k, p), T->mb, + B(p, n), ldbp); } } @@ -145,40 +146,45 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, /* TT or TS */ if(qrtree->gettype(qrtree, k, m) == 0){ - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - - MORSE_TASK_ztsmlq( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( k, m), ldak, - TS(k, m), TS->mb); - } + L = 0; + T = TS; } else { - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - - MORSE_TASK_zttmlq( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( k, m), ldak, - TT(k, m), TS->mb); - } + L = A->nb; + T = TT; + } + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(p, n), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( + &options, + side, trans, + tempmm, tempnn, tempkm, chameleon_min( L, tempnn ), ib, T->nb, + A(k, m), ldak, + T(k, m), T->mb, + B(p, n), ldbp, + B(m, n), ldbm); } } + + /* Restore the original location of the tiles */ + for (n = 0; n < B->nt; n++) { + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, k, n ) ); + } + RUNTIME_iteration_pop(morse); } - } else { - /* - * MorseLeft / MorseConjTrans - */ + } + /* + * MorseLeft / MorseConjTrans + */ + else { for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -198,32 +204,33 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, /* TT or TS */ if(qrtree->gettype(qrtree, k, m) == 0){ - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmlq( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( k, m), ldak, - TS(k, m), TS->mb); - } + L = 0; + T = TS; } else { - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zttmlq( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( k, m), ldak, - TT(k, m), TT->mb); - } + L = A->nb; + T = TT; + } + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(p, n), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( + &options, + side, trans, + tempmm, tempnn, tempkm, chameleon_min(L, tempnn), ib, T->nb, + A(k, m), ldak, + T(k, m), T->mb, + B(p, n), ldbp, + B(m, n), ldbm); } } + + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); @@ -247,23 +254,28 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, #endif for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(p, n), + B->get_rankof( B, p, n ) ); + MORSE_TASK_zunmlq( &options, side, trans, - temppn, tempnn, tempkmin, ib, TS->nb, - D( k, p), ldak, - TS(k, p), TS->mb, - B( p, n), ldbp); + temppn, tempnn, tempkmin, ib, T->nb, + D(k, p), ldak, + T(k, p), T->mb, + B(p, n), ldbp); } } RUNTIME_iteration_pop(morse); } } - } else { + } + /* + * MorseRight / MorseNoTrans + */ + else { if (trans == MorseNoTrans) { - /* - * MorseRight / MorseNoTrans - */ for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -280,37 +292,36 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldbp = BLKLDD(B, p); - /* TT or TS */ - + /* TS or TT */ if(qrtree->gettype(qrtree, k, n) == 0){ - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmlq( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb, - B( m, p), ldbm, - B( m, n), ldbm, - A( k, n), ldak, - TS(k, n), TS->mb); - } + L = 0; + T = TS; } else { - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_zttmlq( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb, - B( m, p), ldbm, - B( m, n), ldbm, - A( k, n), ldak, - TT(k, n), TT->mb); - } + L = tempnn; + T = TT; + } + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + + RUNTIME_data_migrate( sequence, B(m, p), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( + &options, + side, trans, + tempmm, tempnn, tempkm, L, ib, T->nb, + A(k, n), ldak, + T(k, n), T->mb, + B(m, p), ldbm, + B(m, n), ldbm); } } + + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); @@ -334,26 +345,33 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, for (m = 0; m < B->mt; m++) { ldbm = BLKLDD(B, m); tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + + RUNTIME_data_migrate( sequence, B(m, p), + B->get_rankof( B, m, p ) ); + MORSE_TASK_zunmlq( &options, side, trans, - tempmm, temppn, tempkmin, ib, TS->nb, - D( k, p), ldak, - TS(k, p), TS->mb, - B( m, p), ldbm); + tempmm, temppn, tempkmin, ib, T->nb, + D(k, p), ldak, + T(k, p), T->mb, + B(m, p), ldbm); } } RUNTIME_iteration_pop(morse); } - } else { - /* - * MorseRight / MorseConjTrans - */ + } + /* + * MorseRight / MorseConjTrans + */ + else { for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); + + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); @@ -381,10 +399,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_TASK_zunmlq( &options, side, trans, - tempmm, temppn, tempkmin, ib, TS->nb, - D( k, p), ldak, - TS(k, p), TS->mb, - B( m, p), ldbm); + tempmm, temppn, tempkmin, ib, T->nb, + D(k, p), ldak, + T(k, p), TS->mb, + B(m, p), ldbm); } } /* Setting the order of tiles */ @@ -398,32 +416,31 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, ldbp = BLKLDD(B, p); if(qrtree->gettype(qrtree, k, n) == 0){ - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmlq( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( k, n), ldak, - TS(k, n), TS->mb); - } + L = 0; + T = TS; } else { - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_zttmlq( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( k, n), ldak, - TT(k, n), TT->mb); - } + L = tempnn; + T = TT; + } + + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + + RUNTIME_data_migrate( sequence, B(m, p), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( + &options, + side, trans, + tempmm, tempnn, tempkm, L, ib, T->nb, + A(k, n), ldak, + T(k, n), T->mb, + B(m, p), ldbm, + B(m, n), ldbm); } } @@ -435,6 +452,4 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, free(tiles); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - - (void)D; } diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c index 8444f0bb3..0b4af0cb8 100644 --- a/compute/pzunmlqrh.c +++ b/compute/pzunmlqrh.c @@ -34,7 +34,7 @@ #define A(m,n) A, (m), (n) #define B(m,n) B, (m), (n) #define T(m,n) T, (m), (n) -#define T2(m,n) T, (m), (n)+A->nt +#define T2(m,n) T, (m), ((n)+A->nt) #if defined(CHAMELEON_COPY_DIAG) #define D(m,n) D, ((n)/BS), 0 #else @@ -133,15 +133,19 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ldbm = BLKLDD(B, m); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmlq( - &options, - side, trans, - B->nb, tempnn, tempmm, tempnn, - tempkm, ib, T->nb, - B(N, n), ldbN, - B(m, n), ldbm, + + RUNTIME_data_migrate( sequence, B(N, n), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( + &options, side, trans, + tempmm, tempnn, tempkm, 0, ib, T->nb, A(k, m), ldak, - T(k, m), T->mb); + T(k, m), T->mb, + B(N, n), ldbN, + B(m, n), ldbm); } } } @@ -152,19 +156,30 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ldbNRD = BLKLDD(B, N+RD); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zttmlq( + + RUNTIME_data_migrate( sequence, B(N, n), + B->get_rankof( B, N+RD, n ) ); + RUNTIME_data_migrate( sequence, B(N+RD, n), + B->get_rankof( B, N+RD, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - B->mb, tempnn, tempNRDn, tempnn, - tempkm, ib, T->nb, - B (N, n), ldbN, - B (N+RD, n), ldbNRD, + tempNRDn, tempnn, tempkm, tempnn, ib, T->nb, A (k, N+RD), ldak, - T2(k, N+RD), T->mb); + T2(k, N+RD), T->mb, + B (N, n), ldbN, + B (N+RD, n), ldbNRD); } } } + /* Restore the original location of the tiles */ + for (n = 0; n < B->nt; n++) { + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, k, n ) ); + } + RUNTIME_iteration_pop(morse); } } else { @@ -186,15 +201,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ldbNRD = BLKLDD(B, N+RD); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zttmlq( + + RUNTIME_data_migrate( sequence, B(N, n), + B->get_rankof( B, N+RD, n ) ); + RUNTIME_data_migrate( sequence, B(N+RD, n), + B->get_rankof( B, N+RD, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - B->nb, tempnn, tempNRDn, tempnn, - tempkm, ib, T->nb, - B (N, n), ldbN, - B (N+RD, n), ldbNRD, + tempNRDn, tempnn, tempkm, tempnn, ib, T->nb, A (k, N+RD), ldak, - T2(k, N+RD), T->mb); + T2(k, N+RD), T->mb, + B (N, n), ldbN, + B (N+RD, n), ldbNRD); } } } @@ -207,15 +227,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ldbm = BLKLDD(B, m); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, B(N, n), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - B->mb, tempnn, tempmm, tempnn, - tempkm, ib, T->nb, - B(N, n), ldbN, - B(m, n), ldbm, + tempmm, tempnn, tempkm, 0, ib, T->nb, A(k, m), ldak, - T(k, m), T->mb); + T(k, m), T->mb, + B(N, n), ldbN, + B(m, n), ldbm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -244,12 +269,11 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, B(N, n), ldbN); } } - RUNTIME_iteration_pop(morse); } - } - } else { + } + else { if (trans == MorseNoTrans) { /* * MorseRight / MorseNoTrans @@ -268,15 +292,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { ldbm = BLKLDD(B, m); tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - MORSE_TASK_zttmlq( + + RUNTIME_data_migrate( sequence, B(m, N), + B->get_rankof( B, m, N+RD ) ); + RUNTIME_data_migrate( sequence, B(m, N+RD), + B->get_rankof( B, m, N+RD ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - tempmm, B->nb, tempmm, tempNRDn, - tempkm, ib, T->nb, - B (m, N ), ldbm, - B (m, N+RD), ldbm, + tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb, A (k, N+RD), ldak, - T2(k, N+RD), T->mb); + T2(k, N+RD), T->mb, + B (m, N ), ldbm, + B (m, N+RD), ldbm); } } } @@ -288,15 +317,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, B(m, N), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, m), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - tempmm, B->nb, tempmm, tempnn, - tempkm, ib, T->nb, - B(m, N), ldbm, - B(m, n), ldbm, + tempmm, tempnn, tempkm, 0, ib, T->nb, A(k, n), ldak, - T(k, n), T->mb); + T(k, n), T->mb, + B(m, N), ldbm, + B(m, n), ldbm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -316,6 +350,10 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); + + RUNTIME_data_migrate( sequence, B(m, N), + B->get_rankof( B, m, N ) ); + MORSE_TASK_zunmlq( &options, side, trans, @@ -372,15 +410,20 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmlq( + + RUNTIME_data_migrate( sequence, B(m, N), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - tempmm, tempNn, tempmm, tempnn, - tempkm, ib, T->nb, - B(m, N), ldbm, - B(m, n), ldbm, + tempmm, tempnn, tempkm, 0, ib, T->nb, A(k, n), ldak, - T(k, n), T->mb); + T(k, n), T->mb, + B(m, N), ldbm, + B(m, n), ldbm); } } } @@ -390,19 +433,30 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_zttmlq( + + RUNTIME_data_migrate( sequence, B(m, N), + B->get_rankof( B, m, N+RD ) ); + RUNTIME_data_migrate( sequence, B(m, N+RD), + B->get_rankof( B, m, N+RD ) ); + + MORSE_TASK_ztpmlqt( &options, side, trans, - tempmm, B->nb, tempmm, tempNRDn, - tempkm, ib, T->nb, - B (m, N ), ldbm, - B (m, N+RD), ldbm, + tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb, A (k, N+RD), ldak, - T2(k, N+RD), T->mb); + T2(k, N+RD), T->mb, + B (m, N ), ldbm, + B (m, N+RD), ldbm); } } } + /* Restore the original location of the tiles */ + for (m = 0; m < B->mt; m++) { + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, k ) ); + } + RUNTIME_iteration_pop(morse); } } -- GitLab