From 7e28d2d5ecc0200745ed1cab5cf9f76bee582ace Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 31 Jan 2018 13:49:50 +0100 Subject: [PATCH] Add migration and tp kernels to unmqr functions --- compute/pzunmqr.c | 92 +++++++++---- compute/pzunmqr_param.c | 281 +++++++++++++++++++++------------------- compute/pzunmqrrh.c | 186 ++++++++++++++++---------- 3 files changed, 331 insertions(+), 228 deletions(-) diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index 4e81716dd..e9d74dea3 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -3,8 +3,7 @@ * @copyright (c) 2009-2014 The University of Tennessee and The University * of Tennessee Research Foundation. * All rights reserved. - * @copyright (c) 2012-2016 Inria. All rights reserved. - * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. * **/ @@ -35,12 +34,12 @@ #define B(m,n) B, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define D(k) D, k, 0 +#define D(k) D, k, 0 #else -#define D(k) A, k, k +#define D(k) D, k, k #endif -/******************************************************************************* +/** * Parallel application of Q using tile V - QR factorization - dynamic scheduling **/ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, @@ -72,6 +71,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, minMT = A->mt; } + if (D == NULL) { + D = A; + } + /* * zunmqr = A->nb * ib * ztsmqr = A->nb * ib @@ -134,17 +137,27 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ldbm = BLKLDD(B, m); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmqr( + + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, side, trans, - B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb, - B(k, n), ldbk, - B(m, n), ldbm, + tempmm, tempnn, tempkmin, 0, ib, T->nb, A(m, k), ldam, - T(m, k), T->mb); + T(m, k), T->mb, + B(k, n), ldbk, + B(m, n), ldbm); } } + /* Restore the original location of the tiles */ + for (n = 0; n < B->nt; n++) { + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, k, n ) ); + } + RUNTIME_iteration_pop(morse); } } @@ -165,14 +178,18 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ldbm = BLKLDD(B, m); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmqr( + + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, side, trans, - B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb, - B(k, n), ldbk, - B(m, n), ldbm, + tempmm, tempnn, tempkmin, 0, ib, T->nb, A(m, k), ldam, - T(m, k), T->mb); + T(m, k), T->mb, + B(k, n), ldbk, + B(m, n), ldbm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -189,8 +206,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, D(k), ldak ); #endif #endif + for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, k, n ) ); + MORSE_TASK_zunmqr( &options, side, trans, @@ -199,7 +221,6 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, T(k, k), T->mb, B(k, n), ldbk); } - RUNTIME_iteration_pop(morse); } } @@ -222,14 +243,18 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmqr( + + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, side, trans, - tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb, - B(m, k), ldbm, - B(m, n), ldbm, + tempmm, tempnn, tempkmin, 0, ib, T->nb, A(n, k), ldan, - T(n, k), T->mb); + T(n, k), T->mb, + B(m, k), ldbm, + B(m, n), ldbm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -249,6 +274,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); + + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, k ) ); + MORSE_TASK_zunmqr( &options, side, trans, @@ -302,17 +331,27 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmqr( + + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, side, trans, - tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb, - B(m, k), ldbm, - B(m, n), ldbm, + tempmm, tempnn, tempkmin, 0, ib, T->nb, A(n, k), ldan, - T(n, k), T->mb); + T(n, k), T->mb, + B(m, k), ldbm, + B(m, n), ldbm); } } + /* Restore the original location of the tiles */ + for (m = 0; m < B->mt; m++) { + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, k ) ); + } + RUNTIME_iteration_pop(morse); } } @@ -320,5 +359,4 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - (void)D; } diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index ae9b46d5e..2390cb761 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -27,13 +27,8 @@ #define A(m,n) A, m, n #define B(m,n) B, m, n -#define TS(m,n) TS, m, n -#define TT(m,n) TT, m, n -#if defined(CHAMELEON_COPY_DIAG) -#define D(m,n) D, m, n -#else -#define D(m,n) A, m, n -#endif +#define T(m,n) T, m, n +#define D(m,n) D, m, n /** * Parallel application of Q using tile V - QR factorization - dynamic scheduling @@ -45,13 +40,14 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, { MORSE_context_t *morse; MORSE_option_t options; + MORSE_desc_t *T; size_t ws_worker = 0; size_t ws_host = 0; int k, m, n, i, p; int ldam, ldan, ldbm, ldbp; int tempnn, tempkmin, tempmm, tempkn; - int ib, K; + int ib, K, L; int *tiles; morse = morse_context_self(); @@ -63,6 +59,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, K = chameleon_min(A->mt, A->nt); + if (D == NULL) { + D = A; + } + /* * zunmqr = A->nb * ib * ztsmqr = A->nb * ib @@ -80,7 +80,6 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, #endif /* Initialisation of tiles */ - tiles = (int*)calloc( qrtree->mt, sizeof(int) ); ws_worker *= sizeof(MORSE_Complex64_t); @@ -98,6 +97,7 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { m = qrtree->getm(qrtree, k, i); @@ -125,10 +125,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_TASK_zunmqr( &options, side, trans, - tempmm, tempnn, tempkmin, ib, TS->nb, - D( m, k), ldam, - TS(m, k), TS->mb, - B( m, n), ldbm); + tempmm, tempnn, tempkmin, ib, T->nb, + D(m, k), ldam, + T(m, k), T->mb, + B(m, n), ldbm); } } /* Setting the order of the tiles*/ @@ -143,38 +143,45 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ldbm = BLKLDD(B, m); ldbp = BLKLDD(B, p); if(qrtree->gettype(qrtree, k, m) == 0){ - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmqr( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( m, k), ldam, - TS(m, k), TS->mb); - } + L = 0; + T = TS; } else { - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zttmqr( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( m, k), ldam, - TT(m, k), TT->mb); - } + L = tempmm; + T = TT; + } + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(p, n), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, + side, trans, + tempmm, tempnn, tempkn, L, ib, T->nb, + A(m, k), ldam, + T(m, k), T->mb, + B(p, n), ldbp, + B(m, n), ldbm); } } + + /* Restore the original location of the tiles */ + for (n = 0; n < B->nt; n++) { + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, k, n ) ); + } + RUNTIME_iteration_pop(morse); } - } else { - /* - * MorseLeft / MorseNoTrans - */ + } + /* + * MorseLeft / MorseNoTrans + */ + else { for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -193,34 +200,34 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ldbp = BLKLDD(B, p); /* TT or TS */ - if(qrtree->gettype(qrtree, k, m) == 0){ - for (n = k; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmqr( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( m, k), ldam, - TS(m, k), TS->mb); - } + L = 0; + T = TS; } else { - for (n = k; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zttmqr( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, - B( p, n), ldbp, - B( m, n), ldbm, - A( m, k), ldam, - TT(m, k), TT->mb); - } + L = tempmm; + T = TT; + } + for (n = k; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(p, n), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, + side, trans, + tempmm, tempnn, tempkn, L, ib, T->nb, + A(m, k), ldam, + T(m, k), T->mb, + B(p, n), ldbp, + B(m, n), ldbm); } } + + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { m = qrtree->getm(qrtree, k, i); @@ -245,23 +252,28 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, #endif for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + MORSE_TASK_zunmqr( &options, side, trans, - tempmm, tempnn, tempkmin, ib, TS->nb, - D( m, k), ldam, - TS(m, k), TS->mb, - B( m, n), ldbm); + tempmm, tempnn, tempkmin, ib, T->nb, + D(m, k), ldam, + T(m, k), T->mb, + B(m, n), ldbm); } } RUNTIME_iteration_pop(morse); } } - } else { + } + /* + * MorseRight / MorseConjTrans + */ + else { if (trans == MorseConjTrans) { - /* - * MorseRight / MorseConjTrans - */ for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -280,34 +292,34 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, /* TS or TT */ if(qrtree->gettype(qrtree, k, n) == 0){ - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmqr( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb, - B( m, p), ldbm, - B( m, n), ldbm, - A( n, k), ldan, - TS(n, k), TS->mb); - } + L = 0; + T = TS; } - else{ - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_zttmqr( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb, - B( m, p), ldbm, - B( m, n), ldbm, - A( n, k), ldan, - TT(n, k), TT->mb); - } + else { + L = tempmm; + T = TT; + } + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + + RUNTIME_data_migrate( sequence, B(m, p), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, + side, trans, + tempmm, tempnn, tempkn, L, ib, T->nb, + A(n, k), ldan, + T(n, k), T->mb, + B(m, p), ldbm, + B(m, n), ldbm); } } + + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { n = qrtree->getm(qrtree, k, i); @@ -332,27 +344,33 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, for (m = 0; m < B->mt; m++) { ldbm = BLKLDD(B, m); tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + MORSE_TASK_zunmqr( &options, side, trans, - tempmm, tempnn, tempkmin, ib, TS->nb, - D( n, k), ldan, - TS(n, k), TS->mb, - B( m, n), ldbm); + tempmm, tempnn, tempkmin, ib, T->nb, + D(n, k), ldan, + T(n, k), T->mb, + B(m, n), ldbm); } } RUNTIME_iteration_pop(morse); } - } else { - /* - * MorseRight / MorseNoTrans - */ + } + /* + * MorseRight / MorseNoTrans + */ + else { for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { n = qrtree->getm(qrtree, k, i); @@ -380,10 +398,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_TASK_zunmqr( &options, side, trans, - tempmm, tempnn, tempkmin, ib, TS->nb, - D( n, k), ldan, - TS(n, k), TS->mb, - B( m, n), ldbm); + tempmm, tempnn, tempkmin, ib, T->nb, + D(n, k), ldan, + T(n, k), T->mb, + B(m, n), ldbm); } } /* Setting the order of tiles */ @@ -397,32 +415,31 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ldan = BLKLDD(A, n); ldbp = BLKLDD(B, p); if(qrtree->gettype(qrtree, k, n) == 0){ - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmqr( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb, - B( m, p), ldbm, - B( m, n), ldbm, - A( n, k), ldan, - TS(n, k), TS->mb); - } + L = 0; + T = T; } else { - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_zttmqr( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb, - B( m, p), ldbm, - B( m, n), ldbm, - A( n, k), ldan, - TT(n, k), TT->mb); - } + L = tempmm; + T = TT; + } + + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + + RUNTIME_data_migrate( sequence, B(m, p), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, + side, trans, + tempmm, tempnn, tempkn, L, ib, T->nb, + A(n, k), ldan, + T(n, k), T->mb, + B(m, p), ldbm, + B(m, n), ldbm); } } @@ -434,6 +451,4 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, free(tiles); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - - (void)D; } diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index ca7097ca9..00b4d3ded 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -134,15 +134,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, ldam = BLKLDD(A, m); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmqr( - &options, - side, trans, - A->nb, tempnn, tempmm, tempnn, - tempkn, ib, T->nb, - B(M, n), ldbM, - B(m, n), ldbm, + + RUNTIME_data_migrate( sequence, B(M, n), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, side, trans, + tempmm, tempnn, tempkn, 0, ib, T->nb, A(m, k), ldam, - T(m, k), T->mb); + T(m, k), T->mb, + B(M, n), ldbM, + B(m, n), ldbm); } } } @@ -154,19 +158,29 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, ldaMRD = BLKLDD(A, M+RD); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zttmqr( - &options, - side, trans, - A->nb, tempnn, tempMRDm, tempnn, - tempkn, ib, T->nb, - B (M, n), ldbM, - B (M+RD, n), ldbMRD, + + RUNTIME_data_migrate( sequence, B(M, n), + B->get_rankof( B, M+RD, n ) ); + RUNTIME_data_migrate( sequence, B(M+RD, n), + B->get_rankof( B, M+RD, n ) ); + + MORSE_TASK_ztpmqrt( + &options, side, trans, + tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb, A (M+RD, k), ldaMRD, - T2(M+RD, k), T->mb); + T2(M+RD, k), T->mb, + B (M, n), ldbM, + B (M+RD, n), ldbMRD); } } } + /* Restore the original location of the tiles */ + for (n = 0; n < B->nt; n++) { + RUNTIME_data_migrate( sequence, B(k, n), + B->get_rankof( B, k, n ) ); + } + RUNTIME_iteration_pop(morse); } } else { @@ -188,15 +202,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, ldaMRD = BLKLDD(A, M+RD); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zttmqr( - &options, - side, trans, - A->nb, tempnn, tempMRDm, tempnn, - tempkn, ib, T->nb, - B (M, n), ldbM, - B (M+RD, n), ldbMRD, + + RUNTIME_data_migrate( sequence, B(M, n), + B->get_rankof( B, M+RD, n ) ); + RUNTIME_data_migrate( sequence, B(M+RD, n), + B->get_rankof( B, M+RD, n ) ); + + MORSE_TASK_ztpmqrt( + &options, side, trans, + tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb, A (M+RD, k), ldaMRD, - T2(M+RD, k), T->mb); + T2(M+RD, k), T->mb, + B (M, n), ldbM, + B (M+RD, n), ldbMRD); } } } @@ -211,15 +229,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, ldam = BLKLDD(A, m); for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmqr( - &options, - side, trans, - A->nb, tempnn, tempmm, tempnn, - tempkn, ib, T->nb, - B(M, n), ldbM, - B(m, n), ldbm, + + RUNTIME_data_migrate( sequence, B(M, n), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, side, trans, + tempmm, tempnn, tempkn, 0, ib, T->nb, A(m, k), ldam, - T(m, k), T->mb); + T(m, k), T->mb, + B(M, n), ldbM, + B(m, n), ldbm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -238,11 +260,13 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, #endif for (n = 0; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + RUNTIME_data_migrate( sequence, B(M, n), + B->get_rankof( B, M, n ) ); + MORSE_TASK_zunmqr( - &options, - side, trans, - tempMm, tempnn, - tempkmin, ib, T->nb, + &options, side, trans, + tempMm, tempnn, tempkmin, ib, T->nb, D(M, k), ldaM, T(M, k), T->mb, B(M, n), ldbM); @@ -270,15 +294,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { ldbm = BLKLDD(B, m); tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - MORSE_TASK_zttmqr( - &options, - side, trans, - tempmm, B->nb, tempmm, tempMRDm, - tempkn, ib, T->nb, - B (m, M), ldbm, - B (m, M+RD), ldbm, + + RUNTIME_data_migrate( sequence, B(m, M), + B->get_rankof( B, m, M+RD ) ); + RUNTIME_data_migrate( sequence, B(m, M+RD), + B->get_rankof( B, m, M+RD ) ); + + MORSE_TASK_ztpmqrt( + &options, side, trans, + tempmm, tempMRDm, tempkn, tempmm, ib, T->nb, A (M+RD, k), ldaMRD, - T2(M+RD, k), T->mb); + T2(M+RD, k), T->mb, + B (m, M), ldbm, + B (m, M+RD), ldbm); } } } @@ -293,15 +321,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { ldbm = BLKLDD(B, m); tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - MORSE_TASK_ztsmqr( - &options, - side, trans, - tempmm, tempMm, tempmm, tempnn, - tempkn, ib, T->nb, - B(m, M), ldbm, - B(m, n), ldbm, + + RUNTIME_data_migrate( sequence, B(m, M), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, m), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, side, trans, + tempmm, tempnn, tempkn, 0, ib, T->nb, A(n, k), ldan, - T(n, k), T->mb); + T(n, k), T->mb, + B(m, M), ldbm, + B(m, n), ldbm); } } #if defined(CHAMELEON_COPY_DIAG) @@ -321,6 +353,10 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { ldbm = BLKLDD(B, m); tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + + RUNTIME_data_migrate( sequence, B(m, M), + B->get_rankof( B, m, M ) ); + MORSE_TASK_zunmqr( &options, side, trans, @@ -376,15 +412,19 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmqr( - &options, - side, trans, - tempmm, tempMm, tempmm, tempnn, - tempkn, ib, T->nb, - B(m, M), ldbm, - B(m, n), ldbm, + + RUNTIME_data_migrate( sequence, B(m, M), + B->get_rankof( B, m, n ) ); + RUNTIME_data_migrate( sequence, B(m, n), + B->get_rankof( B, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, side, trans, + tempmm, tempnn, tempkn, 0, ib, T->nb, A(n, k), ldan, - T(n, k), T->mb); + T(n, k), T->mb, + B(m, M), ldbm, + B(m, n), ldbm); } } } @@ -395,19 +435,29 @@ void morse_pzunmqrrh( MORSE_enum side, MORSE_enum trans, for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); - MORSE_TASK_zttmqr( - &options, - side, trans, - tempmm, B->nb, tempmm, tempMRDm, - tempkn, ib, T->nb, - B (m, M ), ldbm, - B (m, M+RD), ldbm, + + RUNTIME_data_migrate( sequence, B(m, M), + B->get_rankof( B, m, M+RD ) ); + RUNTIME_data_migrate( sequence, B(m, M+RD), + B->get_rankof( B, m, M+RD ) ); + + MORSE_TASK_ztpmqrt( + &options, side, trans, + tempmm, tempMRDm, tempkn, tempmm, ib, T->nb, A (M+RD, k), ldaMRD, - T2(M+RD, k), T->mb); + T2(M+RD, k), T->mb, + B (m, M ), ldbm, + B (m, M+RD), ldbm); } } } + /* Restore the original location of the tiles */ + for (m = 0; m < B->mt; m++) { + RUNTIME_data_migrate( sequence, B(m, k), + B->get_rankof( B, m, k ) ); + } + RUNTIME_iteration_pop(morse); } } -- GitLab