From b586e208fb1a2a20d2cbc991976ce78ca1b2db84 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 31 Jan 2018 13:13:44 +0100 Subject: [PATCH] Add migration and tp kernels to ungqr functions --- compute/pzungqr.c | 31 +++++++++++----- compute/pzungqr_param.c | 79 ++++++++++++++++++++++------------------- compute/pzungqrrh.c | 40 ++++++++++++++------- 3 files changed, 92 insertions(+), 58 deletions(-) diff --git a/compute/pzungqr.c b/compute/pzungqr.c index 389f79e85..e305f329f 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -34,9 +34,9 @@ #define Q(m,n) Q, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define D(k) D, k, 0 +#define D(k) D, k, 0 #else -#define D(k) A, k, k +#define D(k) D, k, k #endif /******************************************************************************* @@ -69,6 +69,10 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc minMT = A->mt; } + if (D == NULL) { + D = A; + } + /* * zunmqr = A->nb * ib * ztsmqr = A->nb * ib @@ -105,16 +109,21 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc ldqm = BLKLDD(Q, m); for (n = k; n < Q->nt; n++) { tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; - MORSE_TASK_ztsmqr( + + RUNTIME_data_migrate( sequence, Q(k, n), + Q->get_rankof( Q, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, MorseLeft, MorseNoTrans, - Q->mb, tempnn, tempmm, tempnn, tempAkn, ib, T->nb, - Q(k, n), ldqk, - Q(m, n), ldqm, + tempmm, tempnn, tempAkn, 0, ib, T->nb, A(m, k), ldam, - T(m, k), T->mb); + T(m, k), T->mb, + Q(k, n), ldqk, + Q(m, n), ldqm); } } + #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, @@ -131,11 +140,16 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc #endif for (n = k; n < Q->nt; n++) { tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + + /* Restore the original location of the tiles */ + RUNTIME_data_migrate( sequence, Q(k, n), + Q->get_rankof( Q, k, n ) ); + MORSE_TASK_zunmqr( &options, MorseLeft, MorseNoTrans, tempkm, tempnn, tempkmin, ib, T->nb, - D(k), ldak, + D(k), ldak, T(k, k), T->mb, Q(k, n), ldqk); } @@ -144,5 +158,4 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - (void)D; } diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c index a0849b634..d78459ae9 100644 --- a/compute/pzungqr_param.c +++ b/compute/pzungqr_param.c @@ -25,15 +25,10 @@ #include "control/common.h" #include <stdlib.h> -#define A(m,n) A, m, n -#define Q(m,n) Q, m, n -#define TS(m,n) TS, m, n -#define TT(m,n) TT, m, n -#if defined(CHAMELEON_COPY_DIAG) -#define D(m,n) D, m, n -#else -#define D(m,n) A, m, n -#endif +#define A(m,n) A, m, n +#define Q(m,n) Q, m, n +#define T(m,n) T, m, n +#define D(m,n) D, m, n /** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling @@ -45,10 +40,11 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, { MORSE_context_t *morse; MORSE_option_t options; + MORSE_desc_t *T; size_t ws_worker = 0; size_t ws_host = 0; - int k, m, n, i, p; + int k, m, n, i, p, L; int ldam, ldqm, ldqp; int tempmm, tempnn, tempkmin, tempkn; int ib, minMT; @@ -67,6 +63,10 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, minMT = A->mt; } + if (D == NULL) { + D = A; + } + /* * zunmqr = A->nb * ib * ztsmqr = A->nb * ib @@ -110,35 +110,35 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, ldqp = BLKLDD(Q, p); /* TT or TS */ - - if(qrtree->gettype(qrtree, k , m) == 0){ - for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; - MORSE_TASK_ztsmqr( - &options, - MorseLeft, MorseNoTrans, - Q->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, - Q(p, n), ldqp, - Q(m, n), ldqm, - A(m, k), ldam, - TS(m, k), TS->mb); - } + if(qrtree->gettype(qrtree, k , m) == 0) { + T = TS; + L = 0; } else { - for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; - MORSE_TASK_zttmqr( - &options, - MorseLeft, MorseNoTrans, - Q->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, - Q(p, n), ldqp, - Q(m, n), ldqm, - A(m, k), ldam, - TT(m, k), TT->mb); - } + T = TT; + L = tempmm; + } + + for (n = k; n < Q->nt; n++) { + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + + RUNTIME_data_migrate( sequence, Q(p, n), + Q->get_rankof( Q, m, n ) ); + RUNTIME_data_migrate( sequence, Q(m, n), + Q->get_rankof( Q, m, n ) ); + + MORSE_TASK_ztpmqrt( + &options, + MorseLeft, MorseNoTrans, + tempmm, tempnn, tempkn, L, ib, T->nb, + A(m, k), ldam, + T(m, k), T->mb, + Q(p, n), ldqp, + Q(m, n), ldqm); } } + T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { m = qrtree->getm(qrtree, k, i); @@ -161,14 +161,20 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, D(m, k), ldam ); #endif #endif + for (n = k; n < Q->nt; n++) { tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + + /* Restore the original location of the tiles */ + RUNTIME_data_migrate( sequence, Q(m, n), + Q->get_rankof( Q, m, n ) ); + MORSE_TASK_zunmqr( &options, MorseLeft, MorseNoTrans, - tempmm, tempnn, tempkmin, ib, TS->nb, + tempmm, tempnn, tempkmin, ib, T->nb, D(m, k), ldam, - TS(m, k), TS->mb, + T(m, k), T->mb, Q(m, n), ldqm); } } @@ -178,5 +184,4 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, free(tiles); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - (void)D; } diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index 1d0516298..89853c6cf 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -105,15 +105,20 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ldaMRD = BLKLDD(A, M+RD); for (n = k; n < Q->nt; n++) { tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; - MORSE_TASK_zttmqr( + + RUNTIME_data_migrate( sequence, Q(M, n), + Q->get_rankof( Q, M+RD, n ) ); + RUNTIME_data_migrate( sequence, Q(M+RD, n), + Q->get_rankof( Q, M+RD, n ) ); + + MORSE_TASK_ztpmqrt( &options, MorseLeft, MorseNoTrans, - Q->mb, tempnn, tempMRDm, tempnn, - tempkn, ib, T->nb, - Q (M, n), ldqM, - Q (M+RD, n), ldqMRD, + tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb, A (M+RD, k), ldaMRD, - T2(M+RD, k), T->mb); + T2(M+RD, k), T->mb, + Q (M, n), ldqM, + Q (M+RD, n), ldqMRD); } } } @@ -129,17 +134,23 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, for (n = k; n < Q->nt; n++) { tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; - MORSE_TASK_ztsmqr( + + RUNTIME_data_migrate( sequence, Q(M, n), + Q->get_rankof( Q, m, n ) ); + RUNTIME_data_migrate( sequence, Q(m, n), + Q->get_rankof( Q, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, MorseLeft, MorseNoTrans, - Q->mb, tempnn, tempmm, tempnn, - tempkn, ib, T->nb, - Q(M, n), ldqM, - Q(m, n), ldqm, + tempmm, tempnn, tempkn, 0, ib, T->nb, A(m, k), ldam, - T(m, k), T->mb); + T(m, k), T->mb, + Q(M, n), ldqM, + Q(m, n), ldqm); } } + #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, @@ -156,6 +167,11 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, #endif for (n = k; n < Q->nt; n++) { tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + + /* Restore the original location of the tiles */ + RUNTIME_data_migrate( sequence, Q(M, n), + Q->get_rankof( Q, M, n ) ); + MORSE_TASK_zunmqr( &options, MorseLeft, MorseNoTrans, -- GitLab