From f14b1e776dd951d719eec1a8b0c66f492341de62 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 31 Jan 2018 12:42:09 +0100 Subject: [PATCH] Add migration and swith to TP kernels in QRF algorithms --- compute/pzgeqrf.c | 38 +++++++++---- compute/pzgeqrf_param.c | 117 ++++++++++++++++++++-------------------- compute/pzgeqrfrh.c | 57 +++++++++++++++----- 3 files changed, 132 insertions(+), 80 deletions(-) diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index 849d7a005..f5b3b744c 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -33,9 +33,9 @@ #define A(m,n) A, m, n #define T(m,n) T, m, n #if defined(CHAMELEON_COPY_DIAG) -#define D(k) D, k, 0 +#define D(k) D, k, 0 #else -#define D(k) A, k, k +#define D(k) D, k, k #endif /******************************************************************************* @@ -62,6 +62,10 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, ib = MORSE_IB; + if ( D == NULL ) { + D = A; + } + /* * zgeqrt = A->nb * (ib+1) * zunmqr = A->nb * ib @@ -122,28 +126,44 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, T(k, k), T->mb, A(k, n), ldak); } + for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); - MORSE_TASK_ztsqrt( + + RUNTIME_data_migrate( sequence, A(k, k), + A->get_rankof( A, m, k ) ); + + MORSE_TASK_ztpqrt( &options, - tempmm, tempkn, ib, T->nb, + tempmm, tempkn, 0, ib, T->nb, A(k, k), ldak, A(m, k), ldam, T(m, k), T->mb); + for (n = k+1; n < A->nt; n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - MORSE_TASK_ztsmqr( + + RUNTIME_data_migrate( sequence, A(k, n), + A->get_rankof( A, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, MorseLeft, MorseConjTrans, - A->mb, tempnn, tempmm, tempnn, A->nb, ib, T->nb, - A(k, n), ldak, - A(m, n), ldam, + tempmm, tempnn, A->nb, 0, ib, T->nb, A(m, k), ldam, - T(m, k), T->mb); + T(m, k), T->mb, + A(k, n), ldak, + A(m, n), ldam); } } + /* Restore the original location of the tiles */ + for (n = k; n < A->nt; n++) { + RUNTIME_data_migrate( sequence, A(k, n), + A->get_rankof( A, k, n ) ); + } + RUNTIME_iteration_pop(morse); } diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c index 3845f4627..99027c88f 100644 --- a/compute/pzgeqrf_param.c +++ b/compute/pzgeqrf_param.c @@ -26,14 +26,9 @@ #include <stdlib.h> #include "libhqr.h" -#define A(m,n) A, (m), (n) -#define TS(m,n) TS, (m), (n) -#define TT(m,n) TT, (m), (n) -#if defined(CHAMELEON_COPY_DIAG) -#define D(m,n) D, (m), (n) -#else -#define D(m,n) A, (m), (n) -#endif +#define A(m,n) A, (m), (n) +#define T(m,n) T, (m), (n) +#define D(m,n) D, (m), (n) /** * Parallel tile QR factorization (reduction Householder) - dynamic scheduling @@ -44,11 +39,12 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, { MORSE_context_t *morse; MORSE_option_t options; + MORSE_desc_t *T; size_t ws_worker = 0; size_t ws_host = 0; int k, m, n, i, p; - int K; + int K, L; int ldap, ldam; int tempkmin, tempkn, tempnn, tempmm; int ib; @@ -61,6 +57,10 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, ib = MORSE_IB; + if ( D == NULL ) { + D = A; + } + /* * zgeqrt = A->nb * (ib+1) * zunmqr = A->nb * ib @@ -81,8 +81,7 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif - /* Initialisation of tiles */ - + /* Initialisation of temporary tiles array */ tiles = (int*)calloc(qrtree->mt, sizeof(int)); ws_worker *= sizeof(MORSE_Complex64_t); @@ -104,11 +103,13 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, tempkmin = chameleon_min(tempmm, tempkn); ldam = BLKLDD(A, m); + T = TS; + MORSE_TASK_zgeqrt( &options, - tempmm, tempkn, ib, TS->nb, - A( m, k), ldam, - TS(m, k), TS->mb); + tempmm, tempkn, ib, T->nb, + A(m, k), ldam, + T(m, k), T->mb); if ( k < (A->nt-1) ) { #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -130,10 +131,10 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_TASK_zunmqr( &options, MorseLeft, MorseConjTrans, - tempmm, tempnn, tempkmin, ib, TS->nb, - D( m, k), ldam, - TS(m, k), TS->mb, - A( m, n), ldam); + tempmm, tempnn, tempkmin, ib, T->nb, + D(m, k), ldam, + T(m, k), T->mb, + A(m, n), ldam); } } @@ -149,54 +150,56 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, ldam = BLKLDD(A, m); /* Tiles killed is a TS */ - if(qrtree->gettype(qrtree, k, m) == 0){ - MORSE_TASK_ztsqrt( - &options, - tempmm, tempkn, ib, TS->nb, - A( p, k), ldap, - A( m, k), ldam, - TS(m, k), TS->mb); - - for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - MORSE_TASK_ztsmqr( - &options, - MorseLeft, MorseConjTrans, - A->nb, tempnn, tempmm, tempnn, A->nb, ib, TS->nb, - A( p, n), ldap, - A( m, n), ldam, - A( m, k), ldam, - TS(m, k), TS->mb); - } + if (qrtree->gettype(qrtree, k, m) == 0) { + T = TS; + L = 0; } - - /* Tiles killed is a TT */ else { - MORSE_TASK_zttqrt( + T = TT; + L = tempmm; + } + + RUNTIME_data_migrate( sequence, A(p, k), + A->get_rankof( A, m, k ) ); + RUNTIME_data_migrate( sequence, A(m, k), + A->get_rankof( A, m, k ) ); + + MORSE_TASK_ztpqrt( + &options, + tempmm, tempkn, L, ib, T->nb, + A(p, k), ldap, + A(m, k), ldam, + T(m, k), T->mb); + + for (n = k+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + RUNTIME_data_migrate( sequence, A(p, n), + A->get_rankof( A, m, n ) ); + RUNTIME_data_migrate( sequence, A(m, n), + A->get_rankof( A, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, - tempmm, tempkn, ib, TT->nb, - A( p, k), ldap, - A( m, k), ldam, - TT(m, k), TT->mb); - - for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - MORSE_TASK_zttmqr( - &options, - MorseLeft, MorseConjTrans, - A->mb, tempnn, tempmm, tempnn, A->nb, ib, TT->nb, - A( p, n), ldap, - A( m, n), ldam, - A( m, k), ldam, - TT(m, k), TT->mb); - } + MorseLeft, MorseConjTrans, + tempmm, tempnn, A->nb, L, ib, T->nb, + A(m, k), ldam, + T(m, k), T->mb, + A(p, n), ldap, + A(m, n), ldam); } } + + /* Restore the original location of the tiles */ + for (n = k; n < A->nt; n++) { + RUNTIME_data_migrate( sequence, A(k, n), + A->get_rankof( A, k, n ) ); + } + RUNTIME_iteration_pop(morse); } free(tiles); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); - (void)D; } diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index 30d6a1273..1024a6a23 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -98,6 +98,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; tempkmin = chameleon_min(tempMm, tempkn); ldaM = BLKLDD(A, M); + MORSE_TASK_zgeqrt( &options, tempMm, tempkn, ib, T->nb, @@ -129,26 +130,35 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, T(M, k), T->mb, A(M, n), ldaM); } + for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); - MORSE_TASK_ztsqrt( + + RUNTIME_data_migrate( sequence, A(M, k), + A->get_rankof( A, m, k ) ); + + MORSE_TASK_ztpqrt( &options, - tempmm, tempkn, ib, T->nb, + tempmm, tempkn, 0, ib, T->nb, A(M, k), ldaM, A(m, k), ldam, T(m, k), T->mb); for (n = k+1; n < A->nt; n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - MORSE_TASK_ztsmqr( + + RUNTIME_data_migrate( sequence, A(M, n), + A->get_rankof( A, m, n ) ); + + MORSE_TASK_ztpmqrt( &options, MorseLeft, MorseConjTrans, - A->nb, tempnn, tempmm, tempnn, A->nb, ib, T->nb, - A(M, n), ldaM, - A(m, n), ldam, + tempmm, tempnn, A->nb, 0, ib, T->nb, A(m, k), ldam, - T(m, k), T->mb); + T(m, k), T->mb, + A(M, n), ldaM, + A(m, n), ldam); } } } @@ -157,26 +167,45 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb; ldaM = BLKLDD(A, M ); ldaMRD = BLKLDD(A, M+RD); - MORSE_TASK_zttqrt( + + RUNTIME_data_migrate( sequence, A(M, k), + A->get_rankof( A, M+RD, k ) ); + RUNTIME_data_migrate( sequence, A(M+RD, k), + A->get_rankof( A, M+RD, k ) ); + + MORSE_TASK_ztpqrt( &options, - tempMRDm, tempkn, ib, T->nb, + tempMRDm, tempkn, tempMRDm, ib, T->nb, A (M , k), ldaM, A (M+RD, k), ldaMRD, T2(M+RD, k), T->mb); for (n = k+1; n < A->nt; n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - MORSE_TASK_zttmqr( + + RUNTIME_data_migrate( sequence, A(M, n), + A->get_rankof( A, M+RD, n ) ); + RUNTIME_data_migrate( sequence, A(M+RD, n), + A->get_rankof( A, M+RD, n ) ); + + MORSE_TASK_ztpmqrt( &options, MorseLeft, MorseConjTrans, - A->nb, tempnn, tempMRDm, tempnn, A->nb, ib, T->nb, - A (M, n), ldaM, - A (M+RD, n), ldaMRD, + tempMRDm, tempnn, A->nb, tempMRDm, ib, T->nb, A (M+RD, k), ldaMRD, - T2(M+RD, k), T->mb); + T2(M+RD, k), T->mb, + A (M, n), ldaM, + A (M+RD, n), ldaMRD); } } } + + /* Restore the original location of the tiles */ + for (n = k; n < A->nt; n++) { + RUNTIME_data_migrate( sequence, A(k, n), + A->get_rankof( A, k, n ) ); + } + RUNTIME_iteration_pop(morse); } -- GitLab