Commit 7e28d2d5 authored by Mathieu Faverge's avatar Mathieu Faverge

Add migration and tp kernels to unmqr functions

parent b586e208
...@@ -3,8 +3,7 @@ ...@@ -3,8 +3,7 @@
* @copyright (c) 2009-2014 The University of Tennessee and The University * @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. * of Tennessee Research Foundation.
* All rights reserved. * All rights reserved.
* @copyright (c) 2012-2016 Inria. All rights reserved. * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
* *
**/ **/
...@@ -35,12 +34,12 @@ ...@@ -35,12 +34,12 @@
#define B(m,n) B, m, n #define B(m,n) B, m, n
#define T(m,n) T, m, n #define T(m,n) T, m, n
#if defined(CHAMELEON_COPY_DIAG) #if defined(CHAMELEON_COPY_DIAG)
#define D(k) D, k, 0 #define D(k) D, k, 0
#else #else
#define D(k) A, k, k #define D(k) D, k, k
#endif #endif
/******************************************************************************* /**
* Parallel application of Q using tile V - QR factorization - dynamic scheduling * Parallel application of Q using tile V - QR factorization - dynamic scheduling
**/ **/
void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
...@@ -72,6 +71,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -72,6 +71,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
minMT = A->mt; minMT = A->mt;
} }
if (D == NULL) {
D = A;
}
/* /*
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = A->nb * ib * ztsmqr = A->nb * ib
...@@ -134,17 +137,27 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -134,17 +137,27 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
ldbm = BLKLDD(B, m); ldbm = BLKLDD(B, m);
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
MORSE_TASK_ztsmqr(
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmqrt(
&options, &options,
side, trans, side, trans,
B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb, tempmm, tempnn, tempkmin, 0, ib, T->nb,
B(k, n), ldbk,
B(m, n), ldbm,
A(m, k), ldam, A(m, k), ldam,
T(m, k), T->mb); T(m, k), T->mb,
B(k, n), ldbk,
B(m, n), ldbm);
} }
} }
/* Restore the original location of the tiles */
for (n = 0; n < B->nt; n++) {
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, k, n ) );
}
RUNTIME_iteration_pop(morse); RUNTIME_iteration_pop(morse);
} }
} }
...@@ -165,14 +178,18 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -165,14 +178,18 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
ldbm = BLKLDD(B, m); ldbm = BLKLDD(B, m);
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
MORSE_TASK_ztsmqr(
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmqrt(
&options, &options,
side, trans, side, trans,
B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb, tempmm, tempnn, tempkmin, 0, ib, T->nb,
B(k, n), ldbk,
B(m, n), ldbm,
A(m, k), ldam, A(m, k), ldam,
T(m, k), T->mb); T(m, k), T->mb,
B(k, n), ldbk,
B(m, n), ldbm);
} }
} }
#if defined(CHAMELEON_COPY_DIAG) #if defined(CHAMELEON_COPY_DIAG)
...@@ -189,8 +206,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -189,8 +206,13 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
D(k), ldak ); D(k), ldak );
#endif #endif
#endif #endif
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, k, n ) );
MORSE_TASK_zunmqr( MORSE_TASK_zunmqr(
&options, &options,
side, trans, side, trans,
...@@ -199,7 +221,6 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -199,7 +221,6 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
T(k, k), T->mb, T(k, k), T->mb,
B(k, n), ldbk); B(k, n), ldbk);
} }
RUNTIME_iteration_pop(morse); RUNTIME_iteration_pop(morse);
} }
} }
...@@ -222,14 +243,18 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -222,14 +243,18 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m); ldbm = BLKLDD(B, m);
MORSE_TASK_ztsmqr(
RUNTIME_data_migrate( sequence, B(m, k),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmqrt(
&options, &options,
side, trans, side, trans,
tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb, tempmm, tempnn, tempkmin, 0, ib, T->nb,
B(m, k), ldbm,
B(m, n), ldbm,
A(n, k), ldan, A(n, k), ldan,
T(n, k), T->mb); T(n, k), T->mb,
B(m, k), ldbm,
B(m, n), ldbm);
} }
} }
#if defined(CHAMELEON_COPY_DIAG) #if defined(CHAMELEON_COPY_DIAG)
...@@ -249,6 +274,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -249,6 +274,10 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m); ldbm = BLKLDD(B, m);
RUNTIME_data_migrate( sequence, B(m, k),
B->get_rankof( B, m, k ) );
MORSE_TASK_zunmqr( MORSE_TASK_zunmqr(
&options, &options,
side, trans, side, trans,
...@@ -302,17 +331,27 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -302,17 +331,27 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m); ldbm = BLKLDD(B, m);
MORSE_TASK_ztsmqr(
RUNTIME_data_migrate( sequence, B(m, k),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmqrt(
&options, &options,
side, trans, side, trans,
tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb, tempmm, tempnn, tempkmin, 0, ib, T->nb,
B(m, k), ldbm,
B(m, n), ldbm,
A(n, k), ldan, A(n, k), ldan,
T(n, k), T->mb); T(n, k), T->mb,
B(m, k), ldbm,
B(m, n), ldbm);
} }
} }
/* Restore the original location of the tiles */
for (m = 0; m < B->mt; m++) {
RUNTIME_data_migrate( sequence, B(m, k),
B->get_rankof( B, m, k ) );
}
RUNTIME_iteration_pop(morse); RUNTIME_iteration_pop(morse);
} }
} }
...@@ -320,5 +359,4 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans, ...@@ -320,5 +359,4 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
RUNTIME_options_ws_free(&options); RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
(void)D;
} }
...@@ -27,13 +27,8 @@ ...@@ -27,13 +27,8 @@
#define A(m,n) A, m, n #define A(m,n) A, m, n
#define B(m,n) B, m, n #define B(m,n) B, m, n
#define TS(m,n) TS, m, n #define T(m,n) T, m, n
#define TT(m,n) TT, m, n #define D(m,n) D, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define D(m,n) D, m, n
#else
#define D(m,n) A, m, n
#endif
/** /**
* Parallel application of Q using tile V - QR factorization - dynamic scheduling * Parallel application of Q using tile V - QR factorization - dynamic scheduling
...@@ -45,13 +40,14 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -45,13 +40,14 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
{ {
MORSE_context_t *morse; MORSE_context_t *morse;
MORSE_option_t options; MORSE_option_t options;
MORSE_desc_t *T;
size_t ws_worker = 0; size_t ws_worker = 0;
size_t ws_host = 0; size_t ws_host = 0;
int k, m, n, i, p; int k, m, n, i, p;
int ldam, ldan, ldbm, ldbp; int ldam, ldan, ldbm, ldbp;
int tempnn, tempkmin, tempmm, tempkn; int tempnn, tempkmin, tempmm, tempkn;
int ib, K; int ib, K, L;
int *tiles; int *tiles;
morse = morse_context_self(); morse = morse_context_self();
...@@ -63,6 +59,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -63,6 +59,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
K = chameleon_min(A->mt, A->nt); K = chameleon_min(A->mt, A->nt);
if (D == NULL) {
D = A;
}
/* /*
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = A->nb * ib * ztsmqr = A->nb * ib
...@@ -80,7 +80,6 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -80,7 +80,6 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
#endif #endif
/* Initialisation of tiles */ /* Initialisation of tiles */
tiles = (int*)calloc( qrtree->mt, sizeof(int) ); tiles = (int*)calloc( qrtree->mt, sizeof(int) );
ws_worker *= sizeof(MORSE_Complex64_t); ws_worker *= sizeof(MORSE_Complex64_t);
...@@ -98,6 +97,7 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -98,6 +97,7 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
m = qrtree->getm(qrtree, k, i); m = qrtree->getm(qrtree, k, i);
...@@ -125,10 +125,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -125,10 +125,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
MORSE_TASK_zunmqr( MORSE_TASK_zunmqr(
&options, &options,
side, trans, side, trans,
tempmm, tempnn, tempkmin, ib, TS->nb, tempmm, tempnn, tempkmin, ib, T->nb,
D( m, k), ldam, D(m, k), ldam,
TS(m, k), TS->mb, T(m, k), T->mb,
B( m, n), ldbm); B(m, n), ldbm);
} }
} }
/* Setting the order of the tiles*/ /* Setting the order of the tiles*/
...@@ -143,38 +143,45 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -143,38 +143,45 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
ldbm = BLKLDD(B, m); ldbm = BLKLDD(B, m);
ldbp = BLKLDD(B, p); ldbp = BLKLDD(B, p);
if(qrtree->gettype(qrtree, k, m) == 0){ if(qrtree->gettype(qrtree, k, m) == 0){
for (n = 0; n < B->nt; n++) { L = 0;
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; T = TS;
MORSE_TASK_ztsmqr(
&options,
side, trans,
B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb,
B( p, n), ldbp,
B( m, n), ldbm,
A( m, k), ldam,
TS(m, k), TS->mb);
}
} }
else { else {
for (n = 0; n < B->nt; n++) { L = tempmm;
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; T = TT;
MORSE_TASK_zttmqr( }
&options, for (n = 0; n < B->nt; n++) {
side, trans, tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb,
B( p, n), ldbp, RUNTIME_data_migrate( sequence, B(p, n),
B( m, n), ldbm, B->get_rankof( B, m, n ) );
A( m, k), ldam, RUNTIME_data_migrate( sequence, B(m, n),
TT(m, k), TT->mb); B->get_rankof( B, m, n ) );
}
MORSE_TASK_ztpmqrt(
&options,
side, trans,
tempmm, tempnn, tempkn, L, ib, T->nb,
A(m, k), ldam,
T(m, k), T->mb,
B(p, n), ldbp,
B(m, n), ldbm);
} }
} }
/* Restore the original location of the tiles */
for (n = 0; n < B->nt; n++) {
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, k, n ) );
}
RUNTIME_iteration_pop(morse); RUNTIME_iteration_pop(morse);
} }
} else { }
/* /*
* MorseLeft / MorseNoTrans * MorseLeft / MorseNoTrans
*/ */
else {
for (k = K-1; k >= 0; k--) { for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k); RUNTIME_iteration_push(morse, k);
...@@ -193,34 +200,34 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -193,34 +200,34 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
ldbp = BLKLDD(B, p); ldbp = BLKLDD(B, p);
/* TT or TS */ /* TT or TS */
if(qrtree->gettype(qrtree, k, m) == 0){ if(qrtree->gettype(qrtree, k, m) == 0){
for (n = k; n < B->nt; n++) { L = 0;
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; T = TS;
MORSE_TASK_ztsmqr(
&options,
side, trans,
B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb,
B( p, n), ldbp,
B( m, n), ldbm,
A( m, k), ldam,
TS(m, k), TS->mb);
}
} }
else { else {
for (n = k; n < B->nt; n++) { L = tempmm;
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; T = TT;
MORSE_TASK_zttmqr( }
&options, for (n = k; n < B->nt; n++) {
side, trans, tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb,
B( p, n), ldbp, RUNTIME_data_migrate( sequence, B(p, n),
B( m, n), ldbm, B->get_rankof( B, m, n ) );
A( m, k), ldam, RUNTIME_data_migrate( sequence, B(m, n),
TT(m, k), TT->mb); B->get_rankof( B, m, n ) );
}
MORSE_TASK_ztpmqrt(
&options,
side, trans,
tempmm, tempnn, tempkn, L, ib, T->nb,
A(m, k), ldam,
T(m, k), T->mb,
B(p, n), ldbp,
B(m, n), ldbm);
} }
} }
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
m = qrtree->getm(qrtree, k, i); m = qrtree->getm(qrtree, k, i);
...@@ -245,23 +252,28 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -245,23 +252,28 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
#endif #endif
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
RUNTIME_data_migrate( sequence, B(m, n),
B->get_rankof( B, m, n ) );
MORSE_TASK_zunmqr( MORSE_TASK_zunmqr(
&options, &options,
side, trans, side, trans,
tempmm, tempnn, tempkmin, ib, TS->nb, tempmm, tempnn, tempkmin, ib, T->nb,
D( m, k), ldam, D(m, k), ldam,
TS(m, k), TS->mb, T(m, k), T->mb,
B( m, n), ldbm); B(m, n), ldbm);
} }
} }
RUNTIME_iteration_pop(morse); RUNTIME_iteration_pop(morse);
} }
} }
} else { }
/*
* MorseRight / MorseConjTrans
*/
else {
if (trans == MorseConjTrans) { if (trans == MorseConjTrans) {
/*
* MorseRight / MorseConjTrans
*/
for (k = K-1; k >= 0; k--) { for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k); RUNTIME_iteration_push(morse, k);
...@@ -280,34 +292,34 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -280,34 +292,34 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
/* TS or TT */ /* TS or TT */
if(qrtree->gettype(qrtree, k, n) == 0){ if(qrtree->gettype(qrtree, k, n) == 0){
for (m = 0; m < B->mt; m++) { L = 0;
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; T = TS;
ldbm = BLKLDD(B, m);
MORSE_TASK_ztsmqr(
&options,
side, trans,
tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb,
B( m, p), ldbm,
B( m, n), ldbm,
A( n, k), ldan,
TS(n, k), TS->mb);
}
} }
else{ else {
for (m = 0; m < B->mt; m++) { L = tempmm;
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; T = TT;
ldbm = BLKLDD(B, m); }
MORSE_TASK_zttmqr( for (m = 0; m < B->mt; m++) {
&options, tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
side, trans, ldbm = BLKLDD(B, m);
tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb,
B( m, p), ldbm, RUNTIME_data_migrate( sequence, B(m, p),
B( m, n), ldbm, B->get_rankof( B, m, n ) );
A( n, k), ldan, RUNTIME_data_migrate( sequence, B(m, n),
TT(n, k), TT->mb); B->get_rankof( B, m, n ) );
}
MORSE_TASK_ztpmqrt(
&options,
side, trans,
tempmm, tempnn, tempkn, L, ib, T->nb,
A(n, k), ldan,
T(n, k), T->mb,
B(m, p), ldbm,
B(m, n), ldbm);
} }
} }
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
n = qrtree->getm(qrtree, k, i); n = qrtree->getm(qrtree, k, i);
...@@ -332,27 +344,33 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ...@@ -332,27 +344,33 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
for (m = 0;