Commit fd2fcb03 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Add migration and swith to TP kernels in unmlq algorithms

parent ab460c8d
......@@ -3,8 +3,7 @@
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
* @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
......@@ -35,12 +34,12 @@
#define B(m,n) B, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define D(k) D, k, 0
#define D(k) D, k, 0
#else
#define D(k) A, k, k
#define D(k) D, k, k
#endif
/*******************************************************************************
/**
* Parallel application of Q using tile V - LQ factorization - dynamic scheduling
**/
void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
......@@ -72,6 +71,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
minMT = A->mt;
}
if (D == NULL) {
D = A;
}
/*
* zunmlq = A->mb * ib
* ztsmlq = A->mb * ib
......@@ -133,24 +136,34 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
ldbm = BLKLDD(B, m);
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
MORSE_TASK_ztsmlq(
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmlqt(
&options,
side, trans,
B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb,
B(k, n), ldbk,
B(m, n), ldbm,
tempmm, tempnn, tempkmin, 0, ib, T->nb,
A(k, m), ldak,
T(k, m), T->mb);
T(k, m), T->mb,
B(k, n), ldbk,
B(m, n), ldbm);
}
}
/* Restore the original location of the tiles */
for (n = 0; n < B->nt; n++) {
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, k, n ) );
}
RUNTIME_iteration_pop(morse);
}
}
/*
* MorseLeft / MorseConjTrans
*/
else {
/*
* MorseLeft / MorseConjTrans
*/
for (k = minMT-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
......@@ -162,15 +175,19 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
MORSE_TASK_ztsmlq(
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmlqt(
&options,
side, trans,
B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb,
B(k, n), ldbk,
B(m, n), ldbm,
tempmm, tempnn, tempkmin, 0, ib, T->nb,
A(k, m), ldak,
T(k, m), T->mb);
T(k, m), T->mb,
B(k, n), ldbk,
B(m, n), ldbm);
}
}
#if defined(CHAMELEON_COPY_DIAG)
......@@ -189,6 +206,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
#endif
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, k, n ) );
MORSE_TASK_zunmlq(
&options,
side, trans,
......@@ -197,35 +218,38 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
T(k, k), T->mb,
B(k, n), ldbk);
}
RUNTIME_iteration_pop(morse);
}
}
}
/*
* MorseRight / MorseNoTrans
*/
else {
if (trans == MorseNoTrans) {
/*
* MorseRight / MorseNoTrans
*/
for (k = minMT-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
tempkn = k == B->nt -1 ? B->n -k*B->nb : B->nb;
tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
tempkn = k == B->nt - 1 ? B->n - k * B->nb : B->nb;
tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb;
ldak = BLKLDD(A, k);
for (n = B->nt-1; n > k; n--) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
MORSE_TASK_ztsmlq(
RUNTIME_data_migrate( sequence, B(m, k),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmlqt(
&options,
side, trans,
tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb,
B(m, k), ldbm,
B(m, n), ldbm,
tempmm, tempnn, tempkmin, 0, ib, T->nb,
A(k, n), ldak,
T(k, n), T->mb);
T(k, n), T->mb,
B(m, k), ldbm,
B(m, n), ldbm);
}
}
#if defined(CHAMELEON_COPY_DIAG)
......@@ -245,6 +269,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
RUNTIME_data_migrate( sequence, B(m, k),
B->get_rankof( B, m, k ) );
MORSE_TASK_zunmlq(
&options,
side, trans,
......@@ -257,14 +285,14 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
RUNTIME_iteration_pop(morse);
}
}
/*
* MorseRight / MorseConjTrans
*/
else {
/*
* MorseRight / MorseConjTrans
*/
for (k = 0; k < minMT; k++) {
RUNTIME_iteration_push(morse, k);
tempkn = k == B->nt -1 ? B->n -k*B->nb : B->nb;
tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
tempkmin = k == minMT-1 ? minM-k*A->mb : A->mb;
ldak = BLKLDD(A, k);
#if defined(CHAMELEON_COPY_DIAG)
......@@ -297,17 +325,27 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
MORSE_TASK_ztsmlq(
RUNTIME_data_migrate( sequence, B(m, k),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmlqt(
&options,
side, trans,
tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb,
B(m, k), ldbm,
B(m, n), ldbm,
tempmm, tempnn, tempkmin, 0, ib, T->nb,
A(k, n), ldak,
T(k, n), T->mb);
T(k, n), T->mb,
B(m, k), ldbm,
B(m, n), ldbm);
}
}
/* Restore the original location of the tiles */
for (m = 0; m < B->mt; m++) {
RUNTIME_data_migrate( sequence, B(m, k),
B->get_rankof( B, m, k ) );
}
RUNTIME_iteration_pop(morse);
}
}
......@@ -315,5 +353,4 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
(void)D;
}
......@@ -27,13 +27,8 @@
#define A(m,n) A, m, n
#define B(m,n) B, m, n
#define TS(m,n) TS, m, n
#define TT(m,n) TT, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define D(m,n) D, m, n
#else
#define D(m,n) A, m, n
#endif
#define T(m,n) T, m, n
#define D(m,n) D, m, n
/**
* Parallel application of Q using tile V - LQ factorization - dynamic scheduling
......@@ -46,13 +41,14 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
{
MORSE_context_t *morse;
MORSE_option_t options;
MORSE_desc_t *T;
size_t ws_worker = 0;
size_t ws_host = 0;
int k, m, n, i, p;
int ldbm, ldak, ldbp;
int tempnn, temppn, tempkmin, tempmm, tempkm;
int ib, K;
int ib, K, L;
int *tiles;
morse = morse_context_self();
......@@ -64,6 +60,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
K = chameleon_min(A->mt, A->nt);
if (D == NULL) {
D = A;
}
/*
* zunmlq = A->nb * ib
* ztsmlq = A->nb * ib
......@@ -99,6 +99,7 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
ldak = BLKLDD(A, k);
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
......@@ -125,10 +126,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
MORSE_TASK_zunmlq(
&options,
side, trans,
temppn, tempnn, tempkmin, ib, TS->nb,
D( k, p), ldak,
TS(k, p), TS->mb,
B( p, n), ldbp);
temppn, tempnn, tempkmin, ib, T->nb,
D(k, p), ldak,
T(k, p), T->mb,
B(p, n), ldbp);
}
}
......@@ -145,40 +146,45 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
/* TT or TS */
if(qrtree->gettype(qrtree, k, m) == 0){
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
MORSE_TASK_ztsmlq(
&options,
side, trans,
B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb,
B( p, n), ldbp,
B( m, n), ldbm,
A( k, m), ldak,
TS(k, m), TS->mb);
}
L = 0;
T = TS;
}
else {
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
MORSE_TASK_zttmlq(
&options,
side, trans,
B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb,
B( p, n), ldbp,
B( m, n), ldbm,
A( k, m), ldak,
TT(k, m), TS->mb);
}
L = A->nb;
T = TT;
}
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
RUNTIME_data_migrate( sequence, B(p, n),
B->get_rankof( B, m, n ) );
RUNTIME_data_migrate( sequence, B(m, n),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmlqt(
&options,
side, trans,
tempmm, tempnn, tempkm, chameleon_min( L, tempnn ), ib, T->nb,
A(k, m), ldak,
T(k, m), T->mb,
B(p, n), ldbp,
B(m, n), ldbm);
}
}
/* Restore the original location of the tiles */
for (n = 0; n < B->nt; n++) {
RUNTIME_data_migrate( sequence, B(k, n),
B->get_rankof( B, k, n ) );
}
RUNTIME_iteration_pop(morse);
}
} else {
/*
* MorseLeft / MorseConjTrans
*/
}
/*
* MorseLeft / MorseConjTrans
*/
else {
for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
......@@ -198,32 +204,33 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
/* TT or TS */
if(qrtree->gettype(qrtree, k, m) == 0){
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
MORSE_TASK_ztsmlq(
&options,
side, trans,
B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb,
B( p, n), ldbp,
B( m, n), ldbm,
A( k, m), ldak,
TS(k, m), TS->mb);
}
L = 0;
T = TS;
}
else {
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
MORSE_TASK_zttmlq(
&options,
side, trans,
B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb,
B( p, n), ldbp,
B( m, n), ldbm,
A( k, m), ldak,
TT(k, m), TT->mb);
}
L = A->nb;
T = TT;
}
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
RUNTIME_data_migrate( sequence, B(p, n),
B->get_rankof( B, m, n ) );
RUNTIME_data_migrate( sequence, B(m, n),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmlqt(
&options,
side, trans,
tempmm, tempnn, tempkm, chameleon_min(L, tempnn), ib, T->nb,
A(k, m), ldak,
T(k, m), T->mb,
B(p, n), ldbp,
B(m, n), ldbm);
}
}
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
......@@ -247,23 +254,28 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
#endif
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
RUNTIME_data_migrate( sequence, B(p, n),
B->get_rankof( B, p, n ) );
MORSE_TASK_zunmlq(
&options,
side, trans,
temppn, tempnn, tempkmin, ib, TS->nb,
D( k, p), ldak,
TS(k, p), TS->mb,
B( p, n), ldbp);
temppn, tempnn, tempkmin, ib, T->nb,
D(k, p), ldak,
T(k, p), T->mb,
B(p, n), ldbp);
}
}
RUNTIME_iteration_pop(morse);
}
}
} else {
}
/*
* MorseRight / MorseNoTrans
*/
else {
if (trans == MorseNoTrans) {
/*
* MorseRight / MorseNoTrans
*/
for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
......@@ -280,37 +292,36 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
ldbp = BLKLDD(B, p);
/* TT or TS */
/* TS or TT */
if(qrtree->gettype(qrtree, k, n) == 0){
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
MORSE_TASK_ztsmlq(
&options,
side, trans,
tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb,
B( m, p), ldbm,
B( m, n), ldbm,
A( k, n), ldak,
TS(k, n), TS->mb);
}
L = 0;
T = TS;
}
else {
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
MORSE_TASK_zttmlq(
&options,
side, trans,
tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb,
B( m, p), ldbm,
B( m, n), ldbm,
A( k, n), ldak,
TT(k, n), TT->mb);
}
L = tempnn;
T = TT;
}
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
RUNTIME_data_migrate( sequence, B(m, p),
B->get_rankof( B, m, n ) );
RUNTIME_data_migrate( sequence, B(m, n),
B->get_rankof( B, m, n ) );
MORSE_TASK_ztpmlqt(
&options,
side, trans,
tempmm, tempnn, tempkm, L, ib, T->nb,
A(k, n), ldak,
T(k, n), T->mb,
B(m, p), ldbm,
B(m, n), ldbm);
}
}
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
......@@ -334,26 +345,33 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
for (m = 0; m < B->mt; m++) {
ldbm = BLKLDD(B, m);
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
RUNTIME_data_migrate( sequence, B(m, p),
B->get_rankof( B, m, p ) );
MORSE_TASK_zunmlq(
&options,
side, trans,
tempmm, temppn, tempkmin, ib, TS->nb,
D( k, p), ldak,
TS(k, p), TS->mb,
B( m, p), ldbm);
tempmm, temppn, tempkmin, ib, T->nb,
D(k, p), ldak,
T(k, p), T->mb,
B(m, p), ldbm);
}
}
RUNTIME_iteration_pop(morse);
}
} else {
/*
* MorseRight / MorseConjTrans
*/
}
/*
* MorseRight / MorseConjTrans
*/
else {
for (k = 0; k < K; k++) {
RUNTIME_iteration_push(morse, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
ldak = BLKLDD(A, k);
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
......@@ -381,10 +399,10 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
MORSE_TASK_zunmlq(
&options,
side, trans,
tempmm, temppn, tempkmin, ib, TS->nb,
D( k, p), ldak,
TS(k, p), TS->mb,
B( m, p), ldbm);
tempmm, temppn, tempkmin, ib, T->nb,
D(k, p), ldak,
T(k, p), TS->mb,
B(m, p), ldbm);
}
}
/* Setting the order of tiles */
......@@ -398,32 +416,31 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree,
ldbp = BLKLDD(B, p);
if(qrtree->gettype(qrtree, k, n) == 0){
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
MORSE_TASK_ztsmlq(
&options,
side, trans,
tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb,
B( p, n), ldbp,
B( m, n), ldbm,
A( k, n), ldak,
TS(k, n), TS->mb);
}
L = 0;
T = TS;
}
else {
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
ldbm = BLKLDD(B, m);
MORSE_TASK_zttmlq(
&options,
side, trans,
tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb,
B( p, n), ldbp,
B( m, n), ldbm,