Attention une mise à jour du serveur va être effectuée le vendredi 16 avril entre 12h et 12h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit f14b1e77 authored by Mathieu Faverge's avatar Mathieu Faverge

Add migration and swith to TP kernels in QRF algorithms

parent d2f214d3
......@@ -33,9 +33,9 @@
#define A(m,n) A, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define D(k) D, k, 0
#define D(k) D, k, 0
#else
#define D(k) A, k, k
#define D(k) D, k, k
#endif
/*******************************************************************************
......@@ -62,6 +62,10 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D,
ib = MORSE_IB;
if ( D == NULL ) {
D = A;
}
/*
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
......@@ -122,28 +126,44 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D,
T(k, k), T->mb,
A(k, n), ldak);
}
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
MORSE_TASK_ztsqrt(
RUNTIME_data_migrate( sequence, A(k, k),
A->get_rankof( A, m, k ) );
MORSE_TASK_ztpqrt(
&options,
tempmm, tempkn, ib, T->nb,
tempmm, tempkn, 0, ib, T->nb,
A(k, k), ldak,
A(m, k), ldam,
T(m, k), T->mb);
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_ztsmqr(
RUNTIME_data_migrate( sequence, A(k, n),
A->get_rankof( A, m, n ) );
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseConjTrans,
A->mb, tempnn, tempmm, tempnn, A->nb, ib, T->nb,
A(k, n), ldak,
A(m, n), ldam,
tempmm, tempnn, A->nb, 0, ib, T->nb,
A(m, k), ldam,
T(m, k), T->mb);
T(m, k), T->mb,
A(k, n), ldak,
A(m, n), ldam);
}
}
/* Restore the original location of the tiles */
for (n = k; n < A->nt; n++) {
RUNTIME_data_migrate( sequence, A(k, n),
A->get_rankof( A, k, n ) );
}
RUNTIME_iteration_pop(morse);
}
......
......@@ -26,14 +26,9 @@
#include <stdlib.h>
#include "libhqr.h"
#define A(m,n) A, (m), (n)
#define TS(m,n) TS, (m), (n)
#define TT(m,n) TT, (m), (n)
#if defined(CHAMELEON_COPY_DIAG)
#define D(m,n) D, (m), (n)
#else
#define D(m,n) A, (m), (n)
#endif
#define A(m,n) A, (m), (n)
#define T(m,n) T, (m), (n)
#define D(m,n) D, (m), (n)
/**
* Parallel tile QR factorization (reduction Householder) - dynamic scheduling
......@@ -44,11 +39,12 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A,
{
MORSE_context_t *morse;
MORSE_option_t options;
MORSE_desc_t *T;
size_t ws_worker = 0;
size_t ws_host = 0;
int k, m, n, i, p;
int K;
int K, L;
int ldap, ldam;
int tempkmin, tempkn, tempnn, tempmm;
int ib;
......@@ -61,6 +57,10 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A,
ib = MORSE_IB;
if ( D == NULL ) {
D = A;
}
/*
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
......@@ -81,8 +81,7 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A,
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
/* Initialisation of tiles */
/* Initialisation of temporary tiles array */
tiles = (int*)calloc(qrtree->mt, sizeof(int));
ws_worker *= sizeof(MORSE_Complex64_t);
......@@ -104,11 +103,13 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A,
tempkmin = chameleon_min(tempmm, tempkn);
ldam = BLKLDD(A, m);
T = TS;
MORSE_TASK_zgeqrt(
&options,
tempmm, tempkn, ib, TS->nb,
A( m, k), ldam,
TS(m, k), TS->mb);
tempmm, tempkn, ib, T->nb,
A(m, k), ldam,
T(m, k), T->mb);
if ( k < (A->nt-1) ) {
#if defined(CHAMELEON_COPY_DIAG)
MORSE_TASK_zlacpy(
......@@ -130,10 +131,10 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A,
MORSE_TASK_zunmqr(
&options,
MorseLeft, MorseConjTrans,
tempmm, tempnn, tempkmin, ib, TS->nb,
D( m, k), ldam,
TS(m, k), TS->mb,
A( m, n), ldam);
tempmm, tempnn, tempkmin, ib, T->nb,
D(m, k), ldam,
T(m, k), T->mb,
A(m, n), ldam);
}
}
......@@ -149,54 +150,56 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A,
ldam = BLKLDD(A, m);
/* Tiles killed is a TS */
if(qrtree->gettype(qrtree, k, m) == 0){
MORSE_TASK_ztsqrt(
&options,
tempmm, tempkn, ib, TS->nb,
A( p, k), ldap,
A( m, k), ldam,
TS(m, k), TS->mb);
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_ztsmqr(
&options,
MorseLeft, MorseConjTrans,
A->nb, tempnn, tempmm, tempnn, A->nb, ib, TS->nb,
A( p, n), ldap,
A( m, n), ldam,
A( m, k), ldam,
TS(m, k), TS->mb);
}
if (qrtree->gettype(qrtree, k, m) == 0) {
T = TS;
L = 0;
}
/* Tiles killed is a TT */
else {
MORSE_TASK_zttqrt(
T = TT;
L = tempmm;
}
RUNTIME_data_migrate( sequence, A(p, k),
A->get_rankof( A, m, k ) );
RUNTIME_data_migrate( sequence, A(m, k),
A->get_rankof( A, m, k ) );
MORSE_TASK_ztpqrt(
&options,
tempmm, tempkn, L, ib, T->nb,
A(p, k), ldap,
A(m, k), ldam,
T(m, k), T->mb);
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
RUNTIME_data_migrate( sequence, A(p, n),
A->get_rankof( A, m, n ) );
RUNTIME_data_migrate( sequence, A(m, n),
A->get_rankof( A, m, n ) );
MORSE_TASK_ztpmqrt(
&options,
tempmm, tempkn, ib, TT->nb,
A( p, k), ldap,
A( m, k), ldam,
TT(m, k), TT->mb);
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_zttmqr(
&options,
MorseLeft, MorseConjTrans,
A->mb, tempnn, tempmm, tempnn, A->nb, ib, TT->nb,
A( p, n), ldap,
A( m, n), ldam,
A( m, k), ldam,
TT(m, k), TT->mb);
}
MorseLeft, MorseConjTrans,
tempmm, tempnn, A->nb, L, ib, T->nb,
A(m, k), ldam,
T(m, k), T->mb,
A(p, n), ldap,
A(m, n), ldam);
}
}
/* Restore the original location of the tiles */
for (n = k; n < A->nt; n++) {
RUNTIME_data_migrate( sequence, A(k, n),
A->get_rankof( A, k, n ) );
}
RUNTIME_iteration_pop(morse);
}
free(tiles);
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
(void)D;
}
......@@ -98,6 +98,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS,
tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
tempkmin = chameleon_min(tempMm, tempkn);
ldaM = BLKLDD(A, M);
MORSE_TASK_zgeqrt(
&options,
tempMm, tempkn, ib, T->nb,
......@@ -129,26 +130,35 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS,
T(M, k), T->mb,
A(M, n), ldaM);
}
for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
MORSE_TASK_ztsqrt(
RUNTIME_data_migrate( sequence, A(M, k),
A->get_rankof( A, m, k ) );
MORSE_TASK_ztpqrt(
&options,
tempmm, tempkn, ib, T->nb,
tempmm, tempkn, 0, ib, T->nb,
A(M, k), ldaM,
A(m, k), ldam,
T(m, k), T->mb);
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_ztsmqr(
RUNTIME_data_migrate( sequence, A(M, n),
A->get_rankof( A, m, n ) );
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseConjTrans,
A->nb, tempnn, tempmm, tempnn, A->nb, ib, T->nb,
A(M, n), ldaM,
A(m, n), ldam,
tempmm, tempnn, A->nb, 0, ib, T->nb,
A(m, k), ldam,
T(m, k), T->mb);
T(m, k), T->mb,
A(M, n), ldaM,
A(m, n), ldam);
}
}
}
......@@ -157,26 +167,45 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS,
tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
ldaM = BLKLDD(A, M );
ldaMRD = BLKLDD(A, M+RD);
MORSE_TASK_zttqrt(
RUNTIME_data_migrate( sequence, A(M, k),
A->get_rankof( A, M+RD, k ) );
RUNTIME_data_migrate( sequence, A(M+RD, k),
A->get_rankof( A, M+RD, k ) );
MORSE_TASK_ztpqrt(
&options,
tempMRDm, tempkn, ib, T->nb,
tempMRDm, tempkn, tempMRDm, ib, T->nb,
A (M , k), ldaM,
A (M+RD, k), ldaMRD,
T2(M+RD, k), T->mb);
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_zttmqr(
RUNTIME_data_migrate( sequence, A(M, n),
A->get_rankof( A, M+RD, n ) );
RUNTIME_data_migrate( sequence, A(M+RD, n),
A->get_rankof( A, M+RD, n ) );
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseConjTrans,
A->nb, tempnn, tempMRDm, tempnn, A->nb, ib, T->nb,
A (M, n), ldaM,
A (M+RD, n), ldaMRD,
tempMRDm, tempnn, A->nb, tempMRDm, ib, T->nb,
A (M+RD, k), ldaMRD,
T2(M+RD, k), T->mb);
T2(M+RD, k), T->mb,
A (M, n), ldaM,
A (M+RD, n), ldaMRD);
}
}
}
/* Restore the original location of the tiles */
for (n = k; n < A->nt; n++) {
RUNTIME_data_migrate( sequence, A(k, n),
A->get_rankof( A, k, n ) );
}
RUNTIME_iteration_pop(morse);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment