Commit b8a9bf6d authored by BOUCHERIE Raphael's avatar BOUCHERIE Raphael

last files for diag copy support

parent 5d03b644
......@@ -41,12 +41,11 @@
* Parallel tile BAND Tridiagonal Reduction - dynamic scheduler
**/
void morse_pzhetrd_he2hb(MORSE_enum uplo,
MORSE_desc_t *A, MORSE_desc_t *T,
MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
MORSE_desc_t *E = NULL;
MORSE_desc_t *D = NULL;
MORSE_desc_t *AT = NULL;
size_t ws_worker = 0;
......@@ -90,12 +89,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* Copy of the extra-diagonal to generate more parallelism by releasing anti-dependencies on UNMQR/TSMQR triangle conflict */
E = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*E, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
#endif
/* Copy of the diagonal tiles to keep the general version of the tile all along the computation */
D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q);
......@@ -451,10 +444,4 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
morse_desc_mat_free(AT);
free(AT);
#if defined(CHAMELEON_COPY_DIAG)
morse_desc_mat_free(E);
free(E);
#endif
(void)E;
}
......@@ -31,9 +31,9 @@
#define Q1(m,n) Q1, m, n
#define Q2(m,n) Q2, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#define D(k) D, k, 0
#else
#define DIAG(k) V1, k, k
#define D(k) V1, k, k
#endif
/***************************************************************************//**
......@@ -43,19 +43,19 @@ void morse_pztpgqrt( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_desc_t *D,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldvk, ldvm;
int ldqk, ldqm;
int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
int ib, minMT;
int ib;
/* Dimension of the first column */
int maxm = chameleon_max( Q2->m - L, 1 );
......@@ -68,13 +68,6 @@ void morse_pztpgqrt( int L,
RUNTIME_options_init(&options, morse, sequence, request);
ib = MORSE_IB;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
/*
* ztpmqrt = Q1->nb * ib
*/
......@@ -94,12 +87,6 @@ void morse_pztpgqrt( int L,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
#endif
for (k = V1->nt-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
......@@ -152,13 +139,13 @@ void morse_pztpgqrt( int L,
&options,
MorseLower, tempkm, tempkk, V1->nb,
V1(k, k), ldvk,
DIAG(k), ldvk );
D(k), ldvk );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempkm, tempkk,
0., 1.,
DIAG(k), ldvk );
D(k), ldvk );
#endif
#endif
for (n = k; n < Q1->nt; n++) {
......@@ -167,7 +154,7 @@ void morse_pztpgqrt( int L,
&options,
MorseLeft, MorseNoTrans,
tempkm, tempnn, tempkk, ib, T1->nb,
DIAG(k), ldvk,
D(k), ldvk,
T1(k, k), T1->mb,
Q1(k, n), ldqk);
}
......@@ -178,11 +165,4 @@ void morse_pztpgqrt( int L,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG; (void)minMT;
}
......@@ -334,6 +334,7 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
MORSE_desc_t descAB;
int N, NB, LDAB;
int status;
MORSE_desc_t D, *Dptr = NULL;
morse = morse_context_self();
if (morse == NULL) {
......@@ -387,9 +388,14 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
N = descA.m;
NB = descA.mb;
#if defined(CHAMELEON_COPY_DIAG)
{
morse_zdesc_alloc_diag(D, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
Dptr = &D;
}
#endif
/* Reduction to band. On exit, T contains reflectors */
morse_pzhetrd_he2hb( uplo, A, T,
morse_pzhetrd_he2hb( uplo, A, T, Dptr,
sequence, request );
LDAB = NB+1;
......@@ -419,7 +425,9 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed");
}
#endif /* !defined(CHAMELEON_SIMULATION) */
if (Dptr != NULL) {
morse_desc_mat_free(Dptr);
}
morse_desc_mat_free(&descAB);
return MORSE_SUCCESS;
}
......@@ -341,6 +341,7 @@ int MORSE_ztpgqrt_Tile_Async( int L,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
MORSE_desc_t D, *Dptr = NULL;
morse = morse_context_self();
if (morse == NULL) {
......@@ -395,15 +396,29 @@ int MORSE_ztpgqrt_Tile_Async( int L,
morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
#if defined(CHAMELEON_COPY_DIAG)
{
int minMT;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
morse_zdesc_alloc_diag(D, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
Dptr = &D;
}
#endif
/* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */
morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request );
morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request );
morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request );
morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, Dptr, sequence, request );
/* } */
/* else { */
/* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */
/* } */
if (Dptr != NULL) {
morse_desc_mat_free(Dptr);
}
return MORSE_SUCCESS;
}
......@@ -106,7 +106,7 @@ void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR
void morse_pzherk(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc_t *A, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzher2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
#endif
void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlag2c(MORSE_desc_t *A, MORSE_desc_t *SB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request);
......@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO
void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment