Commit b8a9bf6d authored by BOUCHERIE Raphael's avatar BOUCHERIE Raphael

last files for diag copy support

parent 5d03b644
...@@ -41,12 +41,11 @@ ...@@ -41,12 +41,11 @@
* Parallel tile BAND Tridiagonal Reduction - dynamic scheduler * Parallel tile BAND Tridiagonal Reduction - dynamic scheduler
**/ **/
void morse_pzhetrd_he2hb(MORSE_enum uplo, void morse_pzhetrd_he2hb(MORSE_enum uplo,
MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E,
MORSE_sequence_t *sequence, MORSE_request_t *request) MORSE_sequence_t *sequence, MORSE_request_t *request)
{ {
MORSE_context_t *morse; MORSE_context_t *morse;
MORSE_option_t options; MORSE_option_t options;
MORSE_desc_t *E = NULL;
MORSE_desc_t *D = NULL; MORSE_desc_t *D = NULL;
MORSE_desc_t *AT = NULL; MORSE_desc_t *AT = NULL;
size_t ws_worker = 0; size_t ws_worker = 0;
...@@ -90,12 +89,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, ...@@ -90,12 +89,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* Copy of the extra-diagonal to generate more parallelism by releasing anti-dependencies on UNMQR/TSMQR triangle conflict */
E = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*E, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
#endif
/* Copy of the diagonal tiles to keep the general version of the tile all along the computation */ /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */
D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q); morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q);
...@@ -451,10 +444,4 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, ...@@ -451,10 +444,4 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
morse_desc_mat_free(AT); morse_desc_mat_free(AT);
free(AT); free(AT);
#if defined(CHAMELEON_COPY_DIAG)
morse_desc_mat_free(E);
free(E);
#endif
(void)E;
} }
...@@ -31,9 +31,9 @@ ...@@ -31,9 +31,9 @@
#define Q1(m,n) Q1, m, n #define Q1(m,n) Q1, m, n
#define Q2(m,n) Q2, m, n #define Q2(m,n) Q2, m, n
#if defined(CHAMELEON_COPY_DIAG) #if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0 #define D(k) D, k, 0
#else #else
#define DIAG(k) V1, k, k #define D(k) V1, k, k
#endif #endif
/***************************************************************************//** /***************************************************************************//**
...@@ -43,19 +43,19 @@ void morse_pztpgqrt( int L, ...@@ -43,19 +43,19 @@ void morse_pztpgqrt( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_desc_t *D,
MORSE_sequence_t *sequence, MORSE_request_t *request ) MORSE_sequence_t *sequence, MORSE_request_t *request )
{ {
MORSE_context_t *morse; MORSE_context_t *morse;
MORSE_option_t options; MORSE_option_t options;
size_t ws_worker = 0; size_t ws_worker = 0;
size_t ws_host = 0; size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n; int k, m, n;
int ldvk, ldvm; int ldvk, ldvm;
int ldqk, ldqm; int ldqk, ldqm;
int tempkm, tempkn, tempkk, tempnn, tempmm, templm; int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
int ib, minMT; int ib;
/* Dimension of the first column */ /* Dimension of the first column */
int maxm = chameleon_max( Q2->m - L, 1 ); int maxm = chameleon_max( Q2->m - L, 1 );
...@@ -68,13 +68,6 @@ void morse_pztpgqrt( int L, ...@@ -68,13 +68,6 @@ void morse_pztpgqrt( int L,
RUNTIME_options_init(&options, morse, sequence, request); RUNTIME_options_init(&options, morse, sequence, request);
ib = MORSE_IB; ib = MORSE_IB;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
/* /*
* ztpmqrt = Q1->nb * ib * ztpmqrt = Q1->nb * ib
*/ */
...@@ -94,12 +87,6 @@ void morse_pztpgqrt( int L, ...@@ -94,12 +87,6 @@ void morse_pztpgqrt( int L,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
#endif
for (k = V1->nt-1; k >= 0; k--) { for (k = V1->nt-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k); RUNTIME_iteration_push(morse, k);
...@@ -152,13 +139,13 @@ void morse_pztpgqrt( int L, ...@@ -152,13 +139,13 @@ void morse_pztpgqrt( int L,
&options, &options,
MorseLower, tempkm, tempkk, V1->nb, MorseLower, tempkm, tempkk, V1->nb,
V1(k, k), ldvk, V1(k, k), ldvk,
DIAG(k), ldvk ); D(k), ldvk );
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseUpper, tempkm, tempkk, MorseUpper, tempkm, tempkk,
0., 1., 0., 1.,
DIAG(k), ldvk ); D(k), ldvk );
#endif #endif
#endif #endif
for (n = k; n < Q1->nt; n++) { for (n = k; n < Q1->nt; n++) {
...@@ -167,7 +154,7 @@ void morse_pztpgqrt( int L, ...@@ -167,7 +154,7 @@ void morse_pztpgqrt( int L,
&options, &options,
MorseLeft, MorseNoTrans, MorseLeft, MorseNoTrans,
tempkm, tempnn, tempkk, ib, T1->nb, tempkm, tempnn, tempkk, ib, T1->nb,
DIAG(k), ldvk, D(k), ldvk,
T1(k, k), T1->mb, T1(k, k), T1->mb,
Q1(k, n), ldqk); Q1(k, n), ldqk);
} }
...@@ -178,11 +165,4 @@ void morse_pztpgqrt( int L, ...@@ -178,11 +165,4 @@ void morse_pztpgqrt( int L,
RUNTIME_options_ws_free(&options); RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG; (void)minMT;
} }
...@@ -334,6 +334,7 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, ...@@ -334,6 +334,7 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
MORSE_desc_t descAB; MORSE_desc_t descAB;
int N, NB, LDAB; int N, NB, LDAB;
int status; int status;
MORSE_desc_t D, *Dptr = NULL;
morse = morse_context_self(); morse = morse_context_self();
if (morse == NULL) { if (morse == NULL) {
...@@ -387,9 +388,14 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, ...@@ -387,9 +388,14 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
N = descA.m; N = descA.m;
NB = descA.mb; NB = descA.mb;
#if defined(CHAMELEON_COPY_DIAG)
{
morse_zdesc_alloc_diag(D, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
Dptr = &D;
}
#endif
/* Reduction to band. On exit, T contains reflectors */ /* Reduction to band. On exit, T contains reflectors */
morse_pzhetrd_he2hb( uplo, A, T, morse_pzhetrd_he2hb( uplo, A, T, Dptr,
sequence, request ); sequence, request );
LDAB = NB+1; LDAB = NB+1;
...@@ -419,7 +425,9 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, ...@@ -419,7 +425,9 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed"); morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed");
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
if (Dptr != NULL) {
morse_desc_mat_free(Dptr);
}
morse_desc_mat_free(&descAB); morse_desc_mat_free(&descAB);
return MORSE_SUCCESS; return MORSE_SUCCESS;
} }
...@@ -341,6 +341,7 @@ int MORSE_ztpgqrt_Tile_Async( int L, ...@@ -341,6 +341,7 @@ int MORSE_ztpgqrt_Tile_Async( int L,
MORSE_sequence_t *sequence, MORSE_request_t *request ) MORSE_sequence_t *sequence, MORSE_request_t *request )
{ {
MORSE_context_t *morse; MORSE_context_t *morse;
MORSE_desc_t D, *Dptr = NULL;
morse = morse_context_self(); morse = morse_context_self();
if (morse == NULL) { if (morse == NULL) {
...@@ -395,15 +396,29 @@ int MORSE_ztpgqrt_Tile_Async( int L, ...@@ -395,15 +396,29 @@ int MORSE_ztpgqrt_Tile_Async( int L,
morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles"); morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
} }
#if defined(CHAMELEON_COPY_DIAG)
{
int minMT;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
morse_zdesc_alloc_diag(D, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
Dptr = &D;
}
#endif
/* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */ /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */
morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request ); morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request );
morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request ); morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request );
morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request ); morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, Dptr, sequence, request );
/* } */ /* } */
/* else { */ /* else { */
/* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */ /* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */
/* } */ /* } */
if (Dptr != NULL) {
morse_desc_mat_free(Dptr);
}
return MORSE_SUCCESS; return MORSE_SUCCESS;
} }
...@@ -106,7 +106,7 @@ void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR ...@@ -106,7 +106,7 @@ void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR
void morse_pzherk(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc_t *A, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzherk(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc_t *A, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzher2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzher2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
#endif #endif
void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlag2c(MORSE_desc_t *A, MORSE_desc_t *SB, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlag2c(MORSE_desc_t *A, MORSE_desc_t *SB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request);
...@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO ...@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO
void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment