Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b8a9bf6d authored by BOUCHERIE Raphael's avatar BOUCHERIE Raphael
Browse files

last files for diag copy support

parent 5d03b644
No related branches found
No related tags found
1 merge request!54Diagonal copy support
...@@ -41,12 +41,11 @@ ...@@ -41,12 +41,11 @@
* Parallel tile BAND Tridiagonal Reduction - dynamic scheduler * Parallel tile BAND Tridiagonal Reduction - dynamic scheduler
**/ **/
void morse_pzhetrd_he2hb(MORSE_enum uplo, void morse_pzhetrd_he2hb(MORSE_enum uplo,
MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E,
MORSE_sequence_t *sequence, MORSE_request_t *request) MORSE_sequence_t *sequence, MORSE_request_t *request)
{ {
MORSE_context_t *morse; MORSE_context_t *morse;
MORSE_option_t options; MORSE_option_t options;
MORSE_desc_t *E = NULL;
MORSE_desc_t *D = NULL; MORSE_desc_t *D = NULL;
MORSE_desc_t *AT = NULL; MORSE_desc_t *AT = NULL;
size_t ws_worker = 0; size_t ws_worker = 0;
...@@ -90,12 +89,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, ...@@ -90,12 +89,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* Copy of the extra-diagonal to generate more parallelism by releasing anti-dependencies on UNMQR/TSMQR triangle conflict */
E = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*E, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
#endif
/* Copy of the diagonal tiles to keep the general version of the tile all along the computation */ /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */
D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q); morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q);
...@@ -451,10 +444,4 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, ...@@ -451,10 +444,4 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
morse_desc_mat_free(AT); morse_desc_mat_free(AT);
free(AT); free(AT);
#if defined(CHAMELEON_COPY_DIAG)
morse_desc_mat_free(E);
free(E);
#endif
(void)E;
} }
...@@ -31,9 +31,9 @@ ...@@ -31,9 +31,9 @@
#define Q1(m,n) Q1, m, n #define Q1(m,n) Q1, m, n
#define Q2(m,n) Q2, m, n #define Q2(m,n) Q2, m, n
#if defined(CHAMELEON_COPY_DIAG) #if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0 #define D(k) D, k, 0
#else #else
#define DIAG(k) V1, k, k #define D(k) V1, k, k
#endif #endif
/***************************************************************************//** /***************************************************************************//**
...@@ -43,19 +43,19 @@ void morse_pztpgqrt( int L, ...@@ -43,19 +43,19 @@ void morse_pztpgqrt( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_desc_t *D,
MORSE_sequence_t *sequence, MORSE_request_t *request ) MORSE_sequence_t *sequence, MORSE_request_t *request )
{ {
MORSE_context_t *morse; MORSE_context_t *morse;
MORSE_option_t options; MORSE_option_t options;
size_t ws_worker = 0; size_t ws_worker = 0;
size_t ws_host = 0; size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n; int k, m, n;
int ldvk, ldvm; int ldvk, ldvm;
int ldqk, ldqm; int ldqk, ldqm;
int tempkm, tempkn, tempkk, tempnn, tempmm, templm; int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
int ib, minMT; int ib;
/* Dimension of the first column */ /* Dimension of the first column */
int maxm = chameleon_max( Q2->m - L, 1 ); int maxm = chameleon_max( Q2->m - L, 1 );
...@@ -68,13 +68,6 @@ void morse_pztpgqrt( int L, ...@@ -68,13 +68,6 @@ void morse_pztpgqrt( int L,
RUNTIME_options_init(&options, morse, sequence, request); RUNTIME_options_init(&options, morse, sequence, request);
ib = MORSE_IB; ib = MORSE_IB;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
/* /*
* ztpmqrt = Q1->nb * ib * ztpmqrt = Q1->nb * ib
*/ */
...@@ -94,12 +87,6 @@ void morse_pztpgqrt( int L, ...@@ -94,12 +87,6 @@ void morse_pztpgqrt( int L,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
#endif
for (k = V1->nt-1; k >= 0; k--) { for (k = V1->nt-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k); RUNTIME_iteration_push(morse, k);
...@@ -152,13 +139,13 @@ void morse_pztpgqrt( int L, ...@@ -152,13 +139,13 @@ void morse_pztpgqrt( int L,
&options, &options,
MorseLower, tempkm, tempkk, V1->nb, MorseLower, tempkm, tempkk, V1->nb,
V1(k, k), ldvk, V1(k, k), ldvk,
DIAG(k), ldvk ); D(k), ldvk );
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseUpper, tempkm, tempkk, MorseUpper, tempkm, tempkk,
0., 1., 0., 1.,
DIAG(k), ldvk ); D(k), ldvk );
#endif #endif
#endif #endif
for (n = k; n < Q1->nt; n++) { for (n = k; n < Q1->nt; n++) {
...@@ -167,7 +154,7 @@ void morse_pztpgqrt( int L, ...@@ -167,7 +154,7 @@ void morse_pztpgqrt( int L,
&options, &options,
MorseLeft, MorseNoTrans, MorseLeft, MorseNoTrans,
tempkm, tempnn, tempkk, ib, T1->nb, tempkm, tempnn, tempkk, ib, T1->nb,
DIAG(k), ldvk, D(k), ldvk,
T1(k, k), T1->mb, T1(k, k), T1->mb,
Q1(k, n), ldqk); Q1(k, n), ldqk);
} }
...@@ -178,11 +165,4 @@ void morse_pztpgqrt( int L, ...@@ -178,11 +165,4 @@ void morse_pztpgqrt( int L,
RUNTIME_options_ws_free(&options); RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG; (void)minMT;
} }
...@@ -334,6 +334,7 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, ...@@ -334,6 +334,7 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
MORSE_desc_t descAB; MORSE_desc_t descAB;
int N, NB, LDAB; int N, NB, LDAB;
int status; int status;
MORSE_desc_t D, *Dptr = NULL;
morse = morse_context_self(); morse = morse_context_self();
if (morse == NULL) { if (morse == NULL) {
...@@ -387,9 +388,14 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, ...@@ -387,9 +388,14 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
N = descA.m; N = descA.m;
NB = descA.mb; NB = descA.mb;
#if defined(CHAMELEON_COPY_DIAG)
{
morse_zdesc_alloc_diag(D, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
Dptr = &D;
}
#endif
/* Reduction to band. On exit, T contains reflectors */ /* Reduction to band. On exit, T contains reflectors */
morse_pzhetrd_he2hb( uplo, A, T, morse_pzhetrd_he2hb( uplo, A, T, Dptr,
sequence, request ); sequence, request );
LDAB = NB+1; LDAB = NB+1;
...@@ -419,7 +425,9 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, ...@@ -419,7 +425,9 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed"); morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed");
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
if (Dptr != NULL) {
morse_desc_mat_free(Dptr);
}
morse_desc_mat_free(&descAB); morse_desc_mat_free(&descAB);
return MORSE_SUCCESS; return MORSE_SUCCESS;
} }
...@@ -341,6 +341,7 @@ int MORSE_ztpgqrt_Tile_Async( int L, ...@@ -341,6 +341,7 @@ int MORSE_ztpgqrt_Tile_Async( int L,
MORSE_sequence_t *sequence, MORSE_request_t *request ) MORSE_sequence_t *sequence, MORSE_request_t *request )
{ {
MORSE_context_t *morse; MORSE_context_t *morse;
MORSE_desc_t D, *Dptr = NULL;
morse = morse_context_self(); morse = morse_context_self();
if (morse == NULL) { if (morse == NULL) {
...@@ -395,15 +396,29 @@ int MORSE_ztpgqrt_Tile_Async( int L, ...@@ -395,15 +396,29 @@ int MORSE_ztpgqrt_Tile_Async( int L,
morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles"); morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
} }
#if defined(CHAMELEON_COPY_DIAG)
{
int minMT;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
morse_zdesc_alloc_diag(D, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
Dptr = &D;
}
#endif
/* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */ /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */
morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request ); morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request );
morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request ); morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request );
morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request ); morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, Dptr, sequence, request );
/* } */ /* } */
/* else { */ /* else { */
/* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */ /* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */
/* } */ /* } */
if (Dptr != NULL) {
morse_desc_mat_free(Dptr);
}
return MORSE_SUCCESS; return MORSE_SUCCESS;
} }
...@@ -106,7 +106,7 @@ void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR ...@@ -106,7 +106,7 @@ void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR
void morse_pzherk(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc_t *A, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzherk(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc_t *A, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzher2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzher2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
#endif #endif
void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlag2c(MORSE_desc_t *A, MORSE_desc_t *SB, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlag2c(MORSE_desc_t *A, MORSE_desc_t *SB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request);
...@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO ...@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO
void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment