diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c index 2173faa09def0a8a31440baf84f563dbce432f6f..2230bd0c20244c7d7eb397670cfab357d464fab4 100644 --- a/compute/pzhetrd_he2hb.c +++ b/compute/pzhetrd_he2hb.c @@ -41,12 +41,11 @@ * Parallel tile BAND Tridiagonal Reduction - dynamic scheduler **/ void morse_pzhetrd_he2hb(MORSE_enum uplo, - MORSE_desc_t *A, MORSE_desc_t *T, + MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; - MORSE_desc_t *E = NULL; MORSE_desc_t *D = NULL; MORSE_desc_t *AT = NULL; size_t ws_worker = 0; @@ -90,12 +89,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* Copy of the extra-diagonal to generate more parallelism by releasing anti-dependencies on UNMQR/TSMQR triangle conflict */ - E = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*E, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); -#endif - /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */ D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q); @@ -451,10 +444,4 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, morse_desc_mat_free(AT); free(AT); - -#if defined(CHAMELEON_COPY_DIAG) - morse_desc_mat_free(E); - free(E); -#endif - (void)E; } diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c index 4f8b5e8b0d09ec1489912fd59518b51782511539..27f2c17018934fee64173c65c5a2dd357d2151f8 100644 --- a/compute/pztpgqrt.c +++ b/compute/pztpgqrt.c @@ -31,9 +31,9 @@ #define Q1(m,n) Q1, m, n #define Q2(m,n) Q2, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(k) D, k, 0 #else -#define DIAG(k) V1, k, k +#define D(k) V1, k, k #endif /***************************************************************************//** @@ -43,19 +43,19 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, + MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request ) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldvk, ldvm; int ldqk, ldqm; int tempkm, tempkn, tempkk, tempnn, tempmm, templm; - int ib, minMT; + int ib; /* Dimension of the first column */ int maxm = chameleon_max( Q2->m - L, 1 ); @@ -68,13 +68,6 @@ void morse_pztpgqrt( int L, RUNTIME_options_init(&options, morse, sequence, request); ib = MORSE_IB; - - if (V1->m > V1->n) { - minMT = V1->nt; - } else { - minMT = V1->mt; - } - /* * ztpmqrt = Q1->nb * ib */ @@ -94,12 +87,6 @@ void morse_pztpgqrt( int L, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q); -#endif - for (k = V1->nt-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -152,13 +139,13 @@ void morse_pztpgqrt( int L, &options, MorseLower, tempkm, tempkk, V1->nb, V1(k, k), ldvk, - DIAG(k), ldvk ); + D(k), ldvk ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, MorseUpper, tempkm, tempkk, 0., 1., - DIAG(k), ldvk ); + D(k), ldvk ); #endif #endif for (n = k; n < Q1->nt; n++) { @@ -167,7 +154,7 @@ void morse_pztpgqrt( int L, &options, MorseLeft, MorseNoTrans, tempkm, tempnn, tempkk, ib, T1->nb, - DIAG(k), ldvk, + D(k), ldvk, T1(k, k), T1->mb, Q1(k, n), ldqk); } @@ -178,11 +165,4 @@ void morse_pztpgqrt( int L, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; (void)minMT; } diff --git a/compute/zhetrd.c b/compute/zhetrd.c index b74a90afc880a8a8f044cb0d421e727077e4db53..e3a6179cc2d1f430bdca9ff15308364c7ea27297 100644 --- a/compute/zhetrd.c +++ b/compute/zhetrd.c @@ -334,6 +334,7 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, MORSE_desc_t descAB; int N, NB, LDAB; int status; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -387,9 +388,14 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, N = descA.m; NB = descA.mb; - +#if defined(CHAMELEON_COPY_DIAG) + { + morse_zdesc_alloc_diag(D, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q); + Dptr = &D; + } +#endif /* Reduction to band. On exit, T contains reflectors */ - morse_pzhetrd_he2hb( uplo, A, T, + morse_pzhetrd_he2hb( uplo, A, T, Dptr, sequence, request ); LDAB = NB+1; @@ -419,7 +425,9 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed"); } #endif /* !defined(CHAMELEON_SIMULATION) */ - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } morse_desc_mat_free(&descAB); return MORSE_SUCCESS; } diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c index 1cdab39d879bfe7d19355e55a7e60e33a8af37fe..3943a31d8b13e452e163789721c1a7017a1e59f7 100644 --- a/compute/ztpgqrt.c +++ b/compute/ztpgqrt.c @@ -341,6 +341,7 @@ int MORSE_ztpgqrt_Tile_Async( int L, MORSE_sequence_t *sequence, MORSE_request_t *request ) { MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; morse = morse_context_self(); if (morse == NULL) { @@ -395,15 +396,29 @@ int MORSE_ztpgqrt_Tile_Async( int L, morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } +#if defined(CHAMELEON_COPY_DIAG) + { + int minMT; + if (V1->m > V1->n) { + minMT = V1->nt; + } else { + minMT = V1->mt; + } + morse_zdesc_alloc_diag(D, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q); + Dptr = &D; + } +#endif /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */ morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request ); morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request ); - morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request ); + morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, Dptr, sequence, request ); /* } */ /* else { */ /* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */ /* } */ - + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } return MORSE_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index a97a78c97198cb5c3750b73087387fd7a6a6a802..4327e583f51dece572a44db85378ee9ff7bea9ee 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -106,7 +106,7 @@ void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR void morse_pzherk(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc_t *A, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzher2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); #endif -void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlag2c(MORSE_desc_t *A, MORSE_desc_t *SB, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request); @@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request ); +void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);