From 624de04738c4f63ca7f8f50ef423f053b36fc5eb Mon Sep 17 00:00:00 2001 From: Raphael Boucherie <raphael.boucherie@inria.fr> Date: Wed, 14 Jun 2017 13:52:38 +0200 Subject: [PATCH] 1 of 3 tests works --- compute/pzgelqf_param.c | 18 ++----------- compute/pzunglq_param.c | 17 ++---------- compute/pzunmlq_param.c | 16 ++--------- compute/zgelqf_param.c | 11 ++++++-- compute/zgelqs_param.c | 13 ++++++--- compute/zgels_param.c | 46 +++++++++++++++++++------------- compute/zunglq_param.c | 11 +++++++- compute/zunmlq_param.c | 10 ++++++- testing/testing_zgels_systolic.c | 16 +++++------ 9 files changed, 79 insertions(+), 79 deletions(-) diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c index d85123145..2bebdfea4 100644 --- a/compute/pzgelqf_param.c +++ b/compute/pzgelqf_param.c @@ -37,14 +37,14 @@ /** * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling */ -void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int K; @@ -90,14 +90,6 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); - } -#endif - K = chameleon_min(A->mt, A->nt); /* The number of the factorization */ @@ -208,11 +200,5 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c index 4a2e3da00..2dbe8f83a 100644 --- a/compute/pzunglq_param.c +++ b/compute/pzunglq_param.c @@ -38,14 +38,13 @@ * Parallel construction of Q using tile V - dynamic scheduling */ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, - MORSE_desc_t *TS, MORSE_desc_t *TT, - MORSE_sequence_t *sequence, MORSE_request_t *request) + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int K; @@ -89,12 +88,6 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des K = chameleon_min(A->mt, A->nt); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->mb, A->nb, 0, 0, K*A->mb, A->nb, A->p, A->q); -#endif - for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -178,11 +171,5 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index 3bcec90f3..92a900324 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -39,14 +39,14 @@ */ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_desc_t *A, MORSE_desc_t *B, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int ldbm, ldak, ldbp; @@ -88,12 +88,6 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->mb, A->nb, 0, 0, K*A->mb, A->nb, A->p, A->q); -#endif - if (side == MorseLeft ) { if (trans == MorseNoTrans) { /* @@ -440,11 +434,5 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c index 4b893047a..5d07b6eed 100644 --- a/compute/zgelqf_param.c +++ b/compute/zgelqf_param.c @@ -238,6 +238,7 @@ int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -281,7 +282,13 @@ int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ - morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request); - +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgelqf_param(qrtree, A, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzgelqf_param(qrtree, A, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c index 71550a71b..581d74139 100644 --- a/compute/zgelqs_param.c +++ b/compute/zgelqs_param.c @@ -270,6 +270,7 @@ int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *subB; MORSE_desc_t *subA; MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -324,12 +325,18 @@ int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); - morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); free(subA); free(subB); - morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); - +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgels_param.c b/compute/zgels_param.c index de1a1b80f..92f63d817 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -393,37 +393,45 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, if (A->m >= A->n) { #if defined(CHAMELEON_COPY_DIAG) - morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); - morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request); - morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); - morse_desc_mat_free(&D); + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); #else - morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request); - morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); + morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); #endif - subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); - subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); - morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); - free(subA); - free(subB); + subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); } else { - /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); - morse_pztile_zero(subB, sequence, request); - free(subB); */ - - morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request); +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgelqf_param(qrtree, A, TS, TT, &D, sequence, request); subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); - morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); free(subA); free(subB); + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzgelqf_param(qrtree, A, TS, TT, NULL, sequence, request); + subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); +#endif - morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); - //morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request); } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c index 24294d56b..c0e13b6df 100644 --- a/compute/zunglq_param.c +++ b/compute/zunglq_param.c @@ -254,6 +254,7 @@ int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -302,7 +303,15 @@ int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); - morse_pzunglq_param(qrtree, A, Q, TS, TT, sequence, request); + morse_pzunglq_param(qrtree, A, Q, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); + morse_pzunglq_param(qrtree, A, Q, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c index d00961827..4aac83e45 100644 --- a/compute/zunmlq_param.c +++ b/compute/zunmlq_param.c @@ -310,6 +310,7 @@ int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -364,7 +365,14 @@ int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; */ - morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, sequence, request); +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/testing/testing_zgels_systolic.c b/testing/testing_zgels_systolic.c index ef9db84d5..fce16bfc8 100644 --- a/testing/testing_zgels_systolic.c +++ b/testing/testing_zgels_systolic.c @@ -218,10 +218,10 @@ int testing_zgels_systolic(int argc, char **argv) /* Morse routines */ MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); - MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); - // MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); - //MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); - MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB); + //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); + //MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB); /* Check the orthogonality, factorization and the solution */ info_ortho = check_orthogonality(M, N, LDA, Q, eps); @@ -284,10 +284,10 @@ int testing_zgels_systolic(int argc, char **argv) MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); MORSE_ztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, M, NRHS, 1.0, A2, LDA, B2, LDB); - //MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); - MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); - //MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB); - MORSE_zunmlq(MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, B2, LDB); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB); + //MORSE_zunmlq(MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, B2, LDB); } /* Check the orthogonality, factorization and the solution */ -- GitLab