diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c index d85123145364e34e8b41bcb179b2f78aaa085b87..2bebdfea40f415741c912833ab7c8ae9a1bd7dc6 100644 --- a/compute/pzgelqf_param.c +++ b/compute/pzgelqf_param.c @@ -37,14 +37,14 @@ /** * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling */ -void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int K; @@ -90,14 +90,6 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); - } -#endif - K = chameleon_min(A->mt, A->nt); /* The number of the factorization */ @@ -208,11 +200,5 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c index 4a2e3da006b7154e804406ace7ff777da582bc13..2dbe8f83a54c4606c9d68150422a6bae95785f66 100644 --- a/compute/pzunglq_param.c +++ b/compute/pzunglq_param.c @@ -38,14 +38,13 @@ * Parallel construction of Q using tile V - dynamic scheduling */ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, - MORSE_desc_t *TS, MORSE_desc_t *TT, - MORSE_sequence_t *sequence, MORSE_request_t *request) + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int K; @@ -89,12 +88,6 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des K = chameleon_min(A->mt, A->nt); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->mb, A->nb, 0, 0, K*A->mb, A->nb, A->p, A->q); -#endif - for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -178,11 +171,5 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index 3bcec90f391e6024ffab81cb2ac2dcbf6a09b2f5..92a9003244d61ee99a5ca9b46b4d5051f4192bb2 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -39,14 +39,14 @@ */ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_desc_t *A, MORSE_desc_t *B, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int ldbm, ldak, ldbp; @@ -88,12 +88,6 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->mb, A->nb, 0, 0, K*A->mb, A->nb, A->p, A->q); -#endif - if (side == MorseLeft ) { if (trans == MorseNoTrans) { /* @@ -440,11 +434,5 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c index 4b893047a5cf81770ae47be7d7ecb452f140d10e..5d07b6eed45be7c9d34abca44597d0b04e083a15 100644 --- a/compute/zgelqf_param.c +++ b/compute/zgelqf_param.c @@ -238,6 +238,7 @@ int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -281,7 +282,13 @@ int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ - morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request); - +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgelqf_param(qrtree, A, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzgelqf_param(qrtree, A, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c index 71550a71bf9c987c1fdea6174c5a1b1a600455ad..581d741390d84f63abb2683be65456a2bf5baf99 100644 --- a/compute/zgelqs_param.c +++ b/compute/zgelqs_param.c @@ -270,6 +270,7 @@ int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *subB; MORSE_desc_t *subA; MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -324,12 +325,18 @@ int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); - morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); free(subA); free(subB); - morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); - +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgels_param.c b/compute/zgels_param.c index de1a1b80f7da8aeda0d20d819d00df96a2e3c3ee..92f63d817ba370243ed73e57a067572c9f4b705e 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -393,37 +393,45 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, if (A->m >= A->n) { #if defined(CHAMELEON_COPY_DIAG) - morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); - morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request); - morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); - morse_desc_mat_free(&D); + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); #else - morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request); - morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); + morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); #endif - subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); - subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); - morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); - free(subA); - free(subB); + subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); } else { - /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); - morse_pztile_zero(subB, sequence, request); - free(subB); */ - - morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request); +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgelqf_param(qrtree, A, TS, TT, &D, sequence, request); subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); - morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); free(subA); free(subB); + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzgelqf_param(qrtree, A, TS, TT, NULL, sequence, request); + subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); +#endif - morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); - //morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request); } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c index 24294d56b6dac3ff37d39c0a87e7a280df5de39c..c0e13b6df33849e7fe2f91c5bee1bd8b2adc7622 100644 --- a/compute/zunglq_param.c +++ b/compute/zunglq_param.c @@ -254,6 +254,7 @@ int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -302,7 +303,15 @@ int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); - morse_pzunglq_param(qrtree, A, Q, TS, TT, sequence, request); + morse_pzunglq_param(qrtree, A, Q, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); + morse_pzunglq_param(qrtree, A, Q, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c index d009618274fbabe31594063f9a9cf9f23ec78c99..4aac83e45b3d83f8c67bbea2b8a70dc5834ba1a3 100644 --- a/compute/zunmlq_param.c +++ b/compute/zunmlq_param.c @@ -310,6 +310,7 @@ int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -364,7 +365,14 @@ int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, if (chameleon_min(M, chameleon_min(N, K)) == 0) return MORSE_SUCCESS; */ - morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, sequence, request); +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/testing/testing_zgels_systolic.c b/testing/testing_zgels_systolic.c index ef9db84d505a942eaa33acd1a92b2b0582402db6..fce16bfc8fa637f8616d1da1b2475345567ca569 100644 --- a/testing/testing_zgels_systolic.c +++ b/testing/testing_zgels_systolic.c @@ -218,10 +218,10 @@ int testing_zgels_systolic(int argc, char **argv) /* Morse routines */ MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); - MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); - // MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); - //MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); - MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB); + //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); + //MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB); /* Check the orthogonality, factorization and the solution */ info_ortho = check_orthogonality(M, N, LDA, Q, eps); @@ -284,10 +284,10 @@ int testing_zgels_systolic(int argc, char **argv) MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); MORSE_ztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, M, NRHS, 1.0, A2, LDA, B2, LDB); - //MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); - MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); - //MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB); - MORSE_zunmlq(MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, B2, LDB); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB); + //MORSE_zunmlq(MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, B2, LDB); } /* Check the orthogonality, factorization and the solution */