diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c index 360a2d408f08fab411be9497289a9cf1428b1129..44fead1c4b9b1a99a46e37985bb7e24240f5e5da 100644 --- a/compute/pzgeqrf_param.c +++ b/compute/pzgeqrf_param.c @@ -42,15 +42,15 @@ /** * Parallel tile QR factorization (reduction Householder) - dynamic scheduling */ -void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; - + int k, m, n, i, p; int K; int ldap, ldam; @@ -95,14 +95,6 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); - } -#endif - K = chameleon_min(A->mt, A->nt); /* The number of the factorization */ @@ -212,11 +204,5 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c index 3434c8f6998f4284ed3f52f53b86fedc074850b5..ef0ecd41fd84de44e96282e0ae7fb1939135e959 100644 --- a/compute/pzungqr_param.c +++ b/compute/pzungqr_param.c @@ -44,14 +44,14 @@ * Parallel construction of Q using tile V (application to identity) - dynamic scheduling */ void morse_pzungqr_param(const libhqr_tree_t *qrtree, - MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int ldam, ldqm, ldqp; @@ -98,14 +98,6 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); - } -#endif - for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -192,10 +184,5 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index 678172f7e7010fe32131d8e54398ee0f0ac21dca..7e27339855ba5be585439f58be19da65009480ec 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -40,14 +40,13 @@ */ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int ldam, ldan, ldbm, ldbp; @@ -90,12 +89,6 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->nb, A->nb, 0, 0, K*A->nb, A->nb, A->p, A->q); -#endif - if (side == MorseLeft ) { if (trans == MorseConjTrans) { /* @@ -442,10 +435,5 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/zgels_param.c b/compute/zgels_param.c index 6d78f62c1a9c17de8ce9b7535886f6e1ecb4aa89..cd76d61b052eb127dbb7a52271ba94c133b11931 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -331,7 +331,7 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *subA; MORSE_desc_t *subB; MORSE_context_t *morse; - + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { morse_fatal_error("MORSE_zgels_param_Tile", "MORSE not initialized"); @@ -386,15 +386,22 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, } */ if (A->m >= A->n) { - morse_pzgeqrf_param(qrtree, A, TS, TT, sequence, request); - morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); +#endif - subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); - subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); - morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); - free(subA); - free(subB); + subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); } else { /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); @@ -409,7 +416,8 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, free(subA); free(subB); - morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); + //morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); + morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request); } return MORSE_SUCCESS; } diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c index 3e3113fb6584d0449e8b1d47e7cba6df1786b467..4525309bc8eec78e436cd8f38dea0c27a7b323ce 100644 --- a/compute/zgeqrf_param.c +++ b/compute/zgeqrf_param.c @@ -253,6 +253,7 @@ int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -296,7 +297,13 @@ int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ - morse_pzgeqrf_param(qrtree, A, TS, TT, sequence, request); - +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c index 7dddbdcf0e961479e1c9d4c5ad14a49f8be8057e..1b86b10be599c9f03fd4e1994729f8c03c3f039e 100644 --- a/compute/zgeqrs_param.c +++ b/compute/zgeqrs_param.c @@ -268,7 +268,8 @@ int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *subA; MORSE_desc_t *subB; MORSE_context_t *morse; - + MORSE_desc_t D; + morse = morse_context_self(); if (morse == NULL) { morse_fatal_error("MORSE_zgeqrs_param_Tile", "MORSE not initialized"); @@ -316,7 +317,13 @@ int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, return MORSE_SUCCESS; } */ - morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); +#endif subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 24bec0c623a53b11dceef761161a4882f6aef67e..513c50af4b4781758076aaa5112188f20467f3bf 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -257,11 +257,10 @@ int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_ * @sa MORSE_zgeqrf_Tile_Async * ******************************************************************************/ -int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, - MORSE_sequence_t *sequence, MORSE_request_t *request) +int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; - + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { morse_fatal_error("MORSE_zungqr_param_Tile", "MORSE not initialized"); @@ -308,8 +307,14 @@ int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, if (N <= 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); - morse_pzungqr_param(qrtree, A, Q, TS, TT, sequence, request); - + morse_pzungqr_param(qrtree, A, Q, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); + morse_pzungqr_param(qrtree, A, Q, TS, TT, NULL, sequence, request); +#endif return MORSE_SUCCESS; } diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c index 8a5a42cd47431a9a31ff377e9b172f1338015a03..1e9af1473ea66033988871ec29fe5ed817a2ae1c 100644 --- a/compute/zunmqr_param.c +++ b/compute/zunmqr_param.c @@ -317,6 +317,7 @@ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -372,7 +373,13 @@ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, return MORSE_SUCCESS; */ - morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, sequence, request); - +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index a6b77619a63db2fe5118328f1344e7d6157530da..65a8547b0bbc6d16fa14208106bd724ee1f8d335 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -158,16 +158,18 @@ void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* use #if defined(CHAMELEON_USE_LIBHQR) void morse_pzgelqf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgeqrf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzgeqrf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); #endif /* defined(CHAMELEON_USE_LIBHQR) */ diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt index 61334fa4209b3e44a91e7381968027af4fe67b1e..500dda7f9120fc74ccf8af11bc7f3507d40f7744 100644 --- a/timing/CMakeLists.txt +++ b/timing/CMakeLists.txt @@ -85,6 +85,7 @@ if (NOT CHAMELEON_SIMULATION) time_zgels.c time_zgels_tile.c time_zgeqrf.c + time_zgeqrf_hqr.c time_zgeqrf_tile.c time_zgelqf.c time_zgelqf_tile.c @@ -237,6 +238,7 @@ if(NOT CHAMELEON_SIMULATION) ${CBLAS_LIBRARIES} ${LAPACK_SEQ_LIBRARIES} ${BLAS_SEQ_LIBRARIES} + ${LIBHQR_LIBRARIES} ${HWLOC_LIBRARIES} ${EXTRA_LIBRARIES} ) @@ -246,6 +248,7 @@ if(NOT CHAMELEON_SIMULATION) link_directories(${LAPACK_LIBRARY_DIRS}) link_directories(${CBLAS_LIBRARY_DIRS}) link_directories(${BLAS_LIBRARY_DIRS}) + link_directories(${LIBHQR_LIBRARY_DIRS}) else() diff --git a/timing/time_zgeqrf_hqr.c b/timing/time_zgeqrf_hqr.c index a475afb035becb399fc5d328678dcab1bdfc9324..5c17848b1df613384050e59e5ad4fc66a8933918 100644 --- a/timing/time_zgeqrf_hqr.c +++ b/timing/time_zgeqrf_hqr.c @@ -30,6 +30,9 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) { MORSE_desc_t *TS; MORSE_desc_t *TT; + libhqr_tree_t qrtree; + libhqr_tiledesc_t matrix; + PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N && check ) { @@ -46,14 +49,25 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) /* Allocate Workspace */ MORSE_Alloc_Workspace_zgels(M, N, &TS, P, Q); memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(MorseComplexDouble)); - MORSE_Alloc_Workspace_zgels(M, N, &TT P, Q); + MORSE_Alloc_Workspace_zgels(M, N, &TT, P, Q); memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MorseComplexDouble)); /* Save AT in lapack layout for check */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, MORSE_Complex64_t, A, LDA, N ); + /* Initialize matrix */ + matrix.mt = TS->mt; + matrix.nt = TS->nt; + matrix.nodes = 1; + matrix.p = 1; + + /* Initialize qrtree */ + libhqr_hqr_init( &qrtree, + ( matrix.mt >= matrix.nt ) ? LIBHQR_QR : LIBHQR_LQ, + &matrix, -1, -1, 1, -1, 0, 0); + START_TIMING(); - MORSE_zgeqrf( M, N, A, LDA, TS ); + MORSE_zgeqrf_param(&qrtree, M, N, A, LDA, TS, TT ); STOP_TIMING(); /* Check the solution */ @@ -63,7 +77,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_zplrnt( N, NRHS, X, LDB, 5673 ); PASTE_CODE_ALLOCATE_COPY( B, 1, MORSE_Complex64_t, X, LDB, NRHS ); - MORSE_zgeqrs(M, N, NRHS, A, LDA, TS, X, LDB); + MORSE_zgeqrs_param(&qrtree, M, N, NRHS, A, LDA, TS, TT, X, LDB); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, B, X, LDB, &(dparam[IPARAM_ANORM]),