From 70ba5fe4b8cc67f3c000510c512689f59cf59db7 Mon Sep 17 00:00:00 2001 From: Raphael Boucherie <raphael.boucherie@inria.fr> Date: Wed, 7 Jun 2017 10:29:37 +0200 Subject: [PATCH] Updated for Starpu --- compute/pzgeqrf_param.c | 20 +++----------------- compute/pzungqr_param.c | 17 ++--------------- compute/pzunmqr_param.c | 14 +------------- compute/zgels_param.c | 26 +++++++++++++++++--------- compute/zgeqrf_param.c | 11 +++++++++-- compute/zgeqrs_param.c | 11 +++++++++-- compute/zungqr_param.c | 15 ++++++++++----- compute/zunmqr_param.c | 11 +++++++++-- control/compute_z.h | 10 ++++++---- timing/CMakeLists.txt | 3 +++ timing/time_zgeqrf_hqr.c | 20 +++++++++++++++++--- 11 files changed, 86 insertions(+), 72 deletions(-) diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c index 360a2d408..44fead1c4 100644 --- a/compute/pzgeqrf_param.c +++ b/compute/pzgeqrf_param.c @@ -42,15 +42,15 @@ /** * Parallel tile QR factorization (reduction Householder) - dynamic scheduling */ -void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; - + int k, m, n, i, p; int K; int ldap, ldam; @@ -95,14 +95,6 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); - } -#endif - K = chameleon_min(A->mt, A->nt); /* The number of the factorization */ @@ -212,11 +204,5 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c index 3434c8f69..ef0ecd41f 100644 --- a/compute/pzungqr_param.c +++ b/compute/pzungqr_param.c @@ -44,14 +44,14 @@ * Parallel construction of Q using tile V (application to identity) - dynamic scheduling */ void morse_pzungqr_param(const libhqr_tree_t *qrtree, - MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int ldam, ldqm, ldqp; @@ -98,14 +98,6 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - { - /* necessary to avoid dependencies between tasks regarding the diag tile */ - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); - } -#endif - for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); @@ -192,10 +184,5 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index 678172f7e..7e2733985 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -40,14 +40,13 @@ */ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *D = NULL; int k, m, n, i, p; int ldam, ldan, ldbm, ldbp; @@ -90,12 +89,6 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - /* necessary to avoid dependencies between tasks regarding the diag tile */ -#if defined(CHAMELEON_COPY_DIAG) - D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->nb, A->nb, 0, 0, K*A->nb, A->nb, A->p, A->q); -#endif - if (side == MorseLeft ) { if (trans == MorseConjTrans) { /* @@ -442,10 +435,5 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(D); - free(D); -#endif (void)D; } diff --git a/compute/zgels_param.c b/compute/zgels_param.c index 6d78f62c1..cd76d61b0 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -331,7 +331,7 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *subA; MORSE_desc_t *subB; MORSE_context_t *morse; - + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { morse_fatal_error("MORSE_zgels_param_Tile", "MORSE not initialized"); @@ -386,15 +386,22 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, } */ if (A->m >= A->n) { - morse_pzgeqrf_param(qrtree, A, TS, TT, sequence, request); - morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); +#endif - subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); - subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); - morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); - free(subA); - free(subB); + subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); } else { /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); @@ -409,7 +416,8 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, free(subA); free(subB); - morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); + //morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); + morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request); } return MORSE_SUCCESS; } diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c index 3e3113fb6..4525309bc 100644 --- a/compute/zgeqrf_param.c +++ b/compute/zgeqrf_param.c @@ -253,6 +253,7 @@ int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -296,7 +297,13 @@ int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, if (chameleon_min(M, N) == 0) return MORSE_SUCCESS; */ - morse_pzgeqrf_param(qrtree, A, TS, TT, sequence, request); - +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c index 7dddbdcf0..1b86b10be 100644 --- a/compute/zgeqrs_param.c +++ b/compute/zgeqrs_param.c @@ -268,7 +268,8 @@ int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *subA; MORSE_desc_t *subB; MORSE_context_t *morse; - + MORSE_desc_t D; + morse = morse_context_self(); if (morse == NULL) { morse_fatal_error("MORSE_zgeqrs_param_Tile", "MORSE not initialized"); @@ -316,7 +317,13 @@ int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, return MORSE_SUCCESS; } */ - morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request); +#endif subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 24bec0c62..513c50af4 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -257,11 +257,10 @@ int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_ * @sa MORSE_zgeqrf_Tile_Async * ******************************************************************************/ -int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, - MORSE_sequence_t *sequence, MORSE_request_t *request) +int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; - + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { morse_fatal_error("MORSE_zungqr_param_Tile", "MORSE not initialized"); @@ -308,8 +307,14 @@ int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, if (N <= 0) return MORSE_SUCCESS; */ +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); - morse_pzungqr_param(qrtree, A, Q, TS, TT, sequence, request); - + morse_pzungqr_param(qrtree, A, Q, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); + morse_pzungqr_param(qrtree, A, Q, TS, TT, NULL, sequence, request); +#endif return MORSE_SUCCESS; } diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c index 8a5a42cd4..1e9af1473 100644 --- a/compute/zunmqr_param.c +++ b/compute/zunmqr_param.c @@ -317,6 +317,7 @@ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; + MORSE_desc_t D; morse = morse_context_self(); if (morse == NULL) { @@ -372,7 +373,13 @@ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, return MORSE_SUCCESS; */ - morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, sequence, request); - +#if defined(CHAMELEON_COPY_DIAG) + morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), ); + morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, &D, sequence, request); + morse_desc_mat_free(&D); +#else + morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, NULL, sequence, request); +#endif + (void)D; return MORSE_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index a6b77619a..65a8547b0 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -158,16 +158,18 @@ void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* use #if defined(CHAMELEON_USE_LIBHQR) void morse_pzgelqf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgeqrf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzgeqrf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, +void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); #endif /* defined(CHAMELEON_USE_LIBHQR) */ diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt index 61334fa42..500dda7f9 100644 --- a/timing/CMakeLists.txt +++ b/timing/CMakeLists.txt @@ -85,6 +85,7 @@ if (NOT CHAMELEON_SIMULATION) time_zgels.c time_zgels_tile.c time_zgeqrf.c + time_zgeqrf_hqr.c time_zgeqrf_tile.c time_zgelqf.c time_zgelqf_tile.c @@ -237,6 +238,7 @@ if(NOT CHAMELEON_SIMULATION) ${CBLAS_LIBRARIES} ${LAPACK_SEQ_LIBRARIES} ${BLAS_SEQ_LIBRARIES} + ${LIBHQR_LIBRARIES} ${HWLOC_LIBRARIES} ${EXTRA_LIBRARIES} ) @@ -246,6 +248,7 @@ if(NOT CHAMELEON_SIMULATION) link_directories(${LAPACK_LIBRARY_DIRS}) link_directories(${CBLAS_LIBRARY_DIRS}) link_directories(${BLAS_LIBRARY_DIRS}) + link_directories(${LIBHQR_LIBRARY_DIRS}) else() diff --git a/timing/time_zgeqrf_hqr.c b/timing/time_zgeqrf_hqr.c index a475afb03..5c17848b1 100644 --- a/timing/time_zgeqrf_hqr.c +++ b/timing/time_zgeqrf_hqr.c @@ -30,6 +30,9 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) { MORSE_desc_t *TS; MORSE_desc_t *TT; + libhqr_tree_t qrtree; + libhqr_tiledesc_t matrix; + PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N && check ) { @@ -46,14 +49,25 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) /* Allocate Workspace */ MORSE_Alloc_Workspace_zgels(M, N, &TS, P, Q); memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(MorseComplexDouble)); - MORSE_Alloc_Workspace_zgels(M, N, &TT P, Q); + MORSE_Alloc_Workspace_zgels(M, N, &TT, P, Q); memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MorseComplexDouble)); /* Save AT in lapack layout for check */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, MORSE_Complex64_t, A, LDA, N ); + /* Initialize matrix */ + matrix.mt = TS->mt; + matrix.nt = TS->nt; + matrix.nodes = 1; + matrix.p = 1; + + /* Initialize qrtree */ + libhqr_hqr_init( &qrtree, + ( matrix.mt >= matrix.nt ) ? LIBHQR_QR : LIBHQR_LQ, + &matrix, -1, -1, 1, -1, 0, 0); + START_TIMING(); - MORSE_zgeqrf( M, N, A, LDA, TS ); + MORSE_zgeqrf_param(&qrtree, M, N, A, LDA, TS, TT ); STOP_TIMING(); /* Check the solution */ @@ -63,7 +77,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_zplrnt( N, NRHS, X, LDB, 5673 ); PASTE_CODE_ALLOCATE_COPY( B, 1, MORSE_Complex64_t, X, LDB, NRHS ); - MORSE_zgeqrs(M, N, NRHS, A, LDA, TS, X, LDB); + MORSE_zgeqrs_param(&qrtree, M, N, NRHS, A, LDA, TS, TT, X, LDB); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, B, X, LDB, &(dparam[IPARAM_ANORM]), -- GitLab