From 7982e209431d8220a689d53d8852ed4e6ffe04ee Mon Sep 17 00:00:00 2001 From: Alycia Lisito <alycia.lisito@inria.fr> Date: Fri, 23 Feb 2024 11:21:14 +0100 Subject: [PATCH] zgetrf: correct filter --- compute/pzgetrf.c | 10 ++++++---- control/descriptor_helpers.c | 18 ++++++++++++++++++ include/chameleon/descriptor_helpers.h | 8 ++++++++ .../starpu/codelets/codelet_zgetrf_blocked.c | 6 +++--- .../starpu/codelets/codelet_zgetrf_percol.c | 5 ++--- 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 83d9accfd..108c4a201 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -131,7 +131,6 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, * Algorithm per column with pivoting */ for (h=0; h<=minmn; h++){ - INSERT_TASK_zgetrf_percol_diag( options, h, k * A->mb, @@ -139,8 +138,6 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, ipiv ); for (m = k+1; m < A->mt; m++) { - //tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_zgetrf_percol_offdiag( options, h, m * A->mb, @@ -193,7 +190,6 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, ipiv ); for (m = k+1; m < A->mt; m++) { - INSERT_TASK_zgetrf_blocked_offdiag( options, j, m * A->mb, ws->ib, @@ -371,7 +367,13 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, RUNTIME_iteration_push( chamctxt, k ); options.priority = A->nt; + /* + * Do the panel factorization only if the current proc contributes in the + * block column k. + */ + options.forcesub = chameleon_involved_in_panelk_2dbc( A, k ); chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options ); + options.forcesub = 0; for (n = k+1; n < A->nt; n++) { options.priority = A->nt-n; diff --git a/control/descriptor_helpers.c b/control/descriptor_helpers.c index 775748014..9cae18835 100644 --- a/control/descriptor_helpers.c +++ b/control/descriptor_helpers.c @@ -82,6 +82,24 @@ int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n ) return (mm % A->p) * A->q + (mm % A->q); } +/** + * @brief Test if the current MPI process is involved in the panel k for 2DBC distributions. + * + * @param[in] A + * The matrix descriptor. + * + * @param[in] k + * The index of the panel to test. + * + * @return 1 if the current MPI process contributes to the panel k. + * 0 if the current MPI process doesn't contribute to the panel k. + * + */ +int chameleon_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k ) { + int myrank = A->myrank; + return ( myrank % A->q == k % A->q ); +} + /** * @brief Initializes a custom distribution based on an external file. * diff --git a/include/chameleon/descriptor_helpers.h b/include/chameleon/descriptor_helpers.h index 5716b2582..da79d0486 100644 --- a/include/chameleon/descriptor_helpers.h +++ b/include/chameleon/descriptor_helpers.h @@ -57,6 +57,14 @@ int chameleon_getrankof_custom_init ( custom_dist_t **dist, const char *filena int chameleon_getrankof_custom_destroy( custom_dist_t **dist ); int chameleon_getrankof_custom ( const CHAM_desc_t *A, int m, int n ); +/** + * @} + * @name Panel involvement functions + * @{ + */ + +int chameleon_involved_in_panelk_2dbc( const CHAM_desc_t *A, int An ); + /** * @} * @name Block address functions diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c index 03f4e5553..832132f4f 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c +++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c @@ -114,7 +114,7 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ @@ -197,7 +197,7 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ @@ -275,7 +275,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(U, Um, Un); + CHAMELEON_ACCESS_RW( U, Um, Un ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c index 4b2de691a..7829c3f3e 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_percol.c +++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c @@ -92,7 +92,7 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ @@ -159,10 +159,9 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL; char *cl_name = "zgetrf_percol_offdiag"; - /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ -- GitLab