diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 83d9accfd825ff9c7ff30450738fbdc8dd87806a..108c4a2010849721a5ae0f2efc1291c4a017746f 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -131,7 +131,6 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, * Algorithm per column with pivoting */ for (h=0; h<=minmn; h++){ - INSERT_TASK_zgetrf_percol_diag( options, h, k * A->mb, @@ -139,8 +138,6 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, ipiv ); for (m = k+1; m < A->mt; m++) { - //tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_zgetrf_percol_offdiag( options, h, m * A->mb, @@ -193,7 +190,6 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, ipiv ); for (m = k+1; m < A->mt; m++) { - INSERT_TASK_zgetrf_blocked_offdiag( options, j, m * A->mb, ws->ib, @@ -371,7 +367,13 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, RUNTIME_iteration_push( chamctxt, k ); options.priority = A->nt; + /* + * Do the panel factorization only if the current proc contributes in the + * block column k. + */ + options.forcesub = chameleon_involved_in_panelk_2dbc( A, k ); chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options ); + options.forcesub = 0; for (n = k+1; n < A->nt; n++) { options.priority = A->nt-n; diff --git a/control/descriptor_helpers.c b/control/descriptor_helpers.c index 775748014d6bd0e8ce8b8e7e3f3497b514edef3d..9cae1883552fc8f418aca49140cf904dbcdcbed8 100644 --- a/control/descriptor_helpers.c +++ b/control/descriptor_helpers.c @@ -82,6 +82,24 @@ int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n ) return (mm % A->p) * A->q + (mm % A->q); } +/** + * @brief Test if the current MPI process is involved in the panel k for 2DBC distributions. + * + * @param[in] A + * The matrix descriptor. + * + * @param[in] k + * The index of the panel to test. + * + * @return 1 if the current MPI process contributes to the panel k. + * 0 if the current MPI process doesn't contribute to the panel k. + * + */ +int chameleon_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k ) { + int myrank = A->myrank; + return ( myrank % A->q == k % A->q ); +} + /** * @brief Initializes a custom distribution based on an external file. * diff --git a/include/chameleon/descriptor_helpers.h b/include/chameleon/descriptor_helpers.h index 5716b2582b1a4d5962b51347d042f8134055f2e3..da79d04863f4180e6c6ce929fee6b33235998fc3 100644 --- a/include/chameleon/descriptor_helpers.h +++ b/include/chameleon/descriptor_helpers.h @@ -57,6 +57,14 @@ int chameleon_getrankof_custom_init ( custom_dist_t **dist, const char *filena int chameleon_getrankof_custom_destroy( custom_dist_t **dist ); int chameleon_getrankof_custom ( const CHAM_desc_t *A, int m, int n ); +/** + * @} + * @name Panel involvement functions + * @{ + */ + +int chameleon_involved_in_panelk_2dbc( const CHAM_desc_t *A, int An ); + /** * @} * @name Block address functions diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c index 03f4e55539c231bfeff6c8248b11390fa3fd5e54..832132f4f1498c19d6cd1d7982772044658d16fc 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c +++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c @@ -114,7 +114,7 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ @@ -197,7 +197,7 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ @@ -275,7 +275,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(U, Um, Un); + CHAMELEON_ACCESS_RW( U, Um, Un ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c index 4b2de691a1b8b8312e70fde5afe29f46150c9881..7829c3f3e70bf28bd57495b2fecd1bf55be3cb43 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_percol.c +++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c @@ -92,7 +92,7 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ @@ -159,10 +159,9 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL; char *cl_name = "zgetrf_percol_offdiag"; - /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */