diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index e6f3d107a911b0a29ed40fffe204f7f84ad5d259..584a965969186b0b16ea12122ac8944d6929283e 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -16,6 +16,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Matthieu Kuhn + * @author Alycia Lisito * @date 2024-03-16 * @precisions normal z -> s d c * @@ -218,6 +219,10 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, int m, h, b, nbblock; int tempkm, tempkn, tempmm, minmn; + if ( ! ws->involved ) { + return; + } + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; minmn = chameleon_min( tempkm, tempkn ); @@ -233,7 +238,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, int hmax = b == nbblock-1 ? minmn + 1 - b * ws->ib : ws->ib; for (h=0; h<hmax; h++){ - int j = h + b * ws->ib; + int j = h + b * ws->ib; INSERT_TASK_zgetrf_blocked_diag( options, @@ -306,10 +311,7 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, for ( h = 0; h < hmax; h++ ) { j = h + b * ws->ib; - INSERT_TASK_zgetrf_panel_blocked_batched( options, tempkm, tempkn, j, k * A->mb, (void *)ws, - A(k, k), Up(k, k), clargs, ipiv ); - - for ( m = k + 1; m < A->mt; m++ ) { + for ( m = k; m < A->mt; m++ ) { tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; INSERT_TASK_zgetrf_panel_blocked_batched( options, tempmm, tempkn, j, m * A->mb, (void *)ws, A(m, k), Up(k, k), clargs, ipiv ); @@ -347,6 +349,24 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, int k, RUNTIME_option_t *options ) { + int *proc_involved = malloc( sizeof( int ) * chameleon_min( A->p, A->mt - k) ); + int b; + + /* 2DBC only */ + ws->involved = 0; + for ( b = k; (b < A->mt) && ((b-k) < A->p); b ++ ) { + int rank = chameleon_getrankof_2d( A, b, k ); + proc_involved[ b-k ] = rank; + if ( rank == A->myrank ) { + ws->involved = 1; + } + } + ws->proc_involved = proc_involved; + if ( ws->involved == 0 ) { + free( proc_involved ); + return; + } + /* TODO: Should be replaced by a function pointer */ switch( ws->alg ) { case ChamGetrfNoPivPerColumn: @@ -354,7 +374,7 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, break; case ChamGetrfPPivPerColumn: - if ( ws->batch_size > 1 ) { + if ( ws->batch_size > 0 ) { chameleon_pzgetrf_panel_facto_percol_batched( ws, A, ipiv, k, options ); } else { @@ -363,7 +383,7 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, break; case ChamGetrfPPiv: - if ( ws->batch_size > 1 ) { + if ( ws->batch_size > 0 ) { chameleon_pzgetrf_panel_facto_blocked_batched( ws, A, ipiv, k, options ); } else { @@ -376,6 +396,7 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, default: chameleon_pzgetrf_panel_facto_nopiv( ws, A, ipiv, k, options ); } + free( proc_involved ); } /** @@ -503,7 +524,9 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, * block column k. */ options.forcesub = chameleon_involved_in_panelk_2dbc( A, k ); - chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options ); + if ( chameleon_involved_in_panelk_2dbc( A, k ) ) { + chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options ); + } options.forcesub = 0; for (n = k+1; n < A->nt; n++) { diff --git a/control/compute_z.h b/control/compute_z.h index 088e03140baff5b167727931a6fb9e6b7a1641f0..06c8854c1b05a7c1bffbca0fa8e615218363672d 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -48,6 +48,8 @@ struct chameleon_pzgetrf_s { int batch_size; /**< Batch size for the panel */ CHAM_desc_t U; CHAM_desc_t Up; + int *proc_involved; + int involved:1; }; /**