Mentions légales du service

Skip to content
Snippets Groups Projects
Commit fb6ccd8c authored by LISITO Alycia's avatar LISITO Alycia Committed by Mathieu Faverge
Browse files

zgetrf batched: add batched percol algorithm

parent 680943fd
No related branches found
No related tags found
1 merge request!426Batched panel shared memory
...@@ -157,6 +157,57 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, ...@@ -157,6 +157,57 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
RUNTIME_ipiv_flushk( options->sequence, ipiv, k ); RUNTIME_ipiv_flushk( options->sequence, ipiv, k );
} }
/*
* Factorization of panel k - dynamic scheduling - batched version / stock
*/
static inline void
chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int k,
RUNTIME_option_t *options )
{
int m, h;
int tempkm, tempkn, tempmm, minmn;
void **clargs = malloc( sizeof(char *) );
memset( clargs, 0, sizeof(char *) );
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
minmn = chameleon_min( tempkm, tempkn );
/* Update the number of column */
ipiv->n = minmn;
/*
* Algorithm per column with pivoting (no recursion)
*/
/* Iterate on current panel column */
/* Since index h scales column h-1, we need to iterate up to minmn (included) */
for ( h = 0; h <= minmn; h++ ) {
INSERT_TASK_zgetrf_percol_diag( options, tempkm, tempkn, h, k * A->mb, A(k, k), ipiv );
for ( m = k+1; m < A->mt; m++ ) {
tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
INSERT_TASK_zgetrf_panel_offdiag_batched( options, tempmm, tempkn, h, m * A->mb,
(void *)ws, A(m, k), clargs, ipiv );
}
INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, k, clargs, ipiv );
if ( h < minmn ) {
/* Reduce globally (between MPI processes) */
INSERT_TASK_ipiv_reducek( options, ipiv, k, h );
}
}
free( clargs );
/* Flush temporary data used for the pivoting */
INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k );
RUNTIME_ipiv_flushk( options->sequence, ipiv, k );
}
static inline void static inline void
chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A, CHAM_desc_t *A,
...@@ -235,7 +286,12 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, ...@@ -235,7 +286,12 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
break; break;
case ChamGetrfPPivPerColumn: case ChamGetrfPPivPerColumn:
chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options ); if ( ws->batch_size > 1 ) {
chameleon_pzgetrf_panel_facto_percol_batched( ws, A, ipiv, k, options );
}
else {
chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options );
}
break; break;
case ChamGetrfPPiv: case ChamGetrfPPiv:
......
...@@ -94,6 +94,12 @@ if (NOT CHAMELEON_SIMULATION) ...@@ -94,6 +94,12 @@ if (NOT CHAMELEON_SIMULATION)
set_tests_properties( test_${cat}_${prec}getrf_ppivpercol set_tests_properties( test_${cat}_${prec}getrf_ppivpercol
PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppivpercolumn;CHAMELEON_GETRF_BATCH_SIZE=1" ) PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppivpercolumn;CHAMELEON_GETRF_BATCH_SIZE=1" )
if ( ${cat} STREQUAL "shm" )
add_test( test_${cat}_${prec}getrf_ppivpercol_batch ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf_nopiv.in )
set_tests_properties( test_${cat}_${prec}getrf_ppivpercol_batch
PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppivpercolumn;CHAMELEON_GETRF_BATCH_SIZE=6" )
endif()
add_test( test_${cat}_${prec}getrf_ppiv ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf.in ) add_test( test_${cat}_${prec}getrf_ppiv ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf.in )
set_tests_properties( test_${cat}_${prec}getrf_ppiv set_tests_properties( test_${cat}_${prec}getrf_ppiv
PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=1" ) PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=1" )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment