Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 1e552c91 authored by LISITO Alycia's avatar LISITO Alycia
Browse files

zlaswp batched: add batched zlaswp algorithm

parent 0ff3b520
No related branches found
No related tags found
1 merge request!493zgetrf: Batch of the permutation (swap)
...@@ -447,6 +447,58 @@ chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws, ...@@ -447,6 +447,58 @@ chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws,
} }
} }
static inline void
chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int k,
int n,
RUNTIME_option_t *options )
{
switch( ws->alg ) {
case ChamGetrfPPiv:
chameleon_attr_fallthrough;
case ChamGetrfPPivPerColumn:
{
int m;
int tempkm, tempkn, tempnn, minmn;
void **clargs = malloc( sizeof(char *) );
*clargs = NULL;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
minmn = chameleon_min( tempkm, tempkn );
/* Extract selected rows into U */
INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
A(k, n), U(k, n) );
/*
* perm array is made of size tempkm for the first row especially.
* Otherwise, the final copy back to the tile may copy only a partial tile
*/
INSERT_TASK_zlaswp_get( options, k*A->mb, tempkm,
ipiv, k, A(k, n), U(k, n) );
for(m=k+1; m<A->mt; m++){
INSERT_TASK_zlaswp_batched( options, m*A->mb, minmn, k, m, n, (void *)ws,
ipiv, k, A, &(ws->U), clargs );
}
INSERT_TASK_zlaswp_batched_flush( options, k, n, ipiv, k, A, &(ws->U), clargs );
INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
U(k, n), A(k, n) );
RUNTIME_data_flush( options->sequence, U(k, n) );
free( clargs );
}
break;
default:
;
}
}
static inline void static inline void
chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A, CHAM_desc_t *A,
...@@ -463,7 +515,12 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, ...@@ -463,7 +515,12 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
chameleon_pzgetrf_panel_permute( ws, A, ipiv, k, n, options ); if ( ws->batch_size > 0 ) {
chameleon_pzgetrf_panel_permute_batched( ws, A, ipiv, k, n, options );
}
else {
chameleon_pzgetrf_panel_permute( ws, A, ipiv, k, n, options );
}
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
options, options,
...@@ -536,11 +593,21 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, ...@@ -536,11 +593,21 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
} }
/* Backward pivoting */ /* Backward pivoting */
for (k = 1; k < min_mnt; k++) { if ( ws->batch_size > 0 ) {
for (n = 0; n < k; n++) { for (k = 1; k < min_mnt; k++) {
chameleon_pzgetrf_panel_permute( ws, A, IPIV, k, n, &options ); for (n = 0; n < k; n++) {
chameleon_pzgetrf_panel_permute_batched( ws, A, IPIV, k, n, &options );
}
RUNTIME_perm_flushk( sequence, IPIV, k );
}
}
else {
for (k = 1; k < min_mnt; k++) {
for (n = 0; n < k; n++) {
chameleon_pzgetrf_panel_permute( ws, A, IPIV, k, n, &options );
}
RUNTIME_perm_flushk( sequence, IPIV, k );
} }
RUNTIME_perm_flushk( sequence, IPIV, k );
} }
/* Initialize IPIV with default values if needed */ /* Initialize IPIV with default values if needed */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment