Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 470e5c5f authored by LISITO Alycia's avatar LISITO Alycia
Browse files

zgetrf: clean permutation

parent 77a4aaeb
No related branches found
No related tags found
1 merge request!516Clean the getrf permutation
...@@ -338,10 +338,12 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, ...@@ -338,10 +338,12 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
int k, int k,
RUNTIME_option_t *options ) RUNTIME_option_t *options )
{ {
#if defined(CHAMELEON_USE_MPI)
chameleon_get_proc_involved_in_panelk_2dbc( A, k, k, ws ); chameleon_get_proc_involved_in_panelk_2dbc( A, k, k, ws );
if ( !ws->involved ) { if ( !ws->involved ) {
return; return;
} }
#endif
/* TODO: Should be replaced by a function pointer */ /* TODO: Should be replaced by a function pointer */
switch( ws->alg ) { switch( ws->alg ) {
...@@ -392,19 +394,6 @@ chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws, ...@@ -392,19 +394,6 @@ chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws,
int tempkm, tempkn, tempnn, minmn; int tempkm, tempkn, tempnn, minmn;
int withlacpy; int withlacpy;
chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
INSERT_TASK_zperm_allreduce_send_perm( options, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
INSERT_TASK_zperm_allreduce_send_invp( options, ipiv, k, A, k, n );
}
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
}
if ( !ws->involved ) {
return;
}
tempkm = A->get_blkdim( A, k, DIM_m, A->m ); tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n ); tempkn = A->get_blkdim( A, k, DIM_n, A->n );
tempnn = A->get_blkdim( A, n, DIM_n, A->n ); tempnn = A->get_blkdim( A, n, DIM_n, A->n );
...@@ -457,19 +446,6 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws, ...@@ -457,19 +446,6 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws,
int tempkm, tempkn, tempnn, minmn; int tempkm, tempkn, tempnn, minmn;
int withlacpy; int withlacpy;
chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
INSERT_TASK_zperm_allreduce_send_perm( options, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
INSERT_TASK_zperm_allreduce_send_invp( options, ipiv, k, A, k, n );
}
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
}
if ( !ws->involved ) {
return;
}
void **clargs = malloc( sizeof(char *) ); void **clargs = malloc( sizeof(char *) );
*clargs = NULL; *clargs = NULL;
...@@ -508,6 +484,80 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws, ...@@ -508,6 +484,80 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws,
} }
} }
static inline void
chameleon_pzgetrf_panel_permute_forward( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int k,
int n,
RUNTIME_option_t *options )
{
#if defined(CHAMELEON_USE_MPI)
chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
INSERT_TASK_zperm_allreduce_send_perm( options, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
INSERT_TASK_zperm_allreduce_send_invp( options, ipiv, k, A, k, n );
}
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
}
if ( !ws->involved ) {
return;
}
#endif
if ( ws->batch_size_swap > 0 ) {
chameleon_pzgetrf_panel_permute_batched( ws, A, ipiv, k, n, options );
}
else {
chameleon_pzgetrf_panel_permute( ws, A, ipiv, k, n, options );
}
}
static inline void
chameleon_pzgetrf_panel_permute_backward( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int k,
int n,
RUNTIME_option_t *options,
RUNTIME_sequence_t *sequence )
{
int tempkm, tempnn;
#if defined(CHAMELEON_USE_MPI)
chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
INSERT_TASK_zperm_allreduce_send_perm( options, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
INSERT_TASK_zperm_allreduce_send_invp( options, ipiv, k, A, k, n );
}
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
}
if ( !ws->involved ) {
return;
}
#endif
if ( ws->batch_size_swap > 0 ) {
chameleon_pzgetrf_panel_permute_batched( ws, A, ipiv, k, n, options );
}
else {
chameleon_pzgetrf_panel_permute( ws, A, ipiv, k, n, options );
}
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
Wu(A->myrank, n), A(k, n) );
RUNTIME_data_flush( sequence, A(k, n) );
}
}
static inline void static inline void
chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws, chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A, CHAM_desc_t *A,
...@@ -515,7 +565,7 @@ chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws, ...@@ -515,7 +565,7 @@ chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws,
RUNTIME_option_t *options ) RUNTIME_option_t *options )
{ {
CHAM_context_t *chamctxt = chameleon_context_self(); CHAM_context_t *chamctxt = chameleon_context_self();
int m, tempmm, tempkn, q; int m, n, tempmm, tempkn, q;
int lookahead = chamctxt->lookahead; int lookahead = chamctxt->lookahead;
int P = chameleon_desc_datadist_get_iparam(A, 0); int P = chameleon_desc_datadist_get_iparam(A, 0);
int Q = chameleon_desc_datadist_get_iparam(A, 1); int Q = chameleon_desc_datadist_get_iparam(A, 1);
...@@ -583,12 +633,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, ...@@ -583,12 +633,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
tempkm = A->get_blkdim( A, k, DIM_m, A->m ); tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempnn = A->get_blkdim( A, n, DIM_n, A->n ); tempnn = A->get_blkdim( A, n, DIM_n, A->n );
if ( ws->batch_size_swap > 0 ) { chameleon_pzgetrf_panel_permute_forward( ws, A, ipiv, k, n, options );
chameleon_pzgetrf_panel_permute_batched( ws, A, ipiv, k, n, options );
}
else {
chameleon_pzgetrf_panel_permute( ws, A, ipiv, k, n, options );
}
if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) { if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
for ( p = 0; p < ws->np_involved; p++ ) { for ( p = 0; p < ws->np_involved; p++ ) {
...@@ -607,6 +652,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, ...@@ -607,6 +652,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
ChamLeft, ChamLower, ChamNoTrans, ChamUnit, ChamLeft, ChamLower, ChamNoTrans, ChamUnit,
tempkm, tempnn, A->mb, tempkm, tempnn, A->mb,
zone, A(k, k), zone, A(k, k),
zone, Wu(A->myrank, k),
Wu(A->myrank, n) ); Wu(A->myrank, n) );
} }
...@@ -682,54 +728,26 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, ...@@ -682,54 +728,26 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
} }
/* Flush panel k */ /* Flush panel k */
for (m = k; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
RUNTIME_data_flush( sequence, A(m, k) ); RUNTIME_data_flush( sequence, A(m, k) );
} }
RUNTIME_data_flush( sequence, Wu(A->myrank, k) );
RUNTIME_iteration_pop( chamctxt ); RUNTIME_iteration_pop( chamctxt );
} }
CHAMELEON_Desc_Flush( &(ws->Wl), sequence ); CHAMELEON_Desc_Flush( &(ws->Wl), sequence );
/* Backward pivoting */ /* Backward pivoting */
if ( ws->batch_size > 0 ) { for (k = 1; k < min_mnt; k++) {
for (k = 1; k < min_mnt; k++) { for (n = 0; n < k; n++) {
for (n = 0; n < k; n++) { if ( chameleon_involved_in_panelk_2dbc( A, k ) ||
if ( chameleon_involved_in_panelk_2dbc( A, k ) || chameleon_involved_in_panelk_2dbc( A, n ) )
chameleon_involved_in_panelk_2dbc( A, n ) ) {
{ chameleon_pzgetrf_panel_permute_backward( ws, A, IPIV, k, n, &options, sequence );
chameleon_pzgetrf_panel_permute_batched( ws, A, IPIV, k, n, &options );
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempnn,
Wu(A->myrank, n), A(k, n) );
RUNTIME_data_flush( sequence, A(k, n) );
}
}
RUNTIME_data_flush( sequence, Wu(A->myrank, n) );
}
RUNTIME_perm_flushk( sequence, IPIV, k );
}
}
else {
for (k = 1; k < min_mnt; k++) {
for (n = 0; n < k; n++) {
if ( chameleon_involved_in_panelk_2dbc( A, k ) ||
chameleon_involved_in_panelk_2dbc( A, n ) )
{
chameleon_pzgetrf_panel_permute( ws, A, IPIV, k, n, &options );
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempnn,
Wu(A->myrank, n), A(k, n) );
RUNTIME_data_flush( sequence, A(k, n) );
}
}
RUNTIME_data_flush( sequence, Wu(A->myrank, n) );
} }
RUNTIME_perm_flushk( sequence, IPIV, k ); RUNTIME_data_flush( sequence, Wu(A->myrank, n) );
} }
RUNTIME_perm_flushk( sequence, IPIV, k );
} }
CHAMELEON_Desc_Flush( &(ws->Wu), sequence ); CHAMELEON_Desc_Flush( &(ws->Wu), sequence );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment