Mentions légales du service

Skip to content
Snippets Groups Projects
Commit e40dd027 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'getrf/bcast' into 'master'

GETRF: Add a bcast version of the workspace without ring

See merge request !504
parents 422dc322 1bef67ec
No related branches found
No related tags found
1 merge request!504GETRF: Add a bcast version of the workspace without ring
...@@ -518,29 +518,48 @@ chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws, ...@@ -518,29 +518,48 @@ chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws,
CHAM_context_t *chamctxt = chameleon_context_self(); CHAM_context_t *chamctxt = chameleon_context_self();
int m, tempmm, tempkn, q; int m, tempmm, tempkn, q;
int lookahead = chamctxt->lookahead; int lookahead = chamctxt->lookahead;
int lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 1); int P = chameleon_desc_datadist_get_iparam(A, 0);
int myp = A->myrank / chameleon_desc_datadist_get_iparam(A, 1); int Q = chameleon_desc_datadist_get_iparam(A, 1);
int lq = (k % lookahead) * Q;
int myp = A->myrank / Q;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
for ( m = k+1; m < A->mt; m++ ) { if ( k >= ws->ringswitch ) {
if ( m % chameleon_desc_datadist_get_iparam(A, 0) != myp ) continue; for ( m = k+1; m < A->mt; m++ ) {
if ( ( m % P ) != myp ) continue;
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempmm, tempkn,
A( m, k ),
Wl( m, ( k % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ) );
for ( q = 1; q < chameleon_desc_datadist_get_iparam(A, 1); q++ ) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempmm, tempkn, ChamUpperLower, tempmm, tempkn,
Wl( m, ( ( k + q - 1 ) % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ), A( m, k ),
Wl( m, ( ( k + q ) % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ) ); Wl( m, ( k % Q ) + lq ) );
for ( q = 1; q < Q; q++ ) {
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempmm, tempkn,
Wl( m, ( ( k + q - 1 ) % Q ) + lq ),
Wl( m, ( ( k + q ) % Q ) + lq ) );
}
RUNTIME_data_flush( options->sequence, A(m, k) );
}
}
else {
for ( m = k+1; m < A->mt; m++ ) {
if ( ( m % P ) != myp ) continue;
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
for ( q = 0; q < Q; q++ ) {
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempmm, tempkn,
A( m, k ),
Wl( m, ( ( k + q )% Q ) + lq ) );
}
RUNTIME_data_flush( options->sequence, A(m, k) );
} }
RUNTIME_data_flush( options->sequence, A(m, k) );
} }
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
* *
*/ */
#include "control/common.h" #include "control/common.h"
#include <limits.h>
/** /**
******************************************************************************** ********************************************************************************
...@@ -103,6 +104,8 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) ...@@ -103,6 +104,8 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
ws->batch_size = CHAMELEON_BATCH_SIZE; ws->batch_size = CHAMELEON_BATCH_SIZE;
} }
ws->ringswitch = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_RINGSWITCH", INT_MAX );
/* Allocation of U for permutation of the panels */ /* Allocation of U for permutation of the panels */
if ( ws->alg == ChamGetrfNoPivPerColumn ) { if ( ws->alg == ChamGetrfNoPivPerColumn ) {
chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE, chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE,
......
...@@ -44,12 +44,13 @@ struct chameleon_pzgemm_s { ...@@ -44,12 +44,13 @@ struct chameleon_pzgemm_s {
*/ */
struct chameleon_pzgetrf_s { struct chameleon_pzgetrf_s {
cham_getrf_t alg; cham_getrf_t alg;
int ib; /**< Internal blocking parameter */ int ib; /**< Internal blocking parameter */
int batch_size; /**< Batch size for the panel */ int batch_size; /**< Batch size for the panel */
int ringswitch; /**< Define when to switch to ring bcast */
CHAM_desc_t U; CHAM_desc_t U;
CHAM_desc_t Up; /**< Workspace used for the panel factorization */ CHAM_desc_t Up; /**< Workspace used for the panel factorization */
CHAM_desc_t Wu; /**< Workspace used for the permutation and update */ CHAM_desc_t Wu; /**< Workspace used for the permutation and update */
CHAM_desc_t Wl; /**< Workspace used the update */ CHAM_desc_t Wl; /**< Workspace used the update */
int *proc_involved; int *proc_involved;
unsigned int involved; unsigned int involved;
int np_involved; int np_involved;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment