Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 59227043 authored by LISITO Alycia's avatar LISITO Alycia Committed by Mathieu Faverge
Browse files

zgetrf batched: add codelet batched for getrf blocked

parent fb6ccd8c
No related branches found
No related tags found
1 merge request!426Batched panel shared memory
...@@ -541,6 +541,20 @@ void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *opt ...@@ -541,6 +541,20 @@ void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *opt
void **clargs_ptr, void **clargs_ptr,
CHAM_ipiv_t *ipiv ); CHAM_ipiv_t *ipiv );
void INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
int m, int n, int h, int m0,
void *ws,
CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un,
void **clargs_ptr,
CHAM_ipiv_t *ipiv );
void INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
CHAM_desc_t *A, int An,
CHAM_desc_t *U, int Um, int Un,
void **clargs_ptr,
CHAM_ipiv_t *ipiv );
void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
int m, int n, int h, int ib, int m, int n, int h, int ib,
CHAM_desc_t *U, int Um, int Un, CHAM_desc_t *U, int Um, int Un,
......
...@@ -157,3 +157,202 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, ...@@ -157,3 +157,202 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
/* clargs is freed by starpu. */ /* clargs is freed by starpu. */
*clargs_ptr = NULL; *clargs_ptr = NULL;
} }
#if !defined(CHAMELEON_SIMULATION)
static void
cl_zgetrf_panel_blocked_batched_cpu_func( void *descr[],
void *cl_arg )
{
struct cl_getrf_batched_args_t *clargs = ( struct cl_getrf_batched_args_t * ) cl_arg;
int *ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[clargs->tasks_nbr]);
cppi_interface_t *nextpiv = (cppi_interface_t*) descr[clargs->tasks_nbr + 1];
cppi_interface_t *prevpiv = (cppi_interface_t*) descr[clargs->tasks_nbr + 2];
int i, h, ib;
CHAM_tile_t *tileA, *tileU;
CHAMELEON_Complex64_t *U = NULL;
int ldu = -1;
nextpiv->h = clargs->h;
h = clargs->h;
ib = clargs->ib;
i = 0;
if ( clargs->diag ) {
if ( h != 0 ) {
tileU = cti_interface_get( descr[ clargs->tasks_nbr + 3 ] );
U = CHAM_tile_get_ptr( tileU );
ldu = tileU->ld;
}
tileA = cti_interface_get( descr[ 0 ] );
nextpiv->has_diag = 1;
CORE_zgetrf_panel_diag( clargs->m[i], clargs->n[i], h, clargs->m0[i], ib,
CHAM_tile_get_ptr( tileA ), tileA->ld,
U, ldu,
ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
i++;
}
if ( ( h%ib == 0 ) && ( h > 0 ) ) {
tileU = cti_interface_get( descr[ clargs->tasks_nbr + 3 ] );
U = CHAM_tile_get_ptr( tileU );
ldu = tileU->ld;
}
else {
U = NULL;
ldu = -1;
}
for ( ; i < clargs->tasks_nbr; i++ ) {
tileA = cti_interface_get( descr[ i ] );
CORE_zgetrf_panel_offdiag( clargs->m[i], clargs->n[i], h, clargs->m0[i], ib,
CHAM_tile_get_ptr(tileA), tileA->ld,
U, ldu,
&( nextpiv->pivot ), &( prevpiv->pivot ) );
}
}
#endif /* !defined(CHAMELEON_SIMULATION) */
CODELETS_CPU( zgetrf_panel_blocked_batched, cl_zgetrf_panel_blocked_batched_cpu_func )
void
INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
int m, int n, int h, int m0,
void *ws,
CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un,
void **clargs_ptr,
CHAM_ipiv_t *ipiv )
{
CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
int batch_size = ((struct chameleon_pzgetrf_s *)ws)->batch_size;
int ib = ((struct chameleon_pzgetrf_s *)ws)->ib;
int task_num = 0;
int exec = 0;
void (*callback)(void*) = NULL;
int accessU, access_npiv, access_ipiv, access_ppiv;
struct cl_getrf_batched_args_t *clargs = *clargs_ptr;
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( clargs == NULL ) {
clargs = malloc( sizeof( struct cl_getrf_batched_args_t ) ) ;
clargs->tasks_nbr = 0;
clargs->diag = ( Am == An );
clargs->ib = ib;
clargs->h = h;
clargs->cl_name = "zgetrf_panel_blocked_batched";
*clargs_ptr = clargs;
}
task_num = clargs->tasks_nbr;
clargs->m[ task_num ] = m;
clargs->n[ task_num ] = n;
clargs->m0[ task_num ] = m0;
clargs->handle_mode[ task_num ].handle = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
clargs->handle_mode[ task_num ].mode = STARPU_RW;
clargs->tasks_nbr ++;
/* Refine name */
clargs->cl_name = chameleon_codelet_name( clargs->cl_name, 1,
A->get_blktile( A, Am, An ) );
if ( clargs->tasks_nbr == batch_size ) {
access_npiv = ( clargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
access_ipiv = STARPU_RW;
access_ppiv = STARPU_R;
accessU = STARPU_RW;
if ( clargs->h == 0 ) {
access_ipiv = STARPU_W;
access_ppiv = STARPU_NONE;
accessU = STARPU_NONE;
}
else if ( clargs->h % clargs->ib == 0 ) {
accessU = STARPU_R;
}
else if ( clargs->h % clargs->ib == 1 ) {
accessU = STARPU_W;
}
/* If there isn't a diag task then use offdiag access */
if ( clargs->diag == 0 ) {
accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE;
}
rt_starpu_insert_task(
&cl_zgetrf_panel_blocked_batched,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_getrf_batched_args_t),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, clargs->cl_name,
#endif
STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr,
access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ),
access_npiv, RUNTIME_pivot_getaddr( ipiv, An, h ),
access_ppiv, RUNTIME_pivot_getaddr( ipiv, An, h-1 ),
accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un ),
0);
/* clargs is freed by starpu. */
*clargs_ptr = NULL;
}
}
void
INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
CHAM_desc_t *A, int An,
CHAM_desc_t *U, int Um, int Un,
void **clargs_ptr,
CHAM_ipiv_t *ipiv )
{
int accessU, access_npiv, access_ipiv, access_ppiv;
void (*callback)(void*) = NULL;
struct cl_getrf_batched_args_t *clargs = *clargs_ptr;
if ( clargs == NULL ) {
return;
}
access_npiv = ( clargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
access_ipiv = STARPU_RW;
access_ppiv = STARPU_R;
accessU = STARPU_RW;
if ( clargs->h == 0 ) {
access_ipiv = STARPU_W;
access_ppiv = STARPU_NONE;
accessU = STARPU_NONE;
}
else if ( clargs->h % clargs->ib == 0 ) {
accessU = STARPU_R;
}
else if ( clargs->h % clargs->ib == 1 ) {
accessU = STARPU_W;
}
/* If there isn't a diag task then use offdiag access */
if ( clargs->diag == 0 ) {
accessU = ((clargs->h%clargs->ib == 0) && (clargs->h > 0)) ? STARPU_R : STARPU_NONE;
}
rt_starpu_insert_task(
&cl_zgetrf_panel_blocked_batched,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_getrf_batched_args_t),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, clargs->cl_name,
#endif
STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr,
access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ),
access_npiv, RUNTIME_pivot_getaddr( ipiv, An, clargs->h ),
access_ppiv, RUNTIME_pivot_getaddr( ipiv, An, clargs->h - 1 ),
accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un ),
0);
/* clargs is freed by starpu. */
*clargs_ptr = NULL;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment