Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b726e724 authored by LISITO Alycia's avatar LISITO Alycia
Browse files

starpu/codelet: Add new task submit to codelet_zgetrf_blocked.c

parent f7fa32e7
No related branches found
No related tags found
1 merge request!523Restructuration of the codelets getrf and getrf_nopiv
......@@ -22,6 +22,16 @@
#include "chameleon_starpu_internal.h"
#include "runtime_codelet_z.h"
struct cl_zgetrf_blocked_args_s {
int m;
int n;
int h;
int m0;
int ib;
RUNTIME_sequence_t *sequence;
RUNTIME_request_t *request;
};
CHAMELEON_CL_CB( zgetrf_blocked_diag, cti_handle_get_m(task->handles[0]), 0, 0, M )
CHAMELEON_CL_CB( zgetrf_blocked_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M )
CHAMELEON_CL_CB( zgetrf_blocked_trsm, cti_handle_get_m(task->handles[0]), 0, 0, M )
......@@ -29,9 +39,7 @@ CHAMELEON_CL_CB( zgetrf_blocked_trsm, cti_handle_get_m(task->handles[0]), 0,
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
{
int m, n, h, m0, ib;
RUNTIME_sequence_t *sequence;
RUNTIME_request_t *request;
struct cl_zgetrf_blocked_args_s *clargs = (struct cl_zgetrf_blocked_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileU;
int *ipiv;
......@@ -40,9 +48,6 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
CHAMELEON_Complex64_t *U = NULL;
int ldu = -1;;
starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &ib,
&sequence, &request );
tileA = cti_interface_get(descr[0]);
ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[1]);
nextpiv = (cppi_interface_t*) descr[2];
......@@ -53,10 +58,10 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
ldu = tileU->ld;
}
if ( h > 0 ) {
if ( clargs->h > 0 ) {
cppi_display_dbg( prevpiv, stderr, "Prevpiv before call: " );
}
if ( h < tileA->n ) {
if ( clargs->h < tileA->n ) {
cppi_display_dbg( nextpiv, stderr, "Nextpiv before call: " );
}
......@@ -64,19 +69,19 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
* Make sure the nextpiv interface store the right information about the
* column and diagonal row for the reduction
*/
nextpiv->h = h;
nextpiv->h = clargs->h;
nextpiv->has_diag = 1;
coreblas_kernel_trace( tileA );
CORE_zgetrf_panel_diag( m, n, h, m0, ib,
CORE_zgetrf_panel_diag( clargs->m, clargs->n, clargs->h, clargs->m0, clargs->ib,
CHAM_tile_get_ptr( tileA ), tileA->ld,
U, ldu,
ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
if ( h > 0 ) {
if ( clargs->h > 0 ) {
cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " );
}
if ( h < tileA->n ) {
if ( clargs->h < tileA->n ) {
cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " );
}
}
......@@ -87,21 +92,22 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
*/
CODELETS_CPU( zgetrf_blocked_diag, cl_zgetrf_blocked_diag_cpu_func )
#if defined(CHAMELEON_STARPU_USE_INSERT)
void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
int m, int n, int h, int m0, int ib,
CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ipiv )
{
struct starpu_codelet *codelet = &cl_zgetrf_blocked_diag;
void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_diag_callback : NULL;
const char *cl_name = "zgetrf_blocked_diag";
int rankA = A->get_rankof(A, Am, An);
#if !defined(HAVE_STARPU_NONE_NONZERO)
/* STARPU_NONE can't be equal to 0 */
fprintf( stderr, "INSERT_TASK_zgetrf_percol_diag: STARPU_NONE can not be equal to 0\n" );
fprintf( stderr, "INSERT_TASK_zgetrf_blocked_diag: STARPU_NONE can not be equal to 0\n" );
assert( 0 );
#endif
void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_diag_callback : NULL;
const char *cl_name = "zgetrf_blocked_diag";
int rankA = A->get_rankof(A, Am, An);
#if defined ( CHAMELEON_USE_MPI )
if ( ( h % ib == 0 ) && ( h > 0 ) ) {
......@@ -113,6 +119,17 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
}
#endif
/* Set codelet parameters */
struct cl_zgetrf_blocked_args_s *clargs;
clargs = malloc( sizeof( struct cl_zgetrf_blocked_args_s ) );
clargs->m = m;
clargs->n = n;
clargs->h = h;
clargs->m0 = m0;
clargs->ib = ib;
clargs->sequence = options->sequence;
clargs->request = options->request;
int access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW;
int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R;
......@@ -139,19 +156,18 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
A->get_blktile( A, Am, An ) );
rt_starpu_insert_task(
codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &h, sizeof(int),
STARPU_VALUE, &m0, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*),
STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*),
&cl_zgetrf_blocked_diag,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_blocked_args_s),
/* Task handles */
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ),
access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),
access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ),
accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
......@@ -159,12 +175,97 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
0 );
}
#else /* defined(CHAMELEON_STARPU_USE_INSERT) */
void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
int m, int n, int h, int m0, int ib,
CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ipiv )
{
int ret, access_ipiv, access_npiv, access_ppiv, accessU;
struct starpu_task *task;
int rankA = A->get_rankof(A, Am, An);
#if defined ( CHAMELEON_USE_MPI )
if ( ( h % ib == 0 ) && ( h > 0 ) ) {
starpu_mpi_cache_flush( options->sequence->comm, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un) );
}
if ( rankA != A->myrank ) {
return;
}
#endif
INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_blocked_diag, zgetrf_blocked_diag, zgetrf_blocked, 5 );
access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW;
access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R;
accessU = STARPU_RW;
if ( h == 0 ) {
accessU = STARPU_NONE;
/* U can be set after ppiv because they are both none together, so it won't shift the buffers */
}
else if ( h%ib == 0 ) {
accessU = STARPU_R;
}
else if ( ( h%ib == 1 ) || ( ib == 1 ) ) {
accessU = STARPU_W;
}
/*
* Register the data handles, no exchange needed
*/
starpu_cham_exchange_init_params( options, &params, rankA );
starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_RW );
starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An), access_ipiv );
starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_npiv );
starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv );
starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ), accessU );
task = starpu_task_create();
task->cl = cl;
/* Set codelet parameters */
clargs = malloc( sizeof( struct cl_zgetrf_blocked_args_s ) );
clargs->m = m;
clargs->n = n;
clargs->h = h;
clargs->m0 = m0;
clargs->ib = ib;
clargs->sequence = options->sequence;
clargs->request = options->request;
task->cl_arg = clargs;
task->cl_arg_size = sizeof( struct cl_zgetrf_blocked_args_s );
task->cl_arg_free = 1;
/* Set common parameters */
starpu_cham_task_set_options( options, task, nbdata, descrs, cl_zgetrf_blocked_diag_callback );
/* Flops */
// task->flops = TODO;
/* Refine name */
task->name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) );
ret = starpu_task_submit( task );
if ( ret == -ENODEV ) {
task->destroy = 0;
starpu_task_destroy( task );
chameleon_error( "INSERT_TASK_zgetrf_blocked_diag", "Failed to submit the task to StarPU" );
return;
}
starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
}
#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
{
int m, n, h, k, m0, ib;
RUNTIME_sequence_t *sequence;
RUNTIME_request_t *request;
struct cl_zgetrf_blocked_args_s *clargs = (struct cl_zgetrf_blocked_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileU;
cppi_interface_t *nextpiv;
......@@ -172,8 +273,6 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
CHAMELEON_Complex64_t *U = NULL;
int ldu = -1;
starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &k, &m0, &ib, &sequence, &request );
tileA = cti_interface_get(descr[0]);
nextpiv = (cppi_interface_t*) descr[1];
prevpiv = (cppi_interface_t*) descr[2];
......@@ -183,26 +282,26 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
ldu = tileU->ld;
}
if ( h > 0 ) {
if ( clargs->h > 0 ) {
cppi_display_dbg( prevpiv, stderr, "Prevpiv offdiag before call: " );
}
if ( h < tileA->n ) {
if ( clargs->h < tileA->n ) {
cppi_display_dbg( nextpiv, stderr, "Nextpiv offdiag before call: " );
}
nextpiv->h = h; /* Initialize in case it uses a copy */
nextpiv->h = clargs->h; /* Initialize in case it uses a copy */
nextpiv->has_diag = chameleon_max( -1, nextpiv->has_diag);
coreblas_kernel_trace( tileA );
CORE_zgetrf_panel_offdiag( m, n, h, m0, ib,
CORE_zgetrf_panel_offdiag( clargs->m, clargs->n, clargs->h, clargs->m0, clargs->ib,
CHAM_tile_get_ptr(tileA), tileA->ld,
U, ldu,
&(nextpiv->pivot), &(prevpiv->pivot) );
if ( h > 0 ) {
if ( clargs->h > 0 ) {
cppi_display_dbg( prevpiv, stderr, "Prevpiv offdiag after call: " );
}
if ( h < tileA->n ) {
if ( clargs->h < tileA->n ) {
cppi_display_dbg( nextpiv, stderr, "Nextpiv offdiag after call: " );
}
}
......@@ -213,23 +312,23 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
*/
CODELETS_CPU(zgetrf_blocked_offdiag, cl_zgetrf_blocked_offdiag_cpu_func)
#if defined(CHAMELEON_STARPU_USE_INSERT)
void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
int m, int n, int h, int m0, int ib,
CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ipiv )
{
struct starpu_codelet *codelet = &cl_zgetrf_blocked_offdiag;
int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R;
int accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE;
int rankA = A->get_rankof(A, Am, An);
#if !defined(HAVE_STARPU_NONE_NONZERO)
/* STARPU_NONE can't be equal to 0 */
fprintf( stderr, "INSERT_TASK_zgetrf_percol_diag: STARPU_NONE can not be equal to 0\n" );
fprintf( stderr, "INSERT_TASK_zgetrf_blocked_diag: STARPU_NONE can not be equal to 0\n" );
assert( 0 );
#endif
int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R;
int accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE;
int rankA = A->get_rankof(A, Am, An);
#if defined ( CHAMELEON_USE_MPI )
if ( rankA != A->myrank ) {
......@@ -244,6 +343,17 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
}
#endif
/* Set codelet parameters */
struct cl_zgetrf_blocked_args_s *clargs;
clargs = malloc( sizeof( struct cl_zgetrf_blocked_args_s ) );
clargs->m = m;
clargs->n = n;
clargs->h = h;
clargs->m0 = m0;
clargs->ib = ib;
clargs->sequence = options->sequence;
clargs->request = options->request;
void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_offdiag_callback : NULL;
const char *cl_name = "zgetrf_blocked_offdiag";
......@@ -260,19 +370,17 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
A->get_blktile( A, Am, An ) );
rt_starpu_insert_task(
codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &h, sizeof(int),
STARPU_VALUE, &An, sizeof(int),
STARPU_VALUE, &m0, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t *),
STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t *),
&cl_zgetrf_blocked_offdiag,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_blocked_args_s),
/* Task handles */
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),
access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ),
accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
......@@ -280,19 +388,96 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
0 );
}
#else /* defined(CHAMELEON_STARPU_USE_INSERT) */
void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
int m, int n, int h, int m0, int ib,
CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ipiv )
{
int ret;
struct starpu_task *task;
int rankA = A->get_rankof(A, Am, An);
int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R;
int accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE;
#if defined ( CHAMELEON_USE_MPI )
if ( rankA != A->myrank ) {
if ( ( accessU != STARPU_NONE ) &&
( A->myrank == A->get_rankof( A, An, An ) ) )
{
starpu_mpi_get_data_on_node_detached( options->sequence->comm,
RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
rankA, NULL, NULL );
}
return;
}
#endif
INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_blocked_offdiag, zgetrf_blocked_offdiag, zgetrf_blocked, 4 );
/*
* Register the data handles, exchange needed only for U
*/
starpu_cham_exchange_init_params( options, &params, rankA );
starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_RW );
starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_npiv );
starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv );
starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
RTBLKADDR( U, ChamComplexDouble, Um, Un ),
accessU );
task = starpu_task_create();
task->cl = cl;
/* Set codelet parameters */
clargs = malloc( sizeof( struct cl_zgetrf_blocked_args_s ) );
clargs->m = m;
clargs->n = n;
clargs->h = h;
clargs->m0 = m0;
clargs->ib = ib;
clargs->sequence = options->sequence;
clargs->request = options->request;
task->cl_arg = clargs;
task->cl_arg_size = sizeof( struct cl_zgetrf_blocked_args_s );
task->cl_arg_free = 1;
/* Set common parameters */
starpu_cham_task_set_options( options, task, nbdata, descrs, cl_zgetrf_blocked_offdiag_callback );
/* Flops */
// task->flops = TODO;
/* Refine name */
task->name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) );
ret = starpu_task_submit( task );
if ( ret == -ENODEV ) {
task->destroy = 0;
starpu_task_destroy( task );
chameleon_error( "INSERT_TASK_zgetrf_blocked_offdiag", "Failed to submit the task to StarPU" );
return;
}
starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
}
#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */
#if !defined(CHAMELEON_SIMULATION)
static const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
static void cl_zgetrf_blocked_trsm_cpu_func(void *descr[], void *cl_arg)
{
int m, n, h, ib;
struct cl_zgetrf_blocked_args_s *clargs = (struct cl_zgetrf_blocked_args_s *)cl_arg;
CHAM_tile_t *tileU;
cppi_interface_t *prevpiv;
CHAMELEON_Complex64_t *U;
int ldu;
starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &ib );
tileU = cti_interface_get(descr[0]);
prevpiv = (cppi_interface_t*) descr[1];
U = CHAM_tile_get_ptr( tileU );
......@@ -301,16 +486,16 @@ static void cl_zgetrf_blocked_trsm_cpu_func(void *descr[], void *cl_arg)
coreblas_kernel_trace( tileU );
/* Copy the final max line of the block and solve */
cblas_zcopy( n, prevpiv->pivot.pivrow, 1,
U + m - 1, ldu );
cblas_zcopy( clargs->n, prevpiv->pivot.pivrow, 1,
U + clargs->m - 1, ldu );
if ( (n-h) > 0 ) {
if ( ( clargs->n - clargs->h ) > 0 ) {
cblas_ztrsm( CblasColMajor,
CblasLeft, CblasLower,
CblasNoTrans, CblasUnit,
ib, n - h,
CBLAS_SADDR(zone), U + (h-ib) * ldu, ldu,
U + h * ldu, ldu );
clargs->ib, clargs->n - clargs->h,
CBLAS_SADDR(zone), U + (clargs->h-clargs->ib) * ldu, ldu,
U + clargs->h * ldu, ldu );
}
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -320,13 +505,13 @@ static void cl_zgetrf_blocked_trsm_cpu_func(void *descr[], void *cl_arg)
*/
CODELETS_CPU(zgetrf_blocked_trsm, cl_zgetrf_blocked_trsm_cpu_func)
#if defined(CHAMELEON_STARPU_USE_INSERT)
void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
int m, int n, int h, int ib,
CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ipiv )
{
struct starpu_codelet *codelet = &cl_zgetrf_blocked_trsm;
void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_trsm_callback : NULL;
const char *cl_name = "zgetrf_blocked_trsm";
int rankU = U->get_rankof(U, Um, Un);
......@@ -344,17 +529,86 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
return;
}
/* Set codelet parameters */
struct cl_zgetrf_blocked_args_s *clargs;
clargs = malloc( sizeof( struct cl_zgetrf_blocked_args_s ) );
clargs->m = m;
clargs->n = n;
clargs->h = h;
clargs->ib = ib;
rt_starpu_insert_task(
codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &h, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
&cl_zgetrf_blocked_trsm,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_blocked_args_s),
/* Task handles */
STARPU_RW, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
STARPU_R, RUNTIME_pivot_getaddr( ipiv, rankU, Un, h-1 ),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
STARPU_NAME, cl_name,
0 );
}
#else /* defined(CHAMELEON_STARPU_USE_INSERT) */
void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
int m, int n, int h, int ib,
CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ipiv )
{
int ret;
struct starpu_task *task;
int rankU = U->get_rankof(U, Um, Un);
if ( U->myrank != rankU ) {
return;
}
INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_blocked_trsm, zgetrf_blocked_trsm, zgetrf_blocked, 2 );
/*
* Register the data handles, no exchange needed
*/
starpu_cham_exchange_init_params( options, &params, rankU );
starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ), STARPU_RW );
starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankU, Un, h-1 ), STARPU_R );
task = starpu_task_create();
task->cl = cl;
/* Set codelet parameters */
clargs = malloc( sizeof( struct cl_zgetrf_blocked_args_s ) );
clargs->m = m;
clargs->n = n;
clargs->h = h;
clargs->ib = ib;
task->cl_arg = clargs;
task->cl_arg_size = sizeof( struct cl_zgetrf_blocked_args_s );
task->cl_arg_free = 1;
/* Set common parameters */
starpu_cham_task_set_options( options, task, nbdata, descrs, cl_zgetrf_blocked_trsm_callback );
/* Flops */
// task->flops = TODO;
/* Refine name */
task->name = chameleon_codelet_name( cl_name, 1, U->get_blktile( U, Um, Un ) );
ret = starpu_task_submit( task );
if ( ret == -ENODEV ) {
task->destroy = 0;
starpu_task_destroy( task );
chameleon_error( "INSERT_TASK_zgetrf_blocked_diag", "Failed to submit the task to StarPU" );
return;
}
starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
}
#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment