From f7fa32e7c5284c0798c93c140acb358294ef32e7 Mon Sep 17 00:00:00 2001 From: Alycia Lisito <alycia.lisito@inria.fr> Date: Wed, 5 Mar 2025 12:47:23 +0100 Subject: [PATCH] starpu/codelet: Add new task submit to codelet_zgetrf_percol.c --- .../starpu/codelets/codelet_zgetrf_percol.c | 224 +++++++++++++++--- 1 file changed, 189 insertions(+), 35 deletions(-) diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c index 1ee38e8a1..8e6f541a0 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_percol.c +++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c @@ -22,31 +22,36 @@ #include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" +struct cl_zgetrf_percol_args_s { + int m; + int n; + int h; + int m0; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; +}; + CHAMELEON_CL_CB( zgetrf_percol_diag, cti_handle_get_m(task->handles[0]), 0, 0, M ) CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M ) #if !defined(CHAMELEON_SIMULATION) static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) { - int m, n, h, m0; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; + struct cl_zgetrf_percol_args_s *clargs = (struct cl_zgetrf_percol_args_s *)cl_arg; CHAM_tile_t *tileA; int *ipiv; cppi_interface_t *nextpiv; cppi_interface_t *prevpiv; - starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request ); - tileA = cti_interface_get(descr[0]); ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[1]); nextpiv = (cppi_interface_t*) descr[2]; prevpiv = (cppi_interface_t*) descr[3]; - if ( h > 0 ) { + if ( clargs->h > 0 ) { cppi_display_dbg( prevpiv, stderr, "Prevpiv before call: " ); } - if ( h < tileA->n ) { + if ( clargs->h < tileA->n ) { cppi_display_dbg( nextpiv, stderr, "Nextpiv before call: " ); } @@ -54,18 +59,18 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) * Make sure the nextpiv interface store the right information about the * column and diagonal row for the reduction */ - nextpiv->h = h; + nextpiv->h = clargs->h; nextpiv->has_diag = 1; - CORE_zgetrf_panel_diag( m, n, h, m0, tileA->n, + CORE_zgetrf_panel_diag( clargs->m, clargs->n, clargs->h, clargs->m0, tileA->n, CHAM_tile_get_ptr( tileA ), tileA->ld, NULL, -1, ipiv, &(nextpiv->pivot), &(prevpiv->pivot) ); - if ( h > 0 ) { + if ( clargs->h > 0 ) { cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " ); } - if ( h < n ) { + if ( clargs->h < clargs->n ) { cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " ); } } @@ -76,12 +81,13 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func ) +#if defined(CHAMELEON_STARPU_USE_INSERT) + void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, CHAM_ipiv_t *ipiv ) { - struct starpu_codelet *codelet = &cl_zgetrf_percol_diag; void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_diag_callback : NULL; const char *cl_name = "zgetrf_percol_diag"; int rankA = A->get_rankof(A, Am, An); @@ -105,21 +111,31 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; + /* Set codelet parameters */ + struct cl_zgetrf_percol_args_s *clargs; + clargs = malloc( sizeof( struct cl_zgetrf_percol_args_s ) ); + clargs->m = m; + clargs->n = n; + clargs->h = h; + clargs->m0 = m0; + clargs->sequence = options->sequence; + clargs->request = options->request; + /* Refine name */ cl_name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) ); rt_starpu_insert_task( - codelet, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &h, sizeof(int), - STARPU_VALUE, &m0, sizeof(int), - STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), - STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), + &cl_zgetrf_percol_diag, + /* Task codelet arguments */ + STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_percol_args_s), + + /* Task handles */ STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + + /* Common task arguments */ STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, @@ -127,26 +143,89 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, 0 ); } +#else /* defined(CHAMELEON_STARPU_USE_INSERT) */ + +void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, + int m, int n, int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + int ret, access_ipiv, access_npiv, access_ppiv; + struct starpu_task *task; + int rankA = A->get_rankof(A, Am, An); + + if ( rankA != A->myrank ) { + return; + } + + INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_percol_diag, zgetrf_percol_diag, zgetrf_percol, 4 ); + + access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; + access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + + /* + * Register the data handles, no exchange needed + */ + starpu_cham_exchange_init_params( options, ¶ms, rankA ); + starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_RW ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An), access_ipiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_npiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv ); + + task = starpu_task_create(); + task->cl = cl; + + /* Set codelet parameters */ + clargs = malloc( sizeof( struct cl_zgetrf_percol_args_s ) ); + clargs->m = m; + clargs->n = n; + clargs->h = h; + clargs->m0 = m0; + clargs->sequence = options->sequence; + clargs->request = options->request; + + task->cl_arg = clargs; + task->cl_arg_size = sizeof( struct cl_zgetrf_percol_args_s ); + task->cl_arg_free = 1; + + /* Set common parameters */ + starpu_cham_task_set_options( options, task, nbdata, descrs, cl_zgetrf_percol_diag_callback ); + + /* Flops */ + // task->flops = TODO; + + /* Refine name */ + task->name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) ); + + ret = starpu_task_submit( task ); + if ( ret == -ENODEV ) { + task->destroy = 0; + starpu_task_destroy( task ); + chameleon_error( "INSERT_TASK_zgetrf_percol_diag", "Failed to submit the task to StarPU" ); + return; + } + starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs ); +} + +#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */ + #if !defined(CHAMELEON_SIMULATION) static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg) { - int m, n, h, m0; - RUNTIME_sequence_t *sequence; - RUNTIME_request_t *request; + struct cl_zgetrf_percol_args_s *clargs = (struct cl_zgetrf_percol_args_s *)cl_arg; CHAM_tile_t *tileA; cppi_interface_t *nextpiv; cppi_interface_t *prevpiv; - starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request ); - tileA = cti_interface_get(descr[0]); nextpiv = (cppi_interface_t*) descr[1]; prevpiv = (cppi_interface_t*) descr[2]; - nextpiv->h = h; /* Initialize in case it uses a copy */ + nextpiv->h = clargs->h; /* Initialize in case it uses a copy */ nextpiv->has_diag = chameleon_max( -1, nextpiv->has_diag); - CORE_zgetrf_panel_offdiag( m, n, h, m0, tileA->n, + CORE_zgetrf_panel_offdiag( clargs->m, clargs->n, clargs->h, clargs->m0, tileA->n, CHAM_tile_get_ptr(tileA), tileA->ld, NULL, -1, &(nextpiv->pivot), &(prevpiv->pivot) ); @@ -158,13 +237,13 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func) +#if defined(CHAMELEON_STARPU_USE_INSERT) + void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, CHAM_ipiv_t *ipiv ) { - struct starpu_codelet *codelet = &cl_zgetrf_percol_offdiag; - void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL; const char *cl_name = "zgetrf_percol_offdiag"; int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; @@ -185,23 +264,98 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, CHAMELEON_ACCESS_RW( A, Am, An ); CHAMELEON_END_ACCESS_DECLARATION; + /* Set codelet parameters */ + struct cl_zgetrf_percol_args_s *clargs; + clargs = malloc( sizeof( struct cl_zgetrf_percol_args_s ) ); + clargs->m = m; + clargs->n = n; + clargs->h = h; + clargs->m0 = m0; + clargs->sequence = options->sequence; + clargs->request = options->request; + /* Refine name */ cl_name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) ); rt_starpu_insert_task( - codelet, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &h, sizeof(int), - STARPU_VALUE, &m0, sizeof(int), - STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t *), - STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t *), + &cl_zgetrf_percol_offdiag, + /* Task codelet arguments */ + STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_percol_args_s), + + /* Task handles */ STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + + /* Common task arguments */ STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, STARPU_NAME, cl_name, 0 ); } + +#else /* defined(CHAMELEON_STARPU_USE_INSERT) */ + +void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, + int m, int n, int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + int ret, access_npiv, access_ppiv; + struct starpu_task *task; + int rankA = A->get_rankof(A, Am, An); + + if ( rankA != A->myrank ) { + return; + } + + INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_percol_offdiag, zgetrf_percol_offdiag, zgetrf_percol, 3 ); + + access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + + /* + * Register the data handles, no exchange needed + */ + starpu_cham_exchange_init_params( options, ¶ms, rankA ); + starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_RW ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_npiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv ); + + task = starpu_task_create(); + task->cl = cl; + + /* Set codelet parameters */ + clargs = malloc( sizeof( struct cl_zgetrf_percol_args_s ) ); + clargs->m = m; + clargs->n = n; + clargs->h = h; + clargs->m0 = m0; + clargs->sequence = options->sequence; + clargs->request = options->request; + + task->cl_arg = clargs; + task->cl_arg_size = sizeof( struct cl_zgetrf_percol_args_s ); + task->cl_arg_free = 1; + + /* Set common parameters */ + starpu_cham_task_set_options( options, task, nbdata, descrs, cl_zgetrf_percol_offdiag_callback ); + + /* Flops */ + // task->flops = TODO; + + /* Refine name */ + task->name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) ); + + ret = starpu_task_submit( task ); + if ( ret == -ENODEV ) { + task->destroy = 0; + starpu_task_destroy( task ); + chameleon_error( "INSERT_TASK_zgetrf_percol_diag", "Failed to submit the task to StarPU" ); + return; + } + starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs ); +} + +#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */ -- GitLab