From f7fa32e7c5284c0798c93c140acb358294ef32e7 Mon Sep 17 00:00:00 2001
From: Alycia Lisito <alycia.lisito@inria.fr>
Date: Wed, 5 Mar 2025 12:47:23 +0100
Subject: [PATCH] starpu/codelet: Add new task submit to
 codelet_zgetrf_percol.c

---
 .../starpu/codelets/codelet_zgetrf_percol.c   | 224 +++++++++++++++---
 1 file changed, 189 insertions(+), 35 deletions(-)

diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c
index 1ee38e8a1..8e6f541a0 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_percol.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c
@@ -22,31 +22,36 @@
 #include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
+struct cl_zgetrf_percol_args_s {
+        int                 m;
+        int                 n;
+        int                 h;
+        int                 m0;
+        RUNTIME_sequence_t *sequence;
+        RUNTIME_request_t  *request;
+};
+
 CHAMELEON_CL_CB( zgetrf_percol_diag,    cti_handle_get_m(task->handles[0]), 0, 0, M )
 CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M )
 
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                 m, n, h, m0;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t  *request;
+    struct cl_zgetrf_percol_args_s *clargs = (struct cl_zgetrf_percol_args_s *)cl_arg;
     CHAM_tile_t        *tileA;
     int                *ipiv;
     cppi_interface_t   *nextpiv;
     cppi_interface_t   *prevpiv;
 
-    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request );
-
     tileA   = cti_interface_get(descr[0]);
     ipiv    = (int *)STARPU_VECTOR_GET_PTR(descr[1]);
     nextpiv = (cppi_interface_t*) descr[2];
     prevpiv = (cppi_interface_t*) descr[3];
 
-    if ( h > 0 ) {
+    if ( clargs->h > 0 ) {
         cppi_display_dbg( prevpiv, stderr, "Prevpiv before call: " );
     }
-    if ( h < tileA->n ) {
+    if ( clargs->h < tileA->n ) {
         cppi_display_dbg( nextpiv, stderr, "Nextpiv before call: " );
     }
 
@@ -54,18 +59,18 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
      * Make sure the nextpiv interface store the right information about the
      * column and diagonal row for the reduction
      */
-    nextpiv->h        = h;
+    nextpiv->h        = clargs->h;
     nextpiv->has_diag = 1;
 
-    CORE_zgetrf_panel_diag( m, n, h, m0, tileA->n,
+    CORE_zgetrf_panel_diag( clargs->m, clargs->n, clargs->h, clargs->m0, tileA->n,
                             CHAM_tile_get_ptr( tileA ), tileA->ld,
                             NULL, -1,
                             ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
 
-    if ( h > 0 ) {
+    if ( clargs->h > 0 ) {
         cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " );
     }
-    if ( h < n ) {
+    if ( clargs->h < clargs->n ) {
         cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " );
     }
 }
@@ -76,12 +81,13 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func )
 
+#if defined(CHAMELEON_STARPU_USE_INSERT)
+
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
                                      int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
                                      CHAM_ipiv_t *ipiv )
 {
-    struct starpu_codelet *codelet = &cl_zgetrf_percol_diag;
     void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_diag_callback : NULL;
     const char *cl_name = "zgetrf_percol_diag";
     int rankA           = A->get_rankof(A, Am, An);
@@ -105,21 +111,31 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
     CHAMELEON_ACCESS_RW( A, Am, An );
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Set codelet parameters */
+    struct cl_zgetrf_percol_args_s *clargs;
+    clargs = malloc( sizeof( struct cl_zgetrf_percol_args_s ) );
+    clargs->m        = m;
+    clargs->n        = n;
+    clargs->h        = h;
+    clargs->m0       = m0;
+    clargs->sequence = options->sequence;
+    clargs->request  = options->request;
+
     /* Refine name */
     cl_name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) );
 
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,             &m,                   sizeof(int),
-        STARPU_VALUE,             &n,                   sizeof(int),
-        STARPU_VALUE,             &h,                   sizeof(int),
-        STARPU_VALUE,             &m0,                  sizeof(int),
-        STARPU_VALUE,             &(options->sequence), sizeof(RUNTIME_sequence_t*),
-        STARPU_VALUE,             &(options->request),  sizeof(RUNTIME_request_t*),
+        &cl_zgetrf_percol_diag,
+        /* Task codelet arguments */
+        STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_percol_args_s),
+
+        /* Task handles */
         STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         access_ipiv,              RUNTIME_ipiv_getaddr( ipiv, An ),
         access_npiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h   ),
         access_ppiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ),
+
+        /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
         STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
@@ -127,26 +143,89 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
         0 );
 }
 
+#else /* defined(CHAMELEON_STARPU_USE_INSERT) */
+
+void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
+                                     int m, int n, int h, int m0,
+                                     CHAM_desc_t *A, int Am, int An,
+                                     CHAM_ipiv_t *ipiv )
+{
+    int ret, access_ipiv, access_npiv, access_ppiv;
+    struct starpu_task *task;
+    int rankA = A->get_rankof(A, Am, An);
+
+    if ( rankA != A->myrank ) {
+        return;
+    }
+
+    INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_percol_diag, zgetrf_percol_diag, zgetrf_percol, 4 );
+
+    access_ipiv = ( h == 0 )       ? STARPU_W    : STARPU_RW;
+    access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
+    access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+
+    /*
+     * Register the data handles, no exchange needed
+     */
+    starpu_cham_exchange_init_params( options, &params, rankA );
+    starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ),     STARPU_RW );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An),               access_ipiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),   access_npiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv );
+
+    task = starpu_task_create();
+    task->cl = cl;
+
+    /* Set codelet parameters */
+    clargs = malloc( sizeof( struct cl_zgetrf_percol_args_s ) );
+    clargs->m        = m;
+    clargs->n        = n;
+    clargs->h        = h;
+    clargs->m0       = m0;
+    clargs->sequence = options->sequence;
+    clargs->request  = options->request;
+
+    task->cl_arg      = clargs;
+    task->cl_arg_size = sizeof( struct cl_zgetrf_percol_args_s );
+    task->cl_arg_free = 1;
+
+    /* Set common parameters */
+    starpu_cham_task_set_options( options, task, nbdata, descrs, cl_zgetrf_percol_diag_callback );
+
+    /* Flops */
+    // task->flops = TODO;
+
+    /* Refine name */
+    task->name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) );
+
+    ret = starpu_task_submit( task );
+    if ( ret == -ENODEV ) {
+        task->destroy = 0;
+        starpu_task_destroy( task );
+        chameleon_error( "INSERT_TASK_zgetrf_percol_diag", "Failed to submit the task to StarPU" );
+        return;
+    }
+    starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
+}
+
+#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */
+
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                 m, n, h, m0;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t  *request;
+    struct cl_zgetrf_percol_args_s *clargs = (struct cl_zgetrf_percol_args_s *)cl_arg;
     CHAM_tile_t        *tileA;
     cppi_interface_t   *nextpiv;
     cppi_interface_t   *prevpiv;
 
-    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request );
-
     tileA   = cti_interface_get(descr[0]);
     nextpiv = (cppi_interface_t*) descr[1];
     prevpiv = (cppi_interface_t*) descr[2];
 
-    nextpiv->h = h; /* Initialize in case it uses a copy */
+    nextpiv->h = clargs->h; /* Initialize in case it uses a copy */
     nextpiv->has_diag = chameleon_max( -1, nextpiv->has_diag);
 
-    CORE_zgetrf_panel_offdiag( m, n, h, m0, tileA->n,
+    CORE_zgetrf_panel_offdiag( clargs->m, clargs->n, clargs->h, clargs->m0, tileA->n,
                                CHAM_tile_get_ptr(tileA), tileA->ld,
                                NULL, -1,
                                &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -158,13 +237,13 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func)
 
+#if defined(CHAMELEON_STARPU_USE_INSERT)
+
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
                                         int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
                                         CHAM_ipiv_t *ipiv )
 {
-    struct starpu_codelet *codelet = &cl_zgetrf_percol_offdiag;
-
     void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL;
     const char *cl_name = "zgetrf_percol_offdiag";
     int access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
@@ -185,23 +264,98 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
     CHAMELEON_ACCESS_RW( A, Am, An );
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Set codelet parameters */
+    struct cl_zgetrf_percol_args_s *clargs;
+    clargs = malloc( sizeof( struct cl_zgetrf_percol_args_s ) );
+    clargs->m        = m;
+    clargs->n        = n;
+    clargs->h        = h;
+    clargs->m0       = m0;
+    clargs->sequence = options->sequence;
+    clargs->request  = options->request;
+
     /* Refine name */
     cl_name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) );
 
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,             &m,                   sizeof(int),
-        STARPU_VALUE,             &n,                   sizeof(int),
-        STARPU_VALUE,             &h,                   sizeof(int),
-        STARPU_VALUE,             &m0,                  sizeof(int),
-        STARPU_VALUE,             &(options->sequence), sizeof(RUNTIME_sequence_t *),
-        STARPU_VALUE,             &(options->request),  sizeof(RUNTIME_request_t *),
+        &cl_zgetrf_percol_offdiag,
+        /* Task codelet arguments */
+        STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_percol_args_s),
+
+        /* Task handles */
         STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         access_npiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h   ),
         access_ppiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ),
+
+        /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
         STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
         STARPU_NAME,              cl_name,
         0 );
 }
+
+#else /* defined(CHAMELEON_STARPU_USE_INSERT) */
+
+void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
+                                        int m, int n, int h, int m0,
+                                        CHAM_desc_t *A, int Am, int An,
+                                        CHAM_ipiv_t *ipiv )
+{
+    int ret, access_npiv, access_ppiv;
+    struct starpu_task *task;
+    int rankA = A->get_rankof(A, Am, An);
+
+    if ( rankA != A->myrank ) {
+        return;
+    }
+
+    INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_percol_offdiag, zgetrf_percol_offdiag, zgetrf_percol, 3 );
+
+    access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
+    access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+
+    /*
+     * Register the data handles, no exchange needed
+     */
+    starpu_cham_exchange_init_params( options, &params, rankA );
+    starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ),     STARPU_RW );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),   access_npiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv );
+
+    task = starpu_task_create();
+    task->cl = cl;
+
+    /* Set codelet parameters */
+    clargs = malloc( sizeof( struct cl_zgetrf_percol_args_s ) );
+    clargs->m        = m;
+    clargs->n        = n;
+    clargs->h        = h;
+    clargs->m0       = m0;
+    clargs->sequence = options->sequence;
+    clargs->request  = options->request;
+
+    task->cl_arg      = clargs;
+    task->cl_arg_size = sizeof( struct cl_zgetrf_percol_args_s );
+    task->cl_arg_free = 1;
+
+    /* Set common parameters */
+    starpu_cham_task_set_options( options, task, nbdata, descrs, cl_zgetrf_percol_offdiag_callback );
+
+    /* Flops */
+    // task->flops = TODO;
+
+    /* Refine name */
+    task->name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) );
+
+    ret = starpu_task_submit( task );
+    if ( ret == -ENODEV ) {
+        task->destroy = 0;
+        starpu_task_destroy( task );
+        chameleon_error( "INSERT_TASK_zgetrf_percol_diag", "Failed to submit the task to StarPU" );
+        return;
+    }
+    starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
+}
+
+#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */
-- 
GitLab