diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index fcd8177a0a0cc56a6e21080463956c98918d1a46..37d352bfdd0f3f7cabdac2f2b614aea98c126b0b 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -24,7 +24,7 @@
  * @author Alycia Lisito
  * @author Matthieu Kuhn
  * @author Ana Hourcau
- * @date 2024-10-17
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -371,6 +371,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t
 int CHAMELEON_zTile_to_Lapack( CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA ) __attribute__((deprecated("Please refer to CHAMELEON_zDesc2Lap() instead")));
 int CHAMELEON_zLap2Desc( cham_uplo_t uplo, CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t *A );
 int CHAMELEON_zDesc2Lap( cham_uplo_t uplo, CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA );
+void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA, CHAM_ipiv_t *descIPIV );
 
 /**
  *  User Builder function prototypes
diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h
index 7ba9ee093056b21f3d3c8aa975dc304043a8d3a7..b9cd9fcb4be875946d42537f3c75a1997b9a8826 100644
--- a/include/chameleon/tasks.h
+++ b/include/chameleon/tasks.h
@@ -168,8 +168,10 @@ void INSERT_TASK_hgemm( const RUNTIME_option_t *options,
                                                   const CHAM_desc_t *B, int Bm, int Bn,
                         CHAMELEON_Real16_t beta,  const CHAM_desc_t *C, int Cm, int Cn );
 
-void INSERT_TASK_ipiv_init   ( const RUNTIME_option_t *options,
-                               CHAM_ipiv_t *ipiv );
+void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
+                            CHAM_ipiv_t *ipiv );
+void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
+                                 CHAM_ipiv_t *ipiv );
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
                                CHAM_ipiv_t *ws, int k, int h, int rank );
 void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_ipiv.c b/runtime/starpu/codelets/codelet_ipiv.c
index 7d7045edc9d70710f74deee861b436b745b974a4..5a16c6e2dda5d2e411415bf368f214bbbc8ec71b 100644
--- a/runtime/starpu/codelets/codelet_ipiv.c
+++ b/runtime/starpu/codelets/codelet_ipiv.c
@@ -18,17 +18,23 @@
  *
  */
 #include "chameleon_starpu_internal.h"
-#include "runtime_codelets.h"
 
-static void cl_ipiv_init_cpu_func(void *descr[], void *cl_arg)
+struct cl_laswp_args_s {
+    int   m0;
+    int   n;
+    int   m;
+    int  *data;
+};
+
+static void cl_ipiv_init_cpu_func( void *descr[], void *cl_arg )
 {
 #if !defined(CHAMELEON_SIMULATION)
-    int *ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
+    int *ipiv = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
     int i, m0, n;
 
     starpu_codelet_unpack_args( cl_arg, &m0, &n );
 
-    for( i=0; i<n; i++ ) {
+    for( i = 0; i < n; i++ ) {
         ipiv[i] = m0 + i + 1;
     }
 #endif
@@ -47,10 +53,10 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
     int64_t mb = ipiv->mb;
     int     m;
 
-    for (m = 0; m < mt; m++) {
+    for ( m = 0; m < mt; m++ ) {
         starpu_data_handle_t ipiv_src = RUNTIME_ipiv_getaddr( ipiv, m );
         int m0 = m * mb;
-        int n  = (m == (mt-1)) ? ipiv->m - m0 : mb;
+        int n  = ( m == ( mt - 1 ) ) ? ipiv->m - m0 : mb;
 
         rt_starpu_insert_task(
             &cl_ipiv_init,
@@ -61,6 +67,62 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
     }
 }
 
+static void cl_ipiv_init_data_cpu_func( void *descr[], void *cl_arg )
+{
+#if !defined(CHAMELEON_SIMULATION)
+    struct cl_laswp_args_s *clargs = (struct cl_laswp_args_s *) cl_arg;
+
+    int *ipiv = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
+    int  n    = clargs->n;
+    int  i;
+
+    for( i = 0; i < n; i++ ) {
+        ipiv[i] = clargs->data[i];
+    }
+#endif
+}
+
+struct starpu_codelet cl_ipiv_init_data = {
+    .where     = STARPU_CPU,
+    .cpu_func  = cl_ipiv_init_data_cpu_func,
+    .nbuffers  = 1,
+};
+
+void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
+                                 CHAM_ipiv_t            *ipiv )
+{
+
+    int64_t mt   = ipiv->mt;
+    int64_t mb   = ipiv->mb;
+    int     m;
+
+    if ( ipiv->data == NULL ) {
+        return;
+    }
+
+    for ( m = 0; m < mt; m++ ) {
+        starpu_data_handle_t    ipiv_src = RUNTIME_ipiv_getaddr( ipiv, m );
+        struct cl_laswp_args_s *cl_args;
+        int                     m0, n;
+
+        m0 = m * mb;
+        n = ( m == ( mt-1 ) ) ? ipiv->m - m0 : mb;
+
+        cl_args     = malloc( sizeof(struct cl_laswp_args_s) );
+        cl_args->m0 = m0;
+        cl_args->n  = n;
+        cl_args->m  = ipiv->desc->m;
+
+        cl_args->data = ipiv->data + m0;
+
+        rt_starpu_insert_task(
+            &cl_ipiv_init_data,
+            STARPU_CL_ARGS, cl_args, sizeof(struct cl_laswp_args_s),
+            STARPU_W,       ipiv_src,
+            0);
+    }
+}
+
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
                                CHAM_ipiv_t *ipiv, int k, int h, int rank )
 {
@@ -68,7 +130,7 @@ void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
 
 #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI)
 #if !defined(HAVE_STARPU_MPI_REDUX_WRAPUP)
-    starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h   );
+    starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h );
     if ( h < ipiv->n ) {
         starpu_mpi_redux_data_prio_tree( options->sequence->comm, nextpiv,
                                          options->priority, 2 /* Binary tree */ );
@@ -135,3 +197,4 @@ void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
         0 );
 }
+