diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 1c7b5a0b2e623d1091819c72004998b1cd673ac1..d60af1f6018b8c13545da8e74bd5d8477dd1d69e 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -27,7 +27,7 @@
 #  @author Alycia Lisito
 #  @author Loris Lucido
 #  @author Matthieu Kuhn
-#  @date 2023-07-06
+#  @date 2023-08-22
 #
 ###
 
@@ -41,6 +41,7 @@ set(CHAMELEON_CONTROL
     ../control/descriptor.c
     ../control/descriptor_rec.c
     ../control/descriptor_helpers.c
+    ../control/descriptor_ipiv.c
     ../control/workspace.c
     ../control/tile.c
     ../control/chameleon_f77.c
diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 1d93acc5ed3ec411fea4396c85012689e6560ee3..1060432565ee5d581cd82bc74da7c53df0829383 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -11,63 +11,19 @@
  *
  * @brief Chameleon zgetrf parallel algorithm
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Omar Zenati
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Matthieu Kuhn
- * @date 2023-02-21
+ * @date 2023-08-22
  * @precisions normal z -> s d c
  *
  */
 #include "control/common.h"
 
-#define A(m,n) A,  m,  n
-#define U(m,n) &(ws->U),  m,  n
-#define IPIV(m) IPIV,  m,  1
-
-/*
- * Static variable to know how to handle the data within the kernel
- * This assumes that only one runtime is enabled at a time.
- */
-static RUNTIME_id_t zgetrf_runtime_id = RUNTIME_SCHED_STARPU;
-
-static inline int
-zgetrf_ipiv_init( const CHAM_desc_t *descIPIV,
-                  cham_uplo_t uplo, int m, int n,
-                  CHAM_tile_t *tileIPIV, void *op_args )
-{
-    int *IPIV;
-    (void)op_args;
-
-    if ( zgetrf_runtime_id == RUNTIME_SCHED_PARSEC ) {
-        IPIV = (int*)tileIPIV;
-    }
-    else {
-        IPIV = CHAM_tile_get_ptr( tileIPIV );
-    }
-
-#if !defined(CHAMELEON_SIMULATION)
-    {
-        int tempmm = m == descIPIV->mt-1 ? descIPIV->m - m * descIPIV->mb : descIPIV->mb;
-        int i;
-
-        for( i=0; i<tempmm; i++ ) {
-            IPIV[i] = m * descIPIV->mb + i + 1;
-        }
-    }
-#endif
-
-    return 0;
-}
-
-static inline void
-chameleon_pzgetrf_ipiv_init( CHAM_desc_t        *IPIV,
-                             RUNTIME_sequence_t *sequence,
-                             RUNTIME_request_t  *request )
-{
-    chameleon_pmap( ChamW, ChamUpperLower, IPIV, zgetrf_ipiv_init, NULL, sequence, request );
-}
+#define A(m,n)  A,        m, n
+#define U(m,n)  &(ws->U), m, n
 
 /*
  * All the functions below are panel factorization variant.
@@ -79,10 +35,10 @@ chameleon_pzgetrf_ipiv_init( CHAM_desc_t        *IPIV,
  *   @param[inout] A
  *      The descriptor of the full matrix A (not just the panel)
  *
- *   @param[in] k
- *      The index of the column to factorize
+ *   @param[inout] ipiv
+ *      The descriptor of the pivot array associated to A.
  *
- *   @param[in] ib
+ *   @param[in] k
  *      The index of the column to factorize
  *
  *   @param[inout] options
@@ -91,6 +47,7 @@ chameleon_pzgetrf_ipiv_init( CHAM_desc_t        *IPIV,
 static inline void
 chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws,
                                      CHAM_desc_t                *A,
+                                     CHAM_ipiv_t                *ipiv,
                                      int                         k,
                                      RUNTIME_option_t           *options )
 {
@@ -122,6 +79,7 @@ chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws,
 static inline void
 chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws,
                                             CHAM_desc_t                *A,
+                                            CHAM_ipiv_t                *ipiv,
                                             int                         k,
                                             RUNTIME_option_t           *options )
 {
@@ -136,13 +94,13 @@ chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws,
      * Algorithm per column without pivoting
      */
     for(h=0; h<minmn; h++){
-        INSERT_TASK_zgetrf_panel_nopiv_percol_diag(
+        INSERT_TASK_zgetrf_nopiv_percol_diag(
             options, tempkm, tempkn, h,
             A( k, k ), U( k, k ), A->mb * k );
 
         for (m = k+1; m < A->mt; m++) {
             tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
-            INSERT_TASK_zgetrf_panel_nopiv_percol_trsm(
+            INSERT_TASK_zgetrf_nopiv_percol_trsm(
                 options, tempmm, tempkn, h,
                 A( m, k ), U( k, k ) );
         }
@@ -151,18 +109,79 @@ chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws,
     RUNTIME_data_flush( options->sequence, U(k, k) );
 }
 
+static inline void
+chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
+                                      CHAM_desc_t                *A,
+                                      CHAM_ipiv_t                *ipiv,
+                                      int                         k,
+                                      RUNTIME_option_t           *options )
+{
+    int m, h;
+    int tempkm, tempkn, minmn;
+
+    tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+    tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+    minmn  = chameleon_min( tempkm, tempkn );
+
+    /* Update the number of column */
+    ipiv->n = minmn;
+
+    /*
+     * Algorithm per column with pivoting
+     */
+    for (h=0; h<=minmn; h++){
+
+        INSERT_TASK_zgetrf_percol_diag(
+            options,
+            h, k * A->mb,
+            A(k, k),
+            ipiv );
+
+        for (m = k+1; m < A->mt; m++) {
+            //tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+
+            INSERT_TASK_zgetrf_percol_offdiag(
+                options,
+                h, m * A->mb,
+                A(m, k),
+                ipiv );
+        }
+
+        if ( h < minmn ) {
+            /* Reduce globally (between MPI processes) */
+            RUNTIME_ipiv_reducek( options, ipiv, k, h );
+        }
+    }
+
+    /* Flush temporary data used for the pivoting */
+    RUNTIME_ipiv_flushk( options->sequence, ipiv, k );
+}
+
 static inline void
 chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
                                CHAM_desc_t                *A,
+                               CHAM_ipiv_t                *ipiv,
                                int                         k,
                                RUNTIME_option_t           *options )
 {
     /* TODO: Should be replaced by a function pointer */
-    if ( ws->alg == ChamGetrfNoPivPerColumn ) {
-        chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options );
-    }
-    else {
-        chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options );
+    switch( ws->alg ) {
+    case ChamGetrfNoPivPerColumn:
+        chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, ipiv, k, options );
+        break;
+
+    case ChamGetrfPPivPerColumn:
+        chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options );
+        break;
+
+    case ChamGetrfPPiv:
+        chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options );
+        break;
+
+    case ChamGetrfNoPiv:
+        chameleon_attr_fallthrough;
+    default:
+        chameleon_pzgetrf_panel_facto_nopiv( ws, A, ipiv, k, options );
     }
 }
 
@@ -227,7 +246,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
  */
 void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
                         CHAM_desc_t                *A,
-                        CHAM_desc_t                *IPIV,
+                        CHAM_ipiv_t                *IPIV,
                         RUNTIME_sequence_t         *sequence,
                         RUNTIME_request_t          *request )
 {
@@ -243,14 +262,11 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
     }
     RUNTIME_options_init( &options, chamctxt, sequence, request );
 
-    /* Initialize IPIV */
-    chameleon_pzgetrf_ipiv_init( IPIV, sequence, request );
-
     for (k = 0; k < min_mnt; k++) {
         RUNTIME_iteration_push( chamctxt, k );
 
         options.priority = A->nt;
-        chameleon_pzgetrf_panel_facto( ws, A, k, &options );
+        chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options );
 
         for (n = k+1; n < A->nt; n++) {
             options.priority = A->nt-n;
@@ -272,5 +288,12 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
         }
     }
 
+    /* Initialize IPIV */
+    if ( (ws->alg == ChamGetrfNoPivPerColumn) ||
+         (ws->alg == ChamGetrfNoPiv ) )
+    {
+        RUNTIME_ipiv_init( IPIV );
+    }
+
     RUNTIME_options_finalize( &options, chamctxt );
 }
diff --git a/compute/zgetrf.c b/compute/zgetrf.c
index 44d57502a84287db609f701b64a74f8a489aada0..73c810be2c1f3294583b6599aff49e726f8f049d 100644
--- a/compute/zgetrf.c
+++ b/compute/zgetrf.c
@@ -19,7 +19,7 @@
  * @author Florent Pruvost
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-05
+ * @date 2023-08-22
  *
  * @precisions normal z -> s d c
  *
@@ -68,15 +68,21 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
     {
         char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" );
 
-        if ( algostr ) {
-            if ( strcasecmp( algostr, "nopiv" ) ) {
+        if ( algostr != NULL ) {
+            if ( strcasecmp( algostr, "nopiv" ) == 0 ) {
                 ws->alg = ChamGetrfNoPiv;
             }
             else if ( strcasecmp( algostr, "nopivpercolumn" ) == 0  ) {
                 ws->alg = ChamGetrfNoPivPerColumn;
             }
+            else if ( strcasecmp( algostr, "ppiv" )  == 0 ) {
+                ws->alg = ChamGetrfPPiv;
+            }
+            else if ( strcasecmp( algostr, "ppivpercolumn" ) == 0  ) {
+                ws->alg = ChamGetrfPPivPerColumn;
+            }
             else {
-                fprintf( stderr, "ERROR: CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn => Switch back to NoPiv\n" );
+                chameleon_error( "CHAMELEON_zgetrf_WS_Alloc", "CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn, PPiv, PPivPerColumn => Switch back to NoPiv\n" );
             }
         }
         chameleon_cleanenv( algostr );
@@ -90,6 +96,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
                              NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
     }
 
+    /* Set ib to 1 if per column algorithm */
+    if ( ( ws->alg == ChamGetrfNoPivPerColumn ) ||
+         ( ws->alg == ChamGetrfPPivPerColumn  ) )
+    {
+        ws->ib = 1;
+    }
+
     return ws;
 }
 
@@ -123,7 +136,6 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
     free( ws );
 }
 
-#if defined(NOT_AVAILABLE_YET)
 /**
  ********************************************************************************
  *
@@ -149,6 +161,11 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
  * @param[in] LDA
  *          The leading dimension of the array A. LDA >= max(1,M).
  *
+ * @param[out] IPIV
+ *          Integer array of dimension min(M,N).
+ *          The pivot indices; for 1 <= i <= min(M,N), row i of the
+ *          matrix was interchanged with row IPIV(i).
+ *
  *******************************************************************************
  *
  * @retval CHAMELEON_SUCCESS successful exit
@@ -173,10 +190,11 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
     int                 NB;
     int                 status;
     CHAM_desc_t         descAl, descAt;
+    CHAM_ipiv_t         descIPIV;
     CHAM_context_t     *chamctxt;
     RUNTIME_sequence_t *sequence = NULL;
     RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
-    void               *ws;
+    struct chameleon_pzgetrf_s *ws;
 
     chamctxt = chameleon_context_self();
     if ( chamctxt == NULL ) {
@@ -218,25 +236,36 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
 
     /* Allocate workspace for partial pivoting */
     ws = CHAMELEON_zgetrf_WS_Alloc( &descAt );
+
+    if ( ws->alg == ChamGetrfPPivPerColumn ) {
+        chameleon_ipiv_init( &descIPIV, &descAt, IPIV );
+    }
+
     /* Call the tile interface */
-    CHAMELEON_zgetrf_Tile_Async( &descAt, ws, sequence, &request );
+    CHAMELEON_zgetrf_Tile_Async( &descAt, &descIPIV, ws, sequence, &request );
 
     /* Submit the matrix conversion back */
     chameleon_ztile2lap( chamctxt, &descAl, &descAt,
                          ChamDescInout, ChamUpperLower, sequence, &request );
 
+    if ( ws->alg == ChamGetrfPPivPerColumn ) {
+        RUNTIME_ipiv_gather( &descIPIV, IPIV, 0 );
+    }
     chameleon_sequence_wait( chamctxt, sequence );
 
     /* Cleanup the temporary data */
     CHAMELEON_zgetrf_WS_Free( ws );
     chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
 
+    if ( ws->alg == ChamGetrfPPivPerColumn ) {
+        chameleon_ipiv_destroy( &descIPIV );
+    }
+
     status = sequence->status;
     chameleon_sequence_destroy( chamctxt, sequence );
 
     return status;
 }
-#endif
 
 /**
  ********************************************************************************
@@ -254,12 +283,19 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
  *          On entry, the M-by-N matrix to be factored.
  *          On exit, the tile factors L and U from the factorization.
  *
+ * @param[in,out] IPIV
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
+ *
  *******************************************************************************
  *
  * @retval CHAMELEON_SUCCESS successful exit
- * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
- *               but the factor U is exactly singular, and division by zero will occur
- *               if it is used to solve a system of equations.
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been
+ *               completed, but the factor U is exactly singular, and division
+ *               by zero will occur if it is used to solve a system of
+ *               equations.
  *
  *******************************************************************************
  *
@@ -272,7 +308,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
  *
  */
 int
-CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
+CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV )
 {
     CHAM_context_t     *chamctxt;
     RUNTIME_sequence_t *sequence = NULL;
@@ -316,9 +352,10 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
  *          On exit, the tile factors L and U from the factorization.
  *
  * @param[in,out] IPIV
- *          On entry, the descriptor of an min(M, N)-by-1 matrix that may not
- *          have been initialized.
- *          On exit, the pivot vector generated during the factorization.
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
  *
  * @param[in,out] user_ws
  *          The opaque pointer to pre-allocated getrf workspace through
@@ -345,7 +382,7 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
  */
 int
 CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
-                             CHAM_desc_t        *IPIV,
+                             CHAM_ipiv_t        *IPIV,
                              void               *user_ws,
                              RUNTIME_sequence_t *sequence,
                              RUNTIME_request_t  *request )
@@ -383,10 +420,6 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
         chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid first descriptor" );
         return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
     }
-    if ( chameleon_desc_check( IPIV ) != CHAMELEON_SUCCESS ) {
-        chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid second descriptor" );
-        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
-    }
 
     /* Check input arguments */
     if ( A->nb != A->mb ) {
@@ -397,10 +430,6 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
         chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must have the number of rows as tiles of A" );
         return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
     }
-    if ( IPIV->nb != 1 ) {
-        chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must be vectore with only one column per tile" );
-        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
-    }
 
     if ( user_ws == NULL ) {
         ws = CHAMELEON_zgetrf_WS_Alloc( A );
@@ -409,7 +438,7 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
         ws = user_ws;
     }
 
-    chameleon_pzgetrf( user_ws, A, IPIV, sequence, request );
+    chameleon_pzgetrf( ws, A, IPIV, sequence, request );
 
     if ( user_ws == NULL ) {
         CHAMELEON_Desc_Flush( A, sequence );
diff --git a/control/compute_z.h b/control/compute_z.h
index 06eae17b508c012918bbd011bad9cbb25a7bb7d4..9032c20f24666d4e751b1e422e9afd07b41d047f 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -22,7 +22,7 @@
  * @author Alycia Lisito
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-06
+ * @date 2023-08-22
  * @precisions normal z -> c d s
  *
  */
@@ -94,7 +94,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc
 void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_desc_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/control/descriptor.h b/control/descriptor.h
index 38153b6b2bc444155315d249139236a827a2b1de..d3d65c20c20fed8f9836c8a10aaf5a02572b3ca6 100644
--- a/control/descriptor.h
+++ b/control/descriptor.h
@@ -20,7 +20,7 @@
  * @author Raphael Boucherie
  * @author Samuel Thibault
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-05
+ * @date 2023-08-22
  *
  */
 #ifndef _chameleon_descriptor_h_
@@ -76,6 +76,9 @@ CHAM_desc_t* chameleon_desc_submatrix( CHAM_desc_t *descA, int i, int j, int m,
 void         chameleon_desc_destroy  ( CHAM_desc_t *desc );
 int          chameleon_desc_check    ( const CHAM_desc_t *desc );
 
+int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, void *data );
+void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv );
+
 /**
  *  Internal function to return address of block (m,n) with m,n = block indices
  */
diff --git a/control/descriptor_ipiv.c b/control/descriptor_ipiv.c
new file mode 100644
index 0000000000000000000000000000000000000000..54c9fec4068dfdf0c05ae9b18262d2e179c3995b
--- /dev/null
+++ b/control/descriptor_ipiv.c
@@ -0,0 +1,245 @@
+/**
+ *
+ * @file descriptor_ipiv.c
+ *
+ * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon descriptors routines
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ *
+ ***
+ *
+ * @defgroup Descriptor
+ * @brief Group descriptor routines exposed to users to manipulate IPIV data structures
+ *
+ */
+#define _GNU_SOURCE 1
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include "control/common.h"
+#include "control/descriptor.h"
+#include "chameleon/runtime.h"
+
+/**
+ ******************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Internal function to create tiled descriptor associated to a pivot array.
+ *
+ ******************************************************************************
+ *
+ * @param[in,out] ipiv
+ *          The pointer to the ipiv descriptor to initialize.
+ *
+ * @param[in] desc
+ *          The tile descriptor for which an associated ipiv descriptor must be generated.
+ *
+ * @param[in] data
+ *          The pointer to the original vector where to store the pivot values.
+ *
+ ******************************************************************************
+ *
+ * @return CHAMELEON_SUCCESS on success, CHAMELEON_ERR_NOT_INITIALIZED otherwise.
+ *
+ */
+int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, void *data )
+{
+    CHAM_context_t *chamctxt;
+    int rc = CHAMELEON_SUCCESS;
+
+    memset( ipiv, 0, sizeof(CHAM_ipiv_t) );
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+
+    ipiv->desc = desc;
+    ipiv->data = data;
+    ipiv->i    = 0;
+    ipiv->m    = chameleon_min( desc->m, desc->n );
+    ipiv->mb   = desc->mb;
+    ipiv->mt   = chameleon_ceil( ipiv->m, ipiv->mb );
+
+    /* Create runtime specific structure like registering data */
+    RUNTIME_ipiv_create( ipiv );
+
+    return rc;
+}
+
+/**
+ ******************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Internal function to destroy a tiled descriptor associated to a pivot array.
+ *
+ ******************************************************************************
+ *
+ * @param[in,out] ipiv
+ *          The pointer to the ipiv descriptor to destroy.
+ *
+ */
+void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv )
+{
+    RUNTIME_ipiv_destroy( ipiv );
+}
+
+/**
+ *****************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Create a tiled ipiv descriptor associated to a given matrix.
+ *
+ ******************************************************************************
+ *
+ * @param[in,out] ipiv
+ *          The pointer to the ipiv descriptor to initialize.
+ *
+ * @param[in] desc
+ *          The tile descriptor for which an associated ipiv descriptor must be generated.
+ *
+ * @param[in] data
+ *          The pointer to the original vector where to store the pivot values.
+ *
+ ******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS on successful exit
+ * @retval CHAMELEON_ERR_NOT_INITIALIZED if failed to initialize the descriptor.
+ * @retval CHAMELEON_ERR_OUT_OF_RESOURCES if failed to allocated some ressources.
+ *
+ */
+int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void *data )
+{
+    CHAM_context_t *chamctxt;
+    CHAM_ipiv_t *ipiv;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_error("CHAMELEON_Ipiv_Create", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+
+    /* Allocate memory and initialize the ipivriptor */
+    ipiv = (CHAM_ipiv_t*)malloc(sizeof(CHAM_ipiv_t));
+    if (ipiv == NULL) {
+        chameleon_error("CHAMELEON_Ipiv_Create", "malloc() failed");
+        return CHAMELEON_ERR_OUT_OF_RESOURCES;
+    }
+
+    chameleon_ipiv_init( ipiv, desc, data );
+
+    *ipivptr = ipiv;
+    return CHAMELEON_SUCCESS;
+}
+
+/**
+ *****************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Destroys an ipiv tile descriptor.
+ *
+ ******************************************************************************
+ *
+ * @param[in] ipivptr
+ *          The Ipiv tile descriptor to destroy.
+ *
+ ******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ */
+int CHAMELEON_Ipiv_Destroy(CHAM_ipiv_t **ipivptr)
+{
+    CHAM_context_t *chamctxt;
+    CHAM_ipiv_t *ipiv;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_error("CHAMELEON_Ipiv_Destroy", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+
+    if ((ipivptr == NULL) || (*ipivptr == NULL)) {
+        chameleon_error("CHAMELEON_Ipiv_Destroy", "attempting to destroy a NULL descriptor");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+
+    ipiv = *ipivptr;
+    chameleon_ipiv_destroy( ipiv );
+    free(ipiv);
+    *ipivptr = NULL;
+    return CHAMELEON_SUCCESS;
+}
+
+ /**
+ *****************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Flushes the data in the sequence when they won't be reused. This calls
+ * cleans up the distributed communication caches, and transfer the data back to
+ * the CPU.
+ *
+ ******************************************************************************
+ *
+ * @param[in] ipiv
+ *          ipiv vector descriptor.
+ *
+ * @param[in] sequence
+ *          The seqeunce in which to submit the calls to flush the data.
+ *
+ ******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ */
+int CHAMELEON_Ipiv_Flush( const CHAM_ipiv_t        *ipiv,
+                          const RUNTIME_sequence_t *sequence )
+{
+    RUNTIME_ipiv_flush( ipiv, sequence );
+    return CHAMELEON_SUCCESS;
+}
+
+/**
+ *****************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Gathers an IPIV tile descriptor in a single vector on the given root node.
+ *
+ ******************************************************************************
+ *
+ * @param[in] ipivdesc
+ *          the ipiv vector descriptor to gather.
+ *
+ * @param[in] ipiv
+ *          The ipiv vector where to store the result. Allocated vector of size
+ *          ipivdesc->m on root, not referenced on other nodes.
+ *
+ * @param[in] root
+ *          root node on which to gather the data.
+ *
+ ******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ */
+int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc, int *ipiv, int root )
+{
+    RUNTIME_ipiv_gather( ipivdesc, ipiv, root );
+    return CHAMELEON_SUCCESS;
+}
diff --git a/coreblas/compute/core_zgetrf.c b/coreblas/compute/core_zgetrf.c
index 9c3a8fb7c0ec6a0b360dce5f8e9c6627bb710c5b..3c65462504d3792ca61a48d423b99b9efff0d89d 100644
--- a/coreblas/compute/core_zgetrf.c
+++ b/coreblas/compute/core_zgetrf.c
@@ -11,14 +11,15 @@
  *
  * @brief Chameleon core_zgetrf CPU kernel
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @author Matthieu Kuhn
+ * @date 2023-07-26
  * @precisions normal z -> c d s
  *
  */
@@ -26,12 +27,10 @@
 #include "coreblas.h"
 
 
-int CORE_zgetrf(int m, int n,
+int CORE_zgetrf( int m, int n,
                  CHAMELEON_Complex64_t *A, int lda,
-                 int *IPIV, int *info)
+                 int *IPIV, int *info )
 {
     *info = LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, m, n, A, lda, IPIV );
     return CHAMELEON_SUCCESS;
 }
-
-
diff --git a/coreblas/compute/core_zgetrf_panel.c b/coreblas/compute/core_zgetrf_panel.c
index 2ec4b23a8f3e2f7cb37ff08f0721d96f1ed6dd9e..68911699b39b62aa2e12007048bab72311a620f6 100644
--- a/coreblas/compute/core_zgetrf_panel.c
+++ b/coreblas/compute/core_zgetrf_panel.c
@@ -9,10 +9,10 @@
  *
  * @brief Chameleon core_zgetrf with partial pivoting CPU kernel
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
- * @date 2022-02-22
+ * @date 2023-08-22
  * @precisions normal z -> c d s
  *
  */
@@ -42,6 +42,10 @@ static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
  * @param[in] h
  *         The index of the column to factorize in the matrix A.
  *
+ * @param[in] m0
+ *         The number of rows above the diagonale tile A in the global matrix to
+ *         be factorized.
+ *
  * @param[in,out] A
  *          On entry, the matrix A where column h-1 needs to be factorized, and
  *          pivot for column h needs to be selected.
@@ -56,9 +60,11 @@ static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
  *          The leading dimension of the array A. lda >= max(1,m).
  *
  * @param[in,out] IPIV
- *          On entry, the pivot array of size min(m,n) with the first h-2 columns initialized.
- *          On exit, IPIV[h-1] is updated with the selected pivot for the previous column.
- *
+ *          On entry, the pivot array of size min(m,n) with the first h-2
+ *          columns initialized.
+ *          On exit, IPIV[h-1] is updated with the selected pivot for the
+ *          previous column.
+ * *
  * @param[in,out] nextpiv
  *          On entry, the allocated and initialized CHAM_piv_t structure to
  *          store the information related to pivot at stage h.
@@ -169,6 +175,7 @@ CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
     /* Store current diagonal row (in full) into pivot structure */
     cblas_zcopy( n, A + h,            lda,
                     nextpiv->diagrow, 1 );
+
     return 0;
 }
 
diff --git a/include/chameleon.h b/include/chameleon.h
index cce6539fc5cc06e00d3cd887c18bd12f7d570ae1..253767cbb86d28adf7acc00a4b21d3edb935af39 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -18,7 +18,7 @@
  * @author Florent Pruvost
  * @author Philippe Virouleau
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-05
+ * @date 2023-08-22
  *
  */
 #ifndef _chameleon_h_
@@ -195,6 +195,13 @@ int  CHAMELEON_Recursive_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flt
                                       blkaddr_fct_t get_blkaddr, blkldd_fct_t get_blkldd,
                                       blkrankof_fct_t get_rankof, void* get_rankof_arg );
 
+int CHAMELEON_Ipiv_Create ( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void *data );
+int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr );
+int CHAMELEON_Ipiv_Flush  ( const CHAM_ipiv_t        *ipiv,
+                            const RUNTIME_sequence_t *sequence );
+int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc, int *ipiv, int root );
+void CHAMELEON_Ipiv_Print ( const CHAM_ipiv_t *ipiv );
+
 /**
  *
  * @ingroup Control
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index fa5f069e6057cb0f79eb47a7f0e2bbc38777a14b..2362d8b1a8ff0b1e8b4ff96b26a795d71f25761d 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -23,7 +23,7 @@
  * @author Florent Pruvost
  * @author Alycia Lisito
  * @author Matthieu Kuhn
- * @date 2023-07-06
+ * @date 2023-08-22
  * @precisions normal z -> c d s
  *
  */
@@ -135,7 +135,7 @@ int CHAMELEON_zgesvd_Tile(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, dou
 //int CHAMELEON_zgetrf_Tile(CHAM_desc_t *A, int *IPIV);
 int CHAMELEON_zgetrf_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV);
 int CHAMELEON_zgetrf_nopiv_Tile(CHAM_desc_t *A);
-int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV );
+int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV );
 //int CHAMELEON_zgetri_Tile(CHAM_desc_t *A, int *IPIV);
 //int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B);
 int CHAMELEON_zgetrs_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B);
@@ -216,7 +216,7 @@ int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *
 //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrs_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h
index fe50552f405fdef0e5e985d3ebcdcdd39e49b4d5..9dbc2678d2f9e609889a0590ecd92b96d6515e8a 100644
--- a/include/chameleon/constants.h
+++ b/include/chameleon/constants.h
@@ -18,7 +18,8 @@
  * @author Florent Pruvost
  * @author Alycia Lisito
  * @author Terry Cojean
- * @date 2023-07-04
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
  *
  */
 #ifndef _chameleon_constants_h_
@@ -268,6 +269,8 @@ typedef enum chameleon_gemm_e {
 typedef enum chameleon_getrf_e {
     ChamGetrfNoPiv,
     ChamGetrfNoPivPerColumn,
+    ChamGetrfPPiv,
+    ChamGetrfPPivPerColumn,
 } cham_getrf_t;
 
 #define ChameleonTrd            1001
diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h
index 82818ba75fd6884940601b5e022a40ee96024a5a..a8aaaef56a42b2dbaa25664d86022c51c2f4cd09 100644
--- a/include/chameleon/runtime.h
+++ b/include/chameleon/runtime.h
@@ -10,7 +10,7 @@
  ***
  *
  * @brief The common runtimes API
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Cedric Augonnet
  * @author Cedric Castagnede
@@ -18,7 +18,7 @@
  * @author Samuel Thibault
  * @author Philippe Swartvagher
  * @author Matthieu Kuhn
- * @date 2022-02-22
+ * @date 2023-08-22
  *
  */
 #ifndef _chameleon_runtime_h_
@@ -705,6 +705,32 @@ void RUNTIME_ddisplay_oneprofile (cham_tasktype_t task);
 void RUNTIME_sdisplay_allprofile ();
 void RUNTIME_sdisplay_oneprofile (cham_tasktype_t task);
 
+void RUNTIME_ipiv_create ( CHAM_ipiv_t *ipiv );
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv );
+void RUNTIME_ipiv_init   ( CHAM_ipiv_t *ipiv );
+void RUNTIME_ipiv_gather ( CHAM_ipiv_t *desc, int *ipiv, int node );
+
+void *RUNTIME_ipiv_getaddr   ( CHAM_ipiv_t *ipiv, int m );
+void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h );
+void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h );
+
+static inline void *
+RUNTIME_pivot_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) {
+    if ( h%2 == 0 ) {
+        return RUNTIME_nextpiv_getaddr( ipiv, m, -1 );
+    }
+    else {
+        return RUNTIME_prevpiv_getaddr( ipiv, m, -1 );
+    }
+}
+
+void RUNTIME_ipiv_flushk ( const RUNTIME_sequence_t *sequence,
+                           const CHAM_ipiv_t *ipiv, int m );
+void RUNTIME_ipiv_flush  ( const CHAM_ipiv_t *ipiv,
+                           const RUNTIME_sequence_t *sequence );
+void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
+                           CHAM_ipiv_t *ws, int k, int h );
+
 /**
  * @}
  */
diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h
index efa64a1c534b8472c4514cedb330919b31d4fa0c..d7dd07f48dc6fad1cff9359bf3410defdd657357 100644
--- a/include/chameleon/struct.h
+++ b/include/chameleon/struct.h
@@ -19,7 +19,7 @@
  * @author Samuel Thibault
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-05
+ * @date 2023-08-22
  *
  */
 #ifndef _chameleon_struct_h_
@@ -137,6 +137,25 @@ struct chameleon_desc_s {
     void *schedopt;   // scheduler (QUARK|StarPU) specific structure
 };
 
+/**
+ *  CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting
+ */
+typedef struct chameleon_piv_s {
+    const CHAM_desc_t *desc;   /**> Reference descriptor to compute data mapping based on diagonal tiles,
+                              and get floating reference type                                        */
+    int    *data;        /**> Pointer to the data                                                    */
+    void   *ipiv;        /**> Opaque array of pointers for the runtimes to handle the ipiv array     */
+    void   *nextpiv;     /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
+    void   *prevpiv;     /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
+    int64_t mpitag_ipiv;    /**> Initial mpi tag values for the ipiv handles    */
+    int64_t mpitag_nextpiv; /**> Initial mpi tag values for the nextpiv handles */
+    int64_t mpitag_prevpiv; /**> Initial mpi tag values for the prevpiv handles */
+    int     i;              /**> row index to the beginning of the submatrix    */
+    int     m;              /**> The number of row in the vector ipiv           */
+    int     mb;             /**> The number of row per block                    */
+    int     mt;             /**> The number of tiles                            */
+    int     n;              /**> The number of column considered (must be updated for each panel) */
+} CHAM_ipiv_t;
 
 /**
  *  CHAMELEON request uniquely identifies each asynchronous function call.
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index b58895aa4346b597fbd7dece2606df7a6de35fd3..c5704884e1e11331008519bff1a2b955fd6e4321 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -24,7 +24,7 @@
  * @author Alycia Lisito
  * @author Romain Peressoni
  * @author Matthieu Kuhn
- * @date 2023-07-06
+ * @date 2023-08-22
  * @precisions normal z -> c d s
  *
  */
@@ -486,15 +486,25 @@ void RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options,
 /*
  * Tasks for LU factorization with partial pivoting
  */
-void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options,
-                                                 int m, int n, int k,
-                                                 const CHAM_desc_t *A, int Am, int An,
-                                                 const CHAM_desc_t *U, int Um, int Un,
-                                                 int iinfo );
+void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options,
+                                           int m, int n, int k,
+                                           const CHAM_desc_t *A, int Am, int An,
+                                           const CHAM_desc_t *U, int Um, int Un,
+                                           int iinfo );
 
-void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options,
-                                                 int m, int n, int k,
-                                                 const CHAM_desc_t *A, int Am, int An,
-                                                 const CHAM_desc_t *U, int Um, int Un );
+void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
+                                           int m, int n, int k,
+                                           const CHAM_desc_t *A, int Am, int An,
+                                           const CHAM_desc_t *U, int Um, int Un );
+
+void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
+                                     int h, int m0,
+                                     CHAM_desc_t *A, int Am, int An,
+                                     CHAM_ipiv_t *ws );
+
+void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
+                                        int h, int m0,
+                                        CHAM_desc_t *A, int Am, int An,
+                                        CHAM_ipiv_t *ws );
 
 #endif /* _chameleon_tasks_z_h_ */
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index fc1aac3355eb6fda653647270b5103280117c0d9..e63a4dd5e7203333b4890a2aa09c27f71fda66c4 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -17,14 +17,14 @@
 #     Univ. of California Berkeley,
 #     Univ. of Colorado Denver.
 #
-# @version 1.2.0
+# @version 1.3.0
 #  @author Cedric Castagnede
 #  @author Emmanuel Agullo
 #  @author Mathieu Faverge
 #  @author Florent Pruvost
 #  @author Philippe Virouleau
 #  @author Matthieu Kuhn
-#  @date 2023-02-21
+#  @date 2023-08-22
 #
 ###
 
@@ -66,7 +66,8 @@ set(CODELETS_ZSRC
     codelets/codelet_zgetrf.c
     codelets/codelet_zgetrf_incpiv.c
     codelets/codelet_zgetrf_nopiv.c
-    codelets/codelet_zpanel.c
+    codelets/codelet_zgetrf_nopiv_percol.c
+    codelets/codelet_zgetrf_percol.c
     codelets/codelet_zhe2ge.c
     codelets/codelet_zherfb.c
     codelets/codelet_zhessq.c
diff --git a/runtime/openmp/CMakeLists.txt b/runtime/openmp/CMakeLists.txt
index 36a5e18baed52702ea14b82b1c2209d6bb53b9f0..b9b712e5dde02765ed05fae4c8273abd369af2a8 100644
--- a/runtime/openmp/CMakeLists.txt
+++ b/runtime/openmp/CMakeLists.txt
@@ -76,6 +76,7 @@ set(RUNTIME_COMMON
   control/runtime_context.c
   control/runtime_control.c
   control/runtime_descriptor.c
+  control/runtime_descriptor_ipiv.c
   control/runtime_options.c
   control/runtime_profiling.c
   ${RUNTIME_COMMON_GENERATED}
diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv_percol.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv_percol.c
new file mode 100644
index 0000000000000000000000000000000000000000..589bce56f943e8e105fcbc4ed0f0ab704948b49b
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv_percol.c
@@ -0,0 +1,60 @@
+/**
+ *
+ * @file openmp/codelet_zgetrf_nopiv_percol.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgetrf_nopiv_percol OpenMP codelets
+ *
+ * @version 1.3.0
+ * @comment Codelets to perform panel factorization with partial pivoting
+ *
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_openmp.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options,
+                                           int m, int n, int k,
+                                           const CHAM_desc_t *A, int Am, int An,
+                                           const CHAM_desc_t *U, int Um, int Un,
+                                           int iinfo )
+{
+    assert( 0 );
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)k;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)iinfo;
+}
+
+void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
+                                           int m, int n, int k,
+                                           const CHAM_desc_t *A, int Am, int An,
+                                           const CHAM_desc_t *U, int Um, int Un )
+{
+    assert( 0 );
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)k;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)U;
+    (void)Um;
+    (void)Un;
+}
diff --git a/runtime/openmp/codelets/codelet_zgetrf_percol.c b/runtime/openmp/codelets/codelet_zgetrf_percol.c
new file mode 100644
index 0000000000000000000000000000000000000000..4a503f25f849e46b851e9279e1ca623ac68153ae
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_zgetrf_percol.c
@@ -0,0 +1,52 @@
+/**
+ *
+ * @file openmp/codelet_zgetrf_percol.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgetrf_percol OpenMP codelets
+ *
+ * @version 1.3.0
+ * @comment Codelets to perform panel factorization with partial pivoting
+ *
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_openmp.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
+                                     int h, int m0,
+                                     CHAM_desc_t *A, int Am, int An,
+                                     CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)h;
+    (void)m0;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)ipiv;
+}
+
+void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
+                                        int h, int m0,
+                                        CHAM_desc_t *A, int Am, int An,
+                                        CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)h;
+    (void)m0;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)ipiv;
+}
diff --git a/runtime/openmp/codelets/codelet_zpanel.c b/runtime/openmp/codelets/codelet_zpanel.c
deleted file mode 100644
index 6c321a849d28cd46c80c15592bda618d917bd29c..0000000000000000000000000000000000000000
--- a/runtime/openmp/codelets/codelet_zpanel.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- *
- * @file openmp/codelet_zpanel.c
- *
- * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon zpanel OpenMP codelets
- *
- * @version 1.2.0
- * @comment Codelets to perform panel factorization with partial pivoting
- *
- * @author Mathieu Faverge
- * @date 2023-02-21
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_openmp.h"
-#include "chameleon/tasks_z.h"
-
-void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options,
-                                                 int m, int n, int k,
-                                                 const CHAM_desc_t *A, int Am, int An,
-                                                 const CHAM_desc_t *U, int Um, int Un,
-                                                 int iinfo )
-{
-    assert( 0 );
-    (void)options;
-    (void)m;
-    (void)n;
-    (void)k;
-    (void)A;
-    (void)Am;
-    (void)An;
-    (void)U;
-    (void)Um;
-    (void)Un;
-    (void)iinfo;
-}
-
-void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options,
-                                                 int m, int n, int k,
-                                                 const CHAM_desc_t *A, int Am, int An,
-                                                 const CHAM_desc_t *U, int Um, int Un )
-{
-    assert( 0 );
-    (void)options;
-    (void)m;
-    (void)n;
-    (void)k;
-    (void)A;
-    (void)Am;
-    (void)An;
-    (void)U;
-    (void)Um;
-    (void)Un;
-}
-
diff --git a/runtime/openmp/control/runtime_descriptor_ipiv.c b/runtime/openmp/control/runtime_descriptor_ipiv.c
new file mode 100644
index 0000000000000000000000000000000000000000..03886ca650340279207c8163bc30eac81f4a1054
--- /dev/null
+++ b/runtime/openmp/control/runtime_descriptor_ipiv.c
@@ -0,0 +1,97 @@
+/**
+ *
+ * @file openmp/runtime_descriptor_ipiv.c
+ *
+ * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon OpenMP descriptor routines
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ *
+ */
+#include "chameleon_openmp.h"
+
+void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    return NULL;
+}
+
+void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    (void)h;
+    return NULL;
+}
+
+void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    (void)h;
+    return NULL;
+}
+
+void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
+void RUNTIME_ipiv_flush( const CHAM_ipiv_t        *ipiv,
+                         const RUNTIME_sequence_t *sequence )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)sequence;
+}
+
+void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
+                           CHAM_ipiv_t *ipiv, int k, int h )
+{
+    assert( 0 );
+    (void)options;
+    (void)ipiv;
+    (void)k;
+    (void)h;
+}
+
+void RUNTIME_ipiv_init( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void RUNTIME_ipiv_gather( CHAM_ipiv_t *desc, int *ipiv, int node )
+{
+    assert( 0 );
+    (void)desc;
+    (void)ipiv;
+    (void)node;
+}
diff --git a/runtime/parsec/CMakeLists.txt b/runtime/parsec/CMakeLists.txt
index 74c4d0f928a8da8041e30e918ed9aae67057b70b..573d31d7956d9e7a29724a4770e77ce78f8533d9 100644
--- a/runtime/parsec/CMakeLists.txt
+++ b/runtime/parsec/CMakeLists.txt
@@ -88,6 +88,7 @@ set(RUNTIME_COMMON
   control/runtime_context.c
   control/runtime_control.c
   control/runtime_descriptor.c
+  control/runtime_descriptor_ipiv.c
   control/runtime_options.c
   control/runtime_profiling.c
   ${RUNTIME_COMMON_GENERATED}
diff --git a/runtime/parsec/codelets/codelet_zgetrf_nopiv_percol.c b/runtime/parsec/codelets/codelet_zgetrf_nopiv_percol.c
new file mode 100644
index 0000000000000000000000000000000000000000..e8cf820534e15f6bcceab0004405892175aa7570
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_zgetrf_nopiv_percol.c
@@ -0,0 +1,60 @@
+/**
+ *
+ * @file parsec/codelet_zgetrf_nopiv_percol.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgetrf_nopiv_percol Parsec codelets
+ *
+ * @version 1.3.0
+ * @comment Codelets to perform panel factorization with partial pivoting
+ *
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_parsec.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options,
+                                           int m, int n, int k,
+                                           const CHAM_desc_t *A, int Am, int An,
+                                           const CHAM_desc_t *U, int Um, int Un,
+                                           int iinfo )
+{
+    assert( 0 );
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)k;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)iinfo;
+}
+
+void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
+                                           int m, int n, int k,
+                                           const CHAM_desc_t *A, int Am, int An,
+                                           const CHAM_desc_t *U, int Um, int Un )
+{
+    assert( 0 );
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)k;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)U;
+    (void)Um;
+    (void)Un;
+}
diff --git a/runtime/parsec/codelets/codelet_zgetrf_percol.c b/runtime/parsec/codelets/codelet_zgetrf_percol.c
new file mode 100644
index 0000000000000000000000000000000000000000..f94717696c1d93378ac515ee77d3b6d3b6170b92
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_zgetrf_percol.c
@@ -0,0 +1,52 @@
+/**
+ *
+ * @file parsec/codelet_zgetrf_percol.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgetrf_percol Parsec codelets
+ *
+ * @version 1.3.0
+ * @comment Codelets to perform panel factorization with partial pivoting
+ *
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_parsec.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
+                                     int h, int m0,
+                                     CHAM_desc_t *A, int Am, int An,
+                                     CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)h;
+    (void)m0;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)ipiv;
+}
+
+void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
+                                        int h, int m0,
+                                        CHAM_desc_t *A, int Am, int An,
+                                        CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)h;
+    (void)m0;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)ipiv;
+}
diff --git a/runtime/parsec/codelets/codelet_zpanel.c b/runtime/parsec/codelets/codelet_zpanel.c
deleted file mode 100644
index 41e9e2b5f0c6600805dac3596e64b10c3ac3b86d..0000000000000000000000000000000000000000
--- a/runtime/parsec/codelets/codelet_zpanel.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- *
- * @file parsec/codelet_zpanel.c
- *
- * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon zpanel Parsec codelets
- *
- * @version 1.2.0
- * @comment Codelets to perform panel factorization with partial pivoting
- *
- * @author Mathieu Faverge
- * @date 2023-02-21
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_parsec.h"
-#include "chameleon/tasks_z.h"
-
-void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options,
-                                                 int m, int n, int k,
-                                                 const CHAM_desc_t *A, int Am, int An,
-                                                 const CHAM_desc_t *U, int Um, int Un,
-                                                 int iinfo )
-{
-    assert( 0 );
-    (void)options;
-    (void)m;
-    (void)n;
-    (void)k;
-    (void)A;
-    (void)Am;
-    (void)An;
-    (void)U;
-    (void)Um;
-    (void)Un;
-    (void)iinfo;
-}
-
-void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options,
-                                                 int m, int n, int k,
-                                                 const CHAM_desc_t *A, int Am, int An,
-                                                 const CHAM_desc_t *U, int Um, int Un )
-{
-    assert( 0 );
-    (void)options;
-    (void)m;
-    (void)n;
-    (void)k;
-    (void)A;
-    (void)Am;
-    (void)An;
-    (void)U;
-    (void)Um;
-    (void)Un;
-}
-
diff --git a/runtime/parsec/control/runtime_descriptor_ipiv.c b/runtime/parsec/control/runtime_descriptor_ipiv.c
new file mode 100644
index 0000000000000000000000000000000000000000..04a0b791139d5c6a247b25630e126d4a3eb467bf
--- /dev/null
+++ b/runtime/parsec/control/runtime_descriptor_ipiv.c
@@ -0,0 +1,97 @@
+/**
+ *
+ * @file parsec/runtime_descriptor_ipiv.c
+ *
+ * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon PaRSEC descriptor routines
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ *
+ */
+#include "chameleon_parsec.h"
+
+void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    return NULL;
+}
+
+void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    (void)h;
+    return NULL;
+}
+
+void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    (void)h;
+    return NULL;
+}
+
+void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
+void RUNTIME_ipiv_flush( const CHAM_ipiv_t        *ipiv,
+                         const RUNTIME_sequence_t *sequence )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)sequence;
+}
+
+void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
+                           CHAM_ipiv_t *ipiv, int k, int h )
+{
+    assert( 0 );
+    (void)options;
+    (void)ipiv;
+    (void)k;
+    (void)h;
+}
+
+void RUNTIME_ipiv_init( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void RUNTIME_ipiv_gather( CHAM_ipiv_t *desc, int *ipiv, int node )
+{
+    assert( 0 );
+    (void)desc;
+    (void)ipiv;
+    (void)node;
+}
diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt
index 12a673a12606310bae05f6f90d13ccd816d17e9f..e4acef7127cb671ed670195ea62e74ca209ec311 100644
--- a/runtime/quark/CMakeLists.txt
+++ b/runtime/quark/CMakeLists.txt
@@ -90,6 +90,7 @@ set(RUNTIME_COMMON
   control/runtime_context.c
   control/runtime_control.c
   control/runtime_descriptor.c
+  control/runtime_descriptor_ipiv.c
   control/runtime_options.c
   control/runtime_profiling.c
   ${RUNTIME_COMMON_GENERATED}
diff --git a/runtime/quark/codelets/codelet_zgetrf_nopiv_percol.c b/runtime/quark/codelets/codelet_zgetrf_nopiv_percol.c
new file mode 100644
index 0000000000000000000000000000000000000000..dc1d00d0a7d98248c3e4b0b0e3ccb8091eccf383
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zgetrf_nopiv_percol.c
@@ -0,0 +1,60 @@
+/**
+ *
+ * @file quark/codelet_zgetrf_nopiv_percol.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgetrf_nopiv_percol Quark codelets
+ *
+ * @version 1.3.0
+ * @comment Codelets to perform panel factorization with partial pivoting
+ *
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_quark.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options,
+                                           int m, int n, int k,
+                                           const CHAM_desc_t *A, int Am, int An,
+                                           const CHAM_desc_t *U, int Um, int Un,
+                                           int iinfo )
+{
+    assert( 0 );
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)k;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)iinfo;
+}
+
+void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
+                                           int m, int n, int k,
+                                           const CHAM_desc_t *A, int Am, int An,
+                                           const CHAM_desc_t *U, int Um, int Un )
+{
+    assert( 0 );
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)k;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)U;
+    (void)Um;
+    (void)Un;
+}
diff --git a/runtime/quark/codelets/codelet_zgetrf_percol.c b/runtime/quark/codelets/codelet_zgetrf_percol.c
new file mode 100644
index 0000000000000000000000000000000000000000..baea4553d52827ea63db58f487b00d8d119bee9a
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zgetrf_percol.c
@@ -0,0 +1,52 @@
+/**
+ *
+ * @file quark/codelet_zgetrf_percol.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgetrf_percol Quark codelets
+ *
+ * @version 1.3.0
+ * @comment Codelets to perform panel factorization with partial pivoting
+ *
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_quark.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
+                                     int h, int m0,
+                                     CHAM_desc_t *A, int Am, int An,
+                                     CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)h;
+    (void)m0;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)ipiv;
+}
+
+void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
+                                        int h, int m0,
+                                        CHAM_desc_t *A, int Am, int An,
+                                        CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)h;
+    (void)m0;
+    (void)A;
+    (void)Am;
+    (void)An;
+    (void)ipiv;
+}
diff --git a/runtime/quark/codelets/codelet_zpanel.c b/runtime/quark/codelets/codelet_zpanel.c
deleted file mode 100644
index 015ea31c720b42fb3fe965d84f291f2b45e30649..0000000000000000000000000000000000000000
--- a/runtime/quark/codelets/codelet_zpanel.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- *
- * @file quark/codelet_zpanel.c
- *
- * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon zpanel Quark codelets
- *
- * @version 1.2.0
- * @comment Codelets to perform panel factorization with partial pivoting
- *
- * @author Mathieu Faverge
- * @date 2023-02-21
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_quark.h"
-#include "chameleon/tasks_z.h"
-
-void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options,
-                                                 int m, int n, int k,
-                                                 const CHAM_desc_t *A, int Am, int An,
-                                                 const CHAM_desc_t *U, int Um, int Un,
-                                                 int iinfo )
-{
-    assert( 0 );
-    (void)options;
-    (void)m;
-    (void)n;
-    (void)k;
-    (void)A;
-    (void)Am;
-    (void)An;
-    (void)U;
-    (void)Um;
-    (void)Un;
-    (void)iinfo;
-}
-
-void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options,
-                                                 int m, int n, int k,
-                                                 const CHAM_desc_t *A, int Am, int An,
-                                                 const CHAM_desc_t *U, int Um, int Un )
-{
-    assert( 0 );
-    (void)options;
-    (void)m;
-    (void)n;
-    (void)k;
-    (void)A;
-    (void)Am;
-    (void)An;
-    (void)U;
-    (void)Um;
-    (void)Un;
-}
-
diff --git a/runtime/quark/control/runtime_descriptor_ipiv.c b/runtime/quark/control/runtime_descriptor_ipiv.c
new file mode 100644
index 0000000000000000000000000000000000000000..34706a55518f95f0e4b229a772534e3f062d05d2
--- /dev/null
+++ b/runtime/quark/control/runtime_descriptor_ipiv.c
@@ -0,0 +1,97 @@
+/**
+ *
+ * @file quark/runtime_descriptor_ipiv.c
+ *
+ * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon Quark descriptor routines
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ *
+ */
+#include "chameleon_quark.h"
+
+void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    return NULL;
+}
+
+void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    (void)h;
+    return NULL;
+}
+
+void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)m;
+    (void)h;
+    return NULL;
+}
+
+void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
+void RUNTIME_ipiv_flush( const CHAM_ipiv_t        *ipiv,
+                         const RUNTIME_sequence_t *sequence )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)sequence;
+}
+
+void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
+                           CHAM_ipiv_t *ipiv, int k, int h )
+{
+    assert( 0 );
+    (void)options;
+    (void)ipiv;
+    (void)k;
+    (void)h;
+}
+
+void RUNTIME_ipiv_init( CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)ipiv;
+}
+
+void RUNTIME_ipiv_gather( CHAM_ipiv_t *desc, int *ipiv, int node )
+{
+    assert( 0 );
+    (void)desc;
+    (void)ipiv;
+    (void)node;
+}
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index 30ea76045131884c610a0f4ee430393f732f6360..2107134e1125e2ef2e9dcb55cf5e84cc7bd8b1cf 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -26,7 +26,7 @@
 #  @author Matthieu Kuhn
 #  @author Loris Lucido
 #  @author Terry Cojean
-#  @date 2023-07-06
+#  @date 2023-08-22
 #
 ###
 cmake_minimum_required(VERSION 3.1)
@@ -222,11 +222,13 @@ set(RUNTIME_COMMON
   control/runtime_context.c
   control/runtime_control.c
   control/runtime_descriptor.c
+  control/runtime_descriptor_ipiv.c
   control/runtime_tags.c
   control/runtime_options.c
   control/runtime_profiling.c
   control/runtime_workspace.c
   interface/cham_tile_interface.c
+  interface/cppi_interface.c
   ${RUNTIME_COMMON_GENERATED}
   )
 
diff --git a/runtime/starpu/codelets/codelet_zpanel.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c
similarity index 79%
rename from runtime/starpu/codelets/codelet_zpanel.c
rename to runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c
index 7e450986c7448fbc1bd6c2398a7e21df9c63bd89..554735a47e654cadb16c77b209f1e6ed2f28d398 100644
--- a/runtime/starpu/codelets/codelet_zpanel.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c
@@ -1,20 +1,19 @@
 /**
  *
- * @file starpu/codelet_zpanel.c
+ * @file starpu/codelet_zgetrf_nopiv_percol.c
  *
  * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zpanel StarPU codelets
+ * @brief Chameleon getrf codelets to factorize the panel with no pivoting
  *
  * @version 1.3.0
- * @comment Codelets to perform panel factorization with partial pivoting
  *
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
- * @date 2023-07-06
+ * @date 2023-08-22
  * @precisions normal z -> c d s
  *
  */
@@ -22,11 +21,14 @@
 #include "runtime_codelet_z.h"
 #include <coreblas/cblas_wrapper.h>
 
+CHAMELEON_CL_CB( zgetrf_nopiv_percol_diag, cti_handle_get_m(task->handles[0]), 0, 0, M );
+CHAMELEON_CL_CB( zgetrf_nopiv_percol_trsm, cti_handle_get_m(task->handles[0]), 0, 0, M );
+
 static const CHAMELEON_Complex64_t zone  = (CHAMELEON_Complex64_t) 1.0;
 static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
 
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zgetrf_panel_nopiv_percol_diag_cpu_func( void *descr[], void *cl_arg )
+static void cl_zgetrf_nopiv_percol_diag_cpu_func( void *descr[], void *cl_arg )
 {
     CHAM_tile_t           *tileA, *tileU;
     int                    m, n, k, lda, iinfo;
@@ -71,17 +73,16 @@ static void cl_zgetrf_panel_nopiv_percol_diag_cpu_func( void *descr[], void *cl_
 /*
  * Codelet definition
  */
-CODELETS_CPU( zgetrf_panel_nopiv_percol_diag, cl_zgetrf_panel_nopiv_percol_diag_cpu_func );
+CODELETS_CPU( zgetrf_nopiv_percol_diag, cl_zgetrf_nopiv_percol_diag_cpu_func );
 
-void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options,
+void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options,
                                                  int m, int n, int k,
                                                  const CHAM_desc_t *A, int Am, int An,
                                                  const CHAM_desc_t *U, int Um, int Un,
                                                  int iinfo )
 {
-    struct starpu_codelet *codelet = &cl_zgetrf_panel_nopiv_percol_diag;
-    // void (*callback)(void*) = options->profiling ? cl_zgetrf_panel_nopiv_percol_diag_callback : NULL;
-    void (*callback)(void*) = NULL;
+    struct starpu_codelet *codelet = &cl_zgetrf_nopiv_percol_diag;
+    void (*callback)(void*) = options->profiling ? cl_zgetrf_nopiv_percol_diag_callback : NULL;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_RW( A, Am, An );
@@ -102,7 +103,7 @@ void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options
         STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME,              "zgetrf_panel_nopiv_percol_diag",
+        STARPU_NAME,              "zgetrf_nopiv_percol_diag",
 #endif
         0);
 }
@@ -111,7 +112,7 @@ void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options
  * Update column blocs
  */
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zgetrf_panel_nopiv_percol_trsm_cpu_func( void *descr[], void *cl_arg )
+static void cl_zgetrf_nopiv_percol_trsm_cpu_func( void *descr[], void *cl_arg )
 {
     CHAM_tile_t           *tileA, *tileU;
     int                    m, n, k, lda;
@@ -144,16 +145,15 @@ static void cl_zgetrf_panel_nopiv_percol_trsm_cpu_func( void *descr[], void *cl_
 /*
  * Codelet definition
  */
-CODELETS_CPU( zgetrf_panel_nopiv_percol_trsm, cl_zgetrf_panel_nopiv_percol_trsm_cpu_func );
+CODELETS_CPU( zgetrf_nopiv_percol_trsm, cl_zgetrf_nopiv_percol_trsm_cpu_func );
 
-void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options,
+void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
                                                  int m, int n, int k,
                                                  const CHAM_desc_t *A, int Am, int An,
                                                  const CHAM_desc_t *U, int Um, int Un )
 {
-    struct starpu_codelet *codelet = &cl_zgetrf_panel_nopiv_percol_trsm;
-    // void (*callback)(void*) = options->profiling ? cl_zgetrf_panel_nopiv_percol_trsm_callback : NULL;
-    void (*callback)(void*) = NULL;
+    struct starpu_codelet *codelet = &cl_zgetrf_nopiv_percol_trsm;
+    void (*callback)(void*) = options->profiling ? cl_zgetrf_nopiv_percol_trsm_callback : NULL;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_RW(A, Am, An);
@@ -171,8 +171,7 @@ void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options
         STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zgetrf_panel_nopiv_percol_trsm",
+        STARPU_NAME, "zgetrf_nopiv_percol_trsm",
 #endif
         0);
 }
-
diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c
new file mode 100644
index 0000000000000000000000000000000000000000..e5887b02b7faee4ccd67b3e989b040f87b142f82
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c
@@ -0,0 +1,164 @@
+/**
+ *
+ * @file starpu/codelet_zgetrf_percol.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zpanel StarPU codelets
+ *
+ * @version 1.3.0
+ * @comment Codelets to perform panel factorization with partial pivoting
+ *
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelet_z.h"
+#include <coreblas/cblas_wrapper.h>
+
+CHAMELEON_CL_CB( zgetrf_percol_diag,    cti_handle_get_m(task->handles[0]), 0, 0, M );
+CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M );
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
+{
+    int                 h, m0;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t  *request;
+    CHAM_tile_t        *tileA;
+    int                *ipiv;
+    cppi_interface_t   *nextpiv;
+    cppi_interface_t   *prevpiv;
+
+    starpu_codelet_unpack_args( cl_arg, &h, &m0,
+                                &sequence, &request );
+
+    tileA   = cti_interface_get(descr[0]);
+    ipiv    = (int *)STARPU_VECTOR_GET_PTR(descr[1]);
+    nextpiv = (cppi_interface_t*) descr[2];
+    prevpiv = (cppi_interface_t*) descr[3];
+
+    if ( h > 0 ) {
+        cppi_display_dbg( prevpiv, stderr, "Prevpiv before call: " );
+    }
+    if ( h < tileA->n ) {
+        cppi_display_dbg( nextpiv, stderr, "Nextpiv before call: " );
+    }
+
+    /*
+     * Make sure the nextpiv interface store the right information about the
+     * column and diagonal row for the reduction
+     */
+    nextpiv->h        = h;
+    nextpiv->has_diag = 1;
+
+    CORE_zgetrf_panel_diag( tileA->m, tileA->n, h, m0,
+                            CHAM_tile_get_ptr( tileA ), tileA->ld,
+                            ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
+
+    if ( h > 0 ) {
+        cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " );
+    }
+    if ( h < tileA->n ) {
+        cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func );
+
+void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
+                                     int h, int m0,
+                                     CHAM_desc_t *A, int Am, int An,
+                                     CHAM_ipiv_t *ipiv )
+{
+    struct starpu_codelet *codelet = &cl_zgetrf_percol_diag;
+    void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_diag_callback : NULL;
+
+    int access_ipiv = ( h == 0 )       ? STARPU_W    : STARPU_RW;
+    int access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
+    int access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+
+    rt_starpu_insert_task(
+        codelet,
+        STARPU_VALUE,             &h,                   sizeof(int),
+        STARPU_VALUE,             &m0,                  sizeof(int),
+        STARPU_VALUE,             &(options->sequence), sizeof(RUNTIME_sequence_t*),
+        STARPU_VALUE,             &(options->request),  sizeof(RUNTIME_request_t*),
+        STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        access_ipiv,              RUNTIME_ipiv_getaddr( ipiv, An ),
+        access_npiv,              RUNTIME_pivot_getaddr( ipiv, An, h   ),
+        access_ppiv,              RUNTIME_pivot_getaddr( ipiv, An, h-1 ),
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zgetrf_percol_diag",
+#endif
+        0);
+}
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
+{
+    int                 h, m0;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t  *request;
+    CHAM_tile_t        *tileA;
+    cppi_interface_t   *nextpiv;
+    cppi_interface_t   *prevpiv;
+
+    starpu_codelet_unpack_args( cl_arg, &h, &m0, &sequence, &request );
+
+    tileA   = cti_interface_get(descr[0]);
+    nextpiv = (cppi_interface_t*) descr[1];
+    prevpiv = (cppi_interface_t*) descr[2];
+
+    nextpiv->h = h; /* Initialize in case it uses a copy */
+
+    CORE_zgetrf_panel_offdiag( tileA->m, tileA->n, h, m0,
+                               CHAM_tile_get_ptr(tileA), tileA->ld,
+                               &(nextpiv->pivot), &(prevpiv->pivot) );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func)
+
+void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
+                                        int h, int m0,
+                                        CHAM_desc_t *A, int Am, int An,
+                                        CHAM_ipiv_t *ipiv )
+{
+    struct starpu_codelet *codelet = &cl_zgetrf_percol_offdiag;
+
+    void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL;
+
+    rt_starpu_insert_task(
+        codelet,
+        STARPU_VALUE,    &h,                   sizeof(int),
+        STARPU_VALUE,    &m0,                  sizeof(int),
+        STARPU_VALUE,    &(options->sequence), sizeof(RUNTIME_sequence_t *),
+        STARPU_VALUE,    &(options->request),  sizeof(RUNTIME_request_t *),
+        STARPU_RW,       RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_REDUX,    RUNTIME_pivot_getaddr( ipiv, An, h   ),
+        STARPU_R,        RUNTIME_pivot_getaddr( ipiv, An, h-1 ),
+        STARPU_PRIORITY, options->priority,
+        STARPU_CALLBACK, callback,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zgetrf_percol_offdiag",
+#endif
+        0);
+}
diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c
index 450fecf315ff69f85e62c2b790eb6fee6cfe30a4..62c89a00f70c160a8c0ceef38ad78a086189923e 100644
--- a/runtime/starpu/control/runtime_control.c
+++ b/runtime/starpu/control/runtime_control.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon StarPU control routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Cedric Augonnet
  * @author Cedric Castagnede
@@ -21,7 +21,7 @@
  * @author Matthieu Kuhn
  * @author Loris Lucido
  * @author Terry Cojean
- * @date 2023-01-30
+ * @date 2023-08-22
  *
  */
 #include "chameleon_starpu.h"
@@ -232,6 +232,7 @@ int RUNTIME_init( CHAM_context_t *chamctxt,
 #endif
 
     starpu_cham_tile_interface_init();
+    cppi_interface_init();
 
     chameleon_starpu_parallel_worker_init( sched_opt );
     return hres;
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index ee4817fe4cc35abf5a73baffad8b224fcf79452a..2ed4ba05a14db73dc624e812b14ad36e6769d70d 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -20,26 +20,11 @@
  * @author Raphael Boucherie
  * @author Samuel Thibault
  * @author Loris Lucido
- * @date 2023-07-06
+ * @date 2023-08-22
  *
  */
 #include "chameleon_starpu.h"
 
-/**
- *  Set the tag sizes
- */
-#if defined(CHAMELEON_USE_MPI)
-
-#ifndef HAVE_STARPU_MPI_DATA_REGISTER
-#define starpu_mpi_data_register( handle_, tag_, owner_ )       \
-    do {                                                        \
-        starpu_data_set_rank( (handle_), (owner_) );            \
-        starpu_data_set_tag( (handle_), (tag_) );               \
-    } while(0)
-#endif
-
-#endif
-
 /**
  *  Malloc/Free of the data
  */
@@ -289,42 +274,6 @@ void RUNTIME_flush()
 #endif
 }
 
-/**
- * Different implementations of the flush call based on StarPU version
- */
-#if defined(HAVE_STARPU_DATA_WONT_USE)
-
-static inline void
-chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
-    starpu_data_wont_use( handle );
-}
-
-#elif defined(HAVE_STARPU_IDLE_PREFETCH)
-
-static inline void
-chameleon_starpu_data_flush( void *_handle)
-{
-    starpu_data_handle_t handle = (starpu_data_handle_t)_handle;
-    starpu_data_idle_prefetch_on_node(handle, STARPU_MAIN_RAM, 1);
-    starpu_data_release_on_node(handle, -1);
-}
-
-static inline void
-chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
-    starpu_data_acquire_on_node_cb( handle, -1, STARPU_R,
-                                    chameleon_starpu_data_flush, handle );
-}
-
-#else
-
-static inline void
-chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
-    starpu_data_acquire_cb( handle, STARPU_R,
-                            (void (*)(void*))&starpu_data_release, handle );
-}
-
-#endif
-
 void RUNTIME_desc_flush( const CHAM_desc_t        *desc,
                          const RUNTIME_sequence_t *sequence )
 {
diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c
new file mode 100644
index 0000000000000000000000000000000000000000..4131f7d6c79858624ed0b324f6785aebfb195d7e
--- /dev/null
+++ b/runtime/starpu/control/runtime_descriptor_ipiv.c
@@ -0,0 +1,306 @@
+/**
+ *
+ * @file starpu/runtime_descriptor_ipiv.c
+ *
+ * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon StarPU descriptor routines
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ *
+ */
+#include "chameleon_starpu.h"
+
+/**
+ *  Create ws_pivot runtime structures
+ */
+void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
+{
+    assert( ipiv );
+
+    ipiv->ipiv    = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) );
+    ipiv->nextpiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) );
+    ipiv->prevpiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) );
+#if defined(CHAMELEON_USE_MPI)
+    /*
+     * Book the number of tags required to describe pivot structure
+     * One per handle type
+     */
+    {
+        chameleon_starpu_tag_init();
+        ipiv->mpitag_ipiv = chameleon_starpu_tag_book( (int64_t)(ipiv->mt) * 3 );
+        if ( ipiv->mpitag_ipiv == -1 ) {
+            chameleon_fatal_error("RUNTIME_ipiv_create", "Can't pursue computation since no more tags are available for ipiv structure");
+            return;
+        }
+        ipiv->mpitag_nextpiv = ipiv->mpitag_ipiv    + ipiv->mt;
+        ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + ipiv->mt;
+    }
+#endif
+}
+
+/**
+ *  Destroy ws_pivot runtime structures
+ */
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
+{
+    int                   i;
+    starpu_data_handle_t *ipiv_handle    = (starpu_data_handle_t*)(ipiv->ipiv);
+    starpu_data_handle_t *nextpiv_handle = (starpu_data_handle_t*)(ipiv->nextpiv);
+    starpu_data_handle_t *prevpiv_handle = (starpu_data_handle_t*)(ipiv->prevpiv);
+
+    for(i=0; i<ipiv->mt; i++) {
+        if ( *ipiv_handle != NULL ) {
+            starpu_data_unregister( *ipiv_handle );
+            *ipiv_handle = NULL;
+        }
+        ipiv_handle++;
+
+        if ( *nextpiv_handle != NULL ) {
+            starpu_data_unregister( *nextpiv_handle );
+            *nextpiv_handle = NULL;
+        }
+        nextpiv_handle++;
+
+        if ( *prevpiv_handle != NULL ) {
+            starpu_data_unregister( *prevpiv_handle );
+            *prevpiv_handle = NULL;
+        }
+        prevpiv_handle++;
+    }
+
+    free( ipiv->ipiv    );
+    free( ipiv->nextpiv );
+    free( ipiv->prevpiv );
+    chameleon_starpu_tag_release( ipiv->mpitag_ipiv );
+}
+
+void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
+{
+    starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv);
+    int64_t mm = m + (ipiv->i / ipiv->mb);
+
+    handle += mm;
+    assert( handle );
+
+    if ( *handle != NULL ) {
+        return *handle;
+    }
+
+    const CHAM_desc_t *A = ipiv->desc;
+    int owner = A->get_rankof( A, m, m );
+    int ncols = (mm == (ipiv->mt-1)) ? ipiv->m - mm * ipiv->mb : ipiv->mb;
+
+    starpu_vector_data_register( handle, -1, (uintptr_t)NULL, ncols, sizeof(int) );
+
+#if defined(CHAMELEON_USE_MPI)
+    {
+        int64_t tag = ipiv->mpitag_ipiv + mm;
+        starpu_mpi_data_register( *handle, tag, owner );
+    }
+#endif /* defined(CHAMELEON_USE_MPI) */
+
+    assert( *handle );
+    return *handle;
+}
+
+void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+{
+    starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(ipiv->nextpiv);
+    int64_t mm = m + (ipiv->i / ipiv->mb);
+
+    nextpiv += mm;
+    assert( nextpiv );
+
+    if ( *nextpiv != NULL ) {
+        return *nextpiv;
+    }
+
+    const CHAM_desc_t *A = ipiv->desc;
+    int     owner = A->get_rankof( A, m, m );
+    int     ncols = (mm == (ipiv->mt-1)) ? ipiv->m - mm * ipiv->mb : ipiv->mb;
+    int64_t tag   = ipiv->mpitag_nextpiv + mm;
+
+    cppi_register( nextpiv, A->dtyp, ncols, tag, owner );
+
+    assert( *nextpiv );
+    return *nextpiv;
+}
+
+void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+{
+    starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(ipiv->prevpiv);
+    int64_t mm = m + (ipiv->i / ipiv->mb);
+
+    prevpiv += mm;
+    assert( prevpiv );
+
+    if ( *prevpiv != NULL ) {
+        return *prevpiv;
+    }
+
+    const CHAM_desc_t *A = ipiv->desc;
+    int     owner = A->get_rankof( A, m, m );
+    int     ncols = (mm == (ipiv->mt-1)) ? ipiv->m - mm * ipiv->mb : ipiv->mb;
+    int64_t tag   = ipiv->mpitag_prevpiv + mm;
+
+    cppi_register( prevpiv, A->dtyp, ncols, tag, owner );
+
+    assert( *prevpiv );
+    return *prevpiv;
+}
+
+void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    starpu_data_handle_t *handle;
+    const CHAM_desc_t *A = ipiv->desc;
+    int64_t mm = m + ( ipiv->i / ipiv->mb );
+
+    handle = (starpu_data_handle_t*)(ipiv->nextpiv);
+    handle += mm;
+
+    if ( *handle != NULL ) {
+#if defined(CHAMELEON_USE_MPI)
+        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
+#endif
+        {
+            chameleon_starpu_data_wont_use( *handle );
+        }
+    }
+
+    handle = (starpu_data_handle_t*)(ipiv->prevpiv);
+    handle += mm;
+
+    if ( *handle != NULL ) {
+#if defined(CHAMELEON_USE_MPI)
+        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
+#endif
+        {
+            chameleon_starpu_data_wont_use( *handle );
+        }
+    }
+
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
+void RUNTIME_ipiv_flush( const CHAM_ipiv_t        *ipiv,
+                         const RUNTIME_sequence_t *sequence )
+{
+    int m;
+
+    for (m = 0; m < ipiv->mt; m++)
+    {
+        RUNTIME_ipiv_flushk( sequence, ipiv, m );
+    }
+}
+
+void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
+                           CHAM_ipiv_t *ipiv, int k, int h )
+{
+    starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, k, h   );
+    starpu_data_handle_t prevpiv = RUNTIME_pivot_getaddr( ipiv, k, h-1 );
+
+    if ( h < ipiv->n ) {
+#if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI)
+#if !defined(HAVE_STARPU_MPI_REDUX_WRAPUP)
+        starpu_mpi_redux_data_prio_tree( MPI_COMM_WORLD, nextpiv,
+                                         options->priority, 2 /* Binary tree */ );
+#endif
+#endif
+    }
+
+    /* Invalidate the previous pivot structure for correct initialization in later reuse */
+    if ( h > 0 ) {
+        starpu_data_invalidate_submit( prevpiv );
+    }
+
+    (void)options;
+}
+
+static void cl_ipiv_init_cpu_func(void *descr[], void *cl_arg)
+{
+    int *ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
+
+#if !defined(CHAMELEON_SIMULATION)
+    {
+        int i, m0, n;
+        starpu_codelet_unpack_args( cl_arg, &m0, &n );
+
+        for( i=0; i<n; i++ ) {
+            ipiv[i] = m0 + i + 1;
+        }
+    }
+#endif
+}
+
+struct starpu_codelet cl_ipiv_init = {
+    .where     = STARPU_CPU,
+    .cpu_func  = cl_ipiv_init_cpu_func,
+    .nbuffers  = 1,
+};
+
+void RUNTIME_ipiv_init( CHAM_ipiv_t *ipiv )
+{
+    int64_t mt = ipiv->mt;
+    int64_t mb = ipiv->mb;
+    int     m;
+
+    for (m = 0; m < mt; m++) {
+        starpu_data_handle_t ipiv_src = RUNTIME_ipiv_getaddr( ipiv, m );
+        int m0 = m * mb;
+        int n  = (m == (mt-1)) ? ipiv->m - m0 : mb;
+
+        rt_starpu_insert_task(
+            &cl_ipiv_init,
+            STARPU_VALUE, &m0, sizeof(int),
+            STARPU_VALUE, &n,  sizeof(int),
+            STARPU_W, ipiv_src,
+            0);
+    }
+}
+
+void RUNTIME_ipiv_gather( CHAM_ipiv_t *desc, int *ipiv, int node )
+{
+    int64_t mt   = desc->mt;
+    int64_t mb   = desc->mb;
+    int64_t tag  = chameleon_starpu_tag_book( (int64_t)(desc->mt) );
+    int     rank = CHAMELEON_Comm_rank();
+    int     m;
+
+    for (m = 0; m < mt; m++, ipiv += mb) {
+        starpu_data_handle_t ipiv_src = RUNTIME_ipiv_getaddr( desc, m );
+
+#if defined(CHAMELEON_USE_MPI)
+        if ( (rank == node) ||
+             (rank == starpu_mpi_data_get_rank(ipiv_src)) )
+#endif
+        {
+            starpu_data_handle_t ipiv_dst;
+            int       ncols     = (m == (mt-1)) ? desc->m - m * mb : mb;
+            uintptr_t ipivptr   = (rank == node) ? (uintptr_t)ipiv : 0;
+            int       home_node = (rank == node) ? STARPU_MAIN_RAM : -1;
+
+            starpu_vector_data_register( &ipiv_dst, home_node, ipivptr, ncols, sizeof(int) );
+
+#if defined(CHAMELEON_USE_MPI)
+            starpu_mpi_data_register( ipiv_dst, tag + m, 0 );
+#endif /* defined(CHAMELEON_USE_MPI) */
+
+            assert( ipiv_dst );
+
+            starpu_data_cpy( ipiv_dst, ipiv_src, 0, NULL, NULL );
+            starpu_data_unregister( ipiv_dst );
+        }
+    }
+}
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index 8d421ddbb5234d073ba9ea71b3c97ff3046ff9fc..4d21fe0d6e03be17be9d04bf5a3b680ee00cfc19 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -19,7 +19,8 @@
  * @author Samuel Thibault
  * @author Loris Lucido
  * @author Terry Cojean
- * @date 2023-07-06
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
  *
  */
 #ifndef _chameleon_starpu_h_
@@ -95,6 +96,7 @@
 #include "runtime_codelet_profile.h"
 #include "runtime_workspace.h"
 #include "cham_tile_interface.h"
+#include "cppi_interface.h"
 
 typedef struct starpu_schedopt_s
 {
@@ -166,6 +168,9 @@ void    chameleon_starpu_tag_release( int64_t min );
 
 void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, cham_flttype_t dtyp);
 
+#include "runtime_mpi.h"
+#include "runtime_wontuse.h"
+
 #if defined(CHAMELEON_USE_MPI) && defined(HAVE_STARPU_MPI_CACHED_RECEIVE)
 static inline int
 chameleon_starpu_data_iscached(const CHAM_desc_t *A, int m, int n)
diff --git a/runtime/starpu/include/cppi_interface.h b/runtime/starpu/include/cppi_interface.h
new file mode 100644
index 0000000000000000000000000000000000000000..537bc9cd807c9e27f0cf550d6611e2bc974255d3
--- /dev/null
+++ b/runtime/starpu/include/cppi_interface.h
@@ -0,0 +1,90 @@
+/**
+ *
+ * @file starpu/cppi_interface.h
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Header to describe the Chameleon pivot panel interface in StarPU
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ *
+ */
+#ifndef _cppi_interface_h_
+#define _cppi_interface_h_
+
+#include "chameleon_starpu.h"
+#include <starpu_data.h>
+
+extern struct starpu_data_interface_ops cppi_ops;
+#define CPPI_INTERFACE_ID cppi_ops.interfaceid
+
+struct cppi_interface_s;
+typedef struct cppi_interface_s cppi_interface_t;
+
+/**
+ * Chameleon pivot interface
+ */
+struct cppi_interface_s
+{
+    CHAM_pivot_t                  pivot;     /**< Copy of the pivot structure                  */
+    size_t                        arraysize; /**< Allocated size */
+    cham_flttype_t                flttype;   /**< Type of the elements of the matrix           */
+    int                           has_diag;  /**< Bool to determine if pivot corresponds to diagonal block of current panel */
+    int                           h;         /**< Index of the current column being factorized */
+    int                           n;         /**< Number of elements in each row               */
+    enum starpu_data_interface_id id;        /**< Identifier of the interface                  */
+};
+
+void cppi_interface_init();
+void cppi_interface_fini();
+
+void cppi_register( starpu_data_handle_t *handleptr,
+                    cham_flttype_t        flttype,
+                    int                   n,
+                    int64_t               data_tag,
+                    int                   data_rank );
+
+void cl_cppi_redux_cpu_func( void *descr[], void *cl_arg );
+
+#if defined(CHAMELEON_DEBUG_STARPU_CPPI_INTERFACE)
+static inline void
+cppi_display_dbg( cppi_interface_t *cppi_interface, FILE *f, const char *title )
+{
+    int i;
+    double *diagrow, *pivrow;
+    diagrow = cppi_interface->pivot.diagrow;
+    pivrow  = cppi_interface->pivot.pivrow;
+
+    fprintf( f, "%sn=%2d, h=%2d, has_diag=%2d, m0=%2d, idx=%2d\n",
+             title,
+             cppi_interface->n,
+             cppi_interface->h,
+             cppi_interface->has_diag,
+             cppi_interface->pivot.blkm0,
+             cppi_interface->pivot.blkidx );
+
+    fprintf(stderr, "Diagonal row: " );
+    for( i=0; i<cppi_interface->n; i++) {
+        fprintf(stderr, "%e ", diagrow[i] );
+    }
+    fprintf(stderr, "\n" );
+    fprintf(stderr, "Piv      row: " );
+    for( i=0; i<cppi_interface->n; i++) {
+        fprintf(stderr, "%e ", pivrow[i] );
+    }
+    fprintf(stderr, "\n" );
+}
+#else
+static inline void
+cppi_display_dbg( cppi_interface_t *, FILE *, const char * )
+{
+    return;
+}
+#endif
+#endif /* _cppi_interface_h_ */
diff --git a/runtime/starpu/include/runtime_mpi.h b/runtime/starpu/include/runtime_mpi.h
new file mode 100644
index 0000000000000000000000000000000000000000..6d307bc6ae597ec075caf05c7dcbd382a16c4043
--- /dev/null
+++ b/runtime/starpu/include/runtime_mpi.h
@@ -0,0 +1,41 @@
+/**
+ *
+ * @file starpu/runtime_mpi.h
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon StarPU mpi function implementation
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-08-22
+ *
+ */
+#ifndef _runtime_mpi_h_
+#define _runtime_mpi_h_
+
+/**
+ *  Set the tag sizes
+ */
+#if defined(CHAMELEON_USE_MPI)
+
+#if !defined(HAVE_STARPU_MPI_DATA_REGISTER)
+static inline starpu_mpi_data_register( starpu_data_handle_t handle, int64_t tag, int owner )
+{
+    starpu_data_set_rank( handle, owner );
+    starpu_data_set_tag( handle, tag );
+}
+#endif
+
+#else
+
+static inline starpu_mpi_data_register( starpu_data_handle_t, int64_t, int )
+{
+}
+
+#endif
+
+#endif /* _runtime_mpi_h_ */
diff --git a/runtime/starpu/include/runtime_wontuse.h b/runtime/starpu/include/runtime_wontuse.h
new file mode 100644
index 0000000000000000000000000000000000000000..c5b1526d84156cbb004aa39fe114a8e395f6b32d
--- /dev/null
+++ b/runtime/starpu/include/runtime_wontuse.h
@@ -0,0 +1,57 @@
+/**
+ *
+ * @file starpu/runtime_wontuse.h
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon StarPU wont use implementations to flush pieces of data
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-08-22
+ *
+ */
+#ifndef _runtime_wontuse_h_
+#define _runtime_wontuse_h_
+
+#include "chameleon_starpu.h"
+
+/**
+ * Different implementations of the flush call based on StarPU version
+ */
+#if defined(HAVE_STARPU_DATA_WONT_USE)
+
+static inline void
+chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
+    starpu_data_wont_use( handle );
+}
+
+#elif defined(HAVE_STARPU_IDLE_PREFETCH)
+
+static inline void
+chameleon_starpu_data_flush( void *_handle)
+{
+    starpu_data_handle_t handle = (starpu_data_handle_t)_handle;
+    starpu_data_idle_prefetch_on_node( handle, STARPU_MAIN_RAM, 1 );
+    starpu_data_release_on_node( handle, -1 );
+}
+
+static inline void
+chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
+    starpu_data_acquire_on_node_cb( handle, -1, STARPU_R,
+                                    chameleon_starpu_data_flush, handle );
+}
+
+#else
+
+static inline void
+chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
+    starpu_data_acquire_cb( handle, STARPU_R,
+                            (void (*)(void*))&starpu_data_release, handle );
+}
+
+#endif
+#endif /* _runtime_wontuse_h_ */
diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c
index 1e837e1b0ca4c4a5e8418e9acf86dcfb71029851..1a048178623ea2dea461e01218ad4dfd93bfb86f 100644
--- a/runtime/starpu/interface/cham_tile_interface.c
+++ b/runtime/starpu/interface/cham_tile_interface.c
@@ -13,7 +13,7 @@
  * @author Mathieu Faverge
  * @author Gwenole Lucas
  * @author Samuel Thibault
- * @date 2023-07-06
+ * @date 2023-08-22
  *
  */
 #include "chameleon_starpu.h"
@@ -115,7 +115,7 @@ cti_init( void *data_interface )
 
 static void
 cti_register_data_handle( starpu_data_handle_t  handle,
-                          unsigned int          home_node,
+                          int                   home_node,
                           void                 *data_interface )
 {
     starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) data_interface;
diff --git a/runtime/starpu/interface/cppi_interface.c b/runtime/starpu/interface/cppi_interface.c
new file mode 100644
index 0000000000000000000000000000000000000000..2249f88b135e7152b751d26d4510fdd8e95c41a7
--- /dev/null
+++ b/runtime/starpu/interface/cppi_interface.c
@@ -0,0 +1,529 @@
+/**
+ *
+ * @file starpu/cppi_interface.c
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon pivot panel interface for StarPU
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2023-08-22
+ *
+ */
+#include "chameleon_starpu.h"
+#undef HAVE_STARPU_REUSE_DATA_ON_NODE
+
+static inline CHAM_pivot_t *
+cppi_handle_get( starpu_data_handle_t handle )
+{
+    cppi_interface_t *cppi_interface = (cppi_interface_t *)
+        starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
+
+#if defined(STARPU_DEBUG)
+    STARPU_ASSERT_MSG( cppi_interface->id == CPPI_INTERFACE_ID,
+                       "Error. The given data is not a CHAM_pivot interface." );
+#endif
+
+    return &(cppi_interface->pivot);
+}
+
+static void
+cppi_init( void *data_interface )
+{
+    cppi_interface_t *cppi_interface = (cppi_interface_t *)data_interface;
+    cppi_interface->id = CPPI_INTERFACE_ID;
+    cppi_interface->h  = -1;
+    cppi_interface->has_diag = 0;
+}
+
+static void
+cppi_register_data_handle( starpu_data_handle_t  handle,
+                           int                   home_node,
+                           void                 *data_interface )
+{
+    cppi_interface_t *cppi_interface =
+        (cppi_interface_t *) data_interface;
+    int node;
+
+    for (node = 0; node < STARPU_MAXNODES; node++)
+    {
+        cppi_interface_t *local_interface = (cppi_interface_t *)
+            starpu_data_get_interface_on_node(handle, node);
+
+        memcpy( local_interface, cppi_interface,
+                sizeof( cppi_interface_t ) );
+
+        if ( node != home_node )
+        {
+            local_interface->pivot.pivrow  = NULL;
+            local_interface->pivot.diagrow = NULL;
+        }
+    }
+}
+
+static starpu_ssize_t
+cppi_allocate_data_on_node( void *data_interface, unsigned node )
+{
+    cppi_interface_t *cppi_interface = (cppi_interface_t *) data_interface;
+    starpu_ssize_t    requested_memory = cppi_interface->arraysize * 2;
+    void             *dataptr = NULL;
+
+    dataptr = (void*) starpu_malloc_on_node( node, requested_memory );
+    if ( !dataptr ) {
+        return -ENOMEM;
+    }
+
+    /* WARNING: Should not be a memset if GPU */
+    //memset ((void*) dataptr, 0, requested_memory );
+
+    /* update the data properly in consequence */
+    cppi_interface->h = -1;
+    cppi_interface->has_diag = 0;
+    cppi_interface->pivot.pivrow  = dataptr;
+    cppi_interface->pivot.diagrow = ((char*)dataptr) + cppi_interface->arraysize;
+    memset( cppi_interface->pivot.pivrow, 0, cppi_interface->arraysize * 2 );
+
+    return requested_memory;
+}
+
+static void
+cppi_free_data_on_node( void *data_interface, unsigned node )
+{
+    cppi_interface_t *cppi_interface   = (cppi_interface_t *) data_interface;
+    starpu_ssize_t    requested_memory = cppi_interface->arraysize * 2;
+
+    starpu_free_on_node( node, (uintptr_t)(cppi_interface->pivot.pivrow), requested_memory );
+
+    cppi_interface->pivot.pivrow  = NULL;
+    cppi_interface->pivot.diagrow = NULL;
+}
+
+#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE)
+static void
+cppi_reuse_data_on_node( void *dst_data_interface, const void *cached_interface, unsigned node )
+{
+    (void)node;
+    cppi_interface_t *dst_pivot = (cppi_interface_t *)dst_data_interface;
+    cppi_interface_t *src_pivot = (cppi_interface_t *)cached_interface;
+
+    /* update the data properly */
+    dst_pivot->has_diag = 0;  //src_pivot->has_diag;
+    dst_pivot->h        = -1; //src_pivot->h;
+    dst_pivot->n        = src_pivot->n;
+    dst_pivot->pivot    = src_pivot->pivot;
+}
+#endif
+
+static size_t
+cppi_get_size(starpu_data_handle_t handle)
+{
+    cppi_interface_t *cppi_interface =
+        starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
+    size_t size;
+
+#if defined(STARPU_DEBUG)
+    STARPU_ASSERT_MSG( cppi_interface->id == CPPI_INTERFACE_ID,
+                       "Error. The given data is not a pivot interface." );
+#endif
+
+    size = cppi_interface->arraysize * 2 + 4 * sizeof(int);
+    return size;
+}
+
+#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE)
+static size_t
+cppi_get_alloc_size(starpu_data_handle_t handle)
+{
+    cppi_interface_t *cppi_interface =
+        starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
+
+#if defined(STARPU_DEBUG)
+    STARPU_ASSERT_MSG( cppi_interface->id == CPPI_INTERFACE_ID,
+                       "Error. The given data is not a pivot interface." );
+#endif
+
+    return cppi_interface->arraysize * 2;
+}
+#endif
+
+static uint32_t
+cppi_footprint( starpu_data_handle_t handle )
+{
+    cppi_interface_t *cppi_interface =
+        starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
+
+    return starpu_hash_crc32c_be( 2., cppi_interface->n );
+}
+
+#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE)
+static uint32_t
+cppi_alloc_footprint( starpu_data_handle_t handle )
+{
+    return starpu_hash_crc32c_be( cti_handle_get_allocsize(handle), 0 );
+}
+#endif
+
+static int
+cppi_compare( void *data_interface_a, void *data_interface_b )
+{
+    cppi_interface_t *cppi_interface_a = (cppi_interface_t *) data_interface_a;
+    cppi_interface_t *cppi_interface_b = (cppi_interface_t *) data_interface_b;
+
+    /* Two matrices are considered compatible if they have the same size and the same flttype */
+    return ( ( cppi_interface_a->n       == cppi_interface_b->n      ) &&
+             ( cppi_interface_a->flttype == cppi_interface_b->flttype) );
+}
+
+#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE)
+static int
+cppi_alloc_compare(void *data_interface_a, void *data_interface_b)
+{
+    cppi_interface_t *cppi_a = (cppi_interface_t *) data_interface_a;
+    cppi_interface_t *cppi_b = (cppi_interface_t *) data_interface_b;
+
+    /* Two matrices are considered compatible if they have the same allocated size */
+    return ( cppi_a->arraysize == cppi_b->arraysize );
+}
+#endif
+
+static void
+cppi_display( starpu_data_handle_t handle, FILE *f )
+{
+    cppi_interface_t *cppi_interface = (cppi_interface_t *) handle;
+
+    fprintf( f, "%d\t%d\t%d\t%d\n",
+             cppi_interface->n,
+             cppi_interface->h,
+             cppi_interface->pivot.blkm0,
+             cppi_interface->pivot.blkidx );
+}
+
+static int
+cppi_pack_data( starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count )
+{
+    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+    cppi_interface_t *cppi_interface = (cppi_interface_t *)
+        starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+    *count = cppi_get_size( handle );
+
+    if ( ptr != NULL )
+    {
+        int *tmp;
+        *ptr = (void *)starpu_malloc_on_node_flags( node, *count, 0 );
+        tmp = (int*)(*ptr);
+
+        /* Copy the tile metadata */
+        tmp[0] = cppi_interface->has_diag;
+        tmp[1] = cppi_interface->h;
+        tmp[2] = cppi_interface->pivot.blkm0;
+        tmp[3] = cppi_interface->pivot.blkidx;
+        tmp += 4;
+
+        memcpy( tmp, cppi_interface->pivot.pivrow, cppi_interface->arraysize * 2 );
+    }
+
+    return 0;
+}
+
+static int
+cppi_peek_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t count )
+{
+    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+    cppi_interface_t *cppi_interface = (cppi_interface_t *)
+        starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+    int   *tmp  = ptr;
+    size_t size = cppi_interface->arraysize * 2;
+
+#if defined(CHAMELEON_USE_MPI_DATATYPES) && 0
+    /*
+     * We may end up here if an early reception occured before the handle of the
+     * received data has been registered. Thus, datatype was not existant and we
+     * need to unpack the data ourselves
+     */
+    STARPU_ASSERT( count == (size + 4 * sizeof(int)) );
+
+#else
+
+    cppi_interface->has_diag     = tmp[0];
+    cppi_interface->h            = tmp[1];
+    cppi_interface->pivot.blkm0  = tmp[2];
+    cppi_interface->pivot.blkidx = tmp[3];
+    tmp += 4;
+
+    memcpy( cppi_interface->pivot.pivrow, tmp, size );
+
+#endif
+    return 0;
+}
+
+static int
+cppi_unpack_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t count )
+{
+    cppi_peek_data( handle, node, ptr, count );
+
+    /* Free the received information */
+    starpu_free_on_node_flags( node, (uintptr_t)ptr, count, 0 );
+
+    return 0;
+}
+
+static starpu_ssize_t
+cppi_describe( void *data_interface, char *buf, size_t size )
+{
+    cppi_interface_t *cppi_interface = (cppi_interface_t *) data_interface;
+
+    return snprintf( buf, size, "Pivot structure, n %d, blkm0 %d, blkidx %d",
+                     cppi_interface->n,
+                     cppi_interface->pivot.blkm0,
+                     cppi_interface->pivot.blkidx );
+}
+
+static int
+cppi_copy_any_to_any( void *src_interface, unsigned src_node,
+                      void *dst_interface, unsigned dst_node, void *async_data )
+{
+    cppi_interface_t *cppi_interface_src = (cppi_interface_t *) src_interface;
+    cppi_interface_t *cppi_interface_dst = (cppi_interface_t *) dst_interface;
+    size_t size;
+    int ret = 0;
+
+    STARPU_ASSERT( cppi_interface_src->n       == cppi_interface_dst->n       );
+    STARPU_ASSERT( cppi_interface_src->flttype == cppi_interface_dst->flttype );
+
+    cppi_interface_dst->h            = cppi_interface_src->h;
+    cppi_interface_dst->pivot.blkm0  = cppi_interface_src->pivot.blkm0;
+    cppi_interface_dst->pivot.blkidx = cppi_interface_src->pivot.blkidx;
+
+    void *src_mat = cppi_interface_src->pivot.pivrow;
+    void *dst_mat = cppi_interface_dst->pivot.pivrow;
+
+    size = cppi_interface_src->arraysize * 2;
+
+    if ( starpu_interface_copy( (uintptr_t) src_mat, 0, src_node,
+                                (uintptr_t) dst_mat, 0, dst_node,
+                                size, async_data ) )
+    {
+        ret = -EAGAIN;
+    }
+
+    starpu_interface_data_copy( src_node, dst_node, size );
+
+    return ret;
+}
+
+static const struct starpu_data_copy_methods cppi_copy_methods =
+{
+    .any_to_any = cppi_copy_any_to_any,
+};
+
+struct starpu_data_interface_ops cppi_ops =
+{
+    .init                  = cppi_init,
+    .register_data_handle  = cppi_register_data_handle,
+    .allocate_data_on_node = cppi_allocate_data_on_node,
+    .free_data_on_node     = cppi_free_data_on_node,
+#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE)
+    .reuse_data_on_node    = cppi_reuse_data_on_node,
+    .alloc_compare         = cppi_alloc_compare,
+    .alloc_footprint       = cppi_alloc_footprint,
+#endif
+    .get_size              = cppi_get_size,
+    .footprint             = cppi_footprint,
+    .compare               = cppi_compare,
+    .display               = cppi_display,
+    .pack_data             = cppi_pack_data,
+#if defined(HAVE_STARPU_DATA_PEEK)
+    .peek_data             = cppi_peek_data,
+#endif
+    .unpack_data           = cppi_unpack_data,
+    .describe              = cppi_describe,
+    .copy_methods          =&cppi_copy_methods,
+    .interfaceid           = STARPU_UNKNOWN_INTERFACE_ID,
+    .interface_size        = sizeof(cppi_interface_t),
+    .name                  = "CPPI_INTERFACE"
+};
+
+
+static int compare_pivots( cham_flttype_t type, int h, void * redux, void * input ){
+    if ( type == ChamRealFloat )
+    {
+        float *valredux = redux;
+        float *valinput = input;
+        return fabsf( valredux[h] ) < fabsf( valinput[h] );
+    }
+    else if ( type == ChamRealDouble )
+    {
+        double *valredux = redux;
+        double *valinput = input;
+        return fabs( valredux[h] ) < fabs( valinput[h] );
+    }
+    else if (type == ChamComplexFloat)
+    {
+        CHAMELEON_Complex32_t *valredux = redux;
+        CHAMELEON_Complex32_t *valinput = input;
+        return cabsf( valredux[h] ) < cabsf( valinput[h] );
+    }
+    else if (type == ChamComplexDouble)
+    {
+        CHAMELEON_Complex64_t *valredux = redux;
+        CHAMELEON_Complex64_t *valinput = input;
+        return cabs( valredux[h] ) < cabs( valinput[h] );
+    }
+    return 0;
+}
+
+void
+cl_cppi_redux_cpu_func(void *descr[], void *cl_arg)
+{
+    cppi_interface_t *cppi_redux = ((cppi_interface_t *) descr[0]);
+    cppi_interface_t *cppi_input = ((cppi_interface_t *) descr[1]);
+
+    STARPU_ASSERT( cppi_redux->n         == cppi_input->n         );
+    STARPU_ASSERT( cppi_redux->flttype   == cppi_input->flttype   );
+    STARPU_ASSERT( cppi_redux->arraysize == cppi_input->arraysize );
+
+    cppi_display_dbg( cppi_input, stderr, "BRed Input: ");
+    cppi_display_dbg( cppi_redux, stderr, "BRed Inout: ");
+
+    /* Set redux pivot h index to current h index */
+    if ( cppi_input->h == -1 ) {
+        cppi_input->h = cppi_redux->h;
+    }
+    if ( cppi_redux->h == -1 ) {
+        cppi_redux->h = cppi_input->h;
+    }
+    assert( cppi_redux->h == cppi_input->h );
+
+    /* Let's copy the diagonal row if needed */
+    if ( cppi_input->has_diag ) {
+        assert( cppi_redux->has_diag == 0 );
+
+        memcpy( cppi_redux->pivot.diagrow,
+                cppi_input->pivot.diagrow,
+                cppi_input->arraysize );
+        cppi_redux->has_diag = 1;
+    }
+
+    /*
+     * Let's now select the pivot:
+     * we have to compare the column entry corresponding to the diagonal element.
+     */
+    {
+        int   h            = cppi_redux->h;
+        void *pivrow_redux = cppi_redux->pivot.pivrow;
+        void *pivrow_input = cppi_input->pivot.pivrow;
+
+        if( compare_pivots( cppi_redux->flttype, h, pivrow_redux, pivrow_input ) )
+        {
+            cppi_redux->pivot.blkm0  = cppi_input->pivot.blkm0;
+            cppi_redux->pivot.blkidx = cppi_input->pivot.blkidx;
+            memcpy( pivrow_redux,
+                    pivrow_input,
+                    cppi_input->arraysize );
+        }
+    }
+
+    cppi_display_dbg( cppi_input, stderr, "ARed Input: ");
+    cppi_display_dbg( cppi_redux, stderr, "ARed Inout: ");
+
+    return;
+}
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(cppi_redux, cl_cppi_redux_cpu_func)
+
+static void
+cl_cppi_init_redux_cpu_func( void *descr[], void *cl_arg )
+{
+    (void)cl_arg;
+    /* (void)descr; */
+    cppi_interface_t *cppi_redux = ((cppi_interface_t *) descr[0]);
+
+    /* Redux pivot never has diagonal at initialization */
+    cppi_redux->has_diag = 0;
+    cppi_redux->h        = -1;
+
+    /* No need to set to 0, as copies will be made to initalize them */
+#if defined(CHAMELEON_DEBUG_STARPU)
+    size_t size = cppi_redux->arraysize;
+    memset( cppi_redux->pivot.pivrow,  0, size );
+    memset( cppi_redux->pivot.diagrow, 0, size );
+#endif
+}
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU( cppi_init_redux, cl_cppi_init_redux_cpu_func );
+
+static void cppi_redux_init( void ) __attribute__( ( constructor ) );
+static void cppi_redux_init( void )
+{
+    cl_cppi_init_redux.nbuffers = 1;
+    cl_cppi_init_redux.modes[0] = STARPU_W;
+    cl_cppi_init_redux.name = "CPPI ALLREDUX INIT";
+
+    cl_cppi_redux.nbuffers = 2;
+    cl_cppi_redux.modes[0] = STARPU_RW | STARPU_COMMUTE;
+    cl_cppi_redux.modes[1] = STARPU_R;
+    /* Shoulb be RW | COMMUTE to be an allreduce */
+    //cl_cppi_redux.modes[1] = STARPU_RW | STARPU_COMMUTE;
+    cl_cppi_redux.name = "CPPI ALLREDUX TASK";
+}
+
+static void
+cppi_set_reduction_methods( starpu_data_handle_t handle)
+{
+    starpu_data_set_reduction_methods( handle ,
+                                       &cl_cppi_redux,
+                                       &cl_cppi_init_redux );
+}
+
+void
+cppi_register( starpu_data_handle_t *handleptr,
+               cham_flttype_t        flttype,
+               int                   n,
+               int64_t               data_tag,
+               int                   data_rank )
+{
+    cppi_interface_t cppi_interface =
+        {
+            .id = CPPI_INTERFACE_ID,
+            .arraysize = n * CHAMELEON_Element_Size( flttype ),
+            .flttype = flttype,
+            .has_diag = 0,
+            .h  = -1,
+            .n  = n,
+        };
+    starpu_data_register( handleptr, -1, &cppi_interface, &cppi_ops );
+
+#if defined(CHAMELEON_USE_MPI)
+    starpu_mpi_data_register( *handleptr, data_tag, data_rank );
+#endif /* defined(CHAMELEON_USE_MPI) */
+
+    cppi_set_reduction_methods( *handleptr );
+}
+
+void
+cppi_interface_init()
+{
+    if ( cppi_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID )
+    {
+        cppi_ops.interfaceid = starpu_data_interface_get_next_id();
+    }
+    cppi_redux_init();
+}
+
+void
+cppi_interface_fini()
+{}
diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c
index 978017c413f879736b8f1321b081618608c45622..9ee58764c33ef8c6f4c53660ed1dc5be50af599b 100644
--- a/testing/testing_zgetrf.c
+++ b/testing/testing_zgetrf.c
@@ -15,7 +15,7 @@
  * @author Alycia Lisito
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-05
+ * @date 2023-08-22
  * @precisions normal z -> c d s
  *
  */
@@ -39,19 +39,19 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
     int         M     = run_arg_get_int( args, "M", N );
     int         LDA   = run_arg_get_int( args, "LDA", M );
     int         seedA = run_arg_get_int( args, "seedA", testing_ialea() );
-    cham_diag_t diag  = run_arg_get_diag( args, "diag", ChamUnit );
+    cham_diag_t diag  = run_arg_get_diag( args, "diag", ChamNonUnit );
     int         minMN = chameleon_min( M, N );
 
     /* Descriptors */
-    CHAM_desc_t *descA, *descIPIV;
+    CHAM_desc_t *descA;
+    CHAM_ipiv_t *descIPIV;
     void        *ws = NULL;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
     /* Creates the matrices */
     parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N );
-    CHAMELEON_Desc_Create(
-        &descIPIV, CHAMELEON_MAT_ALLOC_TILE, ChamInteger, nb, 1, nb, minMN, 1, 0, 0, minMN, 1, CHAMELEON_Comm_size(), 1 );
+    CHAMELEON_Ipiv_Create( &descIPIV, descA, NULL );
 
     /* Fills the matrix with random values */
     if ( diag == ChamUnit ) {
@@ -71,7 +71,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
     if ( async ) {
         hres = CHAMELEON_zgetrf_Tile_Async( descA, descIPIV, ws, test_data.sequence, &test_data.request );
         CHAMELEON_Desc_Flush( descA, test_data.sequence );
-        CHAMELEON_Desc_Flush( descIPIV, test_data.sequence );
+        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
     }
     else {
         hres = CHAMELEON_zgetrf_Tile( descA, descIPIV );
@@ -82,19 +82,14 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
     /* Checks the factorization and residual */
 #if !defined(CHAMELEON_SIMULATION)
     if ( check ) {
-        CHAM_desc_t *descA0c, *descIPIVc;
+        CHAM_desc_t *descA0c;
         CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE );
-        int         *ipiv;
 
         /* Create A0c as local to rank 0 on all nodes to gather the matrix */
         CHAMELEON_Desc_Create_User(
             &descA0c, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble,
             nb, nb, nb*nb, M, N, 0, 0, M, N, 1, 1,
             chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL, NULL );
-        CHAMELEON_Desc_Create_User(
-            &descIPIVc, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamInteger,
-            nb, 1, nb, M, 1, 0, 0, M, 1, 1, 1,
-            chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL, NULL );
 
         if ( diag == ChamUnit ) {
             CHAMELEON_zplgtr_Tile( 0,     ChamUpper, descA0c, seedA   );
@@ -104,18 +99,21 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
             CHAMELEON_zplrnt_Tile( descA0c, seedA );
         }
 
-        /* Cheat code: float (s) is the same size as int */
-        CHAMELEON_slacpy_Tile( ChamUpperLower, descIPIV, descIPIVc );
-        ipiv = descIPIVc->mat;
-
         /* Compute the permutation of A0: P * A0 */
         if ( CHAMELEON_Comm_rank() == 0 ) {
-            LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, 1, M, ipiv, 1 );
+            int *ipiv;
+
+            ipiv = malloc( minMN * sizeof(int) );
+            CHAMELEON_Ipiv_Gather( descIPIV, ipiv, 0 );
+            LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, 1, minMN, ipiv, 1 );
+            free( ipiv );
+        }
+        else {
+            CHAMELEON_Ipiv_Gather( descIPIV, NULL, 0 );
         }
 
         CHAMELEON_zlacpy_Tile( ChamUpperLower, descA0c, descA0 );
         CHAMELEON_Desc_Destroy( &descA0c );
-        CHAMELEON_Desc_Destroy( &descIPIVc );
 
         hres += check_zxxtrf( args, ChamGeneral, ChamUpperLower,
                               descA0, descA );
@@ -129,7 +127,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
     }
 
     parameters_desc_destroy( &descA );
-    CHAMELEON_Desc_Destroy( &descIPIV );
+    CHAMELEON_Ipiv_Destroy( &descIPIV );
 
     return hres;
 }