diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 368a3d46f26124035b35fa9be1deba42619ca0a8..1060432565ee5d581cd82bc74da7c53df0829383 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -24,50 +24,6 @@
 
 #define A(m,n)  A,        m, n
 #define U(m,n)  &(ws->U), m, n
-#define IPIV(m) IPIV,     m, 1
-
-/*
- * Static variable to know how to handle the data within the kernel
- * This assumes that only one runtime is enabled at a time.
- */
-static RUNTIME_id_t zgetrf_runtime_id = RUNTIME_SCHED_STARPU;
-
-static inline int
-zgetrf_ipiv_init( const CHAM_desc_t *descIPIV,
-                  cham_uplo_t uplo, int m, int n,
-                  CHAM_tile_t *tileIPIV, void *op_args )
-{
-    int *IPIV;
-    (void)op_args;
-
-    if ( zgetrf_runtime_id == RUNTIME_SCHED_PARSEC ) {
-        IPIV = (int*)tileIPIV;
-    }
-    else {
-        IPIV = CHAM_tile_get_ptr( tileIPIV );
-    }
-
-#if !defined(CHAMELEON_SIMULATION)
-    {
-        int tempmm = m == descIPIV->mt-1 ? descIPIV->m - m * descIPIV->mb : descIPIV->mb;
-        int i;
-
-        for( i=0; i<tempmm; i++ ) {
-            IPIV[i] = m * descIPIV->mb + i + 1;
-        }
-    }
-#endif
-
-    return 0;
-}
-
-static inline void
-chameleon_pzgetrf_ipiv_init( CHAM_desc_t        *IPIV,
-                             RUNTIME_sequence_t *sequence,
-                             RUNTIME_request_t  *request )
-{
-    chameleon_pmap( ChamW, ChamUpperLower, IPIV, zgetrf_ipiv_init, NULL, sequence, request );
-}
 
 /*
  * All the functions below are panel factorization variant.
@@ -79,10 +35,10 @@ chameleon_pzgetrf_ipiv_init( CHAM_desc_t        *IPIV,
  *   @param[inout] A
  *      The descriptor of the full matrix A (not just the panel)
  *
- *   @param[in] k
- *      The index of the column to factorize
+ *   @param[inout] ipiv
+ *      The descriptor of the pivot array associated to A.
  *
- *   @param[in] ib
+ *   @param[in] k
  *      The index of the column to factorize
  *
  *   @param[inout] options
@@ -91,6 +47,7 @@ chameleon_pzgetrf_ipiv_init( CHAM_desc_t        *IPIV,
 static inline void
 chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws,
                                      CHAM_desc_t                *A,
+                                     CHAM_ipiv_t                *ipiv,
                                      int                         k,
                                      RUNTIME_option_t           *options )
 {
@@ -122,6 +79,7 @@ chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws,
 static inline void
 chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws,
                                             CHAM_desc_t                *A,
+                                            CHAM_ipiv_t                *ipiv,
                                             int                         k,
                                             RUNTIME_option_t           *options )
 {
@@ -151,18 +109,79 @@ chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws,
     RUNTIME_data_flush( options->sequence, U(k, k) );
 }
 
+static inline void
+chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
+                                      CHAM_desc_t                *A,
+                                      CHAM_ipiv_t                *ipiv,
+                                      int                         k,
+                                      RUNTIME_option_t           *options )
+{
+    int m, h;
+    int tempkm, tempkn, minmn;
+
+    tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+    tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+    minmn  = chameleon_min( tempkm, tempkn );
+
+    /* Update the number of column */
+    ipiv->n = minmn;
+
+    /*
+     * Algorithm per column with pivoting
+     */
+    for (h=0; h<=minmn; h++){
+
+        INSERT_TASK_zgetrf_percol_diag(
+            options,
+            h, k * A->mb,
+            A(k, k),
+            ipiv );
+
+        for (m = k+1; m < A->mt; m++) {
+            //tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+
+            INSERT_TASK_zgetrf_percol_offdiag(
+                options,
+                h, m * A->mb,
+                A(m, k),
+                ipiv );
+        }
+
+        if ( h < minmn ) {
+            /* Reduce globally (between MPI processes) */
+            RUNTIME_ipiv_reducek( options, ipiv, k, h );
+        }
+    }
+
+    /* Flush temporary data used for the pivoting */
+    RUNTIME_ipiv_flushk( options->sequence, ipiv, k );
+}
+
 static inline void
 chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
                                CHAM_desc_t                *A,
+                               CHAM_ipiv_t                *ipiv,
                                int                         k,
                                RUNTIME_option_t           *options )
 {
     /* TODO: Should be replaced by a function pointer */
-    if ( ws->alg == ChamGetrfNoPivPerColumn ) {
-        chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options );
-    }
-    else {
-        chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options );
+    switch( ws->alg ) {
+    case ChamGetrfNoPivPerColumn:
+        chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, ipiv, k, options );
+        break;
+
+    case ChamGetrfPPivPerColumn:
+        chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options );
+        break;
+
+    case ChamGetrfPPiv:
+        chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options );
+        break;
+
+    case ChamGetrfNoPiv:
+        chameleon_attr_fallthrough;
+    default:
+        chameleon_pzgetrf_panel_facto_nopiv( ws, A, ipiv, k, options );
     }
 }
 
@@ -227,7 +246,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
  */
 void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
                         CHAM_desc_t                *A,
-                        CHAM_desc_t                *IPIV,
+                        CHAM_ipiv_t                *IPIV,
                         RUNTIME_sequence_t         *sequence,
                         RUNTIME_request_t          *request )
 {
@@ -243,14 +262,11 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
     }
     RUNTIME_options_init( &options, chamctxt, sequence, request );
 
-    /* Initialize IPIV */
-    chameleon_pzgetrf_ipiv_init( IPIV, sequence, request );
-
     for (k = 0; k < min_mnt; k++) {
         RUNTIME_iteration_push( chamctxt, k );
 
         options.priority = A->nt;
-        chameleon_pzgetrf_panel_facto( ws, A, k, &options );
+        chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options );
 
         for (n = k+1; n < A->nt; n++) {
             options.priority = A->nt-n;
@@ -272,5 +288,12 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
         }
     }
 
+    /* Initialize IPIV */
+    if ( (ws->alg == ChamGetrfNoPivPerColumn) ||
+         (ws->alg == ChamGetrfNoPiv ) )
+    {
+        RUNTIME_ipiv_init( IPIV );
+    }
+
     RUNTIME_options_finalize( &options, chamctxt );
 }
diff --git a/compute/zgetrf.c b/compute/zgetrf.c
index 9d1b19806343d54840eb2da435cdeb7bb5939cb5..73c810be2c1f3294583b6599aff49e726f8f049d 100644
--- a/compute/zgetrf.c
+++ b/compute/zgetrf.c
@@ -68,15 +68,21 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
     {
         char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" );
 
-        if ( algostr ) {
-            if ( strcasecmp( algostr, "nopiv" ) ) {
+        if ( algostr != NULL ) {
+            if ( strcasecmp( algostr, "nopiv" ) == 0 ) {
                 ws->alg = ChamGetrfNoPiv;
             }
             else if ( strcasecmp( algostr, "nopivpercolumn" ) == 0  ) {
                 ws->alg = ChamGetrfNoPivPerColumn;
             }
+            else if ( strcasecmp( algostr, "ppiv" )  == 0 ) {
+                ws->alg = ChamGetrfPPiv;
+            }
+            else if ( strcasecmp( algostr, "ppivpercolumn" ) == 0  ) {
+                ws->alg = ChamGetrfPPivPerColumn;
+            }
             else {
-                fprintf( stderr, "ERROR: CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn => Switch back to NoPiv\n" );
+                chameleon_error( "CHAMELEON_zgetrf_WS_Alloc", "CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn, PPiv, PPivPerColumn => Switch back to NoPiv\n" );
             }
         }
         chameleon_cleanenv( algostr );
@@ -90,6 +96,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
                              NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
     }
 
+    /* Set ib to 1 if per column algorithm */
+    if ( ( ws->alg == ChamGetrfNoPivPerColumn ) ||
+         ( ws->alg == ChamGetrfPPivPerColumn  ) )
+    {
+        ws->ib = 1;
+    }
+
     return ws;
 }
 
@@ -123,7 +136,6 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
     free( ws );
 }
 
-#if defined(NOT_AVAILABLE_YET)
 /**
  ********************************************************************************
  *
@@ -149,6 +161,11 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
  * @param[in] LDA
  *          The leading dimension of the array A. LDA >= max(1,M).
  *
+ * @param[out] IPIV
+ *          Integer array of dimension min(M,N).
+ *          The pivot indices; for 1 <= i <= min(M,N), row i of the
+ *          matrix was interchanged with row IPIV(i).
+ *
  *******************************************************************************
  *
  * @retval CHAMELEON_SUCCESS successful exit
@@ -173,10 +190,11 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
     int                 NB;
     int                 status;
     CHAM_desc_t         descAl, descAt;
+    CHAM_ipiv_t         descIPIV;
     CHAM_context_t     *chamctxt;
     RUNTIME_sequence_t *sequence = NULL;
     RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
-    void               *ws;
+    struct chameleon_pzgetrf_s *ws;
 
     chamctxt = chameleon_context_self();
     if ( chamctxt == NULL ) {
@@ -219,25 +237,35 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
     /* Allocate workspace for partial pivoting */
     ws = CHAMELEON_zgetrf_WS_Alloc( &descAt );
 
+    if ( ws->alg == ChamGetrfPPivPerColumn ) {
+        chameleon_ipiv_init( &descIPIV, &descAt, IPIV );
+    }
+
     /* Call the tile interface */
-    CHAMELEON_zgetrf_Tile_Async( &descAt, ws, sequence, &request );
+    CHAMELEON_zgetrf_Tile_Async( &descAt, &descIPIV, ws, sequence, &request );
 
     /* Submit the matrix conversion back */
     chameleon_ztile2lap( chamctxt, &descAl, &descAt,
                          ChamDescInout, ChamUpperLower, sequence, &request );
 
+    if ( ws->alg == ChamGetrfPPivPerColumn ) {
+        RUNTIME_ipiv_gather( &descIPIV, IPIV, 0 );
+    }
     chameleon_sequence_wait( chamctxt, sequence );
 
     /* Cleanup the temporary data */
     CHAMELEON_zgetrf_WS_Free( ws );
     chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
 
+    if ( ws->alg == ChamGetrfPPivPerColumn ) {
+        chameleon_ipiv_destroy( &descIPIV );
+    }
+
     status = sequence->status;
     chameleon_sequence_destroy( chamctxt, sequence );
 
     return status;
 }
-#endif
 
 /**
  ********************************************************************************
@@ -255,12 +283,19 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
  *          On entry, the M-by-N matrix to be factored.
  *          On exit, the tile factors L and U from the factorization.
  *
+ * @param[in,out] IPIV
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
+ *
  *******************************************************************************
  *
  * @retval CHAMELEON_SUCCESS successful exit
- * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
- *               but the factor U is exactly singular, and division by zero will occur
- *               if it is used to solve a system of equations.
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been
+ *               completed, but the factor U is exactly singular, and division
+ *               by zero will occur if it is used to solve a system of
+ *               equations.
  *
  *******************************************************************************
  *
@@ -273,7 +308,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
  *
  */
 int
-CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
+CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV )
 {
     CHAM_context_t     *chamctxt;
     RUNTIME_sequence_t *sequence = NULL;
@@ -317,9 +352,10 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
  *          On exit, the tile factors L and U from the factorization.
  *
  * @param[in,out] IPIV
- *          On entry, the descriptor of an min(M, N)-by-1 matrix that may not
- *          have been initialized.
- *          On exit, the pivot vector generated during the factorization.
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
  *
  * @param[in,out] user_ws
  *          The opaque pointer to pre-allocated getrf workspace through
@@ -346,7 +382,7 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
  */
 int
 CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
-                             CHAM_desc_t        *IPIV,
+                             CHAM_ipiv_t        *IPIV,
                              void               *user_ws,
                              RUNTIME_sequence_t *sequence,
                              RUNTIME_request_t  *request )
@@ -384,10 +420,6 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
         chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid first descriptor" );
         return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
     }
-    if ( chameleon_desc_check( IPIV ) != CHAMELEON_SUCCESS ) {
-        chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid second descriptor" );
-        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
-    }
 
     /* Check input arguments */
     if ( A->nb != A->mb ) {
@@ -398,10 +430,6 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
         chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must have the number of rows as tiles of A" );
         return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
     }
-    if ( IPIV->nb != 1 ) {
-        chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must be vectore with only one column per tile" );
-        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
-    }
 
     if ( user_ws == NULL ) {
         ws = CHAMELEON_zgetrf_WS_Alloc( A );
diff --git a/control/compute_z.h b/control/compute_z.h
index 06eae17b508c012918bbd011bad9cbb25a7bb7d4..9032c20f24666d4e751b1e422e9afd07b41d047f 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -22,7 +22,7 @@
  * @author Alycia Lisito
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-06
+ * @date 2023-08-22
  * @precisions normal z -> c d s
  *
  */
@@ -94,7 +94,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc
 void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_desc_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index fa5f069e6057cb0f79eb47a7f0e2bbc38777a14b..2362d8b1a8ff0b1e8b4ff96b26a795d71f25761d 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -23,7 +23,7 @@
  * @author Florent Pruvost
  * @author Alycia Lisito
  * @author Matthieu Kuhn
- * @date 2023-07-06
+ * @date 2023-08-22
  * @precisions normal z -> c d s
  *
  */
@@ -135,7 +135,7 @@ int CHAMELEON_zgesvd_Tile(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, dou
 //int CHAMELEON_zgetrf_Tile(CHAM_desc_t *A, int *IPIV);
 int CHAMELEON_zgetrf_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV);
 int CHAMELEON_zgetrf_nopiv_Tile(CHAM_desc_t *A);
-int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV );
+int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV );
 //int CHAMELEON_zgetri_Tile(CHAM_desc_t *A, int *IPIV);
 //int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B);
 int CHAMELEON_zgetrs_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B);
@@ -216,7 +216,7 @@ int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *
 //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrs_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);