diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h
index a8aaaef56a42b2dbaa25664d86022c51c2f4cd09..2655ef34669642404a79a566db7f0020323f9c65 100644
--- a/include/chameleon/runtime.h
+++ b/include/chameleon/runtime.h
@@ -18,7 +18,7 @@
  * @author Samuel Thibault
  * @author Philippe Swartvagher
  * @author Matthieu Kuhn
- * @date 2023-08-22
+ * @date 2023-08-31
  *
  */
 #ifndef _chameleon_runtime_h_
@@ -710,9 +710,11 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv );
 void RUNTIME_ipiv_init   ( CHAM_ipiv_t *ipiv );
 void RUNTIME_ipiv_gather ( CHAM_ipiv_t *desc, int *ipiv, int node );
 
-void *RUNTIME_ipiv_getaddr   ( CHAM_ipiv_t *ipiv, int m );
-void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h );
-void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h );
+void *RUNTIME_ipiv_getaddr   ( const CHAM_ipiv_t *ipiv, int m );
+void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h );
+void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h );
+void *RUNTIME_perm_getaddr   ( const CHAM_ipiv_t *ipiv, int m );
+void *RUNTIME_invp_getaddr   ( const CHAM_ipiv_t *ipiv, int m );
 
 static inline void *
 RUNTIME_pivot_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) {
@@ -730,6 +732,8 @@ void RUNTIME_ipiv_flush  ( const CHAM_ipiv_t *ipiv,
                            const RUNTIME_sequence_t *sequence );
 void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
                            CHAM_ipiv_t *ws, int k, int h );
+void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m );
 
 /**
  * @}
diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h
index d7dd07f48dc6fad1cff9359bf3410defdd657357..e995dcbe18321d0b9bfef2c375318cee8c336b1b 100644
--- a/include/chameleon/struct.h
+++ b/include/chameleon/struct.h
@@ -19,7 +19,7 @@
  * @author Samuel Thibault
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2023-08-22
+ * @date 2023-08-31
  *
  */
 #ifndef _chameleon_struct_h_
@@ -143,13 +143,17 @@ struct chameleon_desc_s {
 typedef struct chameleon_piv_s {
     const CHAM_desc_t *desc;   /**> Reference descriptor to compute data mapping based on diagonal tiles,
                               and get floating reference type                                        */
-    int    *data;        /**> Pointer to the data                                                    */
-    void   *ipiv;        /**> Opaque array of pointers for the runtimes to handle the ipiv array     */
-    void   *nextpiv;     /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
-    void   *prevpiv;     /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
+    int    *data;    /**> Pointer to the data                                                    */
+    void   *ipiv;    /**> Opaque array of pointers for the runtimes to handle the ipiv array     */
+    void   *nextpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
+    void   *prevpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
+    void   *perm;    /**> Opaque array of pointers for the runtimes to handle the temporary permutation array */
+    void   *invp;    /**> Opaque array of pointers for the runtimes to handle the temporary inverse permutation array */
     int64_t mpitag_ipiv;    /**> Initial mpi tag values for the ipiv handles    */
     int64_t mpitag_nextpiv; /**> Initial mpi tag values for the nextpiv handles */
     int64_t mpitag_prevpiv; /**> Initial mpi tag values for the prevpiv handles */
+    int64_t mpitag_perm;    /**> Initial mpi tag values for the nextpiv handles */
+    int64_t mpitag_invp;    /**> Initial mpi tag values for the prevpiv handles */
     int     i;              /**> row index to the beginning of the submatrix    */
     int     m;              /**> The number of row in the vector ipiv           */
     int     mb;             /**> The number of row per block                    */
diff --git a/runtime/openmp/control/runtime_descriptor_ipiv.c b/runtime/openmp/control/runtime_descriptor_ipiv.c
index 03886ca650340279207c8163bc30eac81f4a1054..f10c4156d83f3e50d4b523f3942b0757475b913f 100644
--- a/runtime/openmp/control/runtime_descriptor_ipiv.c
+++ b/runtime/openmp/control/runtime_descriptor_ipiv.c
@@ -12,7 +12,7 @@
  * @version 1.3.0
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
- * @date 2023-08-22
+ * @date 2023-08-31
  *
  */
 #include "chameleon_openmp.h"
@@ -29,7 +29,7 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
     (void)ipiv;
 }
 
-void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
+void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
 {
     assert( 0 );
     (void)ipiv;
@@ -37,7 +37,7 @@ void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
     return NULL;
 }
 
-void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h )
 {
     assert( 0 );
     (void)ipiv;
@@ -46,7 +46,7 @@ void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
     return NULL;
 }
 
-void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h )
 {
     assert( 0 );
     (void)ipiv;
@@ -55,6 +55,22 @@ void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
     return NULL;
 }
 
+void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int k )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)k;
+    return NULL;
+}
+
+void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)k;
+    return NULL;
+}
+
 void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m )
 {
@@ -72,6 +88,15 @@ void RUNTIME_ipiv_flush( const CHAM_ipiv_t        *ipiv,
     (void)sequence;
 }
 
+void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
 void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
                            CHAM_ipiv_t *ipiv, int k, int h )
 {
diff --git a/runtime/parsec/control/runtime_descriptor_ipiv.c b/runtime/parsec/control/runtime_descriptor_ipiv.c
index 04a0b791139d5c6a247b25630e126d4a3eb467bf..fefb42abf9aaa65f98e2959bf09ca24779c95a7d 100644
--- a/runtime/parsec/control/runtime_descriptor_ipiv.c
+++ b/runtime/parsec/control/runtime_descriptor_ipiv.c
@@ -12,7 +12,7 @@
  * @version 1.3.0
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
- * @date 2023-08-22
+ * @date 2023-08-31
  *
  */
 #include "chameleon_parsec.h"
@@ -29,7 +29,7 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
     (void)ipiv;
 }
 
-void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
+void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
 {
     assert( 0 );
     (void)ipiv;
@@ -37,7 +37,7 @@ void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
     return NULL;
 }
 
-void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h )
 {
     assert( 0 );
     (void)ipiv;
@@ -46,7 +46,7 @@ void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
     return NULL;
 }
 
-void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h )
 {
     assert( 0 );
     (void)ipiv;
@@ -55,6 +55,22 @@ void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
     return NULL;
 }
 
+void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int k )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)k;
+    return NULL;
+}
+
+void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)k;
+    return NULL;
+}
+
 void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m )
 {
@@ -72,6 +88,15 @@ void RUNTIME_ipiv_flush( const CHAM_ipiv_t        *ipiv,
     (void)sequence;
 }
 
+void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
 void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
                            CHAM_ipiv_t *ipiv, int k, int h )
 {
diff --git a/runtime/quark/control/runtime_descriptor_ipiv.c b/runtime/quark/control/runtime_descriptor_ipiv.c
index 34706a55518f95f0e4b229a772534e3f062d05d2..88e8f886e8578f99e066868e6dfb2880fc4035d0 100644
--- a/runtime/quark/control/runtime_descriptor_ipiv.c
+++ b/runtime/quark/control/runtime_descriptor_ipiv.c
@@ -12,7 +12,7 @@
  * @version 1.3.0
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
- * @date 2023-08-22
+ * @date 2023-08-31
  *
  */
 #include "chameleon_quark.h"
@@ -29,7 +29,7 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
     (void)ipiv;
 }
 
-void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
+void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
 {
     assert( 0 );
     (void)ipiv;
@@ -37,7 +37,7 @@ void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
     return NULL;
 }
 
-void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h )
 {
     assert( 0 );
     (void)ipiv;
@@ -46,7 +46,7 @@ void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
     return NULL;
 }
 
-void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h )
 {
     assert( 0 );
     (void)ipiv;
@@ -55,6 +55,22 @@ void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
     return NULL;
 }
 
+void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int k )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)k;
+    return NULL;
+}
+
+void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k )
+{
+    assert( 0 );
+    (void)ipiv;
+    (void)k;
+    return NULL;
+}
+
 void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m )
 {
@@ -72,6 +88,15 @@ void RUNTIME_ipiv_flush( const CHAM_ipiv_t        *ipiv,
     (void)sequence;
 }
 
+void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
 void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
                            CHAM_ipiv_t *ipiv, int k, int h )
 {
diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c
index ffe3d1e47a24dbfa3e9986a477df07a95c2cdf32..69f09cb0ae291365ee9aafae8e579662c9ffd387 100644
--- a/runtime/starpu/control/runtime_descriptor_ipiv.c
+++ b/runtime/starpu/control/runtime_descriptor_ipiv.c
@@ -12,7 +12,7 @@
  * @version 1.3.0
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
- * @date 2023-08-22
+ * @date 2023-08-31
  *
  */
 #include "chameleon_starpu.h"
@@ -23,10 +23,16 @@
 void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
 {
     assert( ipiv );
-
-    ipiv->ipiv    = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) );
-    ipiv->nextpiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) );
-    ipiv->prevpiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) );
+    starpu_data_handle_t *handles = calloc( 5 * ipiv->mt, sizeof(starpu_data_handle_t) );
+    ipiv->ipiv    = handles;
+    handles += ipiv->mt;
+    ipiv->nextpiv = handles;
+    handles += ipiv->mt;
+    ipiv->prevpiv = handles;
+    handles += ipiv->mt;
+    ipiv->perm    = handles;
+    handles += ipiv->mt;
+    ipiv->invp    = handles;
 #if defined(CHAMELEON_USE_MPI)
     /*
      * Book the number of tags required to describe pivot structure
@@ -34,13 +40,15 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
      */
     {
         chameleon_starpu_tag_init();
-        ipiv->mpitag_ipiv = chameleon_starpu_tag_book( (int64_t)(ipiv->mt) * 3 );
+        ipiv->mpitag_ipiv = chameleon_starpu_tag_book( (int64_t)(ipiv->mt) * 5 );
         if ( ipiv->mpitag_ipiv == -1 ) {
             chameleon_fatal_error("RUNTIME_ipiv_create", "Can't pursue computation since no more tags are available for ipiv structure");
             return;
         }
         ipiv->mpitag_nextpiv = ipiv->mpitag_ipiv    + ipiv->mt;
         ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + ipiv->mt;
+        ipiv->mpitag_perm    = ipiv->mpitag_prevpiv + ipiv->mt;
+        ipiv->mpitag_invp    = ipiv->mpitag_perm    + ipiv->mt;
     }
 #endif
 }
@@ -51,37 +59,26 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
 void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
 {
     int                   i;
-    starpu_data_handle_t *ipiv_handle    = (starpu_data_handle_t*)(ipiv->ipiv);
-    starpu_data_handle_t *nextpiv_handle = (starpu_data_handle_t*)(ipiv->nextpiv);
-    starpu_data_handle_t *prevpiv_handle = (starpu_data_handle_t*)(ipiv->prevpiv);
-
-    for(i=0; i<ipiv->mt; i++) {
-        if ( *ipiv_handle != NULL ) {
-            starpu_data_unregister( *ipiv_handle );
-            *ipiv_handle = NULL;
-        }
-        ipiv_handle++;
-
-        if ( *nextpiv_handle != NULL ) {
-            starpu_data_unregister( *nextpiv_handle );
-            *nextpiv_handle = NULL;
-        }
-        nextpiv_handle++;
+    starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv);
 
-        if ( *prevpiv_handle != NULL ) {
-            starpu_data_unregister( *prevpiv_handle );
-            *prevpiv_handle = NULL;
+    for(i=0; i<(5 * ipiv->mt); i++) {
+        if ( *handle != NULL ) {
+            starpu_data_unregister( *handle );
+            *handle = NULL;
         }
-        prevpiv_handle++;
+        handle++;
     }
 
     free( ipiv->ipiv    );
-    free( ipiv->nextpiv );
-    free( ipiv->prevpiv );
+    ipiv->ipiv    = NULL;
+    ipiv->nextpiv = NULL;
+    ipiv->prevpiv = NULL;
+    ipiv->perm    = NULL;
+    ipiv->invp    = NULL;
     chameleon_starpu_tag_release( ipiv->mpitag_ipiv );
 }
 
-void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
+void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
 {
     starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv);
     int64_t mm = m + (ipiv->i / ipiv->mb);
@@ -110,7 +107,7 @@ void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m )
     return *handle;
 }
 
-void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h )
 {
     starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(ipiv->nextpiv);
     int64_t mm = m + (ipiv->i / ipiv->mb);
@@ -133,7 +130,7 @@ void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
     return *nextpiv;
 }
 
-void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
+void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h )
 {
     starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(ipiv->prevpiv);
     int64_t mm = m + (ipiv->i / ipiv->mb);
@@ -156,6 +153,64 @@ void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h )
     return *prevpiv;
 }
 
+void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int m )
+{
+    starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->perm);
+    int64_t mm = m + (ipiv->i / ipiv->mb);
+
+    handle += mm;
+    assert( handle );
+
+    if ( *handle != NULL ) {
+        return *handle;
+    }
+
+    const CHAM_desc_t *A = ipiv->desc;
+    int owner = A->get_rankof( A, m, m );
+    int ncols = ipiv->mb;
+
+    starpu_vector_data_register( handle, -1, (uintptr_t)NULL, ncols, sizeof(int) );
+
+#if defined(CHAMELEON_USE_MPI)
+    {
+        int64_t tag = ipiv->mpitag_perm + mm;
+        starpu_mpi_data_register( *handle, tag, owner );
+    }
+#endif /* defined(CHAMELEON_USE_MPI) */
+
+    assert( *handle );
+    return *handle;
+}
+
+void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int m )
+{
+    starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->invp);
+    int64_t mm = m + (ipiv->i / ipiv->mb);
+
+    handle += mm;
+    assert( handle );
+
+    if ( *handle != NULL ) {
+        return *handle;
+    }
+
+    const CHAM_desc_t *A = ipiv->desc;
+    int owner = A->get_rankof( A, m, m );
+    int ncols = ipiv->mb;
+
+    starpu_vector_data_register( handle, -1, (uintptr_t)NULL, ncols, sizeof(int) );
+
+#if defined(CHAMELEON_USE_MPI)
+    {
+        int64_t tag = ipiv->mpitag_invp + mm;
+        starpu_mpi_data_register( *handle, tag, owner );
+    }
+#endif /* defined(CHAMELEON_USE_MPI) */
+
+    assert( *handle );
+    return *handle;
+}
+
 void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m )
 {
@@ -205,6 +260,44 @@ void RUNTIME_ipiv_flush( const CHAM_ipiv_t        *ipiv,
     }
 }
 
+void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    starpu_data_handle_t *handle;
+    const CHAM_desc_t *A = ipiv->desc;
+    int64_t mm = m + ( ipiv->i / ipiv->mb );
+
+    handle = (starpu_data_handle_t*)(ipiv->perm);
+    handle += mm;
+
+    if ( *handle != NULL ) {
+#if defined(CHAMELEON_USE_MPI)
+        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
+#endif
+        {
+            chameleon_starpu_data_wont_use( *handle );
+        }
+    }
+
+    handle = (starpu_data_handle_t*)(ipiv->invp);
+    handle += mm;
+
+    if ( *handle != NULL ) {
+#if defined(CHAMELEON_USE_MPI)
+        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
+#endif
+        {
+            chameleon_starpu_data_wont_use( *handle );
+        }
+    }
+
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
 void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options,
                            CHAM_ipiv_t *ipiv, int k, int h )
 {