diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 240d0a49886efebacc91ec7c62e57644ef465e4e..ee148020a6833c8ad44a19f4e9f51c708d4af1d2 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -24,11 +24,11 @@
  */
 #include "control/common.h"
 
-#define A(m,n)   A,         m, n
-#define U(m,n)   &(ws->U),  m, n
-#define Up(m,n)  &(ws->Up), m, n
-#define Wu(m,n)  &(ws->Wu), m, n
-#define Wl(m,n)  &(ws->Wl), m, n
+#define A(m,n)   A,               m, n
+#define U(m,n)   &(ws->U),        m, n
+#define Up(m,n)  &(ws->Up),       m, n
+#define Wu(m,n)  &(ws->laswp->W), m, n
+#define Wl(m,n)  &(ws->Wl),       m, n
 
 /*
  * All the functions below are panel factorization variant.
@@ -118,6 +118,7 @@ static inline void
 chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
                                       CHAM_desc_t                *A,
                                       CHAM_ipiv_t                *ipiv,
+                                      CHAM_desc_pivot_t          *pivot,
                                       int                         k,
                                       RUNTIME_option_t           *options )
 {
@@ -129,7 +130,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
     minmn  = chameleon_min( tempkm, tempkn );
 
     /* Update the number of column */
-    ipiv->n = minmn;
+    pivot->n = minmn;
 
     /*
      * Algorithm per column with pivoting
@@ -139,7 +140,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
             options,
             tempkm, tempkn, h, k * A->mb,
             A(k, k),
-            ipiv );
+            ipiv, pivot );
 
         for (m = k+1; m < A->mt; m++) {
             tempmm = A->get_blkdim( A, m, DIM_m, A->m );
@@ -147,16 +148,16 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
                 options,
                 tempmm, tempkn, h, m * A->mb,
                 A(m, k),
-                ipiv );
+                pivot );
         }
 
         /* Reduce globally (between MPI processes) */
-        INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, h, tempkn, ws );
+        INSERT_TASK_zipiv_allreduce( options, A, pivot, k, h, tempkn, ws->laswp );
     }
 
     /* Flush temporary data used for the pivoting */
     INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k );
-    RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank );
+    RUNTIME_pivot_flushk( options->sequence, pivot, A->myrank );
 }
 
 /*
@@ -166,6 +167,7 @@ static inline void
 chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws,
                                               CHAM_desc_t                *A,
                                               CHAM_ipiv_t                *ipiv,
+                                              CHAM_desc_pivot_t          *pivot,
                                               int                         k,
                                               RUNTIME_option_t           *options )
 {
@@ -179,7 +181,7 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws,
     minmn  = chameleon_min( tempkm, tempkn );
 
     /* Update the number of column */
-    ipiv->n = minmn;
+    pivot->n = minmn;
 
     /*
      * Algorithm per column with pivoting (no recursion)
@@ -188,29 +190,30 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws,
     /* Since index h scales column h-1, we need to iterate up to minmn (included) */
     for ( h = 0; h <= minmn; h++ ) {
 
-        INSERT_TASK_zgetrf_percol_diag( options, tempkm, tempkn, h, k * A->mb, A(k, k), ipiv );
+        INSERT_TASK_zgetrf_percol_diag( options, tempkm, tempkn, h, k * A->mb, A(k, k), ipiv, pivot );
 
         for ( m = k+1; m < A->mt; m++ ) {
             tempmm = A->get_blkdim( A, m, DIM_m, A->m );
             INSERT_TASK_zgetrf_panel_offdiag_batched( options, tempmm, tempkn, h, m * A->mb,
-                                                      (void *)ws, A(m, k), clargs, ipiv );
+                                                      (void *)ws, A(m, k), clargs, pivot );
         }
-        INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, k, clargs, ipiv );
+        INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, k, clargs, pivot );
 
-        INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, h, tempkn, ws );
+        INSERT_TASK_zipiv_allreduce( options, A, pivot, k, h, tempkn, ws->laswp );
     }
 
     free( clargs );
 
     /* Flush temporary data used for the pivoting */
     INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k );
-    RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank );
+    RUNTIME_pivot_flushk( options->sequence, pivot, A->myrank );
 }
 
 static inline void
 chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
                                        CHAM_desc_t                *A,
                                        CHAM_ipiv_t                *ipiv,
+                                       CHAM_desc_pivot_t          *pivot,
                                        int                         k,
                                        RUNTIME_option_t           *options )
 {
@@ -222,7 +225,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
     minmn  = chameleon_min( tempkm, tempkn );
 
     /* Update the number of column */
-    ipiv->n = minmn;
+    pivot->n = minmn;
     nbblock = chameleon_ceil( minmn, ws->ib );
 
     /*
@@ -238,7 +241,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
                 options,
                 tempkm, tempkn, j, k * A->mb, ws->ib,
                 A(k, k), Up(k, k),
-                ipiv );
+                ipiv, pivot );
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = A->get_blkdim( A, m, DIM_m, A->m );
@@ -246,19 +249,19 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
                     options,
                     tempmm, tempkn, j, m * A->mb, ws->ib,
                     A(m, k), Up(k, k),
-                    ipiv );
+                    pivot );
             }
 
             assert( j <= minmn );
             /* Reduce globally (between MPI processes) */
-            INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, j, tempkn, ws );
+            INSERT_TASK_zipiv_allreduce( options, A, pivot, k, j, tempkn, ws->laswp );
 
             if ( ( b < (nbblock-1) ) && ( h == hmax-1 ) ) {
                 INSERT_TASK_zgetrf_blocked_trsm(
                     options,
                     ws->ib, tempkn, j+1, ws->ib,
                     Up(k, k),
-                    ipiv );
+                    pivot );
             }
         }
     }
@@ -266,7 +269,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
 
     /* Flush temporary data used for the pivoting */
     INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k );
-    RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank );
+    RUNTIME_pivot_flushk( options->sequence, pivot, A->myrank );
 }
 
 /*
@@ -276,6 +279,7 @@ static inline void
 chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws,
                                                CHAM_desc_t                *A,
                                                CHAM_ipiv_t                *ipiv,
+                                               CHAM_desc_pivot_t          *pivot,
                                                int                         k,
                                                RUNTIME_option_t           *options )
 {
@@ -289,7 +293,7 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws,
     minmn  = chameleon_min( tempkm, tempkn );
 
     /* Update the number of column */
-    ipiv->n = minmn;
+    pivot->n = minmn;
     nbblock = chameleon_ceil( minmn, ws->ib );
 
     /*
@@ -306,21 +310,21 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws,
             for ( m = k; m < A->mt; m++ ) {
                 tempmm = A->get_blkdim( A, m, DIM_m, A->m );
                 INSERT_TASK_zgetrf_panel_blocked_batched( options, tempmm, tempkn, j, m * A->mb,
-                                                          (void *)ws, A(m, k), Up(k, k), clargs, ipiv );
+                                                          (void *)ws, A(m, k), Up(k, k), clargs, ipiv, pivot );
             }
             INSERT_TASK_zgetrf_panel_blocked_batched_flush( options, A, k,
-                                                            Up(k, k), clargs, ipiv );
+                                                            Up(k, k), clargs, ipiv, pivot );
 
             assert( j <= minmn );
             /* Reduce globally (between MPI processes) */
-            INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, j, tempkn, ws );
+            INSERT_TASK_zipiv_allreduce( options, A, pivot, k, j, tempkn, ws->laswp );
 
             if ( (b < (nbblock-1)) && (h == hmax-1) ) {
                 INSERT_TASK_zgetrf_blocked_trsm(
                     options,
                     ws->ib, tempkn, b * ws->ib + hmax, ws->ib,
                     Up(k, k),
-                    ipiv );
+                    pivot );
             }
         }
     }
@@ -329,19 +333,21 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws,
 
     /* Flush temporary data used for the pivoting */
     INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k );
-    RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank );
+    RUNTIME_pivot_flushk( options->sequence, pivot, A->myrank );
 }
 
 static inline void
 chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
                                CHAM_desc_t                *A,
                                CHAM_ipiv_t                *ipiv,
+                               CHAM_desc_pivot_t          *pivot,
                                int                         k,
                                RUNTIME_option_t           *options )
 {
 #if defined(CHAMELEON_USE_MPI)
-    chameleon_get_proc_involved_in_panelk_2dbc( A, k, k, ws );
-    if ( !ws->involved ) {
+    CHAM_reduce_t *reduce = &(ws->laswp->reduce);
+    chameleon_get_proc_involved_in_panelk_2dbc( A, k, k, reduce );
+    if ( !reduce->involved ) {
         return;
     }
 #endif
@@ -354,19 +360,19 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
 
     case ChamGetrfPPivPerColumn:
         if ( ws->batch_size_blas2 > 0 ) {
-            chameleon_pzgetrf_panel_facto_percol_batched( ws, A, ipiv, k, options );
+            chameleon_pzgetrf_panel_facto_percol_batched( ws, A, ipiv, pivot, k, options );
         }
         else {
-            chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options );
+            chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, pivot, k, options );
         }
         break;
 
     case ChamGetrfPPiv:
         if ( ws->batch_size_blas2 > 0 ) {
-            chameleon_pzgetrf_panel_facto_blocked_batched( ws, A, ipiv, k, options );
+            chameleon_pzgetrf_panel_facto_blocked_batched( ws, A, ipiv, pivot, k, options );
         }
         else {
-            chameleon_pzgetrf_panel_facto_blocked( ws, A, ipiv, k, options );
+            chameleon_pzgetrf_panel_facto_blocked( ws, A, ipiv, pivot, k, options );
         }
         break;
 
@@ -423,7 +429,7 @@ chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws,
                                     ipiv, k, A(k, n), A(m, n) );
         }
 
-        INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws );
+        INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws->laswp );
     }
     break;
     default:
@@ -475,7 +481,7 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws,
         }
         INSERT_TASK_zlaswp_batched_flush( options, ipiv, k, A(k, n), Wu(A->myrank, n), clargs );
 
-        INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws );
+        INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws->laswp );
 
         free( clargs );
     }
@@ -494,16 +500,17 @@ chameleon_pzgetrf_panel_permute_forward( struct chameleon_pzgetrf_s *ws,
                                          RUNTIME_option_t           *options )
 {
 #if defined(CHAMELEON_USE_MPI)
-    chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
+    CHAM_reduce_t *reduce = &(ws->laswp->reduce);
+    chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, reduce );
     if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
-        INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved );
         INSERT_TASK_zperm_allreduce_send_invp_row( options, ChamDirForward, ipiv, k, A, k, n );
     }
     if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
-        INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, reduce->np_involved, reduce->proc_involved );
     }
 
-    if ( !ws->involved ) {
+    if ( !reduce->involved ) {
         return;
     }
 #endif
@@ -525,19 +532,20 @@ chameleon_pzgetrf_panel_permute_backward( struct chameleon_pzgetrf_s *ws,
                                           RUNTIME_option_t           *options,
                                           RUNTIME_sequence_t         *sequence )
 {
-    int tempkm, tempnn;
+    CHAM_reduce_t *reduce = &(ws->laswp->reduce);
+    int            tempkm, tempnn;
 
 #if defined(CHAMELEON_USE_MPI)
-    chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
+    chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, reduce );
     if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
-        INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved );
         INSERT_TASK_zperm_allreduce_send_invp_row( options, ChamDirForward, ipiv, k, A, k, n );
     }
     if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
-        INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, reduce->np_involved, reduce->proc_involved );
     }
 
-    if ( !ws->involved ) {
+    if ( !reduce->involved ) {
         return;
     }
 #endif
@@ -557,6 +565,7 @@ chameleon_pzgetrf_panel_permute_backward( struct chameleon_pzgetrf_s *ws,
                             Wu(A->myrank, n), A(k, n) );
         RUNTIME_data_flush( sequence, A(k, n) );
     }
+    (void)reduce;
 }
 
 static inline void
@@ -666,7 +675,8 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
 {
     const CHAMELEON_Complex64_t zone  = (CHAMELEON_Complex64_t) 1.0;
     const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
-    CHAM_context_t  *chamctxt = chameleon_context_self();
+    CHAM_context_t             *chamctxt = chameleon_context_self();
+    CHAM_reduce_t              *reduce   = &(ws->laswp->reduce);
 
     int m, tempkm, tempmm, tempnn, rankAmn;
 
@@ -680,7 +690,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
     chameleon_pzgetrf_panel_permute_forward( ws, A, ipiv, k, n, options );
 
 #if defined(CHAMELEON_USE_MPI)
-    if ( ws->involved )
+    if ( reduce->involved )
 #endif
     {
         INSERT_TASK_ztrsm(
@@ -713,6 +723,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
 
     RUNTIME_data_flush( options->sequence, Wu(A->myrank, n) );
     RUNTIME_data_flush( options->sequence, A(k, n) );
+    (void)reduce;
 }
 
 /**
@@ -724,8 +735,9 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
                         RUNTIME_sequence_t         *sequence,
                         RUNTIME_request_t          *request )
 {
-    CHAM_context_t  *chamctxt;
-    RUNTIME_option_t options;
+    CHAM_context_t    *chamctxt;
+    RUNTIME_option_t   options;
+    CHAM_desc_pivot_t  pivot;
 
     int k, m, n;
     int min_mnt = chameleon_min( A->mt, A->nt );
@@ -736,6 +748,8 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
     }
     RUNTIME_options_init( &options, chamctxt, sequence, request );
 
+    chameleon_pivot_init( &pivot, A );
+
     for (k = 0; k < min_mnt; k++) {
         RUNTIME_iteration_push( chamctxt, k );
 
@@ -746,7 +760,7 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
          */
         options.forcesub = chameleon_involved_in_panelk_2dbc( A, k );
         if ( chameleon_involved_in_panelk_2dbc( A, k ) ) {
-            chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options );
+            chameleon_pzgetrf_panel_facto( ws, A, IPIV, &pivot, k, &options );
         }
         options.forcesub = 0;
 
@@ -770,6 +784,8 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
         RUNTIME_iteration_pop( chamctxt );
     }
     CHAMELEON_Desc_Flush( &(ws->Wl), sequence );
+    CHAMELEON_Ipiv_Flush( IPIV, sequence );
+    chameleon_pivot_destroy( &pivot );
 
     /* Backward pivoting */
     for (k = 1; k < min_mnt; k++) {
@@ -783,7 +799,7 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
         }
         RUNTIME_perm_flushk( sequence, IPIV, k );
     }
-    CHAMELEON_Desc_Flush( &(ws->Wu), sequence );
+    CHAMELEON_Desc_Flush( &(ws->laswp->W), sequence );
 
     /* Initialize IPIV with default values if needed */
     if ( (ws->alg == ChamGetrfNoPivPerColumn) ||
diff --git a/compute/pzlaswp.c b/compute/pzlaswp.c
index 85f384fecd37b8528b400602d7549c7b906f6332..2afedfd5a0bf6f0bf1fb38567f4043fcfdc7b5ea 100644
--- a/compute/pzlaswp.c
+++ b/compute/pzlaswp.c
@@ -20,14 +20,14 @@
  */
 #include "control/common.h"
 
-#define A(m,n)   A,         m, n
-#define Wu(m,n)  &(ws->Wu), m, n
+#define A(m,n)   A,        m, n
+#define Wu(m,n)  &(ws->W), m, n
 
 /**
  *  Permutation of the panel n at step k
  */
 static inline void
-chameleon_pzlaswp_panel_permute( struct chameleon_pzgetrf_s *ws,
+chameleon_pzlaswp_panel_permute( struct chameleon_pzlaswp_s *ws,
                                  cham_dir_t                  dir,
                                  CHAM_desc_t                *A,
                                  CHAM_ipiv_t                *ipiv,
@@ -35,9 +35,9 @@ chameleon_pzlaswp_panel_permute( struct chameleon_pzgetrf_s *ws,
                                  int                         n,
                                  RUNTIME_option_t           *options )
 {
-    int m;
-    int tempkm, tempnn;
-    int withlacpy;
+    int                        m;
+    int                        tempkm, tempnn;
+    int                        withlacpy;
 
     tempkm = A->get_blkdim( A, k, DIM_m, A->m );
     tempnn = A->get_blkdim( A, n, DIM_n, A->n );
@@ -65,7 +65,7 @@ chameleon_pzlaswp_panel_permute( struct chameleon_pzgetrf_s *ws,
 }
 
 static inline void
-chameleon_pzlaswp_panel( struct chameleon_pzgetrf_s *ws,
+chameleon_pzlaswp_panel( struct chameleon_pzlaswp_s *ws,
                          cham_dir_t                  dir,
                          CHAM_desc_t                *A,
                          CHAM_ipiv_t                *ipiv,
@@ -74,19 +74,20 @@ chameleon_pzlaswp_panel( struct chameleon_pzgetrf_s *ws,
                          RUNTIME_option_t           *options,
                          RUNTIME_sequence_t         *sequence )
 {
-    int tempkm, tempnn;
+    CHAM_reduce_t *reduce = &(ws->reduce);
+    int            tempkm, tempnn;
 
 #if defined(CHAMELEON_USE_MPI)
-    chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
-    if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
-        INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
+    chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, reduce );
+    if ( A->myrank == ipiv->get_rankof( ipiv, k, k ) ) {
+        INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved );
         INSERT_TASK_zperm_allreduce_send_invp_row( options, dir, ipiv, k, A, k, n );
     }
     if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
-        INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, reduce->np_involved, reduce->proc_involved );
     }
 
-    if ( !ws->involved ) {
+    if ( !reduce->involved ) {
         return;
     }
 #endif
@@ -101,10 +102,11 @@ chameleon_pzlaswp_panel( struct chameleon_pzgetrf_s *ws,
                             Wu(A->myrank, n), A(k, n) );
         RUNTIME_data_flush( sequence, A(k, n) );
     }
+    (void)reduce;
 }
 
 void
-chameleon_pzlaswp( struct chameleon_pzgetrf_s *ws,
+chameleon_pzlaswp( struct chameleon_pzlaswp_s *ws,
                    cham_dir_t                  dir,
                    CHAM_desc_t                *A,
                    CHAM_ipiv_t                *IPIV,
diff --git a/compute/pzlaswpc.c b/compute/pzlaswpc.c
index 3ba7c37a4240d1f3f164f8647ff9bcf3c9336726..a3328070bdf6717ed4614f42f9493f8ffdcb36db 100644
--- a/compute/pzlaswpc.c
+++ b/compute/pzlaswpc.c
@@ -18,14 +18,14 @@
  */
 #include "control/common.h"
 
-#define A(m,n)   A,         m, n
-#define Wc(m,n)  &(ws->Wc), m, n
+#define A(m,n)   A,        m, n
+#define Wc(m,n)  &(ws->W), m, n
 
 /**
  *  Permutation of the panel n at step k
  */
 static inline void
-chameleon_pzlaswpc_panel_permute( struct chameleon_pzgetrf_s *ws,
+chameleon_pzlaswpc_panel_permute( struct chameleon_pzlaswp_s *ws,
                                   cham_dir_t                  dir,
                                   CHAM_desc_t                *A,
                                   CHAM_ipiv_t                *ipiv,
@@ -33,9 +33,9 @@ chameleon_pzlaswpc_panel_permute( struct chameleon_pzgetrf_s *ws,
                                   int                         k,
                                   RUNTIME_option_t           *options )
 {
-    int n;
-    int tempkn, tempmm;
-    int withlacpy;
+    int                        n;
+    int                        tempkn, tempmm;
+    int                        withlacpy;
 
     tempkn = A->get_blkdim( A, k, DIM_n, A->n );
     tempmm = A->get_blkdim( A, m, DIM_m, A->m );
@@ -63,7 +63,7 @@ chameleon_pzlaswpc_panel_permute( struct chameleon_pzgetrf_s *ws,
 }
 
 static inline void
-chameleon_pzlaswpc_panel( struct chameleon_pzgetrf_s *ws,
+chameleon_pzlaswpc_panel( struct chameleon_pzlaswp_s *ws,
                           cham_dir_t                  dir,
                           CHAM_desc_t                *A,
                           CHAM_ipiv_t                *ipiv,
@@ -72,19 +72,20 @@ chameleon_pzlaswpc_panel( struct chameleon_pzgetrf_s *ws,
                           RUNTIME_option_t           *options,
                           RUNTIME_sequence_t         *sequence )
 {
-    int tempkn, tempmm;
+    CHAM_reduce_t *reduce = &(ws->reduce);
+    int            tempkn, tempmm;
 
 #if defined(CHAMELEON_USE_MPI)
-    chameleon_get_proc_involved_in_rowpanelk_2dbc( A, m, k, ws );
-    if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
-        INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
+    chameleon_get_proc_involved_in_rowpanelk_2dbc( A, m, k, reduce );
+    if ( A->myrank == ipiv->get_rankof( ipiv, k, k ) ) {
+        INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved );
         INSERT_TASK_zperm_allreduce_send_invp_col( options, dir, ipiv, k, A, m, k );
     }
     if ( A->myrank == chameleon_getrankof_2d( A, m, k ) ) {
-        INSERT_TASK_zperm_allreduce_send_A( options, A, m, k, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_A( options, A, m, k, A->myrank, reduce->np_involved, reduce->proc_involved );
     }
 
-    if ( !ws->involved ) {
+    if ( !reduce->involved ) {
         return;
     }
 #endif
@@ -99,10 +100,11 @@ chameleon_pzlaswpc_panel( struct chameleon_pzgetrf_s *ws,
                             Wc(m, A->myrank), A(m, k) );
         RUNTIME_data_flush( sequence, A(m, k) );
     }
+    (void)reduce;
 }
 
 void
-chameleon_pzlaswpc( struct chameleon_pzgetrf_s *ws,
+chameleon_pzlaswpc( struct chameleon_pzlaswp_s *ws,
                     cham_dir_t                  dir,
                     CHAM_desc_t                *A,
                     CHAM_ipiv_t                *IPIV,
@@ -121,7 +123,7 @@ chameleon_pzlaswpc( struct chameleon_pzgetrf_s *ws,
     RUNTIME_options_init( &options, chamctxt, sequence, request );
 
     if ( dir == ChamDirForward ) {
-        for ( k = 0; k < A->nt; k++ ) {
+        for ( k = 0; k < IPIV->mt; k++ ) {
             for ( m = 0; m < A->mt; m++ ) {
                 options.priority = A->mt-m;
 
@@ -131,7 +133,7 @@ chameleon_pzlaswpc( struct chameleon_pzgetrf_s *ws,
         }
     }
     else {
-        for ( k = A->nt - 1; k > -1; k-- ) {
+        for ( k = IPIV->mt - 1; k > -1; k-- ) {
             for ( m = 0; m < A->mt; m++ ) {
                 options.priority = A->mt-m;
                 chameleon_pzlaswpc_panel( ws, dir, A, IPIV, m, k, &options, sequence );
diff --git a/compute/zgesv.c b/compute/zgesv.c
index c412cd3211b11aeae696faee3b2226501c89e05a..72aa2e99573272f3f80cec5dbbfed0d80d6212e8 100644
--- a/compute/zgesv.c
+++ b/compute/zgesv.c
@@ -84,6 +84,7 @@ int CHAMELEON_zgesv( int N, int NRHS,
     CHAM_desc_t                 descAl, descAt;
     CHAM_desc_t                 descBl, descBt;
     struct chameleon_pzgetrf_s *wsA,   *wsB;
+    int                         P,      Q;
 
     chamctxt = chameleon_context_self();
     if ( chamctxt == NULL ) {
@@ -130,6 +131,9 @@ int CHAMELEON_zgesv( int N, int NRHS,
     chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
                          B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
 
+    P = chameleon_desc_datadist_get_iparam( &descAt, 0 );
+    Q = chameleon_desc_datadist_get_iparam( &descAt, 1 );
+
     /* Allocate workspace for partial pivoting */
     wsA = CHAMELEON_zgetrf_WS_Alloc( &descAt );
     wsB = CHAMELEON_zgetrf_WS_Alloc( &descBt );
@@ -137,7 +141,7 @@ int CHAMELEON_zgesv( int N, int NRHS,
     if ( ( wsA->alg == ChamGetrfPPivPerColumn ) ||
          ( wsA->alg == ChamGetrfPPiv ) )
     {
-        chameleon_ipiv_init( &descIPIV, &descAt, N, IPIV );
+        chameleon_ipiv_init( &descIPIV, ChamLeft, descAt.mb, N, P, P*Q, IPIV, chameleon_getrankof_ipiv_2d_diag );
     }
 
     /* Call the tile interface */
@@ -161,7 +165,7 @@ int CHAMELEON_zgesv( int N, int NRHS,
     if ( ( wsA->alg == ChamGetrfPPivPerColumn ) ||
          ( wsA->alg == ChamGetrfPPiv ) )
     {
-        chameleon_ipiv_destroy( &descIPIV, &descAt );
+        chameleon_ipiv_destroy( &descIPIV );
     }
 
     /* Cleanup the temporary data */
@@ -240,7 +244,6 @@ int CHAMELEON_zgesv_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B )
     CHAMELEON_zgesv_Tile_Async( A, IPIV, B, wsA, wsB, sequence, &request );
 
     CHAMELEON_Desc_Flush( A, sequence );
-    CHAMELEON_Ipiv_Flush( IPIV, sequence );
     CHAMELEON_Desc_Flush( B, sequence );
 
     chameleon_sequence_wait( chamctxt, sequence );
@@ -369,6 +372,8 @@ int CHAMELEON_zgesv_Tile_Async( CHAM_desc_t        *A,
         wsB = user_wsB;
     }
 
+    IPIV->get_rankof = chameleon_getrankof_ipiv_2d_diag;
+
     chameleon_pzgetrf( wsA, A, IPIV, sequence, request );
 
     CHAMELEON_zgetrs_Tile_Async( ChamNoTrans, A, IPIV, B, wsB, sequence, request );
diff --git a/compute/zgetrf.c b/compute/zgetrf.c
index 4470c33f06a4f4477757a10c4d478d6b48f91709..ea1d50bc8c7251ffeb37a0b8c9e0668cfe31792f 100644
--- a/compute/zgetrf.c
+++ b/compute/zgetrf.c
@@ -59,7 +59,9 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
 {
     CHAM_context_t             *chamctxt;
     struct chameleon_pzgetrf_s *ws;
-    int lookahead, batch_size;
+    int                         lookahead, batch_size;
+    int                         P = chameleon_desc_datadist_get_iparam( A, 0 );
+    int                         Q = chameleon_desc_datadist_get_iparam( A, 1 );
 
     chamctxt = chameleon_context_self();
     if ( chamctxt == NULL ) {
@@ -70,11 +72,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
     ws->alg = ChamGetrfPPiv;
     ws->ib  = CHAMELEON_IB;
 
-#if defined (CHAMELEON_USE_MPI)
-    ws->proc_involved = malloc( sizeof( int ) * chameleon_desc_datadist_get_iparam(A, 0) );
-    ws->involved      = 0;
-    ws->np_involved   = 0;
-#endif
+    ws->laswp = CHAMELEON_zlaswp_WS_Alloc( ChamLeft, A );
 
     {
         char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" );
@@ -99,21 +97,6 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
         chameleon_cleanenv( algostr );
     }
 
-    {
-        char *allreduce = chameleon_getenv( "CHAMELEON_GETRF_ALL_REDUCE" );
-
-        if ( allreduce != NULL ) {
-            if ( strcasecmp( allreduce, "cham_spu_tasks" ) == 0 ) {
-                ws->alg_allreduce = ChamStarPUTasks;
-            }
-            else {
-                chameleon_error( "CHAMELEON_zgetrf_WS_Alloc", "CHAMELEON_GETRF_ALL_REDUCE is not one of chameleon_starpu_tasks, chameleon_starpu, chameleon_starpu_mpi, chameleon_mpi => Switch back to chameleon_starpu_tasks\n" );
-                ws->alg_allreduce = ChamStarPUTasks;
-            }
-        }
-        chameleon_cleanenv( allreduce );
-    }
-
     batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", 0 );
     if ( batch_size > CHAMELEON_BATCH_SIZE ) {
         chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE must be smaller than CHAMELEON_BATCH_SIZE, please recompile with the right CHAMELEON_BATCH_SIZE, or reduce the CHAMELEON_GETRF_BATCH_SIZE value\n" );
@@ -132,9 +115,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
         chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE,
                              ChamComplexDouble, 1, A->nb, A->nb,
                              A->mt, A->nt * A->nb, 0, 0,
-                             A->mt, A->nt * A->nb,
-                             chameleon_desc_datadist_get_iparam(A, 0),
-                             chameleon_desc_datadist_get_iparam(A, 1),
+                             A->mt, A->nt * A->nb, P, Q,
                              NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
     }
     else if ( ( ws->alg == ChamGetrfPPiv )          ||
@@ -143,25 +124,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
         chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE,
                              ChamComplexDouble, A->mb, A->nb, A->mb*A->nb,
                              A->m, A->n, 0, 0,
-                             A->m, A->n,
-                             chameleon_desc_datadist_get_iparam(A, 0),
-                             chameleon_desc_datadist_get_iparam(A, 1),
+                             A->m, A->n, P, Q,
                              NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
-        chameleon_desc_init( &(ws->Wu), CHAMELEON_MAT_ALLOC_TILE,
-                             ChamComplexDouble, A->mb, A->nb, A->mb*A->nb,
-                             A->mb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), A->n, 0, 0,
-                             A->mb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), A->n, chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), 1,
-                             NULL, NULL, NULL, A->get_rankof_init_arg );
-        chameleon_desc_init( &(ws->Wc), CHAMELEON_MAT_ALLOC_TILE,
-                            ChamComplexDouble, A->mb, A->nb, A->mb*A->nb,
-                            A->m, A->nb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), 0, 0,
-                            A->m, A->nb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), 1, chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1),
-                            NULL, NULL, NULL, A->get_rankof_init_arg );
         lookahead = chamctxt->lookahead;
         chameleon_desc_init( &(ws->Wl), CHAMELEON_MAT_ALLOC_TILE,
                              ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb),
-                             A->mt * A->mb, A->nb * chameleon_desc_datadist_get_iparam(A, 1) * lookahead, 0, 0,
-                             A->mt * A->mb, A->nb * chameleon_desc_datadist_get_iparam(A, 1) * lookahead, chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1),
+                             A->mt * A->mb, A->nb * Q * lookahead, 0, 0,
+                             A->mt * A->mb, A->nb * Q * lookahead, P, Q,
                              NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
     }
 
@@ -180,9 +149,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
         chameleon_desc_init( &(ws->Up), CHAMELEON_MAT_ALLOC_TILE,
                              ChamComplexDouble, ws->ib, A->nb, ws->ib * A->nb,
                              A->mt * ws->ib, A->nt * A->nb, 0, 0,
-                             A->mt * ws->ib, A->nt * A->nb,
-                             chameleon_desc_datadist_get_iparam(A, 0),
-                             chameleon_desc_datadist_get_iparam(A, 1),
+                             A->mt * ws->ib, A->nt * A->nb, P, Q,
                              NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
     }
 
@@ -213,10 +180,6 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
 {
     struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws;
 
-#if defined (CHAMELEON_USE_MPI)
-    free( ws->proc_involved );
-#endif
-
     if ( ( ws->alg == ChamGetrfNoPivPerColumn ) ||
          ( ws->alg == ChamGetrfPPiv           ) ||
          ( ws->alg == ChamGetrfPPivPerColumn  ) )
@@ -230,8 +193,6 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
     if ( ( ws->alg == ChamGetrfPPiv           ) ||
          ( ws->alg == ChamGetrfPPivPerColumn  ) )
     {
-        chameleon_desc_destroy( &(ws->Wu) );
-        chameleon_desc_destroy( &(ws->Wc) );
         chameleon_desc_destroy( &(ws->Wl) );
     }
     free( ws );
@@ -289,13 +250,14 @@ int
 CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
 {
     int                 NB;
-    int                 status;
-    CHAM_desc_t         descAl, descAt;
-    CHAM_ipiv_t         descIPIV;
-    CHAM_context_t     *chamctxt;
-    RUNTIME_sequence_t *sequence = NULL;
-    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    int                         status;
+    CHAM_desc_t                 descAl, descAt;
+    CHAM_ipiv_t                 descIPIV;
+    CHAM_context_t             *chamctxt;
+    RUNTIME_sequence_t         *sequence = NULL;
+    RUNTIME_request_t           request  = RUNTIME_REQUEST_INITIALIZER;
     struct chameleon_pzgetrf_s *ws;
+    int                         P, Q;
 
     chamctxt = chameleon_context_self();
     if ( chamctxt == NULL ) {
@@ -335,13 +297,16 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
     chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower,
                          A, NB, NB, LDA, N, M, N, sequence, &request );
 
+    P = chameleon_desc_datadist_get_iparam( &descAt, 0 );
+    Q = chameleon_desc_datadist_get_iparam( &descAt, 1 );
+
     /* Allocate workspace for partial pivoting */
     ws = CHAMELEON_zgetrf_WS_Alloc( &descAt );
 
     if ( ( ws->alg == ChamGetrfPPivPerColumn ) ||
          ( ws->alg == ChamGetrfPPiv ) )
     {
-        chameleon_ipiv_init( &descIPIV, &descAt, chameleon_min( M, N ), IPIV );
+        chameleon_ipiv_init( &descIPIV, ChamLeft, descAt.mb, chameleon_min( M, N ), P, P*Q, IPIV, chameleon_getrankof_ipiv_2d_diag);
     }
 
     /* Call the tile interface */
@@ -362,7 +327,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
     if ( ( ws->alg == ChamGetrfPPivPerColumn ) ||
          ( ws->alg == ChamGetrfPPiv ) )
     {
-        chameleon_ipiv_destroy( &descIPIV, &descAt );
+        chameleon_ipiv_destroy( &descIPIV );
     }
     CHAMELEON_zgetrf_WS_Free( ws );
     chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
@@ -432,7 +397,6 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV )
     ws = CHAMELEON_zgetrf_WS_Alloc( A );
     CHAMELEON_zgetrf_Tile_Async( A, IPIV, ws, sequence, &request );
     CHAMELEON_Desc_Flush( A, sequence );
-    CHAMELEON_Ipiv_Flush( IPIV, sequence );
 
     chameleon_sequence_wait( chamctxt, sequence );
     CHAMELEON_zgetrf_WS_Free( ws );
@@ -545,6 +509,8 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
         ws = user_ws;
     }
 
+    IPIV->get_rankof = chameleon_getrankof_ipiv_2d_diag;
+
     chameleon_pzgetrf( ws, A, IPIV, sequence, request );
 
     if ( user_ws == NULL ) {
diff --git a/compute/zgetrs.c b/compute/zgetrs.c
index 7e8fd22a4c72ad092bf8899f8417dbb9b7a26b2a..46850bd6faa89ce35fea05c257b139de59403adb 100644
--- a/compute/zgetrs.c
+++ b/compute/zgetrs.c
@@ -89,6 +89,7 @@ int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS,
     CHAM_desc_t                 descAl, descAt;
     CHAM_desc_t                 descBl, descBt;
     struct chameleon_pzgetrf_s *ws;
+    int                         P, Q;
 
     chamctxt = chameleon_context_self();
     if ( chamctxt == NULL ) {
@@ -138,9 +139,12 @@ int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS,
     chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
                          B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
 
+    P = chameleon_desc_datadist_get_iparam( &descAt, 0 );
+    Q = chameleon_desc_datadist_get_iparam( &descAt, 1 );
+
     ws = CHAMELEON_zgetrf_WS_Alloc( &descBt );
-    CHAMELEON_Ipiv_Create( &descIPIV, &descAt, N, IPIV );
-    CHAMELEON_Ipiv_Init( &descAt, descIPIV );
+    CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descAt.mb, N, P, P*Q, IPIV );
+    CHAMELEON_Ipiv_Init( descIPIV );
 
     /* Call the tile interface */
     CHAMELEON_zgetrs_Tile_Async( trans, &descAt, descIPIV, &descBt, ws, sequence, &request );
@@ -154,7 +158,7 @@ int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS,
     chameleon_sequence_wait( chamctxt, sequence );
 
     /* Cleanup the temporary data */
-    CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt );
+    CHAMELEON_Ipiv_Destroy( &descIPIV );
     CHAMELEON_zgetrf_WS_Free( ws );
     chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
     chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
@@ -377,7 +381,7 @@ int CHAMELEON_zgetrs_Tile_Async( cham_trans_t        trans,
     }
 
     if ( trans == ChamNoTrans ) {
-        chameleon_pzlaswp( ws, ChamDirForward, B, IPIV, sequence, request );
+        chameleon_pzlaswp( ws->laswp, ChamDirForward, B, IPIV, sequence, request );
 
         chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
 
@@ -388,7 +392,7 @@ int CHAMELEON_zgetrs_Tile_Async( cham_trans_t        trans,
 
         chameleon_pztrsm( ChamLeft, ChamLower, ChamTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
 
-        chameleon_pzlaswp( ws, ChamDirBackward, B, IPIV, sequence, request );
+        chameleon_pzlaswp( ws->laswp, ChamDirBackward, B, IPIV, sequence, request );
     }
 
     if ( user_ws == NULL ) {
diff --git a/compute/zlaswp.c b/compute/zlaswp.c
index a4c6635c20a38a215e9e585663ed32593e0d63ae..f00140efedbd5cc829d73cb581820720bfa0217c 100644
--- a/compute/zlaswp.c
+++ b/compute/zlaswp.c
@@ -18,6 +18,124 @@
  */
 #include "control/common.h"
 
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  CHAMELEON_zlaswp_WS_Alloc - Allocate the required workspaces for laswp
+ *
+ *******************************************************************************
+ *
+ * @param[in] A
+ *          The descriptor of the matrix A.
+ *
+ * @param[in] side
+ *          Specifies whether the permutation is done on the rows or the columns.
+ *          = ChamLeft:  op(A) = A
+ *          = ChamRight: op(A) = A^T
+ *
+ *******************************************************************************
+ *
+ * @retval An allocated opaque pointer to use in CHAMELEON_laswp_Tile_Async()
+ *         and to free with CHAMELEON_laswp_WS_Free().
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgetrf_Tile_Async
+ * @sa CHAMELEON_zgetrf_WS_Free
+ *
+ */
+void *
+CHAMELEON_zlaswp_WS_Alloc( cham_side_t side, const CHAM_desc_t *A )
+{
+    CHAM_context_t             *chamctxt;
+    struct chameleon_pzlaswp_s *ws;
+    CHAM_reduce_t              *reduce;
+    int                         P = chameleon_desc_datadist_get_iparam( A, 0 );
+    int                         Q = chameleon_desc_datadist_get_iparam( A, 1 );
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        return NULL;
+    }
+
+    ws = calloc( 1, sizeof(struct chameleon_pzlaswp_s) );
+
+    reduce = &(ws->reduce);
+
+#if defined (CHAMELEON_USE_MPI)
+    reduce->proc_involved = malloc( sizeof( int ) * P );
+    reduce->involved      = 0;
+    reduce->np_involved   = 0;
+#endif
+
+    {
+        char *allreduce = chameleon_getenv( "CHAMELEON_ALLREDUCE" );
+
+        if ( allreduce != NULL ) {
+            if ( strcasecmp( allreduce, "cham_spu_tasks" ) == 0 ) {
+                reduce->alg_allreduce = ChamStarPUTasks;
+            }
+            else {
+                chameleon_error( "CHAMELEON_zlaswp_WS_Alloc", "CHAMELEON_ALLREDUCE is not one of chameleon_starpu_tasks, chameleon_starpu, chameleon_starpu_mpi, chameleon_mpi => Switch back to chameleon_starpu_tasks\n" );
+                reduce->alg_allreduce = ChamStarPUTasks;
+            }
+        }
+        chameleon_cleanenv( allreduce );
+    }
+
+    if ( side == ChamLeft ) {
+        chameleon_desc_init( &(ws->W), CHAMELEON_MAT_ALLOC_TILE,
+                            ChamComplexDouble, A->mb, A->nb, A->mb*A->nb,
+                            A->mb * P * Q, A->n, 0, 0,
+                            A->mb * P * Q, A->n, P * Q, 1,
+                            NULL, NULL, NULL, A->get_rankof_init_arg );
+    }
+    else {
+        chameleon_desc_init( &(ws->W), CHAMELEON_MAT_ALLOC_TILE,
+                            ChamComplexDouble, A->mb, A->nb, A->mb*A->nb,
+                            A->m, A->nb * P * Q, 0, 0,
+                            A->m, A->nb * P * Q, 1, P * Q,
+                            NULL, NULL, NULL, A->get_rankof_init_arg );
+    }
+
+    return ws;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ * @brief Free the allocated workspaces for asynchronous laswp
+ *
+ *******************************************************************************
+ *
+ * @param[in,out] user_ws
+ *          On entry, the opaque pointer allocated by
+ *          CHAMELEON_zlaswp_WS_Alloc() On exit, all data are freed.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlaswp_Tile_Async
+ * @sa CHAMELEON_zlaswp_WS_Alloc
+ *
+ */
+void
+CHAMELEON_zlaswp_WS_Free( void *user_ws )
+{
+    struct chameleon_pzlaswp_s *ws = (struct chameleon_pzlaswp_s *)user_ws;
+
+#if defined (CHAMELEON_USE_MPI)
+    free( ws->reduce.proc_involved );
+#endif
+
+    chameleon_desc_destroy( &(ws->W) );
+
+    free( ws );
+}
+
 /**
  ********************************************************************************
  *
@@ -88,6 +206,7 @@ int CHAMELEON_zlaswp( cham_side_t            side,
     CHAM_desc_t         descAl, descAt;
     CHAM_ipiv_t        *descIPIV;
     int                 K = ( side == ChamLeft ) ? M : N;
+    int                 P, Q;
 
     chamctxt = chameleon_context_self();
     if ( chamctxt == NULL ) {
@@ -136,9 +255,13 @@ int CHAMELEON_zlaswp( cham_side_t            side,
     /* Submit the matrix conversion */
     chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
                          A, NB, NB, LDA, N, M, N, sequence, &request );
-    CHAMELEON_Ipiv_Create( &descIPIV, &descAt, K, IPIV );
 
-    CHAMELEON_Ipiv_Init( &descAt, descIPIV );
+    P = chameleon_desc_datadist_get_iparam( &descAt, 0 );
+    Q = chameleon_desc_datadist_get_iparam( &descAt, 1 );
+
+    CHAMELEON_Ipiv_Create( &descIPIV, side, descAt.mb, K, P, P*Q, IPIV );
+
+    CHAMELEON_Ipiv_Init( descIPIV );
 
     /* Call the tile interface */
     CHAMELEON_zlaswp_Tile_Async( side, dir, &descAt, K1, K2, descIPIV, sequence, &request );
@@ -150,7 +273,7 @@ int CHAMELEON_zlaswp( cham_side_t            side,
     chameleon_sequence_wait( chamctxt, sequence );
 
     /* Cleanup the temporary data */
-    CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt );
+    CHAMELEON_Ipiv_Destroy( &descIPIV );
     chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
 
     chameleon_sequence_destroy( chamctxt, sequence );
@@ -218,17 +341,18 @@ int CHAMELEON_zlaswp_Tile( cham_side_t  side,
     RUNTIME_sequence_t *sequence = NULL;
     RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
     int                 status;
+    int                 K = ( side == ChamLeft ) ? A->m : A->n;
 
     chamctxt = chameleon_context_self();
     if ( chamctxt == NULL ) {
         chameleon_fatal_error("CHAMELEON_zlaswp_Tile", "CHAMELEON not initialized");
         return CHAMELEON_ERR_NOT_INITIALIZED;
     }
-    if ( ( K1 < 1 ) || ( K1 > A->m ) ) {
+    if ( ( K1 < 1 ) || ( K1 > K ) ) {
         chameleon_error("CHAMELEON_zlaswp", "illegal value of K1");
         return CHAMELEON_ERR_ILLEGAL_VALUE;
     }
-    if ( ( K2 < 1 ) || ( K2 > A->m ) ) {
+    if ( ( K2 < 1 ) || ( K2 > K ) ) {
         chameleon_error("CHAMELEON_zlaswp", "illegal value of K2");
         return CHAMELEON_ERR_ILLEGAL_VALUE;
     }
@@ -237,7 +361,6 @@ int CHAMELEON_zlaswp_Tile( cham_side_t  side,
     CHAMELEON_zlaswp_Tile_Async( side, dir, A, K1, K2, IPIV, sequence, &request );
 
     CHAMELEON_Desc_Flush( A, sequence );
-    CHAMELEON_Ipiv_Flush( IPIV, sequence );
 
     chameleon_sequence_wait( chamctxt, sequence );
     status = sequence->status;
@@ -307,7 +430,7 @@ int CHAMELEON_zlaswp_Tile_Async( cham_side_t         side,
                                  RUNTIME_request_t  *request )
 {
     CHAM_context_t             *chamctxt;
-    struct chameleon_pzgetrf_s *ws;
+    struct chameleon_pzlaswp_s *ws;
     RUNTIME_option_t            options;
     int                         k;
     int                         K = ( side == ChamLeft ) ? A->m : A->n;
@@ -367,6 +490,7 @@ int CHAMELEON_zlaswp_Tile_Async( cham_side_t         side,
                 m0 = k * A->mb;
                 INSERT_TASK_ipiv_to_perm( &options, m0, tempkm, tempkm, K1 - 1, K2 - 1,
                                                IPIV, k );
+                RUNTIME_ipiv_flushk( sequence, IPIV, k);
             }
         }
         else {
@@ -376,13 +500,14 @@ int CHAMELEON_zlaswp_Tile_Async( cham_side_t         side,
                 tempkn = A->get_blkdim( A, k, DIM_n, A->n );
                 n0 = k * A->nb;
                 INSERT_TASK_ipiv_to_perm( &options, n0, tempkn, tempkn, K1 - 1, K2 - 1,
-                                               IPIV, k );
+                                           IPIV, k );
+                RUNTIME_ipiv_flushk( sequence, IPIV, k);
             }
         }
         chameleon_sequence_wait( chamctxt, sequence );
     }
 
-    ws = CHAMELEON_zgetrf_WS_Alloc( A );
+    ws = CHAMELEON_zlaswp_WS_Alloc( side, A );
 
     if ( side == ChamLeft ) {
         chameleon_pzlaswp( ws, dir, A, IPIV, sequence, request );
diff --git a/control/common.h b/control/common.h
index aa1b39ca5b6a696412eeff04cb69d5ace70baa56..bf43d3c58e40a38f4492ea32ade9850f8cae2287 100644
--- a/control/common.h
+++ b/control/common.h
@@ -92,6 +92,17 @@
 #define ChamIPT_Panel   1
 #define ChamIPT_All     2
 
+/**
+ * Structure for reduction operations
+ */
+struct chameleon_reduce_s {
+    cham_getrf_allreduce_t  alg_allreduce; /**< Specifies the algorithm used for the allreduce                          */
+    int                    *proc_involved; /**< Specifies the processes involved in the reduction operation             */
+    unsigned int            involved;      /**< Specifies if the current process is involved in the reduction operation */
+    int                     np_involved;   /**< Specifies the number of involved processes in the reduction operation   */
+};
+typedef struct chameleon_reduce_s CHAM_reduce_t;
+
 /**
  *  Global array of LAPACK constants
  */
diff --git a/control/compute_z.h b/control/compute_z.h
index 3d86c666ab5655cce9d95decb7a75a50f3948be9..e747c2ac2203c63266acf4c928ad5a28495e0a86 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -40,25 +40,28 @@ struct chameleon_pzgemm_s {
     CHAM_desc_t WB;
 };
 
+/**
+ * @brief Data structure to handle the LASWP workspaces
+ */
+struct chameleon_pzlaswp_s {
+    CHAM_desc_t   W;      /**< Workspace used for the row/column permutation. */
+    CHAM_reduce_t reduce; /**< Structure for reduction operations             */
+};
+
 /**
  * @brief Data structure to handle the GETRF workspaces with partial pivoting
  */
 struct chameleon_pzgetrf_s {
-    cham_getrf_t            alg;
-    cham_getrf_allreduce_t  alg_allreduce;
-    int                     ib;         /**< Internal blocking parameter */
-    int                     batch_size_blas2; /**< Batch size for the blas 2 operations of the panel factorization */
-    int                     batch_size_blas3; /**< Batch size for the blas 3 operations of the panel factorization */
-    int                     batch_size_swap;  /**< Batch size for the permutation */
-    int                     ringswitch; /**< Define when to switch to ring bcast           */
-    CHAM_desc_t             U;
-    CHAM_desc_t             Up; /**< Workspace used for the panel factorization    */
-    CHAM_desc_t             Wu; /**< Workspace used for the permutation and update */
-    CHAM_desc_t             Wc; /**< Workspace used for the column permutation. */
-    CHAM_desc_t             Wl; /**< Workspace used the update                     */
-    int                    *proc_involved;
-    unsigned int            involved;
-    int                     np_involved;
+    struct chameleon_pzlaswp_s *laswp;            /**< Structure containing the permutation workspace and the reduce data   */
+    cham_getrf_t                alg;              /**< Define the algorithm used to compute the getrf                       */
+    int                         ib;               /**< Internal blocking parameter                                          */
+    int                         batch_size_blas2; /**< Batch size for the blas 2 operations of the panel factorization      */
+    int                         batch_size_blas3; /**< Batch size for the blas 3 operations of the panel factorization      */
+    int                         batch_size_swap;  /**< Batch size for the permutation                                       */
+    int                         ringswitch;       /**< Define when to switch to ring bcast                                  */
+    CHAM_desc_t                 U;                /**< Workspaces used for the panels permutation in getrf without pivoting */
+    CHAM_desc_t                 Up;               /**< Workspace used for the panel factorization                           */
+    CHAM_desc_t                 Wl;               /**< Workspace used for the update                                        */
 };
 
 /**
@@ -173,8 +176,8 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
 void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzlaset( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha,                          CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzlaswp( struct chameleon_pzgetrf_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
-void chameleon_pzlaswpc( struct chameleon_pzgetrf_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+void chameleon_pzlaswp( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+void chameleon_pzlaswpc( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzplghe(double bump, cham_uplo_t uplo, CHAM_desc_t *A, int bigM, int m0, int n0, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
diff --git a/control/descriptor.h b/control/descriptor.h
index 02c77e502cf1620cded3d6137ef7b6c8072ac249..8dc99c8383feed99cfc8806c0879325f787615fd 100644
--- a/control/descriptor.h
+++ b/control/descriptor.h
@@ -54,7 +54,6 @@ int chameleon_desc_init_internal( CHAM_desc_t *desc, const char *name, void *mat
                                   int   (*get_rankof) ( const CHAM_desc_t*, int, int ),
                                   void* get_rankof_arg );
 
-
 static inline int chameleon_desc_init( CHAM_desc_t *desc, void *mat,
                                        cham_flttype_t dtyp, int mb, int nb, int bsiz,
                                        int lm, int ln, int i, int j,
@@ -78,8 +77,13 @@ CHAM_desc_t* chameleon_desc_submatrix( CHAM_desc_t *descA, int i, int j, int m,
 void         chameleon_desc_destroy  ( CHAM_desc_t *desc );
 int          chameleon_desc_check    ( const CHAM_desc_t *desc );
 
-int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, int m, void *data );
-void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc );
+int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, cham_side_t side, int mb, int m,
+                         int p, int np, void *data,
+                         blkrankof_ipiv_fct_t get_rankof );
+void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv );
+
+int  chameleon_pivot_init   ( CHAM_desc_pivot_t *pivot, const CHAM_desc_t *desc );
+void chameleon_pivot_destroy( CHAM_desc_pivot_t *pivot );
 
 /**
  *  Internal function to return address of block (m,n) with m,n = block indices
diff --git a/control/descriptor_helpers.c b/control/descriptor_helpers.c
index 6db15fa1e37dffa3e3dd5d8182a62c2a05de63a7..259f3eaf21f36108725b1cd8aa7d49ea445bb039 100644
--- a/control/descriptor_helpers.c
+++ b/control/descriptor_helpers.c
@@ -84,6 +84,73 @@ int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n )
     return (mm % chameleon_desc_datadist_get_iparam(A,0)) * chameleon_desc_datadist_get_iparam(A,1) + (mm % chameleon_desc_datadist_get_iparam(A,1));
 }
 
+/**
+ * @brief Return the rank of the process responsible for the permutation of the tile (m, n)
+ * in a classic 2D Block Cyclic distribution PxQ.
+ *
+ * @param[in] IPIV
+ *        The ipiv descriptor.
+ *
+ * @param[in] m
+ *        The row index of the tile.
+ *
+ * @param[in] n
+ *        The column index of the tile.
+ *
+ * @return The rank of the process responsible for the row permutation of the tile (m, n)
+ *
+ */
+int chameleon_getrankof_ipiv_2d_row( const CHAM_ipiv_t *IPIV, int m, int n )
+{
+    int Q = IPIV->NP / IPIV->P;
+    return ( m % IPIV->P ) * Q;
+}
+
+/**
+ * @brief Return the rank of the process responsible for the column permutation of the tile (m, n)
+ * in a classic 2D Block Cyclic distribution PxQ.
+ *
+ * @param[in] IPIV
+ *        The ipiv descriptor.
+ *
+ * @param[in] m
+ *        The row index of the tile.
+ *
+ * @param[in] n
+ *        The column index of the tile.
+ *
+ * @return The rank of the process responsible for the permutation of the tile (m, n)
+ *
+ */
+int chameleon_getrankof_ipiv_2d_col( const CHAM_ipiv_t *IPIV, int m, int n )
+{
+    int Q = IPIV->NP / IPIV->P;
+    return n % Q;
+}
+
+/**
+ * @brief Return the rank of the process responsible for the permutation of the tile (m, n)
+ * when used for getrf in a classic 2D Block Cyclic distribution PxQ.
+ *
+ * @param[in] IPIV
+ *        The ipiv descriptor.
+ *
+ * @param[in] m
+ *        The row and column index of the tile.
+ *
+ * @param[in] n
+ *        Unused
+ *
+ * @return The rank of the process responsible for the permutation of the tile (m, n)
+ *
+ */
+int chameleon_getrankof_ipiv_2d_diag( const CHAM_ipiv_t *IPIV, int m, int n )
+{
+    (void)n;
+    int Q = IPIV->NP / IPIV->P;
+    return (m % IPIV->P) * Q + (m % Q);
+}
+
 /**
  * @brief Test if the current MPI process is involved in the panel k for 2DBC distributions.
  *
@@ -134,37 +201,37 @@ int chameleon_p_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k, int p ) {
  * @param[in] n
  *        The index of the panel to test.
  *
- * @param[inout] ws_getrf
+ * @param[inout] ws_reduce
  *        The i.
  *
  */
 void chameleon_get_proc_involved_in_panelk_2dbc( const CHAM_desc_t *A,
                                                  int                k,
                                                  int                n,
-                                                 void              *ws_getrf )
+                                                 void              *ws_reduce )
 {
 #if defined (CHAMELEON_USE_MPI)
-    struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)ws_getrf;
-    int *proc_involved = ws->proc_involved;
-    int  b, rank, np;
+    CHAM_reduce_t *reduce        = (CHAM_reduce_t*) ws_reduce;
+    int           *proc_involved = reduce->proc_involved;
+    int            b, rank, np;
 
     np = 0;
-    ws->involved = 0;
+    reduce->involved = 0;
     for ( b = k; (b < A->mt) && ((b-k) < chameleon_desc_datadist_get_iparam(A, 0)); b ++ ) {
         rank = chameleon_getrankof_2d( A, b, n );
         proc_involved[ b-k ] = rank;
         np ++;
         if ( rank == A->myrank ) {
-            ws->involved = 1;
+            reduce->involved = 1;
         }
     }
-    ws->proc_involved = proc_involved;
-    ws->np_involved   = np;
+    reduce->proc_involved = proc_involved;
+    reduce->np_involved   = np;
 #else
     (void)A;
     (void)k;
     (void)n;
-    (void)ws_getrf;
+    (void)ws_reduce;
 #endif
 }
 
@@ -180,37 +247,37 @@ void chameleon_get_proc_involved_in_panelk_2dbc( const CHAM_desc_t *A,
  * @param[in] k
  *        The index of the panel to test.
  *
- * @param[inout] ws_getrf
+ * @param[inout] ws_reduce
  *        The i.
  *
  */
 void chameleon_get_proc_involved_in_rowpanelk_2dbc( const CHAM_desc_t *A,
                                                     int                m,
                                                     int                k,
-                                                    void              *ws_getrf )
+                                                    void              *ws_reduce )
 {
 #if defined (CHAMELEON_USE_MPI)
-    struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)ws_getrf;
-    int *proc_involved = ws->proc_involved;
-    int  b, rank, np;
+    CHAM_reduce_t *reduce = (CHAM_reduce_t*) ws_reduce;
+    int           *proc_involved = reduce->proc_involved;
+    int            b, rank, np;
 
     np = 0;
-    ws->involved = 0;
+    reduce->involved = 0;
     for ( b = k; (b < A->nt) && ((b-k) < chameleon_desc_datadist_get_iparam(A, 1)); b ++ ) {
         rank = chameleon_getrankof_2d( A, m, b );
         proc_involved[ b-k ] = rank;
         np ++;
         if ( rank == A->myrank ) {
-            ws->involved = 1;
+            reduce->involved = 1;
         }
     }
-    ws->proc_involved = proc_involved;
-    ws->np_involved   = np;
+    reduce->proc_involved = proc_involved;
+    reduce->np_involved   = np;
 #else
     (void)A;
     (void)k;
     (void)m;
-    (void)ws_getrf;
+    (void)ws_reduce;
 #endif
 }
 
diff --git a/control/descriptor_ipiv.c b/control/descriptor_ipiv.c
index bc58b4cc4448cac8e781e10995065bbb99c69af6..eafcc3dedf53774508db78bf20b003bd36068502 100644
--- a/control/descriptor_ipiv.c
+++ b/control/descriptor_ipiv.c
@@ -43,21 +43,35 @@
  * @param[in,out] ipiv
  *          The pointer to the ipiv descriptor to initialize.
  *
- * @param[in] desc
- *          The tile descriptor for which an associated ipiv descriptor must be generated.
+ * @param[in] side
+ *          Specifies whenever the permutation will be done on the rows or on the columns
+ *
+ * @param[in] mb
+ *          The number of tile in the pivot array.
  *
  * @param[in] m
  *          The size of the pivot array.
  *
+ * @param[in] p
+ *          Number of processes rows for the 2D block-cyclic distribution.
+ *
+ * @param[in] np
+ *          The total number of processes.
+ *
  * @param[in] data
  *          The pointer to the original vector where to store the pivot values.
  *
+ * @param[in] get_rankof
+ *          The function used to determine which process is responsible for the permutation
+ *          of a tile
  ******************************************************************************
  *
  * @return CHAMELEON_SUCCESS on success, CHAMELEON_ERR_NOT_INITIALIZED otherwise.
  *
  */
-int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, int m, void *data )
+int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, cham_side_t side, int mb, int m,
+                         int p, int np, void *data,
+                         blkrankof_ipiv_fct_t get_rankof )
 {
     CHAM_context_t *chamctxt;
     int rc = CHAMELEON_SUCCESS;
@@ -70,15 +84,72 @@ int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, int m, void
         return CHAMELEON_ERR_NOT_INITIALIZED;
     }
 
-    ipiv->desc = desc;
-    ipiv->data = data;
-    ipiv->i    = 0;
-    ipiv->m    = m;
-    ipiv->mb   = desc->mb;
-    ipiv->mt   = chameleon_ceil( ipiv->m, ipiv->mb );
+    if ( get_rankof ) {
+        ipiv->get_rankof = get_rankof;
+    }
+    else {
+        ipiv->get_rankof = ( side == ChamLeft ) ? chameleon_getrankof_ipiv_2d_row :
+                                                  chameleon_getrankof_ipiv_2d_col;
+    }
+
+    ipiv->get_blkdim = chameleon_getblkdim_ipiv;
+
+    ipiv->data   = data;
+    ipiv->myrank = RUNTIME_comm_rank( chamctxt );
+    ipiv->i      = 0;
+    ipiv->m      = m;
+    ipiv->mb     = mb;
+    ipiv->mt     = chameleon_ceil( ipiv->m, ipiv->mb );
+    ipiv->P      = p;
+    ipiv->NP     = np;
+
+    /* Create runtime specific structure like registering data */
+    RUNTIME_ipiv_create( ipiv );
+
+    return rc;
+}
+
+/**
+ ******************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Internal function to create tiled descriptor associated to a pivot.
+ *
+ ******************************************************************************
+ *
+ * @param[in,out] pivot
+ *          The pointer to the pivot descriptor to initialize.
+ *
+ * @param[in] desc
+ *          The tile descriptor for which an associated pivot descriptor must be generated.
+ *
+ ******************************************************************************
+ *
+ * @return CHAMELEON_SUCCESS on success, CHAMELEON_ERR_NOT_INITIALIZED otherwise.
+ *
+ */
+int chameleon_pivot_init( CHAM_desc_pivot_t *pivot, const CHAM_desc_t *desc )
+{
+    CHAM_context_t *chamctxt;
+    int rc = CHAMELEON_SUCCESS;
+
+    memset( pivot, 0, sizeof(CHAM_desc_pivot_t) );
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+
+    pivot->P       = chameleon_desc_datadist_get_iparam( desc, 0 );
+    pivot->Q       = chameleon_desc_datadist_get_iparam( desc, 1 );
+    pivot->n       = chameleon_min(desc->mb, desc->nb);
+    pivot->nb      = desc->mb;
+    pivot->dtyp    = desc->dtyp;
 
     /* Create runtime specific structure like registering data */
-    RUNTIME_ipiv_create( ipiv, desc );
+    RUNTIME_pivot_create( pivot );
 
     return rc;
 }
@@ -96,10 +167,30 @@ int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, int m, void
  *          The pointer to the ipiv descriptor to destroy.
  *
  */
-void chameleon_ipiv_destroy( CHAM_ipiv_t       *ipiv,
-                             const CHAM_desc_t *desc )
+void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv )
+{
+    RUNTIME_ipiv_destroy( ipiv );
+}
+
+/**
+ ******************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Internal function to destroy a tiled descriptor associated to a pivot array.
+ *
+ ******************************************************************************
+ *
+ * @param[in,out] pivot
+ *          The pointer to the pivot descriptor to destroy.
+ *
+ * @param[in] desc
+ *          The tile descriptor for which an associated pivot descriptor must be generated.
+ *
+ */
+void chameleon_pivot_destroy( CHAM_desc_pivot_t *pivot )
 {
-    RUNTIME_ipiv_destroy( ipiv, desc );
+    RUNTIME_pivot_destroy( pivot );
 }
 
 /**
@@ -114,6 +205,9 @@ void chameleon_ipiv_destroy( CHAM_ipiv_t       *ipiv,
  * @param[in,out] ipiv
  *          The pointer to the ipiv descriptor to initialize.
  *
+ * @param[in] side
+ *          Specifies whenever the permutation will be done on the rows or on the columns
+ *
  * @param[in] desc
  *          The tile descriptor for which an associated ipiv descriptor must be generated.
  *
@@ -130,7 +224,8 @@ void chameleon_ipiv_destroy( CHAM_ipiv_t       *ipiv,
  * @retval CHAMELEON_ERR_OUT_OF_RESOURCES if failed to allocated some ressources.
  *
  */
-int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, int m, void *data )
+int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, cham_side_t side, int mb, int m,
+                           int p, int np, void *data )
 {
     CHAM_context_t *chamctxt;
     CHAM_ipiv_t *ipiv;
@@ -148,7 +243,7 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, int m
         return CHAMELEON_ERR_OUT_OF_RESOURCES;
     }
 
-    chameleon_ipiv_init( ipiv, desc, m, data );
+    chameleon_ipiv_init( ipiv, side, mb, m, p, np, data, NULL );
 
     *ipivptr = ipiv;
     return CHAMELEON_SUCCESS;
@@ -163,9 +258,6 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, int m
  *
  *******************************************************************************
  *
- * @param[in] descA
- *          Descriptor of the matrix A.
- *
  * @param[in,out] descIPIV
  *          Descriptor of the pivot array. Should be initialized using
  *          CHAMELEON_Ipiv_Create() with data filled with the vector of pivot.
@@ -174,8 +266,7 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, int m
  *
  *
  */
-void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA,
-                          CHAM_ipiv_t       *descIPIV )
+void CHAMELEON_Ipiv_Init( CHAM_ipiv_t *descIPIV )
 {
 
     RUNTIME_option_t    options;
@@ -210,8 +301,7 @@ void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA,
  * @retval CHAMELEON_SUCCESS successful exit
  *
  */
-int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr,
-                            const CHAM_desc_t *desc )
+int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr )
 {
     CHAM_context_t *chamctxt;
     CHAM_ipiv_t *ipiv;
@@ -228,7 +318,7 @@ int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr,
     }
 
     ipiv = *ipivptr;
-    chameleon_ipiv_destroy( ipiv, desc );
+    chameleon_ipiv_destroy( ipiv );
     free(ipiv);
     *ipivptr = NULL;
     return CHAMELEON_SUCCESS;
@@ -246,10 +336,10 @@ int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr,
  ******************************************************************************
  *
  * @param[in] ipiv
- *          ipiv vector descriptor.
+ *          ipiv descriptor.
  *
  * @param[in] sequence
- *          The seqeunce in which to submit the calls to flush the data.
+ *          The sequence in which to submit the calls to flush the data.
  *
  ******************************************************************************
  *
diff --git a/include/chameleon.h b/include/chameleon.h
index eb159547dc2594418ab41f27e7e6efd43e6f96d8..60320c596ca81a9407e41756844cb4eb37e65fbe 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -215,14 +215,16 @@ int  CHAMELEON_Recursive_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flt
                                       blkaddr_fct_t get_blkaddr, blkldd_fct_t get_blkldd,
                                       blkrankof_fct_t get_rankof, void* get_rankof_arg );
 
-int CHAMELEON_Ipiv_Create ( CHAM_ipiv_t       **ipivptr,
-                            const CHAM_desc_t  *desc,
-                            int                 m,
-                            void               *data );
-int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t       **ipivptr,
-                            const CHAM_desc_t  *desc );
-int CHAMELEON_Ipiv_Flush  ( const CHAM_ipiv_t        *ipiv,
-                            const RUNTIME_sequence_t *sequence );
+int CHAMELEON_Ipiv_Create ( CHAM_ipiv_t        **ipivptr,
+                            cham_side_t          side,
+                            int                  mb,
+                            int                  m,
+                            int                  p,
+                            int                  np,
+                            void                *data );
+int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr );
+int CHAMELEON_Ipiv_Flush( const CHAM_ipiv_t  *ipiv,
+                          const RUNTIME_sequence_t *sequence );
 int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc,
                            int         *ipiv,
                            int          root );
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index d9540d5e8472ed15af9ead33296f9ac9cd76d6ab..279cf70dc8a999b91f80e24763f8ca13e49b42d5 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -333,6 +333,8 @@ void *CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A );
 void  CHAMELEON_zgetrf_WS_Free( void *ws );
 void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A );
 void  CHAMELEON_zgetrf_nopiv_WS_Free( void *ws );
+void *CHAMELEON_zlaswp_WS_Alloc( cham_side_t side, const CHAM_desc_t *A );
+void  CHAMELEON_zlaswp_WS_Free( void *ws );
 
 int CHAMELEON_Alloc_Workspace_zgesv_incpiv(        int N, CHAM_desc_t **descL, int **IPIV, int p, int q);
 int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, int **IPIV, int p, int q);
@@ -369,7 +371,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t
 int CHAMELEON_zTile_to_Lapack( CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA ) __attribute__((deprecated("Please refer to CHAMELEON_zDesc2Lap() instead")));
 int CHAMELEON_zLap2Desc( cham_uplo_t uplo, CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t *A );
 int CHAMELEON_zDesc2Lap( cham_uplo_t uplo, CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA );
-void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA, CHAM_ipiv_t *descIPIV );
+void CHAMELEON_Ipiv_Init( CHAM_ipiv_t *descIPIV );
 
 /**
  *  User Builder function prototypes
diff --git a/include/chameleon/descriptor_helpers.h b/include/chameleon/descriptor_helpers.h
index 3c607885ae07f8410b85a90b4c70181ef9ba4491..208e588b17b9d41e53b556dfe74c6664ba8f1b7a 100644
--- a/include/chameleon/descriptor_helpers.h
+++ b/include/chameleon/descriptor_helpers.h
@@ -45,8 +45,11 @@ extern "C" {
  * @name Mapping functions
  * @{
  */
-int chameleon_getrankof_2d     ( const CHAM_desc_t *A, int m, int n );
-int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n );
+int chameleon_getrankof_2d          ( const CHAM_desc_t *A,    int m, int n );
+int chameleon_getrankof_2d_diag     ( const CHAM_desc_t *A,    int m, int n );
+int chameleon_getrankof_ipiv_2d_row ( const CHAM_ipiv_t *ipiv, int m, int n );
+int chameleon_getrankof_ipiv_2d_col ( const CHAM_ipiv_t *ipiv, int m, int n );
+int chameleon_getrankof_ipiv_2d_diag( const CHAM_ipiv_t *ipiv, int m, int n );
 
 typedef struct custom_dist_s{
     int *blocks_dist;         // Matrix of size dist_m times dist_n with values from 1 to number of process MPI
@@ -69,12 +72,12 @@ int chameleon_p_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k, int p );
 void chameleon_get_proc_involved_in_panelk_2dbc( const CHAM_desc_t *A,
                                                  int                k,
                                                  int                n,
-                                                 void              *ws_getrf );
+                                                 void              *ws_reduce );
 
 void chameleon_get_proc_involved_in_rowpanelk_2dbc( const CHAM_desc_t *A,
                                                     int                m,
                                                     int                k,
-                                                    void              *ws_getrf );
+                                                    void              *ws_reduce );
 
 /**
  * @}
@@ -178,6 +181,28 @@ chameleon_getblkdim( const CHAM_desc_t *A, int m, cham_dim_t dim, int lm )
         return chameleon_getblkdim_n( A, m, lm );
     }
 }
+
+/**
+ *
+ * @ingroup Descriptor
+ *
+ * @brief Return tile dimension along the n dimension with regular tile sizes.
+ *
+ * @param[in] IPIV
+ *          The ipiv descriptor.
+ *
+ * @param[in] m
+ *          The index of the tile.
+ *
+ * @retval The length of the tile.
+ *
+ */
+static inline int
+chameleon_getblkdim_ipiv( const CHAM_ipiv_t *IPIV, int m )
+{
+    return ( ( m + 1 ) * IPIV->mb > IPIV->m ) ? IPIV->m - m * IPIV->mb : IPIV->mb;
+}
+
 /**
  * @}
  */
diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h
index 2d7125621127ea5d63df0bf97d133444ece6a3ac..66b601c83454be2b1f5e081a7826101fe2456886 100644
--- a/include/chameleon/runtime.h
+++ b/include/chameleon/runtime.h
@@ -718,33 +718,37 @@ void RUNTIME_ddisplay_oneprofile (cham_tasktype_t task);
 void RUNTIME_sdisplay_allprofile ();
 void RUNTIME_sdisplay_oneprofile (cham_tasktype_t task);
 
-void RUNTIME_ipiv_create ( CHAM_ipiv_t *ipiv,
-                          const CHAM_desc_t *desc );
-void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv,
-                           const CHAM_desc_t *desc );
+void RUNTIME_ipiv_create ( CHAM_ipiv_t *ipiv );
+void RUNTIME_pivot_create ( CHAM_desc_pivot_t *pivot );
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv );
+void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot );
 void RUNTIME_ipiv_gather ( const RUNTIME_sequence_t *sequence,
                            CHAM_ipiv_t *desc, int *ipiv, int node );
 
+void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence,
+                           const CHAM_desc_pivot_t *pivot, int m );
+void RUNTIME_pivot_flush ( const RUNTIME_sequence_t *sequence,
+                           const CHAM_desc_pivot_t  *pivot );
 void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m );
 void RUNTIME_ipiv_flush ( const RUNTIME_sequence_t *sequence,
-                          const CHAM_ipiv_t *ipiv );
+                          const CHAM_ipiv_t  *ipiv );
 void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m );
 
 void *RUNTIME_ipiv_getaddr   ( const CHAM_ipiv_t *ipiv, int m );
-void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h );
-void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h );
 void *RUNTIME_perm_getaddr   ( const CHAM_ipiv_t *ipiv, int m );
 void *RUNTIME_invp_getaddr   ( const CHAM_ipiv_t *ipiv, int m );
+void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h );
+void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h );
 
 static inline void *
-RUNTIME_pivot_getaddr( CHAM_ipiv_t *ipiv, int rank, int k, int h ) {
+RUNTIME_pivot_getaddr( CHAM_desc_pivot_t *pivot, int rank, int k, int h ) {
     if ( h%2 == 0 ) {
-        return RUNTIME_nextpiv_getaddr( ipiv, rank, k, h );
+        return RUNTIME_nextpiv_getaddr( pivot, rank, k, h );
     }
     else {
-        return RUNTIME_prevpiv_getaddr( ipiv, rank, k, h );
+        return RUNTIME_prevpiv_getaddr( pivot, rank, k, h );
     }
 }
 
diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h
index c1d3d3a105a861f0083f7a6046bf8afbd321501d..d1f7dc73841230b5626d09556369d2a8e1b00fde 100644
--- a/include/chameleon/struct.h
+++ b/include/chameleon/struct.h
@@ -173,29 +173,49 @@ struct chameleon_desc_s {
     void *schedopt;   /**> scheduler (QUARK|StarPU) specific structure                        */
 };
 
+typedef struct chameleon_ipiv_s CHAM_ipiv_t;
+
+typedef int (*blkdim_ipiv_fct_t)    ( const CHAM_ipiv_t*, int );
+typedef int (*blkrankof_ipiv_fct_t) ( const CHAM_ipiv_t*, int, int );
+
+/**
+ *  CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting
+ */
+struct chameleon_ipiv_s {
+    blkdim_ipiv_fct_t    get_blkdim; /**> function to get chameleon tiles dimension within algorithms                                 */
+    blkrankof_ipiv_fct_t get_rankof; /**> function to get chameleon tiles MPI rank                                                    */
+
+    int         *data;               /**> Pointer to the data                                                                         */
+    void        *ipiv;               /**> Opaque array of pointers for the runtimes to handle the ipiv array                          */
+    void        *perm;               /**> Opaque array of pointers for the runtimes to handle the temporary permutation array         */
+    void        *invp;               /**> Opaque array of pointers for the runtimes to handle the temporary inverse permutation array */
+    int64_t      mpitag_ipiv;        /**> Initial mpi tag values for the ipiv handles                                                 */
+    int64_t      mpitag_perm;        /**> Initial mpi tag values for the nextpiv handles                                              */
+    int64_t      mpitag_invp;        /**> Initial mpi tag values for the prevpiv handles                                              */
+
+    int          myrank;             /**> MPI rank of the descriptor */
+    int          i;                  /**> row index to the beginning of the submatrix                                                 */
+    int          m;                  /**> The number of row in the vector ipiv                                                        */
+    int          mb;                 /**> The number of row per block                                                                 */
+    int          mt;                 /**> The number of tiles                                                                         */
+    int          P;                  /**> The number of processes per column on a tiled matrix                                        */
+    int          NP;                 /**> The total number of processes                                                               */
+};
+
 /**
  *  CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting
  */
-typedef struct chameleon_piv_s {
-    const CHAM_desc_t *desc;   /**> Reference descriptor to compute data mapping based on diagonal tiles,
-                              and get floating reference type                                        */
-    int    *data;    /**> Pointer to the data                                                    */
-    void   *ipiv;    /**> Opaque array of pointers for the runtimes to handle the ipiv array     */
-    void   *nextpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
-    void   *prevpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
-    void   *perm;    /**> Opaque array of pointers for the runtimes to handle the temporary permutation array */
-    void   *invp;    /**> Opaque array of pointers for the runtimes to handle the temporary inverse permutation array */
-    int64_t mpitag_ipiv;    /**> Initial mpi tag values for the ipiv handles    */
-    int64_t mpitag_nextpiv; /**> Initial mpi tag values for the nextpiv handles */
-    int64_t mpitag_prevpiv; /**> Initial mpi tag values for the prevpiv handles */
-    int64_t mpitag_perm;    /**> Initial mpi tag values for the nextpiv handles */
-    int64_t mpitag_invp;    /**> Initial mpi tag values for the prevpiv handles */
-    int     i;              /**> row index to the beginning of the submatrix    */
-    int     m;              /**> The number of row in the vector ipiv           */
-    int     mb;             /**> The number of row per block                    */
-    int     mt;             /**> The number of tiles                            */
-    int     n;              /**> The number of column considered (must be updated for each panel) */
-} CHAM_ipiv_t;
+typedef struct chameleon_desc_pivot_s {
+    void          *nextpiv;        /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
+    void          *prevpiv;        /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */
+    int64_t        mpitag_nextpiv; /**> Initial mpi tag values for the nextpiv handles                                      */
+    int64_t        mpitag_prevpiv; /**> Initial mpi tag values for the prevpiv handles                                      */
+    int            P;              /**> The number of processes per column of the tiled matrix                              */
+    int            Q;              /**> The number of processes per line of the tiled matrix                                */
+    int            nb;             /**> The number of row per block                                                         */
+    int            n;              /**> The number of column considered (must be updated for each panel) */
+    cham_flttype_t dtyp;           /**> Arithmetic used to store the rows/columns to swap                                   */
+} CHAM_desc_pivot_t;
 
 static inline void *
 CHAM_tile_get_ptr( const CHAM_tile_t *tile )
diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h
index d6e8e698c2b8d6a4edd9d516a16b0317638f2851..ee36352d4e2350c2793dde2b6847a20d53c572e0 100644
--- a/include/chameleon/tasks.h
+++ b/include/chameleon/tasks.h
@@ -174,7 +174,7 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
 void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
                                  CHAM_ipiv_t *ipiv );
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
-                               CHAM_ipiv_t *ws, int k, int h, int rank );
+                               CHAM_desc_pivot_t *pivot, int k, int h, int rank );
 void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
                                int m0, int m, int k, int K1, int K2,
                                const CHAM_ipiv_t *ipivdesc, int ipivk );
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index 8ff0ae0485e4427a01480ad8cfcfdedcceda76c2..abe3a2c543dfe1e196163f9ae84e8812e97b0bed 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -535,36 +535,38 @@ void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
                                      int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
-                                     CHAM_ipiv_t *ws );
+                                     CHAM_ipiv_t       *ipiv,
+                                     CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
                                         int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
-                                        CHAM_ipiv_t *ws );
+                                        CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
                                       int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ws );
+                                      CHAM_ipiv_t *ipiv,
+                                      CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
                                          int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
-                                         CHAM_ipiv_t *ws );
+                                         CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
                                                int m, int n, int h, int m0,
                                                void *ws,
                                                CHAM_desc_t *A, int Am, int An,
                                                void **clargs_ptr,
-                                               CHAM_ipiv_t *ipiv );
+                                               CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
                                                      CHAM_desc_t *A, int An,
                                                      void **clargs_ptr,
-                                                     CHAM_ipiv_t *ipiv );
+                                                     CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
                                                int m, int n, int h, int m0,
@@ -572,22 +574,24 @@ void INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
                                                CHAM_desc_t *A, int Am, int An,
                                                CHAM_desc_t *U, int Um, int Un,
                                                void **clargs_ptr,
-                                               CHAM_ipiv_t *ipiv );
+                                               CHAM_ipiv_t *ipiv,
+                                               CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
                                                      CHAM_desc_t *A, int An,
                                                      CHAM_desc_t *U, int Um, int Un,
                                                      void **clargs_ptr,
-                                                     CHAM_ipiv_t *ipiv );
+                                                     CHAM_ipiv_t *ipiv,
+                                                     CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
                                       int m, int n, int h, int ib,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ws );
+                                      CHAM_desc_pivot_t *pivot );
 
 void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
                                   CHAM_desc_t            *A,
-                                  CHAM_ipiv_t            *ipiv,
+                                  CHAM_desc_pivot_t      *pivot,
                                   int                     k,
                                   int                     h,
                                   int                     n,
diff --git a/runtime/openmp/codelets/codelet_ipiv.c b/runtime/openmp/codelets/codelet_ipiv.c
index ccc7e8f46ea496f30d00d81dd0f418ba07fdd175..6cede3e07f3c8a47df8c766bd3056855352eb210 100644
--- a/runtime/openmp/codelets/codelet_ipiv.c
+++ b/runtime/openmp/codelets/codelet_ipiv.c
@@ -30,7 +30,7 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
-                                 CHAM_ipiv_t *ipiv )
+                                 CHAM_ipiv_t            *ipiv )
 {
     assert( 0 );
     (void)options;
@@ -38,11 +38,11 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
-                               CHAM_ipiv_t *ipiv, int k, int h, int rank )
+                               CHAM_desc_pivot_t *pivot, int k, int h, int rank )
 {
     assert( 0 );
     (void)options;
-    (void)ipiv;
+    (void)pivot;
     (void)k;
     (void)h;
     (void)rank;
diff --git a/runtime/openmp/codelets/codelet_zgetrf_batched.c b/runtime/openmp/codelets/codelet_zgetrf_batched.c
index 707cf59c58e37e9e8fdb3e01aed538c8d8f0dc1b..2f2b7c99785ffa738de81a54cdf5e964fa6c69a3 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_batched.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_batched.c
@@ -28,7 +28,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
                                           void *ws,
                                           CHAM_desc_t *A, int Am, int An,
                                           void **clargs_ptr,
-                                          CHAM_ipiv_t *ipiv )
+                                          CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -40,21 +40,21 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
     (void)Am;
     (void)An;
     (void)clargs_ptr;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void
 INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
     (void)A;
     (void)An;
     (void)clargs_ptr;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void
@@ -64,7 +64,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
                                           CHAM_desc_t *A, int Am, int An,
                                           CHAM_desc_t *U, int Um, int Un,
                                           void **clargs_ptr,
-                                          CHAM_ipiv_t *ipiv )
+                                          CHAM_ipiv_t *ipiv,
+                                          CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -81,6 +82,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
     (void)Un;
     (void)clargs_ptr;
     (void)ipiv;
+    (void)pivot;
 }
 
 void
@@ -88,7 +90,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 CHAM_desc_t *U, int Um, int Un,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_ipiv_t *ipiv,
+                                                CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -99,4 +102,5 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
     (void)Un;
     (void)clargs_ptr;
     (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf_blocked.c b/runtime/openmp/codelets/codelet_zgetrf_blocked.c
index 399a557a4d41f806aa51ab983b6412710c732d1b..a071bec0db476e523d4743c9575e4fea81d8ccf0 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_blocked.c
@@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
                                       int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_ipiv_t *ipiv,
+                                      CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -41,13 +42,14 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
     (void)Um;
     (void)Un;
     (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
                                          int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
-                                         CHAM_ipiv_t *ipiv )
+                                         CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -62,13 +64,13 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
     (void)U;
     (void)Um;
     (void)Un;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
                                       int m, int n, int h, int ib,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -79,5 +81,5 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
     (void)U;
     (void)Um;
     (void)Un;
-    (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf_percol.c b/runtime/openmp/codelets/codelet_zgetrf_percol.c
index 6a1d2f6634fa8d6cd818b8ee87f9038abf2f0041..77fcca1ab9fbbe7f23973e22a04198c0df320134 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_percol.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_percol.c
@@ -25,7 +25,8 @@
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
                                      int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
-                                     CHAM_ipiv_t *ipiv )
+                                     CHAM_ipiv_t       *ipiv,
+                                     CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -37,12 +38,13 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
     (void)Am;
     (void)An;
     (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
                                         int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
-                                        CHAM_ipiv_t *ipiv )
+                                        CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -53,5 +55,5 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
     (void)A;
     (void)Am;
     (void)An;
-    (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/openmp/codelets/codelet_zipiv_allreduce.c b/runtime/openmp/codelets/codelet_zipiv_allreduce.c
index f96857000c70933cc7f3a7ce3a6885f9c9ccb913..088ff53ae34f0fbd7830c1d8b77820212546d437 100644
--- a/runtime/openmp/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/openmp/codelets/codelet_zipiv_allreduce.c
@@ -19,7 +19,7 @@
 
 void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
                                   CHAM_desc_t            *A,
-                                  CHAM_ipiv_t            *ipiv,
+                                  CHAM_desc_pivot_t      *pivot,
                                   int                     k,
                                   int                     h,
                                   int                     n,
@@ -27,7 +27,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
 {
     (void)options;
     (void)A;
-    (void)ipiv;
+    (void)pivot;
     (void)k;
     (void)h;
     (void)n;
diff --git a/runtime/openmp/control/runtime_descriptor_ipiv.c b/runtime/openmp/control/runtime_descriptor_ipiv.c
index 97b1fbdeccf87b670a87ce98e941afd44fe1d0b0..2cb34db52346e8f02e33a8ba564603fc09105765 100644
--- a/runtime/openmp/control/runtime_descriptor_ipiv.c
+++ b/runtime/openmp/control/runtime_descriptor_ipiv.c
@@ -19,20 +19,28 @@
  */
 #include "chameleon_openmp.h"
 
-void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv,
-                          const CHAM_desc_t *desc )
+void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)ipiv;
-    (void)desc;
 }
 
-void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv,
-                           const CHAM_desc_t *desc )
+void RUNTIME_pivot_create( CHAM_desc_pivot_t *pivot )
+{
+    assert( 0 );
+    (void)pivot;
+}
+
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)ipiv;
-    (void)desc;
+}
+
+void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot )
+{
+    assert( 0 );
+    (void)pivot;
 }
 
 void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
@@ -43,22 +51,22 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
     return NULL;
 }
 
-void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h )
+void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h )
 {
     assert( 0 );
-    (void)ipiv;
+    (void)pivot;
     (void)rank;
-    (void)m;
+    (void)k;
     (void)h;
     return NULL;
 }
 
-void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h )
+void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h )
 {
     assert( 0 );
-    (void)ipiv;
+    (void)pivot;
     (void)rank;
-    (void)m;
+    (void)k;
     (void)h;
     return NULL;
 }
@@ -79,21 +87,38 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k )
     return NULL;
 }
 
-void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
-                          const CHAM_ipiv_t *ipiv, int m )
+void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence,
+                           const CHAM_desc_pivot_t *pivot, int rank )
 {
     assert( 0 );
     (void)sequence;
-    (void)ipiv;
-    (void)m;
+    (void)pivot;
+    (void)rank;
+}
+
+void RUNTIME_pivot_flush( const RUNTIME_sequence_t *sequence,
+                          const CHAM_desc_pivot_t  *pivot )
+{
+    assert( 0 );
+    (void)pivot;
+    (void)sequence;
 }
 
 void RUNTIME_ipiv_flush( const RUNTIME_sequence_t *sequence,
                          const CHAM_ipiv_t        *ipiv )
 {
     assert( 0 );
+    (void)sequence;
     (void)ipiv;
+}
+
+void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    assert( 0 );
     (void)sequence;
+    (void)ipiv;
+    (void)m;
 }
 
 void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
diff --git a/runtime/parsec/codelets/codelet_ipiv.c b/runtime/parsec/codelets/codelet_ipiv.c
index 2145e00b3575d7de659f28422064616815acd22a..ded4d960ca5917342d7cef6235a68f69caafe075 100644
--- a/runtime/parsec/codelets/codelet_ipiv.c
+++ b/runtime/parsec/codelets/codelet_ipiv.c
@@ -38,11 +38,11 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
-                               CHAM_ipiv_t *ipiv, int k, int h, int rank )
+                               CHAM_desc_pivot_t *pivot, int k, int h, int rank )
 {
     assert( 0 );
     (void)options;
-    (void)ipiv;
+    (void)pivot;
     (void)k;
     (void)h;
     (void)rank;
diff --git a/runtime/parsec/codelets/codelet_zgetrf_batched.c b/runtime/parsec/codelets/codelet_zgetrf_batched.c
index 366255b3d54a3e9176dca321416b08e40426929c..50cb396c299d642bc906b42d18f368c62c6b738a 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_batched.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_batched.c
@@ -28,7 +28,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
                                           void *ws,
                                           CHAM_desc_t *A, int Am, int An,
                                           void **clargs_ptr,
-                                          CHAM_ipiv_t *ipiv )
+                                          CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -40,21 +40,21 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
     (void)Am;
     (void)An;
     (void)clargs_ptr;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void
 INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
     (void)A;
     (void)An;
     (void)clargs_ptr;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void
@@ -64,7 +64,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
                                           CHAM_desc_t *A, int Am, int An,
                                           CHAM_desc_t *U, int Um, int Un,
                                           void **clargs_ptr,
-                                          CHAM_ipiv_t *ipiv )
+                                          CHAM_ipiv_t *ipiv,
+                                          CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -81,6 +82,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
     (void)Un;
     (void)clargs_ptr;
     (void)ipiv;
+    (void)pivot;
 }
 
 void
@@ -88,7 +90,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 CHAM_desc_t *U, int Um, int Un,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_ipiv_t *ipiv,
+                                                CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -99,4 +102,5 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
     (void)Un;
     (void)clargs_ptr;
     (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/parsec/codelets/codelet_zgetrf_blocked.c b/runtime/parsec/codelets/codelet_zgetrf_blocked.c
index 812ab095e0bf27c6c0858db4c8d261ff3ac5c234..81a7df7a5acc01515e68a1960f36dc7e22a10065 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_blocked.c
@@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
                                       int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_ipiv_t       *ipiv,
+                                      CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -41,13 +42,14 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
     (void)Um;
     (void)Un;
     (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
                                          int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
-                                         CHAM_ipiv_t *ipiv )
+                                         CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -62,13 +64,13 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
     (void)U;
     (void)Um;
     (void)Un;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
                                       int m, int n, int h, int ib,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -79,5 +81,5 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
     (void)U;
     (void)Um;
     (void)Un;
-    (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/parsec/codelets/codelet_zgetrf_percol.c b/runtime/parsec/codelets/codelet_zgetrf_percol.c
index f7f5d0205a13e53d7224d0ea5bc25921888751fd..c18466a6c8990247d249b1596d19ad71da1a588f 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_percol.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_percol.c
@@ -25,7 +25,8 @@
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
                                      int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
-                                     CHAM_ipiv_t *ipiv )
+                                     CHAM_ipiv_t       *ipiv,
+                                     CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -37,12 +38,13 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
     (void)Am;
     (void)An;
     (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
                                         int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
-                                        CHAM_ipiv_t *ipiv )
+                                        CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -53,5 +55,5 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
     (void)A;
     (void)Am;
     (void)An;
-    (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/parsec/codelets/codelet_zipiv_allreduce.c b/runtime/parsec/codelets/codelet_zipiv_allreduce.c
index 4b9cacd70fb879357d4850b0839a3ce7a112b35a..d7bb5a1823cdbfef9fcc5275d9b6592b72a742b6 100644
--- a/runtime/parsec/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/parsec/codelets/codelet_zipiv_allreduce.c
@@ -19,7 +19,7 @@
 
 void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
                                   CHAM_desc_t            *A,
-                                  CHAM_ipiv_t            *ipiv,
+                                  CHAM_desc_pivot_t      *pivot,
                                   int                     k,
                                   int                     h,
                                   int                     n,
@@ -27,7 +27,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
 {
     (void)options;
     (void)A;
-    (void)ipiv;
+    (void)pivot;
     (void)k;
     (void)h;
     (void)n;
diff --git a/runtime/parsec/control/runtime_descriptor_ipiv.c b/runtime/parsec/control/runtime_descriptor_ipiv.c
index 4b34eef933a12b295a1927c5264a40f0edc5f586..cda5e9ad0ee881ea31a50fe759b727c50ee95b1d 100644
--- a/runtime/parsec/control/runtime_descriptor_ipiv.c
+++ b/runtime/parsec/control/runtime_descriptor_ipiv.c
@@ -19,20 +19,28 @@
  */
 #include "chameleon_parsec.h"
 
-void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv,
-                          const CHAM_desc_t *desc )
+void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)ipiv;
-    (void)desc;
 }
 
-void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv,
-                           const CHAM_desc_t *desc )
+void RUNTIME_pivot_create( CHAM_desc_pivot_t *pivot )
+{
+    assert( 0 );
+    (void)pivot;
+}
+
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)ipiv;
-    (void)desc;
+}
+
+void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot )
+{
+    assert( 0 );
+    (void)pivot;
 }
 
 void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
@@ -43,22 +51,22 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
     return NULL;
 }
 
-void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h )
+void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h )
 {
     assert( 0 );
-    (void)ipiv;
+    (void)pivot;
     (void)rank;
-    (void)m;
+    (void)k;
     (void)h;
     return NULL;
 }
 
-void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h )
+void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h )
 {
     assert( 0 );
-    (void)ipiv;
+    (void)pivot;
     (void)rank;
-    (void)m;
+    (void)k;
     (void)h;
     return NULL;
 }
@@ -79,6 +87,23 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k )
     return NULL;
 }
 
+void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence,
+                           const CHAM_desc_pivot_t *pivot, int rank )
+{
+    assert( 0 );
+    (void)sequence;
+    (void)pivot;
+    (void)rank;
+}
+
+void RUNTIME_pivot_flush( const RUNTIME_sequence_t *sequence,
+                          const CHAM_desc_pivot_t  *pivot )
+{
+    assert( 0 );
+    (void)pivot;
+    (void)sequence;
+}
+
 void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m )
 {
diff --git a/runtime/quark/codelets/codelet_ipiv.c b/runtime/quark/codelets/codelet_ipiv.c
index bf0846d3dfe9d6043162827a4d0a3eab9414caed..77dc6f6d475df9808cd0dd8ceb4985bd6ad9ac5d 100644
--- a/runtime/quark/codelets/codelet_ipiv.c
+++ b/runtime/quark/codelets/codelet_ipiv.c
@@ -38,11 +38,11 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
-                               CHAM_ipiv_t *ipiv, int k, int h, int rank )
+                               CHAM_desc_pivot_t *pivot, int k, int h, int rank )
 {
     assert( 0 );
     (void)options;
-    (void)ipiv;
+    (void)pivot;
     (void)k;
     (void)h;
     (void)rank;
diff --git a/runtime/quark/codelets/codelet_zgetrf_batched.c b/runtime/quark/codelets/codelet_zgetrf_batched.c
index a3a21329086b657c26ca726f985f31c771818f0e..a41f230ff85d504e9325f5807bee88cb25dbe2bf 100644
--- a/runtime/quark/codelets/codelet_zgetrf_batched.c
+++ b/runtime/quark/codelets/codelet_zgetrf_batched.c
@@ -28,7 +28,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
                                           void *ws,
                                           CHAM_desc_t *A, int Am, int An,
                                           void **clargs_ptr,
-                                          CHAM_ipiv_t *ipiv )
+                                          CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -40,21 +40,21 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
     (void)Am;
     (void)An;
     (void)clargs_ptr;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void
 INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
     (void)A;
     (void)An;
     (void)clargs_ptr;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void
@@ -64,7 +64,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
                                           CHAM_desc_t *A, int Am, int An,
                                           CHAM_desc_t *U, int Um, int Un,
                                           void **clargs_ptr,
-                                          CHAM_ipiv_t *ipiv )
+                                          CHAM_ipiv_t *ipiv,
+                                          CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -81,6 +82,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
     (void)Un;
     (void)clargs_ptr;
     (void)ipiv;
+    (void)pivot;
 }
 
 void
@@ -88,7 +90,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 CHAM_desc_t *U, int Um, int Un,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_ipiv_t *ipiv,
+                                                CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -99,4 +102,5 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
     (void)Un;
     (void)clargs_ptr;
     (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/quark/codelets/codelet_zgetrf_blocked.c b/runtime/quark/codelets/codelet_zgetrf_blocked.c
index d3e1029709c256286c0615ff97714b6295efb011..674b898fd03d0919d881ad6418428a2278f56075 100644
--- a/runtime/quark/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/quark/codelets/codelet_zgetrf_blocked.c
@@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
                                       int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_ipiv_t *ipiv,
+                                      CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -41,13 +42,14 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
     (void)Um;
     (void)Un;
     (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
                                          int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
-                                         CHAM_ipiv_t *ipiv )
+                                         CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -62,13 +64,13 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
     (void)U;
     (void)Um;
     (void)Un;
-    (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
                                       int m, int n, int h, int ib,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -79,5 +81,5 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
     (void)U;
     (void)Um;
     (void)Un;
-    (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/quark/codelets/codelet_zgetrf_percol.c b/runtime/quark/codelets/codelet_zgetrf_percol.c
index cda25ec30442422a1347899ffd83b2643637e4ac..a6041fc88a55e2a24c4e8e656b7c27b7b2a76bd8 100644
--- a/runtime/quark/codelets/codelet_zgetrf_percol.c
+++ b/runtime/quark/codelets/codelet_zgetrf_percol.c
@@ -25,7 +25,8 @@
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
                                      int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
-                                     CHAM_ipiv_t *ipiv )
+                                     CHAM_ipiv_t       *ipiv,
+                                     CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -37,12 +38,13 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
     (void)Am;
     (void)An;
     (void)ipiv;
+    (void)pivot;
 }
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
                                         int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
-                                        CHAM_ipiv_t *ipiv )
+                                        CHAM_desc_pivot_t *pivot )
 {
     assert( 0 );
     (void)options;
@@ -53,5 +55,5 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
     (void)A;
     (void)Am;
     (void)An;
-    (void)ipiv;
+    (void)pivot;
 }
diff --git a/runtime/quark/codelets/codelet_zipiv_allreduce.c b/runtime/quark/codelets/codelet_zipiv_allreduce.c
index fe169b65d847f3d74a9d8967dbb1223e15e7cca2..9d93d2cd38fd0aefeb50ed15c4aeb61d98a0de6c 100644
--- a/runtime/quark/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/quark/codelets/codelet_zipiv_allreduce.c
@@ -19,7 +19,7 @@
 
 void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
                                   CHAM_desc_t            *A,
-                                  CHAM_ipiv_t            *ipiv,
+                                  CHAM_desc_pivot_t      *pivot,
                                   int                     k,
                                   int                     h,
                                   int                     n,
@@ -27,7 +27,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
 {
     (void)options;
     (void)A;
-    (void)ipiv;
+    (void)pivot;
     (void)k;
     (void)h;
     (void)n;
diff --git a/runtime/quark/control/runtime_descriptor_ipiv.c b/runtime/quark/control/runtime_descriptor_ipiv.c
index 16c83c381b4ebd88a5fd44ed00a5ece48ce89bb4..3f16bfe4cdd03398ce01406a0d40050846669a70 100644
--- a/runtime/quark/control/runtime_descriptor_ipiv.c
+++ b/runtime/quark/control/runtime_descriptor_ipiv.c
@@ -19,20 +19,28 @@
  */
 #include "chameleon_quark.h"
 
-void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv,
-                          const CHAM_desc_t *desc )
+void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)ipiv;
-    (void)desc;
 }
 
-void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv,
-                           const CHAM_desc_t *desc )
+void RUNTIME_pivot_create( CHAM_desc_pivot_t *pivot )
+{
+    assert( 0 );
+    (void)pivot;
+}
+
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)ipiv;
-    (void)desc;
+}
+
+void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot )
+{
+    assert( 0 );
+    (void)pivot;
 }
 
 void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
@@ -43,22 +51,22 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
     return NULL;
 }
 
-void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h )
+void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h )
 {
     assert( 0 );
-    (void)ipiv;
+    (void)pivot;
     (void)rank;
-    (void)m;
+    (void)k;
     (void)h;
     return NULL;
 }
 
-void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h )
+void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h )
 {
     assert( 0 );
-    (void)ipiv;
+    (void)pivot;
     (void)rank;
-    (void)m;
+    (void)k;
     (void)h;
     return NULL;
 }
@@ -79,6 +87,23 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k )
     return NULL;
 }
 
+void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence,
+                           const CHAM_desc_pivot_t *pivot, int rank )
+{
+    assert( 0 );
+    (void)sequence;
+    (void)pivot;
+    (void)rank;
+}
+
+void RUNTIME_pivot_flush( const RUNTIME_sequence_t *sequence,
+                          const CHAM_desc_pivot_t  *pivot )
+{
+    assert( 0 );
+    (void)pivot;
+    (void)sequence;
+}
+
 void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m )
 {
diff --git a/runtime/starpu/codelets/codelet_ipiv.c b/runtime/starpu/codelets/codelet_ipiv.c
index 5a16c6e2dda5d2e411415bf368f214bbbc8ec71b..97e228eab636ce9c310b84a5c692a1c5fefee670 100644
--- a/runtime/starpu/codelets/codelet_ipiv.c
+++ b/runtime/starpu/codelets/codelet_ipiv.c
@@ -111,7 +111,7 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
         cl_args     = malloc( sizeof(struct cl_laswp_args_s) );
         cl_args->m0 = m0;
         cl_args->n  = n;
-        cl_args->m  = ipiv->desc->m;
+        cl_args->m  = ipiv->m;
 
         cl_args->data = ipiv->data + m0;
 
@@ -124,14 +124,14 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
-                               CHAM_ipiv_t *ipiv, int k, int h, int rank )
+                               CHAM_desc_pivot_t *pivot, int k, int h, int rank )
 {
-    starpu_data_handle_t prevpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h-1 );
+    starpu_data_handle_t prevpiv = RUNTIME_pivot_getaddr( pivot, rank, k, h-1 );
 
 #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI)
 #if !defined(HAVE_STARPU_MPI_REDUX_WRAPUP)
-    starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h );
-    if ( h < ipiv->n ) {
+    starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( pivot, rank, k, h );
+    if ( h < pivot->n ) {
         starpu_mpi_redux_data_prio_tree( options->sequence->comm, nextpiv,
                                          options->priority, 2 /* Binary tree */ );
     }
diff --git a/runtime/starpu/codelets/codelet_zgetrf_batched.c b/runtime/starpu/codelets/codelet_zgetrf_batched.c
index 0ff4ed9854228109928e30ae4b34013338a32a5c..79aae3a122819a32cbd1c2969feb1154627efa2b 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_batched.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_batched.c
@@ -94,7 +94,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
                                           void *ws,
                                           CHAM_desc_t *A, int Am, int An,
                                           void **clargs_ptr,
-                                          CHAM_ipiv_t *ipiv )
+                                          CHAM_desc_pivot_t *pivot )
 {
 #if !defined(HAVE_STARPU_NONE_NONZERO)
     /* STARPU_NONE can't be equal to 0 */
@@ -136,7 +136,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options,
                                               A->get_blktile( A, Am, An ) );
 
     if ( clargs->tasks_nbr == batch_size ) {
-        INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, An, clargs_ptr, ipiv );
+        INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, An, clargs_ptr, pivot );
     }
 }
 
@@ -146,7 +146,7 @@ void
 INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_desc_pivot_t *pivot )
 {
 #if !defined(HAVE_STARPU_NONE_NONZERO)
     /* STARPU_NONE can't be equal to 0 */
@@ -160,7 +160,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
     if ( clargs == NULL ) {
         return;
     }
-    int access_npiv = ( clargs->h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
+    int access_npiv = ( clargs->h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
     int access_ppiv = ( clargs->h == 0 )       ? STARPU_NONE : STARPU_R;
 
     rt_starpu_insert_task(
@@ -168,8 +168,8 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
         /* Task codelet arguments */
         STARPU_CL_ARGS,           clargs, sizeof(struct cl_zgetrf_batched_args_s),
         STARPU_DATA_MODE_ARRAY,   clargs->handle_mode, clargs->tasks_nbr,
-        access_npiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h   ),
-        access_ppiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h-1 ),
+        access_npiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, clargs->h   ),
+        access_ppiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, clargs->h-1 ),
         STARPU_PRIORITY,          options->priority,
         STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
@@ -186,7 +186,7 @@ void
 INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_desc_pivot_t *pivot )
 {
     struct cl_zgetrf_batched_args_s *myclargs = *clargs_ptr;
     int rankA = A->myrank;
@@ -199,8 +199,8 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
 
     INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_panel_percol_offdiag_batched, zgetrf_panel_offdiag_batched, zgetrf_batched, myclargs->tasks_nbr + 2 );
 
-    access_npiv = ( myclargs->h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
-    access_ppiv = ( myclargs->h == 0 )       ? STARPU_NONE : STARPU_R;
+    access_npiv = ( myclargs->h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
+    access_ppiv = ( myclargs->h == 0 )        ? STARPU_NONE : STARPU_R;
 
     /*
      * Register the data handles, no exchange needed
@@ -209,8 +209,8 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options,
     for ( k = 0; k < myclargs->tasks_nbr; k++ ) {
         starpu_cham_register_descr( &nbdata, descrs, myclargs->handle_mode[ k ].handle, STARPU_RW );
     }
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, myclargs->h ),   access_npiv );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, myclargs->h-1 ), access_ppiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, myclargs->h ),   access_npiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, myclargs->h-1 ), access_ppiv );
 
     task = starpu_task_create();
     task->cl = cl;
@@ -311,7 +311,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
                                           CHAM_desc_t *A, int Am, int An,
                                           CHAM_desc_t *U, int Um, int Un,
                                           void **clargs_ptr,
-                                          CHAM_ipiv_t *ipiv )
+                                          CHAM_ipiv_t *ipiv,
+                                          CHAM_desc_pivot_t *pivot )
 {
 #if !defined(HAVE_STARPU_NONE_NONZERO)
     /* STARPU_NONE can't be equal to 0 */
@@ -370,7 +371,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options,
                                               A->get_blktile( A, Am, An ) );
 
     if ( clargs->tasks_nbr == batch_size ) {
-        INSERT_TASK_zgetrf_panel_blocked_batched_flush( options, A, An, U, Um, Un, clargs_ptr, ipiv );
+        INSERT_TASK_zgetrf_panel_blocked_batched_flush( options, A, An, U, Um, Un, clargs_ptr, ipiv, pivot );
     }
 }
 
@@ -381,7 +382,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 CHAM_desc_t *U, int Um, int Un,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_ipiv_t *ipiv,
+                                                CHAM_desc_pivot_t *pivot )
 {
 #if !defined(HAVE_STARPU_NONE_NONZERO)
     /* STARPU_NONE can't be equal to 0 */
@@ -397,7 +399,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
         return;
     }
 
-    access_npiv = ( clargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
+    access_npiv = ( clargs->h == pivot->n ) ? STARPU_R : STARPU_REDUX;
     access_ipiv = STARPU_RW;
     access_ppiv = STARPU_R;
     accessU     = STARPU_RW;
@@ -423,8 +425,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
         /* Task codelet arguments */
         STARPU_CL_ARGS,           clargs, sizeof(struct cl_zgetrf_batched_args_s),
         STARPU_DATA_MODE_ARRAY,   clargs->handle_mode, clargs->tasks_nbr,
-        access_npiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h ),
-        access_ppiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h - 1 ),
+        access_npiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, clargs->h ),
+        access_ppiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, clargs->h - 1 ),
         access_ipiv,              RUNTIME_ipiv_getaddr( ipiv, An ),
         accessU,                  RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un ),
         STARPU_PRIORITY,          options->priority,
@@ -444,7 +446,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
                                                 CHAM_desc_t *A, int An,
                                                 CHAM_desc_t *U, int Um, int Un,
                                                 void **clargs_ptr,
-                                                CHAM_ipiv_t *ipiv )
+                                                CHAM_ipiv_t *ipiv,
+                                                CHAM_desc_pivot_t *pivot )
 {
     struct cl_zgetrf_batched_args_s *myclargs = *clargs_ptr;
     int rankA = A->myrank;
@@ -458,7 +461,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
 
     INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_panel_blocked_batched, zgetrf_panel_blocked_batched, zgetrf_batched, myclargs->tasks_nbr + 4 );
 
-    access_npiv = ( myclargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX;
+    access_npiv = ( myclargs->h == pivot->n ) ? STARPU_R : STARPU_REDUX;
     access_ipiv = STARPU_RW;
     access_ppiv = STARPU_R;
     accessU     = STARPU_RW;
@@ -486,8 +489,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options,
     for ( k = 0; k < myclargs->tasks_nbr; k++ ) {
         starpu_cham_register_descr( &nbdata, descrs, myclargs->handle_mode[ k ].handle, STARPU_RW );
     }
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, myclargs->h ),   access_npiv );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, myclargs->h-1 ), access_ppiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, myclargs->h ),   access_npiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, myclargs->h-1 ), access_ppiv );
     starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An),                       access_ipiv );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
                                                   RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
index f1df48f3cc7c3b6d460f859bc064e841bd4f5dc7..b2c3b1c5801a9fd9c6c877191553265d455387e5 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
@@ -98,7 +98,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
                                       int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_ipiv_t *ipiv,
+                                      CHAM_desc_pivot_t *pivot )
 {
 #if !defined(HAVE_STARPU_NONE_NONZERO)
     /* STARPU_NONE can't be equal to 0 */
@@ -130,9 +131,9 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
     clargs->sequence = options->sequence;
     clargs->request  = options->request;
 
-    int access_ipiv = ( h == 0 )       ? STARPU_W    : STARPU_RW;
-    int access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
-    int access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+    int access_ipiv = ( h == 0 )        ? STARPU_W    : STARPU_RW;
+    int access_npiv = ( h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
+    int access_ppiv = ( h == 0 )        ? STARPU_NONE : STARPU_R;
     int accessU     = STARPU_RW;
 
     if ( h == 0 ) {
@@ -163,8 +164,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
         /* Task handles */
         STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         access_ipiv,              RUNTIME_ipiv_getaddr( ipiv, An ),
-        access_npiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),
-        access_ppiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ),
+        access_npiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, h ),
+        access_ppiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ),
         accessU,                  RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
 
         /* Common task arguments */
@@ -181,7 +182,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
                                       int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_ipiv_t *ipiv,
+                                      CHAM_desc_pivot_t *pivot )
 {
     int ret, access_ipiv, access_npiv, access_ppiv, accessU;
     struct starpu_task *task;
@@ -199,9 +201,9 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 
     INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_blocked_diag, zgetrf_blocked_diag, zgetrf_blocked, 5 );
 
-    access_ipiv = ( h == 0 )       ? STARPU_W    : STARPU_RW;
-    access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
-    access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+    access_ipiv = ( h == 0 )        ? STARPU_W    : STARPU_RW;
+    access_npiv = ( h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
+    access_ppiv = ( h == 0 )        ? STARPU_NONE : STARPU_R;
     accessU     = STARPU_RW;
     if ( h == 0 ) {
         accessU = STARPU_NONE;
@@ -220,8 +222,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
     starpu_cham_exchange_init_params( options, &params, rankA );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ),     STARPU_RW );
     starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An),               access_ipiv );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),   access_npiv );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h ),   access_npiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), access_ppiv );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ),     accessU );
 
     task = starpu_task_create();
@@ -318,15 +320,15 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
                                          int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
-                                         CHAM_ipiv_t *ipiv )
+                                         CHAM_desc_pivot_t *pivot )
 {
 #if !defined(HAVE_STARPU_NONE_NONZERO)
     /* STARPU_NONE can't be equal to 0 */
     fprintf( stderr, "INSERT_TASK_zgetrf_blocked_diag: STARPU_NONE can not be equal to 0\n" );
     assert( 0 );
 #endif
-    int access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
-    int access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+    int access_npiv = ( h == pivot->n )        ? STARPU_R    : STARPU_REDUX;
+    int access_ppiv = ( h == 0 )               ? STARPU_NONE : STARPU_R;
     int accessU     = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE;
     int rankA       = A->get_rankof(A, Am, An);
 
@@ -376,8 +378,8 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
 
         /* Task handles */
         STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        access_npiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),
-        access_ppiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ),
+        access_npiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, h ),
+        access_ppiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ),
         accessU,                  RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
 
         /* Common task arguments */
@@ -394,12 +396,12 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
                                          int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
-                                         CHAM_ipiv_t *ipiv )
+                                         CHAM_desc_pivot_t *pivot )
 {
     int ret;
     struct starpu_task *task;
     int rankA       = A->get_rankof(A, Am, An);
-    int access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
+    int access_npiv = ( h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
     int access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
     int accessU     = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE;
 
@@ -423,8 +425,8 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
      */
     starpu_cham_exchange_init_params( options, &params, rankA );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ),     STARPU_RW );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),   access_npiv );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h ),   access_npiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), access_ppiv );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
                                                   RTBLKADDR( U, ChamComplexDouble, Um, Un ),
                                                   accessU );
@@ -510,7 +512,7 @@ CODELETS_CPU(zgetrf_blocked_trsm, cl_zgetrf_blocked_trsm_cpu_func)
 void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
                                       int m, int n, int h, int ib,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_desc_pivot_t *pivot )
 {
     void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_trsm_callback : NULL;
     const char *cl_name = "zgetrf_blocked_trsm";
@@ -544,7 +546,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
 
         /* Task handles */
         STARPU_RW,                RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
-        STARPU_R,                 RUNTIME_pivot_getaddr( ipiv, rankU, Un, h-1 ),
+        STARPU_R,                 RUNTIME_pivot_getaddr( pivot, rankU, Un, h-1 ),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
@@ -559,7 +561,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
 void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
                                       int m, int n, int h, int ib,
                                       CHAM_desc_t *U, int Um, int Un,
-                                      CHAM_ipiv_t *ipiv )
+                                      CHAM_desc_pivot_t *pivot )
 {
     int ret;
     struct starpu_task *task;
@@ -576,7 +578,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
      */
     starpu_cham_exchange_init_params( options, &params, rankU );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ),     STARPU_RW );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankU, Un, h-1 ), STARPU_R  );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankU, Un, h-1 ), STARPU_R  );
 
     task = starpu_task_create();
     task->cl = cl;
diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c
index 9a0ec048b78b68569974267edb5c62aa97ce65d2..95fb7ea88c785d65dbdf095563db664b5656d8ff 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_percol.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c
@@ -86,7 +86,8 @@ CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func )
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
                                      int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
-                                     CHAM_ipiv_t *ipiv )
+                                     CHAM_ipiv_t *ipiv,
+                                     CHAM_desc_pivot_t *pivot )
 {
     void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_diag_callback : NULL;
     const char *cl_name = "zgetrf_percol_diag";
@@ -102,9 +103,9 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
         return;
     }
 
-    int access_ipiv = ( h == 0 )       ? STARPU_W    : STARPU_RW;
-    int access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
-    int access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+    int access_ipiv = ( h == 0 )        ? STARPU_W    : STARPU_RW;
+    int access_npiv = ( h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
+    int access_ppiv = ( h == 0 )        ? STARPU_NONE : STARPU_R;
 
     /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
@@ -132,8 +133,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
         /* Task handles */
         STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         access_ipiv,              RUNTIME_ipiv_getaddr( ipiv, An ),
-        access_npiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h   ),
-        access_ppiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ),
+        access_npiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, h   ),
+        access_ppiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
@@ -148,7 +149,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
                                      int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
-                                     CHAM_ipiv_t *ipiv )
+                                     CHAM_ipiv_t *ipiv,
+                                     CHAM_desc_pivot_t *pivot )
 {
     int ret, access_ipiv, access_npiv, access_ppiv;
     struct starpu_task *task;
@@ -160,9 +162,9 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 
     INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_percol_diag, zgetrf_percol_diag, zgetrf_percol, 4 );
 
-    access_ipiv = ( h == 0 )       ? STARPU_W    : STARPU_RW;
-    access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
-    access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+    access_ipiv = ( h == 0 )        ? STARPU_W    : STARPU_RW;
+    access_npiv = ( h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
+    access_ppiv = ( h == 0 )        ? STARPU_NONE : STARPU_R;
 
     /*
      * Register the data handles, no exchange needed
@@ -170,8 +172,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
     starpu_cham_exchange_init_params( options, &params, rankA );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ),     STARPU_RW );
     starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An),               access_ipiv );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),   access_npiv );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h ),   access_npiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), access_ppiv );
 
     task = starpu_task_create();
     task->cl = cl;
@@ -242,11 +244,11 @@ CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func)
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
                                         int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
-                                        CHAM_ipiv_t *ipiv )
+                                        CHAM_desc_pivot_t *pivot )
 {
     void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL;
     const char *cl_name = "zgetrf_percol_offdiag";
-    int access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
+    int access_npiv = ( h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
     int access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
     int rankA       = A->get_rankof(A, Am, An);
 #if !defined(HAVE_STARPU_NONE_NONZERO)
@@ -284,8 +286,8 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
 
         /* Task handles */
         STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        access_npiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h   ),
-        access_ppiv,              RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ),
+        access_npiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, h   ),
+        access_ppiv,              RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
@@ -300,7 +302,7 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
                                         int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
-                                        CHAM_ipiv_t *ipiv )
+                                        CHAM_desc_pivot_t *pivot )
 {
     int ret, access_npiv, access_ppiv;
     struct starpu_task *task;
@@ -312,16 +314,16 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
 
     INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_percol_offdiag, zgetrf_percol_offdiag, zgetrf_percol, 3 );
 
-    access_npiv = ( h == ipiv->n ) ? STARPU_R    : STARPU_REDUX;
-    access_ppiv = ( h == 0 )       ? STARPU_NONE : STARPU_R;
+    access_npiv = ( h == pivot->n ) ? STARPU_R    : STARPU_REDUX;
+    access_ppiv = ( h == 0 )        ? STARPU_NONE : STARPU_R;
 
     /*
      * Register the data handles, no exchange needed
      */
     starpu_cham_exchange_init_params( options, &params, rankA );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ),     STARPU_RW );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ),   access_npiv );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h ),   access_npiv );
+    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), access_ppiv );
 
     task = starpu_task_create();
     task->cl = cl;
diff --git a/runtime/starpu/codelets/codelet_zipiv_allreduce.c b/runtime/starpu/codelets/codelet_zipiv_allreduce.c
index e79a1841d491524d67ffd3c398bba8efe5706690..51680cd9d4a99469541856b530f227a26a8682e3 100644
--- a/runtime/starpu/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/starpu/codelets/codelet_zipiv_allreduce.c
@@ -81,7 +81,7 @@ CODELETS_CPU( zipiv_allreduce, cl_zipiv_allreduce_cpu_func )
 
 static void
 INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options,
-                                  CHAM_ipiv_t            *ipiv,
+                                  CHAM_desc_pivot_t      *pivot,
                                   int                     me,
                                   int                     dst,
                                   int                     k,
@@ -90,14 +90,14 @@ INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options,
     rt_starpu_insert_task(
         NULL,
         STARPU_EXECUTE_ON_NODE, dst,
-        STARPU_R,               RUNTIME_pivot_getaddr( ipiv, me, k, h ),
+        STARPU_R,               RUNTIME_pivot_getaddr( pivot, me, k, h ),
         STARPU_PRIORITY,        options->priority,
         0 );
 }
 
 static void
 INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options,
-                                  CHAM_ipiv_t            *ipiv,
+                                  CHAM_desc_pivot_t      *pivot,
                                   int                     me,
                                   int                     src,
                                   int                     k,
@@ -112,20 +112,20 @@ INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options,
     rt_starpu_insert_task(
         &cl_zipiv_allreduce,
         STARPU_CL_ARGS,           clargs, sizeof(struct cl_redux_args_s),
-        STARPU_RW,                RUNTIME_pivot_getaddr( ipiv, me,  k, h ),
-        STARPU_R,                 RUNTIME_pivot_getaddr( ipiv, src, k, h ),
+        STARPU_RW,                RUNTIME_pivot_getaddr( pivot, me,  k, h ),
+        STARPU_R,                 RUNTIME_pivot_getaddr( pivot, src, k, h ),
         STARPU_EXECUTE_ON_NODE,   me,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
         STARPU_PRIORITY,          options->priority,
         0 );
-    starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( ipiv, src, k, h ) );
+    starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( pivot, src, k, h ) );
 }
 
 #else /* defined(CHAMELEON_STARPU_USE_INSERT) */
 
 static void
 INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options,
-                                  CHAM_ipiv_t            *ipiv,
+                                  CHAM_desc_pivot_t      *pivot,
                                   int                     me,
                                   int                     dst,
                                   int                     k,
@@ -135,7 +135,7 @@ INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options,
 
     starpu_cham_exchange_init_params( options, &params, dst );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
-                                                  RUNTIME_pivot_getaddr( ipiv, me, k, h ),
+                                                  RUNTIME_pivot_getaddr( pivot, me, k, h ),
                                                   STARPU_R );
     starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
     (void)cl;
@@ -144,7 +144,7 @@ INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options,
 
 static void
 INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options,
-                                  CHAM_ipiv_t            *ipiv,
+                                  CHAM_desc_pivot_t      *pivot,
                                   int                     me,
                                   int                     src,
                                   int                     k,
@@ -157,10 +157,10 @@ INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options,
 
     starpu_cham_exchange_init_params( options, &params, me );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
-                                                  RUNTIME_pivot_getaddr( ipiv, me,  k, h ),
+                                                  RUNTIME_pivot_getaddr( pivot, me,  k, h ),
                                                   STARPU_RW );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
-                                                  RUNTIME_pivot_getaddr( ipiv, src, k, h ),
+                                                  RUNTIME_pivot_getaddr( pivot, src, k, h ),
                                                   STARPU_R );
 
     task = starpu_task_create();
@@ -193,7 +193,7 @@ INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options,
     }
 
     starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
-    starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( ipiv, src, k, h ) );
+    starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( pivot, src, k, h ) );
 }
 
 #endif /* defined(CHAMELEON_STARPU_USE_INSERT) */
@@ -201,7 +201,7 @@ INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options,
 static void
 zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options,
                                        CHAM_desc_t            *A,
-                                       CHAM_ipiv_t            *ipiv,
+                                       CHAM_desc_pivot_t      *pivot,
                                        int                    *proc_involved,
                                        int                     k,
                                        int                     h,
@@ -213,9 +213,9 @@ zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options,
     int shift = 1;
 
     if ( h > 0 ) {
-        starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( ipiv, A->myrank, k, h-1 ) );
+        starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( pivot, A->myrank, k, h-1 ) );
     }
-    if ( h >= ipiv->n ) {
+    if ( h >= pivot->n ) {
         return;
     }
 
@@ -233,8 +233,8 @@ zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options,
             p_send = proc_involved[ ( me + shift               ) % np_involved ];
             p_recv = proc_involved[ ( me - shift + np_involved ) % np_involved ];
 
-            INSERT_TASK_zipiv_allreduce_send( options, ipiv, A->myrank, p_send, k, h    );
-            INSERT_TASK_zipiv_allreduce_recv( options, ipiv, A->myrank, p_recv, k, h, n );
+            INSERT_TASK_zipiv_allreduce_send( options, pivot, A->myrank, p_send, k, h    );
+            INSERT_TASK_zipiv_allreduce_recv( options, pivot, A->myrank, p_recv, k, h, n );
 
             shift   = shift << 1;
             np_iter = chameleon_ceil( np_iter, 2 );
@@ -245,32 +245,32 @@ zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options,
 void
 INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
                              CHAM_desc_t            *A,
-                             CHAM_ipiv_t            *ipiv,
+                             CHAM_desc_pivot_t      *pivot,
                              int                     k,
                              int                     h,
                              int                     n,
                              void                   *ws )
 {
-    struct chameleon_pzgetrf_s *tmp = (struct chameleon_pzgetrf_s *)ws;
-    cham_getrf_allreduce_t alg = tmp->alg_allreduce;
+    struct chameleon_pzlaswp_s *tmp = (struct chameleon_pzlaswp_s *)ws;
+    cham_getrf_allreduce_t      alg = tmp->reduce.alg_allreduce;
     switch( alg ) {
     case ChamStarPUTasks:
     default:
-        zipiv_allreduce_chameleon_starpu_task( options, A, ipiv, tmp->proc_involved, k, h, n );
+        zipiv_allreduce_chameleon_starpu_task( options, A, pivot, tmp->reduce.proc_involved, k, h, n );
     }
 }
 #else
 void
 INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
                              CHAM_desc_t            *A,
-                             CHAM_ipiv_t            *ipiv,
+                             CHAM_desc_pivot_t      *pivot,
                              int                     k,
                              int                     h,
                              int                     n,
                              void                   *ws )
 {
     if ( h > 0 ) {
-        starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( ipiv, A->myrank, k, h-1 ) );
+        starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( pivot, A->myrank, k, h-1 ) );
     }
 
     (void)options;
diff --git a/runtime/starpu/codelets/codelet_zperm_allreduce.c b/runtime/starpu/codelets/codelet_zperm_allreduce.c
index 6e6bf8a0c3408f6064fa84f1674336c861938411..eccba676db24837421da8e9cc6d760e5feaee149 100644
--- a/runtime/starpu/codelets/codelet_zperm_allreduce.c
+++ b/runtime/starpu/codelets/codelet_zperm_allreduce.c
@@ -250,9 +250,9 @@ zperm_allreduce_chameleon_starpu_task( const RUNTIME_option_t     *options,
                                        int                         ipivk,
                                        int                         k,
                                        int                         n,
-                                       struct chameleon_pzgetrf_s *ws )
+                                       CHAM_reduce_t              *reduce )
 {
-    int *proc_involved = ws->proc_involved;
+    int *proc_involved = reduce->proc_involved;
     int  np_involved   = chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt - k );
     int  np_iter       = np_involved;
     int  p_recv, p_send, me, p_first;
@@ -299,12 +299,12 @@ INSERT_TASK_zperm_allreduce_row( const RUNTIME_option_t *options,
                                  int                     n,
                                  void                   *ws )
 {
-    struct chameleon_pzgetrf_s *tmp = (struct chameleon_pzgetrf_s *)ws;
-    cham_getrf_allreduce_t alg = tmp->alg_allreduce;
+    struct chameleon_pzlaswp_s *tmp = (struct chameleon_pzlaswp_s *)ws;
+    cham_getrf_allreduce_t      alg = tmp->reduce.alg_allreduce;
     switch( alg ) {
     case ChamStarPUTasks:
     default:
-        zperm_allreduce_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, k, n, tmp );
+        zperm_allreduce_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, k, n, &(tmp->reduce) );
     }
 }
 
diff --git a/runtime/starpu/codelets/codelet_zperm_allreduce_col.c b/runtime/starpu/codelets/codelet_zperm_allreduce_col.c
index 192b977632c54780d36e0086a909e1edca30a3c2..f1ad9ff9bcc1217410373d77162eedac87a3f578 100644
--- a/runtime/starpu/codelets/codelet_zperm_allreduce_col.c
+++ b/runtime/starpu/codelets/codelet_zperm_allreduce_col.c
@@ -250,9 +250,9 @@ zperm_allreduce_col_chameleon_starpu_task( const RUNTIME_option_t     *options,
                                            int                         ipivk,
                                            int                         m,
                                            int                         k,
-                                           struct chameleon_pzgetrf_s *ws )
+                                           CHAM_reduce_t              *reduce )
 {
-    int *proc_involved = ws->proc_involved;
+    int *proc_involved = reduce->proc_involved;
     int  np_involved   = chameleon_min( chameleon_desc_datadist_get_iparam(A, 1), A->nt - k );
     int  np_iter       = np_involved;
     int  p_recv, p_send, me, p_first;
@@ -299,12 +299,12 @@ INSERT_TASK_zperm_allreduce_col( const RUNTIME_option_t *options,
                                  int                     k,
                                  void                   *ws )
 {
-    struct chameleon_pzgetrf_s *tmp = (struct chameleon_pzgetrf_s *)ws;
-    cham_getrf_allreduce_t alg = tmp->alg_allreduce;
+    struct chameleon_pzlaswp_s *tmp = (struct chameleon_pzlaswp_s *)ws;
+    cham_getrf_allreduce_t      alg = tmp->reduce.alg_allreduce;
     switch( alg ) {
     case ChamStarPUTasks:
     default:
-        zperm_allreduce_col_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, m, k, tmp );
+        zperm_allreduce_col_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, m, k, &(tmp->reduce) );
     }
 }
 
diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c
index b7d07b171ac8303984afa29e2556be5559eeab3e..ada48a52cb15af180cb870f876fdf5e01f0442db 100644
--- a/runtime/starpu/control/runtime_descriptor_ipiv.c
+++ b/runtime/starpu/control/runtime_descriptor_ipiv.c
@@ -23,19 +23,13 @@
 /**
  *  Create ws_pivot runtime structures
  */
-void RUNTIME_ipiv_create( CHAM_ipiv_t       *ipiv,
-                          const CHAM_desc_t *desc )
+void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv )
 {
     assert( ipiv );
-    size_t                P         = chameleon_desc_datadist_get_iparam(desc, 0);
-    size_t                nbhandles = 3 * ipiv->mt + 2 * P;
+    size_t                nbhandles = 3 * ipiv->mt;
     starpu_data_handle_t *handles   = calloc( nbhandles, sizeof(starpu_data_handle_t) );
     ipiv->ipiv    = handles;
     handles += ipiv->mt;
-    ipiv->nextpiv = handles;
-    handles += P;
-    ipiv->prevpiv = handles;
-    handles += P;
     ipiv->perm    = handles;
     handles += ipiv->mt;
     ipiv->invp    = handles;
@@ -51,10 +45,36 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t       *ipiv,
             chameleon_fatal_error("RUNTIME_ipiv_create", "Can't pursue computation since no more tags are available for ipiv structure");
             return;
         }
-        ipiv->mpitag_nextpiv = ipiv->mpitag_ipiv    + ipiv->mt;
-        ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + P;
-        ipiv->mpitag_perm    = ipiv->mpitag_prevpiv + P;
-        ipiv->mpitag_invp    = ipiv->mpitag_perm    + ipiv->mt;
+        ipiv->mpitag_perm    = ipiv->mpitag_ipiv + ipiv->mt;
+        ipiv->mpitag_invp    = ipiv->mpitag_perm + ipiv->mt;
+    }
+#endif
+}
+
+/**
+ *  Create ws_pivot runtime structures
+ */
+void RUNTIME_pivot_create( CHAM_desc_pivot_t *pivot )
+{
+    assert( pivot );
+    size_t                nbhandles = 2 * pivot->P;
+    starpu_data_handle_t *handles   = calloc( nbhandles, sizeof(starpu_data_handle_t) );
+    pivot->nextpiv = handles;
+    handles += pivot->P;
+    pivot->prevpiv = handles;
+#if defined(CHAMELEON_USE_MPI)
+    /*
+     * Book the number of tags required to describe pivot structure
+     * One per handle type
+     */
+    {
+        chameleon_starpu_tag_init();
+        pivot->mpitag_nextpiv = chameleon_starpu_tag_book( nbhandles );
+        if ( pivot->mpitag_nextpiv == -1 ) {
+            chameleon_fatal_error("RUNTIME_pivot_create", "Can't pursue computation since no more tags are available for pivot structure");
+            return;
+        }
+        pivot->mpitag_prevpiv = pivot->mpitag_nextpiv + pivot->P;
     }
 #endif
 }
@@ -62,12 +82,11 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t       *ipiv,
 /**
  *  Destroy ws_pivot runtime structures
  */
-void RUNTIME_ipiv_destroy( CHAM_ipiv_t       *ipiv,
-                           const CHAM_desc_t *desc )
+void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv )
 {
     int                   i;
     starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv);
-    size_t                nbhandles = 3 * ipiv->mt + 2 * chameleon_desc_datadist_get_iparam(desc, 0);
+    size_t                nbhandles = 3 * ipiv->mt;
 
     for(i=0; i<nbhandles; i++) {
         if ( *handle != NULL ) {
@@ -79,13 +98,34 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t       *ipiv,
 
     free( ipiv->ipiv    );
     ipiv->ipiv    = NULL;
-    ipiv->nextpiv = NULL;
-    ipiv->prevpiv = NULL;
     ipiv->perm    = NULL;
     ipiv->invp    = NULL;
     chameleon_starpu_tag_release( ipiv->mpitag_ipiv );
 }
 
+/**
+ *  Destroy ws_pivot runtime structures
+ */
+void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot )
+{
+    int                   i;
+    starpu_data_handle_t *handle = (starpu_data_handle_t*)(pivot->nextpiv);
+    size_t                nbhandles = 2 * pivot->P;
+
+    for(i=0; i<nbhandles; i++) {
+        if ( *handle != NULL ) {
+            starpu_data_unregister_submit( *handle );
+            *handle = NULL;
+        }
+        handle++;
+    }
+
+    free( pivot->nextpiv );
+    pivot->nextpiv = NULL;
+    pivot->prevpiv = NULL;
+    chameleon_starpu_tag_release( pivot->mpitag_nextpiv );
+}
+
 void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
 {
     starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv);
@@ -104,8 +144,7 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
 
 #if defined(CHAMELEON_USE_MPI)
     {
-        const CHAM_desc_t *A     = ipiv->desc;
-        int                owner = A->get_rankof( A, m, m );
+        int                owner = ipiv->get_rankof( ipiv, m, m );
         int64_t            tag   = ipiv->mpitag_ipiv + mm;
         starpu_mpi_data_register( *handle, tag, owner );
     }
@@ -115,48 +154,45 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m )
     return (void*)(*handle);
 }
 
-void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h )
+void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h )
 {
-    starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(ipiv->nextpiv);
-    const CHAM_desc_t *A = ipiv->desc;
+    starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(pivot->nextpiv);
+    int                   Q       = pivot->Q;
 
-    nextpiv += rank/chameleon_desc_datadist_get_iparam(A, 1);
+    nextpiv += rank/Q;
     assert( nextpiv );
 
     if ( *nextpiv != NULL ) {
         return (void*)(*nextpiv);
     }
-
-    int64_t kk    = k + (ipiv->i / ipiv->mb);
     int     owner = rank;
-    int     ncols = (kk == (A->nt-1)) ? A->n - kk * A->nb : A->nb;
-    int64_t tag   = ipiv->mpitag_nextpiv + owner/chameleon_desc_datadist_get_iparam(A, 1);
+    int     ncols = pivot->nb;
+    int64_t tag   = pivot->mpitag_nextpiv + owner/Q;
 
-    cppi_register( nextpiv, A->dtyp, ncols, tag, owner );
+    cppi_register( nextpiv, pivot->dtyp, ncols, tag, owner );
 
     assert( *nextpiv );
     (void)h;
     return (void*)(*nextpiv);
 }
 
-void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h )
+void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h )
 {
-    starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(ipiv->prevpiv);
-    const CHAM_desc_t *A = ipiv->desc;
+    starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(pivot->prevpiv);
+    int                   Q       = pivot->Q;
 
-    prevpiv += rank/chameleon_desc_datadist_get_iparam(A, 1);
+    prevpiv += rank/Q;
     assert( prevpiv );
 
     if ( *prevpiv != NULL ) {
         return (void*)(*prevpiv);
     }
 
-    int64_t kk    = k + (ipiv->i / ipiv->mb);
     int     owner = rank;
-    int     ncols = (kk == (A->nt-1)) ? A->n - kk * A->nb : A->nb;
-    int64_t tag   = ipiv->mpitag_prevpiv + owner/chameleon_desc_datadist_get_iparam(A, 1);
+    int     ncols = pivot->nb;
+    int64_t tag   = pivot->mpitag_prevpiv + owner/Q;
 
-    cppi_register( prevpiv, A->dtyp, ncols, tag, owner );
+    cppi_register( prevpiv, pivot->dtyp, ncols, tag, owner );
 
     assert( *prevpiv );
     (void)h;
@@ -181,8 +217,7 @@ void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int m )
 
 #if defined(CHAMELEON_USE_MPI)
     {
-        const CHAM_desc_t *A     = ipiv->desc;
-        int                owner = A->get_rankof( A, m, m );
+        int                owner = ipiv->get_rankof( ipiv, m, m );
         int64_t            tag   = ipiv->mpitag_perm + mm;
         starpu_mpi_data_register( *handle, tag, owner );
     }
@@ -210,8 +245,7 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int m )
 
 #if defined(CHAMELEON_USE_MPI)
     {
-        const CHAM_desc_t *A     = ipiv->desc;
-        int                owner = A->get_rankof( A, m, m );
+        int                owner = ipiv->get_rankof( ipiv, m, m );
         int64_t            tag   = ipiv->mpitag_invp + mm;
         starpu_mpi_data_register( *handle, tag, owner );
     }
@@ -221,14 +255,14 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int m )
     return (void*)(*handle);
 }
 
-void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
-                          const CHAM_ipiv_t *ipiv, int rank )
+void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence,
+                           const CHAM_desc_pivot_t *pivot, int rank )
 {
     starpu_data_handle_t *handle;
-    const CHAM_desc_t *A = ipiv->desc;
+    int                   Q = pivot->Q;
 
-    handle = (starpu_data_handle_t*)(ipiv->nextpiv);
-    handle += rank/chameleon_desc_datadist_get_iparam(A, 1);
+    handle = (starpu_data_handle_t*)(pivot->nextpiv);
+    handle += rank/Q;
 
     if ( *handle != NULL ) {
 #if defined(CHAMELEON_USE_MPI)
@@ -240,8 +274,8 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
         }
     }
 
-    handle = (starpu_data_handle_t*)(ipiv->prevpiv);
-    handle += rank/chameleon_desc_datadist_get_iparam(A, 1);
+    handle = (starpu_data_handle_t*)(pivot->prevpiv);
+    handle += rank/Q;
 
     if ( *handle != NULL ) {
 #if defined(CHAMELEON_USE_MPI)
@@ -254,10 +288,21 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
     }
 
     (void)sequence;
-    (void)ipiv;
+    (void)pivot;
     (void)rank;
 }
 
+void RUNTIME_pivot_flush( const RUNTIME_sequence_t *sequence,
+                          const CHAM_desc_pivot_t  *pivot )
+{
+    int m;
+
+    for (m = 0; m < pivot->Q; m++)
+    {
+        RUNTIME_pivot_flushk( sequence, pivot, m );
+    }
+}
+
 void RUNTIME_ipiv_flush( const RUNTIME_sequence_t *sequence,
                          const CHAM_ipiv_t        *ipiv )
 {
@@ -269,11 +314,34 @@ void RUNTIME_ipiv_flush( const RUNTIME_sequence_t *sequence,
     }
 }
 
+void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
+                          const CHAM_ipiv_t *ipiv, int m )
+{
+    starpu_data_handle_t *handle;
+    int64_t mm = m + ( ipiv->i / ipiv->mb );
+
+    handle = (starpu_data_handle_t*)(ipiv->ipiv);
+    handle += mm;
+
+    if ( *handle != NULL ) {
+#if defined(CHAMELEON_USE_MPI)
+        starpu_mpi_cache_flush( sequence->comm, *handle );
+        if ( starpu_mpi_data_get_rank( *handle ) == ipiv->myrank )
+#endif
+        {
+            chameleon_starpu_data_wont_use( *handle );
+        }
+    }
+
+    (void)sequence;
+    (void)ipiv;
+    (void)m;
+}
+
 void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
                           const CHAM_ipiv_t *ipiv, int m )
 {
     starpu_data_handle_t *handle;
-    const CHAM_desc_t *A = ipiv->desc;
     int64_t mm = m + ( ipiv->i / ipiv->mb );
 
     handle = (starpu_data_handle_t*)(ipiv->perm);
@@ -282,7 +350,7 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
     if ( *handle != NULL ) {
 #if defined(CHAMELEON_USE_MPI)
         starpu_mpi_cache_flush( sequence->comm, *handle );
-        if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
+        if ( starpu_mpi_data_get_rank( *handle ) == ipiv->myrank )
 #endif
         {
             chameleon_starpu_data_wont_use( *handle );
@@ -295,7 +363,7 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
     if ( *handle != NULL ) {
 #if defined(CHAMELEON_USE_MPI)
         starpu_mpi_cache_flush( sequence->comm, *handle );
-        if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
+        if ( starpu_mpi_data_get_rank( *handle ) == ipiv->myrank )
 #endif
         {
             chameleon_starpu_data_wont_use( *handle );
@@ -305,7 +373,6 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
     (void)sequence;
     (void)ipiv;
     (void)m;
-    (void)A;
 }
 
 void RUNTIME_ipiv_gather( const RUNTIME_sequence_t *sequence,
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index 297d2628a1d114bc9ec2472b1caf2ad381fa442c..e04ed9503b376caa599e5d92fa905b53b74a7f2d 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -119,7 +119,7 @@ if (NOT CHAMELEON_SIMULATION)
                     add_test( test_${cat}_${prec}gesv_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/gesv.in )
                     add_test( test_${cat}_${prec}getrf_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrf.in )
                     set_tests_properties( test_${cat}_${prec}getrf_ppiv_comm_with_task
-                                          PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_GETRF_ALL_REDUCE=cham_spu_tasks" )
+                                          PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_ALLREDUCE=cham_spu_tasks" )
                 endif()
             endif()
         endif()
diff --git a/testing/testing_zgesv.c b/testing/testing_zgesv.c
index 0b5161d55d5134255f674f921a3654965690a0d6..1f3872b9fc844a447f4c014a3bef9b9e7d975f30 100644
--- a/testing/testing_zgesv.c
+++ b/testing/testing_zgesv.c
@@ -38,6 +38,7 @@ testing_zgesv_desc( run_arg_list_t *args, int check )
 {
     testdata_t test_data = { .args = args };
     int        hres      = 0;
+    int        P, Q;
 
     /* Read arguments */
     int         async = parameters_getvalue_int( "async" );
@@ -62,7 +63,11 @@ testing_zgesv_desc( run_arg_list_t *args, int check )
     /* Creates the matrices */
     parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, N, N );
     parameters_desc_create( "X", &descX, ChamComplexDouble, nb, nb, LDB, NRHS, N, NRHS );
-    CHAMELEON_Ipiv_Create( &descIPIV, descA, N, NULL );
+
+    P = chameleon_desc_datadist_get_iparam( descA, 0 );
+    Q = chameleon_desc_datadist_get_iparam( descA, 1 );
+
+    CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descA->mb, N, P, P*Q, NULL );
 
     /* Fills the matrix with random values */
     CHAMELEON_zplrnt_Tile( descA, seedA );
@@ -80,7 +85,6 @@ testing_zgesv_desc( run_arg_list_t *args, int check )
                                            test_data.sequence, &test_data.request );
         CHAMELEON_Desc_Flush( descA, test_data.sequence );
         CHAMELEON_Desc_Flush( descX, test_data.sequence );
-        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
     }
     else {
         hres = CHAMELEON_zgesv_Tile( descA, descIPIV, descX );
@@ -107,7 +111,7 @@ testing_zgesv_desc( run_arg_list_t *args, int check )
 
         if ( hres ) {
             CHAMELEON_Desc_Destroy( &descA0 );
-            CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+            CHAMELEON_Ipiv_Destroy( &descIPIV );
             parameters_desc_destroy( &descA );
             parameters_desc_destroy( &descX );
             return hres;
@@ -124,7 +128,7 @@ testing_zgesv_desc( run_arg_list_t *args, int check )
         CHAMELEON_Desc_Destroy( &descB );
     }
 
-    CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+    CHAMELEON_Ipiv_Destroy( &descIPIV );
     parameters_desc_destroy( &descA );
     parameters_desc_destroy( &descX );
 
diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c
index bf7d52ab70fecddc68c046f71563330ba2971187..65d9defcc2b5624cebf2eb6fd8cd020af13694ab 100644
--- a/testing/testing_zgetrf.c
+++ b/testing/testing_zgetrf.c
@@ -39,6 +39,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
 {
     testdata_t test_data = { .args = args };
     int        hres      = 0;
+    int        P, Q;
 
     /* Read arguments */
     int         async = parameters_getvalue_int( "async" );
@@ -78,7 +79,11 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
 
     /* Creates the matrices */
     parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N );
-    CHAMELEON_Ipiv_Create( &descIPIV, descA, minMN, NULL );
+
+    P = chameleon_desc_datadist_get_iparam( descA, 0 );
+    Q = chameleon_desc_datadist_get_iparam( descA, 1 );
+
+    CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descA->mb, N, P, P*Q, NULL );
 
     /* Fills the matrix with random values */
     if ( diag == ChamUnit ) {
@@ -98,7 +103,6 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
     if ( async ) {
         hres = CHAMELEON_zgetrf_Tile_Async( descA, descIPIV, ws, test_data.sequence, &test_data.request );
         CHAMELEON_Desc_Flush( descA, test_data.sequence );
-        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
     }
     else {
         hres = CHAMELEON_zgetrf_Tile( descA, descIPIV );
@@ -130,7 +134,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
         CHAMELEON_zgetrf_WS_Free( ws );
     }
 
-    CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+    CHAMELEON_Ipiv_Destroy( &descIPIV );
     parameters_desc_destroy( &descA );
 
     return hres;
diff --git a/testing/testing_zgetrs.c b/testing/testing_zgetrs.c
index 6a7fcfc50aa252ace498086f8abf19352599888a..4f9cb6505b0259bc8aedfa0f5a82169782457a94 100644
--- a/testing/testing_zgetrs.c
+++ b/testing/testing_zgetrs.c
@@ -33,6 +33,7 @@ testing_zgetrs_desc( run_arg_list_t *args, int check )
 {
     testdata_t test_data = { .args = args };
     int        hres      = 0;
+    int        P, Q;
 
     /* Read arguments */
     int           async = parameters_getvalue_int( "async" );
@@ -57,7 +58,11 @@ testing_zgetrs_desc( run_arg_list_t *args, int check )
     /* Creates the matrices */
     parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, N, N );
     parameters_desc_create( "X", &descX, ChamComplexDouble, nb, nb, LDB, NRHS, N, NRHS );
-    CHAMELEON_Ipiv_Create( &descIPIV, descA, N, NULL );
+
+    P = chameleon_desc_datadist_get_iparam( descA, 0 );
+    Q = chameleon_desc_datadist_get_iparam( descA, 1 );
+
+    CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descA->mb, N, P, P*Q, NULL );
 
     CHAMELEON_zplrnt_Tile( descA, seedA );
     CHAMELEON_zplrnt_Tile( descX, seedB );
@@ -73,7 +78,6 @@ testing_zgetrs_desc( run_arg_list_t *args, int check )
     if ( async ) {
         hres = CHAMELEON_zgetrs_Tile_Async( trans, descA, descIPIV, descX, ws, test_data.sequence, &test_data.request );
         CHAMELEON_Desc_Flush( descA, test_data.sequence );
-        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
     }
     else {
         hres = CHAMELEON_zgetrs_Tile( trans, descA, descIPIV, descX );
@@ -103,7 +107,7 @@ testing_zgetrs_desc( run_arg_list_t *args, int check )
         CHAMELEON_zgetrf_WS_Free( ws );
     }
 
-    CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+    CHAMELEON_Ipiv_Destroy( &descIPIV );
     parameters_desc_destroy( &descA );
     parameters_desc_destroy( &descX );
 
diff --git a/testing/testing_zlaswp.c b/testing/testing_zlaswp.c
index 0c6e04d39469a995bdb078a8f29b01c44e6c2743..8fca88da027cd5f1fe6f902ae0b81fa3274a0ab4 100644
--- a/testing/testing_zlaswp.c
+++ b/testing/testing_zlaswp.c
@@ -40,6 +40,7 @@ testing_zlaswp_desc( run_arg_list_t *args, int check )
 {
     testdata_t test_data = { .args = args };
     int        hres      = 0;
+    int        P, Q;
 
     /* Read arguments */
     int         async   = parameters_getvalue_int( "async" );
@@ -55,28 +56,31 @@ testing_zlaswp_desc( run_arg_list_t *args, int check )
 
     int  K        = ( side == ChamLeft ) ? M : N;
     int *IPIV     = malloc( sizeof(int) * K );
+    int  kb;
 
     /* Descriptors */
-    CHAM_desc_t *descA, *descInit;
+    CHAM_desc_t *descA;
     CHAM_ipiv_t *descIPIV;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
     /* Creates the matrices */
-    parameters_desc_create( "Init", &descInit, ChamComplexDouble, nb, nb, K, K, K, K );
     parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N );
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
+    P  = chameleon_desc_datadist_get_iparam( descA, 0 );
+    Q  = chameleon_desc_datadist_get_iparam( descA, 1 );
+    kb = ( side == ChamLeft ) ? descA->nb : descA->mb;
+
     testing_zlaswp_ipiv_gen( IPIV, K );
-    CHAMELEON_Ipiv_Create( &descIPIV, descInit, K, IPIV );
-    CHAMELEON_Ipiv_Init( descInit, descIPIV );
+    CHAMELEON_Ipiv_Create( &descIPIV, side, kb, K, P, P*Q, IPIV );
+    CHAMELEON_Ipiv_Init( descIPIV );
 
     /* Calculates the solution */
     testing_start( &test_data );
     if ( async ) {
         hres = CHAMELEON_zlaswp_Tile_Async( side, dir, descA, K1, K2, descIPIV, test_data.sequence, &test_data.request );
         CHAMELEON_Desc_Flush( descA, test_data.sequence );
-        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
     }
     else {
         hres = CHAMELEON_zlaswp_Tile( side, dir, descA, K1, K2, descIPIV );
@@ -116,7 +120,7 @@ testing_zlaswp_desc( run_arg_list_t *args, int check )
     }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
-    CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+    CHAMELEON_Ipiv_Destroy( &descIPIV );
     parameters_desc_destroy( &descA );
     free( IPIV );