diff --git a/ChangeLog b/ChangeLog
index 545761d09776fcc7b48035e90992eff90d4b6227..4d53148cf1e02349cbb1c7b81ae9eb16c7e9705e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,7 @@
 chameleon-1.4.0
 ------------------------------------------------------------------------
+ - Add the laswp driver and testing. Be careful, the interface does not follow the lapack API to propose the op(P) \times A, or A \times op(P) operation with op(P) equal to P or P^{-1}
+ - Add the gesv driver to perform LU factorization and solve with its associated testing.
  - StarPU: Update the minimum requirement from 1.3 to 1.4
  - StarPU: When using starpu > 1.4.8, use the new distributed submit interface in the codelets instead of the classical insert task interface.
  - ci: use -Werror to prevent from adding warning to the code
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 5626c253c38ebd7d6b2e9357f565174879be2ba9..573921e49b789ddb19e9379e845f4168b718d9f1 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -28,7 +28,8 @@
 #  @author Loris Lucido
 #  @author Matthieu Kuhn
 #  @author Ana Hourcau
-#  @date 2024-09-18
+#  @author Matteo Marcos
+#  @date 2025-03-24
 #
 ###
 
@@ -150,7 +151,7 @@ set(ZSRC
     zgepdf_qr.c
     zgeqrs.c
     zgeqrs_param.c
-    #zgesv.c
+    zgesv.c
     zgesv_incpiv.c
     zgesv_nopiv.c
     #zgetrf.c
@@ -159,6 +160,7 @@ set(ZSRC
     zgetrf.c
     zgetrs_incpiv.c
     zgetrs_nopiv.c
+    zgetrs.c
     zlacpy.c
     zlange.c
     zlanhe.c
@@ -219,7 +221,7 @@ set(ZSRC
     #pzhetrd_hb2ht.c
     pzhetrd_he2hb.c
     #pzlarft_blgtrd.c
-    #pzlaswp.c
+    pzlaswp.c
     #pzlaswpc.c
     #pztrsmrv.c
     #pzunmqr_blgtrd.c
@@ -237,7 +239,7 @@ set(ZSRC
     #zhegv.c
     #zhegvd.c
     zhetrd.c
-    #zlaswp.c
+    zlaswp.c
     #zlaswpc.c
     #ztrsmrv.c
     ##################
diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 9a6ba171ac622a59b6fadfb9f3726ae57ddc3254..448bb4309a50a8eb7d7c0b5d46bed5569688fab2 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -17,7 +17,8 @@
  * @author Emmanuel Agullo
  * @author Matthieu Kuhn
  * @author Alycia Lisito
- * @date 2025-01-24
+ * @author Matteo Marcos
+ * @date 2025-03-24
  * @precisions normal z -> s d c
  *
  */
@@ -154,7 +155,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
     }
 
     /* Flush temporary data used for the pivoting */
-    INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k );
+    INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k );
     RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank );
 }
 
@@ -202,7 +203,7 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws,
     free( clargs );
 
     /* Flush temporary data used for the pivoting */
-    INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k );
+    INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k );
     RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank );
 }
 
@@ -264,7 +265,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
     RUNTIME_data_flush( options->sequence, Up(k, k) );
 
     /* Flush temporary data used for the pivoting */
-    INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k );
+    INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k );
     RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank );
 }
 
@@ -327,7 +328,7 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws,
     free( clargs );
 
     /* Flush temporary data used for the pivoting */
-    INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k );
+    INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k );
     RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank );
 }
 
@@ -410,19 +411,19 @@ chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws,
          * perm array is made of size tempkm for the first row especially.
          * Otherwise, the final copy back to the tile may copy only a partial tile
          */
-        INSERT_TASK_zlaswp_get( options, k*A->mb, tempkm,
+        INSERT_TASK_zlaswp_get( options, ChamDirForward, k*A->mb, tempkm,
                                 ipiv, k, A(k, n), Wu(A->myrank, n) );
 
         for(m=k+1; m<A->mt; m++){
             /* Extract selected rows into A(k, n) */
-            INSERT_TASK_zlaswp_get( options, m*A->mb, minmn,
+            INSERT_TASK_zlaswp_get( options, ChamDirForward, m*A->mb, minmn,
                                     ipiv, k, A(m, n), Wu(A->myrank, n) );
             /* Copy rows from A(k,n) into their final position */
-            INSERT_TASK_zlaswp_set( options, m*A->mb, minmn,
+            INSERT_TASK_zlaswp_set( options, ChamDirForward, m*A->mb, minmn,
                                     ipiv, k, A(k, n), A(m, n) );
         }
 
-        INSERT_TASK_zperm_allreduce( options, A, Wu(A->myrank, n), ipiv, k, k, n, ws );
+        INSERT_TASK_zperm_allreduce( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws );
     }
     break;
     default:
@@ -465,7 +466,7 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws,
          * perm array is made of size tempkm for the first row especially.
          * Otherwise, the final copy back to the tile may copy only a partial tile
          */
-        INSERT_TASK_zlaswp_get( options, k*A->mb, tempkm,
+        INSERT_TASK_zlaswp_get( options, ChamDirForward, k*A->mb, tempkm,
                                 ipiv, k, A(k, n), Wu(A->myrank, n) );
 
         for(m=k+1; m<A->mt; m++){
@@ -474,7 +475,7 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws,
         }
         INSERT_TASK_zlaswp_batched_flush( options, ipiv, k, A(k, n), Wu(A->myrank, n), clargs );
 
-        INSERT_TASK_zperm_allreduce( options, A, Wu(A->myrank, n), ipiv, k, k, n, ws );
+        INSERT_TASK_zperm_allreduce( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws );
 
         free( clargs );
     }
@@ -495,8 +496,8 @@ chameleon_pzgetrf_panel_permute_forward( struct chameleon_pzgetrf_s *ws,
 #if defined(CHAMELEON_USE_MPI)
     chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
     if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
-        INSERT_TASK_zperm_allreduce_send_perm( options, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
-        INSERT_TASK_zperm_allreduce_send_invp( options, ipiv, k, A, k, n );
+        INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_invp( options, ChamDirForward, ipiv, k, A, k, n );
     }
     if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
         INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
@@ -529,8 +530,8 @@ chameleon_pzgetrf_panel_permute_backward( struct chameleon_pzgetrf_s *ws,
 #if defined(CHAMELEON_USE_MPI)
     chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
     if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
-        INSERT_TASK_zperm_allreduce_send_perm( options, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
-        INSERT_TASK_zperm_allreduce_send_invp( options, ipiv, k, A, k, n );
+        INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_invp( options, ChamDirForward, ipiv, k, A, k, n );
     }
     if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
         INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
diff --git a/compute/pzlaswp.c b/compute/pzlaswp.c
new file mode 100644
index 0000000000000000000000000000000000000000..0d4aa8694654dad2b79b8108b52061a696b70ce9
--- /dev/null
+++ b/compute/pzlaswp.c
@@ -0,0 +1,146 @@
+/**
+ *
+ * @file pzlaswp.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zlaswp parallel algorithm
+ *
+ * @version 1.3.0
+ * @comment This file has been automatically generated
+ *          from Plasma 2.5.0 for CHAMELEON 0.9.2
+ * @author Alycia Lisito
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+#define A(m,n)   A,         m, n
+#define Wu(m,n)  &(ws->Wu), m, n
+
+/**
+ *  Permutation of the panel n at step k
+ */
+static inline void
+chameleon_pzlaswp_panel_permute( struct chameleon_pzgetrf_s *ws,
+                                 cham_dir_t                  dir,
+                                 CHAM_desc_t                *A,
+                                 CHAM_ipiv_t                *ipiv,
+                                 int                         k,
+                                 int                         n,
+                                 RUNTIME_option_t           *options )
+{
+    int m;
+    int tempkm, tempnn;
+    int withlacpy;
+
+    tempkm = A->get_blkdim( A, k, DIM_m, A->m );
+    tempnn = A->get_blkdim( A, n, DIM_n, A->n );
+
+    /* Extract selected rows into U */
+    withlacpy = options->withlacpy;
+    options->withlacpy = 1;
+    INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
+                       A(k, n), Wu(A->myrank, n) );
+    options->withlacpy = withlacpy;
+
+    INSERT_TASK_zlaswp_get( options, dir, k*A->mb, tempkm,
+                           ipiv, k, A(k, n), Wu(A->myrank, n) );
+
+    for ( m = k + 1; m < A->mt; m++ ) {
+        /* Extract selected rows into A(k, n) */
+        INSERT_TASK_zlaswp_get( options, dir, m*A->mb, tempkm,
+                               ipiv, k, A(m, n), Wu(A->myrank, n) );
+        /* Copy rows from A(k,n) into their final position */
+        INSERT_TASK_zlaswp_set( options, dir, m*A->mb, tempkm,
+                               ipiv, k, A(k, n), A(m, n) );
+    }
+
+    INSERT_TASK_zperm_allreduce( options, dir, A, Wu(A->myrank, n), ipiv, k, k, n, ws );
+}
+
+static inline void
+chameleon_pzlaswp_panel( struct chameleon_pzgetrf_s *ws,
+                         cham_dir_t                  dir,
+                         CHAM_desc_t                *A,
+                         CHAM_ipiv_t                *ipiv,
+                         int                         k,
+                         int                         n,
+                         RUNTIME_option_t           *options,
+                         RUNTIME_sequence_t         *sequence )
+{
+    int tempkm, tempnn;
+
+#if defined(CHAMELEON_USE_MPI)
+    chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws );
+    if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) {
+        INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved );
+        INSERT_TASK_zperm_allreduce_send_invp( options, dir, ipiv, k, A, k, n );
+    }
+    if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
+        INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved );
+    }
+
+    if ( !ws->involved ) {
+        return;
+    }
+#endif
+
+    chameleon_pzlaswp_panel_permute( ws, dir, A, ipiv, k, n, options );
+
+    if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
+
+        tempkm = A->get_blkdim( A, k, DIM_m, A->m );
+        tempnn = A->get_blkdim( A, n, DIM_n, A->n );
+        INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
+                            Wu(A->myrank, n), A(k, n) );
+        RUNTIME_data_flush( sequence, A(k, n) );
+    }
+}
+
+void
+chameleon_pzlaswp( struct chameleon_pzgetrf_s *ws,
+                   cham_dir_t                  dir,
+                   CHAM_desc_t                *A,
+                   CHAM_ipiv_t                *IPIV,
+                   RUNTIME_sequence_t         *sequence,
+                   RUNTIME_request_t          *request )
+{
+    CHAM_context_t   *chamctxt;
+    RUNTIME_option_t  options;
+
+    int n, k;
+
+    chamctxt = chameleon_context_self();
+    if ( sequence->status != CHAMELEON_SUCCESS ) {
+        return;
+    }
+    RUNTIME_options_init( &options, chamctxt, sequence, request );
+
+    if ( dir == ChamDirForward ) {
+        for ( k = 0; k < A->mt; k++ ) {
+            for ( n = 0; n < A->nt; n++ ) {
+                options.priority = A->nt-n;
+
+                chameleon_pzlaswp_panel( ws, dir, A, IPIV, k, n, &options, sequence );
+            }
+            RUNTIME_perm_flushk( sequence, IPIV, k );
+        }
+    }
+    else {
+        for ( k = A->mt - 1; k > -1; k-- ) {
+            for ( n = 0; n < A->nt; n++ ) {
+                options.priority = A->nt-n;
+                chameleon_pzlaswp_panel( ws, dir, A, IPIV, k, n, &options, sequence );
+            }
+            RUNTIME_perm_flushk( sequence, IPIV, k );
+        }
+    }
+    RUNTIME_options_finalize( &options, chamctxt );
+}
+
diff --git a/compute/zgesv.c b/compute/zgesv.c
new file mode 100644
index 0000000000000000000000000000000000000000..1b657bfac45d684e260f0096fc4192256d65bdba
--- /dev/null
+++ b/compute/zgesv.c
@@ -0,0 +1,387 @@
+/**
+ *
+ * @file zgesv.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgesv wrappers
+ *
+ * @version 1.3.0
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  @brief Computes the solution to a system of linear equations A * X = B,
+ *  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+ *
+ *  The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A.
+ *  The factored form of A is then used to solve the system of equations A * X = B.
+ *
+ *******************************************************************************
+ *
+ * @param[in] N
+ *          The number of linear equations, i.e., the order of the matrix A. N >= 0.
+ *
+ * @param[in] NRHS
+ *          The number of right hand sides, i.e., the number of columns of the matrix B.
+ *          NRHS >= 0.
+ *
+ * @param[in,out] A
+ *          On entry, the N-by-N coefficient matrix A.
+ *          On exit, the tile L and U factors from the factorization (not equivalent to LAPACK).
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,N).
+ *
+ * @param[out] IPIV
+ *          On exit, the pivot indices that define the permutations (not equivalent to LAPACK).
+ *
+ * @param[in,out] B
+ *          On entry, the N-by-NRHS matrix of right hand side matrix B.
+ *          On exit, if return value = 0, the N-by-NRHS solution matrix X.
+ *
+ * @param[in] LDB
+ *          The leading dimension of the array B. LDB >= max(1,N).
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ *               but the factor U is exactly singular, so the solution could not be computed.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgesv_Tile
+ * @sa CHAMELEON_zgesv_Tile_Async
+ * @sa CHAMELEON_cgesv
+ * @sa CHAMELEON_dgesv
+ * @sa CHAMELEON_sgesv
+ *
+ */
+int CHAMELEON_zgesv( int N, int NRHS,
+                     CHAMELEON_Complex64_t *A, int LDA,
+                     int *IPIV,
+                     CHAMELEON_Complex64_t *B, int LDB )
+{
+    int                         NB;
+    int                         status;
+    CHAM_context_t             *chamctxt;
+    CHAM_ipiv_t                 descIPIV;
+    RUNTIME_sequence_t         *sequence = NULL;
+    RUNTIME_request_t           request  = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t                 descAl, descAt;
+    CHAM_desc_t                 descBl, descBt;
+    struct chameleon_pzgetrf_s *wsA,   *wsB;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_error( "CHAMELEON_zgesv", "CHAMELEON not initialized" );
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    /* Check input arguments */
+    if ( N < 0 ) {
+        chameleon_error( "CHAMELEON_zgesv", "illegal value of N" );
+        return -1;
+    }
+    if ( NRHS < 0 ) {
+        chameleon_error( "CHAMELEON_zgesv", "illegal value of NRHS" );
+        return -2;
+    }
+    if ( LDA < chameleon_max( 1, N ) ) {
+        chameleon_error( "CHAMELEON_zgesv", "illegal value of LDA" );
+        return -4;
+    }
+    if ( LDB < chameleon_max( 1, N ) ) {
+        chameleon_error( "CHAMELEON_zgesv", "illegal value of LDB" );
+        return -8;
+    }
+    /* Quick return */
+    if ( chameleon_min( N, NRHS ) == 0 ) {
+        return CHAMELEON_SUCCESS;
+    }
+
+    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune( CHAMELEON_FUNC_ZGESV, N, N, NRHS );
+    if ( status != CHAMELEON_SUCCESS ) {
+        chameleon_error( "CHAMELEON_zgesv", "chameleon_tune() failed" );
+        return status;
+    }
+
+    /* Set NT & NTRHS */
+    NB = CHAMELEON_NB;
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower,
+                         A, NB, NB, LDA, N, N, N, sequence, &request );
+    chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
+                         B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
+
+    /* Allocate workspace for partial pivoting */
+    wsA = CHAMELEON_zgetrf_WS_Alloc( &descAt );
+    wsB = CHAMELEON_zgetrf_WS_Alloc( &descBt );
+
+    if ( ( wsA->alg == ChamGetrfPPivPerColumn ) ||
+         ( wsA->alg == ChamGetrfPPiv ) )
+    {
+        chameleon_ipiv_init( &descIPIV, &descAt, IPIV );
+    }
+
+    /* Call the tile interface */
+    CHAMELEON_zgesv_Tile_Async( &descAt, &descIPIV, &descBt, wsA, wsB, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInout, ChamUpperLower, sequence, &request );
+    chameleon_ztile2lap( chamctxt, &descBl, &descBt,
+                         ChamDescInout, ChamUpperLower, sequence, &request );
+
+    if ( ( wsA->alg == ChamGetrfPPivPerColumn ) ||
+         ( wsA->alg == ChamGetrfPPiv ) )
+    {
+        RUNTIME_ipiv_gather( sequence, &descIPIV, IPIV, 0 );
+    }
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    if ( ( wsA->alg == ChamGetrfPPivPerColumn ) ||
+         ( wsA->alg == ChamGetrfPPiv ) )
+    {
+        chameleon_ipiv_destroy( &descIPIV, &descAt );
+    }
+
+    /* Cleanup the temporary data */
+    CHAMELEON_zgetrf_WS_Free( wsA );
+    CHAMELEON_zgetrf_WS_Free( wsB );
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+    chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
+
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ *  @brief Solves a system of linear equations using the tile LU factorization.
+ *  Tile equivalent of CHAMELEON_zgetrf_nopiv().
+ *
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in,out] A
+ *          On entry, the N-by-N coefficient matrix A.
+ *          On exit, the tile L and U factors from the factorization (not equivalent to LAPACK).
+ *
+ * @param[in,out] IPIV
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
+ *
+ * @param[in,out] B
+ *          On entry, the N-by-NRHS matrix of right hand side matrix B.
+ *          On exit, if return value = 0, the N-by-NRHS solution matrix X.
+ *
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ *               but the factor U is exactly singular, so the solution could not be computed.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgesv
+ * @sa CHAMELEON_zgesv_Tile_Async
+ * @sa CHAMELEON_cgesv_Tile
+ * @sa CHAMELEON_dgesv_Tile
+ * @sa CHAMELEON_sgesv_Tile
+ * @sa CHAMELEON_zcgesv_Tile
+ *
+ */
+int CHAMELEON_zgesv_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B )
+{
+    CHAM_context_t     *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    int                 status;
+    void               *wsA, *wsB;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "CHAMELEON not initialized" );
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    wsA = CHAMELEON_zgetrf_WS_Alloc( A );
+    wsB = CHAMELEON_zgetrf_WS_Alloc( B );
+    CHAMELEON_zgesv_Tile_Async( A, IPIV, B, wsA, wsB, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+    CHAMELEON_Ipiv_Flush( IPIV, sequence );
+    CHAMELEON_Desc_Flush( B, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    CHAMELEON_zgetrf_WS_Free( wsA );
+    CHAMELEON_zgetrf_WS_Free( wsB );
+
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ *  @brief Solves a system of linear equations using the tile LU factorization.
+ *
+ *  Non-blocking equivalent of CHAMELEON_zgesv_Tile().
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in,out] A
+ *          On entry, the M-by-N matrix to be factored.
+ *          On exit, the tile factors L and U from the factorization.
+ *
+ * @param[in,out] IPIV
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
+ *
+ * @param[in,out] B
+ *          On entry, the N-by-NRHS matrix of right hand side matrix B.
+ *          On exit, the N-by-NRHS solution matrix X.
+ *
+ * @param[in,out] user_wsA
+ *          The opaque pointer to pre-allocated getrf workspace through
+ *          CHAMELEON_zgetrf_WS_Alloc() for A. If user_ws is NULL, it is automatically
+ *          allocated, but BE CAREFULL as it switches the call from asynchronous
+ *          to synchronous call.
+ *
+ * @param[in,out] user_wsB
+ *          The opaque pointer to pre-allocated getrf workspace through
+ *          CHAMELEON_zgetrf_WS_Alloc() for B. If user_ws is NULL, it is automatically
+ *          allocated, but BE CAREFULL as it switches the call from asynchronous
+ *          to synchronous call.*
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgesv
+ * @sa CHAMELEON_zgesv_Tile
+ * @sa CHAMELEON_cgesv_Tile_Async
+ * @sa CHAMELEON_dgesv_Tile_Async
+ * @sa CHAMELEON_sgesv_Tile_Async
+ * @sa CHAMELEON_zcgesv_Tile_Async
+ *
+ */
+int CHAMELEON_zgesv_Tile_Async( CHAM_desc_t        *A,
+                                CHAM_ipiv_t        *IPIV,
+                                CHAM_desc_t        *B,
+                                void               *user_wsA,
+                                void               *user_wsB,
+                                RUNTIME_sequence_t *sequence,
+                                RUNTIME_request_t  *request )
+{
+    CHAM_context_t *chamctxt;
+    struct chameleon_pzgetrf_s *wsA, *wsB;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "CHAMELEON not initialized" );
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( sequence == NULL ) {
+        chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "NULL sequence" );
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if ( request == NULL ) {
+        chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "NULL request" );
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if ( sequence->status == CHAMELEON_SUCCESS ) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED );
+    }
+
+    /* Check descriptors for correctness */
+    if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) {
+        chameleon_error( "CHAMELEON_zgesv_Tile", "invalid first descriptor" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+    if ( chameleon_desc_check( B ) != CHAMELEON_SUCCESS ) {
+        chameleon_error( "CHAMELEON_zgesv_Tile", "invalid third descriptor" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+    /* Check input arguments */
+    if ( A->nb != A->mb || B->nb != B->mb ) {
+        chameleon_error( "CHAMELEON_zgesv_Tile", "only square tiles supported" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+
+    if ( user_wsA == NULL ) {
+        wsA = CHAMELEON_zgetrf_WS_Alloc( A );
+    }
+    else {
+        wsA = user_wsA;
+    }
+
+    if ( user_wsB == NULL ) {
+        wsB = CHAMELEON_zgetrf_WS_Alloc( B );
+    }
+    else {
+        wsB = user_wsB;
+    }
+
+    chameleon_pzgetrf( wsA, A, IPIV, sequence, request );
+
+    CHAMELEON_zgetrs_Tile_Async( ChamNoTrans, A, IPIV, B, wsB, sequence, request );
+
+    if ( user_wsA == NULL ) {
+        CHAMELEON_Desc_Flush( A, sequence );
+        CHAMELEON_Desc_Flush( B, sequence );
+        chameleon_sequence_wait( chamctxt, sequence );
+        CHAMELEON_zgetrf_WS_Free( wsA );
+    }
+    if ( user_wsB == NULL ) {
+        CHAMELEON_zgetrf_WS_Free( wsB );
+    }
+    return CHAMELEON_SUCCESS;
+}
+
diff --git a/compute/zgetrs.c b/compute/zgetrs.c
new file mode 100644
index 0000000000000000000000000000000000000000..9a2e5bac6de8623a1425cd81f18e4963b3ad92b2
--- /dev/null
+++ b/compute/zgetrs.c
@@ -0,0 +1,399 @@
+/**
+ *
+ * @file zgetrs.c
+ *
+ * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgetrs wrappers
+ *
+ * @version 1.3.0
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  @brief Solves a system of linear equations A * X = B, with a general N-by-N matrix A
+ *  using the tile LU factorization with partial pivoting computed by CHAMELEON_zgetrf.
+ *
+ *******************************************************************************
+ *
+ * @param[in] trans
+ *          Intended to specify the the form of the system of equations:
+ *          = ChamNoTrans:   A * X = B     (No transpose)
+ *          = ChamTrans:     A^T * X = B  (Transpose)
+ *          = ChamConjTrans: A^H * X = B  (Conjugate transpose)
+ *          Only ChamNoTrans and ChamTrans are supported.
+ *
+ * @param[in] N
+ *          The order of the matrix A.  N >= 0.
+ *
+ * @param[in] NRHS
+ *          The number of right hand sides, i.e., the number of columns of the matrix B.
+ *          NRHS >= 0.
+ *
+ * @param[in] A
+ *          The tile factors L and U from the factorization, computed by CHAMELEON_zgetrf.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,N).
+ *
+ * @param[in] IPIV
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
+ *
+ * @param[in,out] B
+ *          On entry, the N-by-NRHS matrix of right hand side matrix B.
+ *          On exit, the solution matrix X.
+ *
+ * @param[in] LDB
+ *          The leading dimension of the array B. LDB >= max(1,N).
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @return <0 if -i, the i-th argument had an illegal value
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgetrs_Tile
+ * @sa CHAMELEON_zgetrs_Tile_Async
+ * @sa CHAMELEON_cgetrs
+ * @sa CHAMELEON_dgetrs
+ * @sa CHAMELEON_sgetrs
+ * @sa CHAMELEON_zgetrf
+ *
+ */
+int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS,
+                      CHAMELEON_Complex64_t *A, int LDA,
+                      int *IPIV,
+                      CHAMELEON_Complex64_t *B, int LDB )
+{
+    int                         NB;
+    int                         status;
+    CHAM_context_t             *chamctxt;
+    CHAM_ipiv_t                *descIPIV;
+    RUNTIME_sequence_t         *sequence = NULL;
+    RUNTIME_request_t           request  = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t                 descAl, descAt;
+    CHAM_desc_t                 descBl, descBt;
+    struct chameleon_pzgetrf_s *ws;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zgetrs", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    /* Check input arguments */
+    if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) {
+        chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( N < 0 ) {
+        chameleon_error("CHAMELEON_zgetrs", "illegal value of N");
+        return -2;
+    }
+    if ( NRHS < 0 ) {
+        chameleon_error("CHAMELEON_zgetrs", "illegal value of NRHS");
+        return -3;
+    }
+    if ( LDA < chameleon_max( 1, N ) ) {
+        chameleon_error("CHAMELEON_zgetrs", "illegal value of LDA");
+        return -5;
+    }
+    if ( LDB < chameleon_max( 1, N ) ) {
+        chameleon_error("CHAMELEON_zgetrs", "illegal value of LDB");
+        return -9;
+    }
+    /* Quick return */
+    if ( chameleon_min( N, NRHS ) == 0 )
+        return CHAMELEON_SUCCESS;
+
+    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
+    status = chameleon_tune( CHAMELEON_FUNC_ZGESV, N, N, NRHS );
+    if ( status != CHAMELEON_SUCCESS ) {
+        chameleon_error("CHAMELEON_zgetrs", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NT & NTRHS */
+    NB    = CHAMELEON_NB;
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
+                         A, NB, NB, LDA, N, N, N, sequence, &request );
+    chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
+                         B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
+
+    ws = CHAMELEON_zgetrf_WS_Alloc( &descBt );
+    CHAMELEON_Ipiv_Create( &descIPIV, &descAt, IPIV );
+    CHAMELEON_Ipiv_Init( &descAt, descIPIV );
+
+    /* Call the tile interface */
+    CHAMELEON_zgetrs_Tile_Async( trans, &descAt, descIPIV, &descBt, ws, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInput, ChamUpperLower, sequence, &request );
+    chameleon_ztile2lap( chamctxt, &descBl, &descBt,
+                         ChamDescInout, ChamUpperLower, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt );
+    CHAMELEON_zgetrf_WS_Free( ws );
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+    chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
+
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ *  @brief Solves a system of linear equations using previously
+ *  computed LU factorization with partial pivoting.
+ *  Tile equivalent of CHAMELEON_zgetrs().
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] trans
+ *          Intended to specify the the form of the system of equations:
+ *          = ChamNoTrans:   A * X = B     (No transpose)
+ *          = ChamTrans:     A^T * X = B  (Transpose)
+ *          = ChamConjTrans: A^H * X = B  (Conjugate transpose)
+ *          Only ChamNoTrans and ChamTrans are supported.
+ *
+ * @param[in] A
+ *          The tile factors L and U from the factorization, computed by CHAMELEON_zgetrf.
+ *
+ * @param[in] IPIV
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
+ *
+ * @param[in,out] B
+ *          On entry, the N-by-NRHS matrix of right hand side matrix B.
+ *          On exit, the solution matrix X.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgetrs
+ * @sa CHAMELEON_zgetrs_Tile_Async
+ * @sa CHAMELEON_cgetrs_Tile
+ * @sa CHAMELEON_dgetrs_Tile
+ * @sa CHAMELEON_sgetrs_Tile
+ * @sa CHAMELEON_zgetrf_Tile
+ *
+ */
+int CHAMELEON_zgetrs_Tile( cham_trans_t trans,
+                           CHAM_desc_t *A,
+                           CHAM_ipiv_t *IPIV,
+                           CHAM_desc_t *B )
+{
+    CHAM_context_t     *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    int                 status;
+    void               *ws;
+
+    chamctxt = chameleon_context_self();
+    if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) {
+        chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    ws = CHAMELEON_zgetrf_WS_Alloc( B );
+
+    CHAMELEON_zgetrs_Tile_Async( trans, A, IPIV, B, ws, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+    CHAMELEON_Desc_Flush( B, sequence );
+
+    CHAMELEON_zgetrf_WS_Free( ws );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ *  @brief Solves a system of linear equations using previously
+ *  computed LU factorization with partial pivoting.
+ *  Non-blocking equivalent of CHAMELEON_zgetrs_Tile().
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] trans
+ *          Intended to specify the the form of the system of equations:
+ *          = ChamNoTrans:   A * X = B     (No transpose)
+ *          = ChamTrans:     A^T * X = B  (Transpose)
+ *          = ChamConjTrans: A^H * X = B  (Conjugate transpose)
+ *          Only ChamNoTrans and ChamTrans are supported.
+ *
+ * @param[in,out] A
+ *          On entry, the M-by-N matrix to be factored.
+ *          On exit, the tile factors L and U from the factorization.
+ *
+ * @param[in] IPIV
+ *          On entry, ipiv descriptor associated to A and created with
+ *          CHAMELEON_Ipiv_Create().
+ *          On exit, it contains the pivot indices associated to the PLU
+ *          factorization of A.
+ *
+ * @param[in,out] B
+ *          On entry, the N-by-NRHS matrix of right hand side matrix B.
+ *          On exit, the N-by-NRHS solution matrix X.
+ *
+ * @param[in,out] user_ws
+ *          The opaque pointer to pre-allocated getrf workspace through
+ *          CHAMELEON_zgetrf_WS_Alloc() for B. If user_ws is NULL, it is automatically
+ *          allocated, but BE CAREFULL as it switches the call from asynchronous
+ *          to synchronous call.*
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgetrs
+ * @sa CHAMELEON_zgetrs_Tile
+ * @sa CHAMELEON_cgetrs_Tile_Async
+ * @sa CHAMELEON_dgetrs_Tile_Async
+ * @sa CHAMELEON_sgetrs_Tile_Async
+ * @sa CHAMELEON_zgetrf_Tile_Async
+ *
+ */
+int CHAMELEON_zgetrs_Tile_Async( cham_trans_t        trans,
+                                 CHAM_desc_t        *A,
+                                 CHAM_ipiv_t        *IPIV,
+                                 CHAM_desc_t        *B,
+                                 void               *user_ws,
+                                 RUNTIME_sequence_t *sequence,
+                                 RUNTIME_request_t  *request )
+{
+    CHAM_context_t             *chamctxt;
+    struct chameleon_pzgetrf_s *ws;
+    RUNTIME_option_t            options;
+    int                         k, tempkm;
+
+    chamctxt = chameleon_context_self();
+    if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) {
+        chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( sequence == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if ( request == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if ( sequence->status == CHAMELEON_SUCCESS ) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED );
+    }
+
+    /* Check descriptors for correctness */
+    if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) {
+        chameleon_error("CHAMELEON_zgetrs_Tile", "invalid first descriptor");
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+    if ( chameleon_desc_check( B ) != CHAMELEON_SUCCESS ) {
+        chameleon_error("CHAMELEON_zgetrs_Tile", "invalid third descriptor");
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+    /* Check input arguments */
+    if ( ( A->nb != A->mb ) || ( B->nb != B->mb ) ) {
+        chameleon_error("CHAMELEON_zgetrs_Tile", "only square tiles supported");
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+
+    if ( user_ws == NULL ) {
+        ws = CHAMELEON_zgetrf_WS_Alloc( B );
+    }
+    else {
+        ws = user_ws;
+    }
+
+    if ( IPIV->data != NULL ) {
+        RUNTIME_options_init( &options, chamctxt, sequence, request );
+        for ( k = 0; k < A->mt; k++ ) {
+            tempkm = A->get_blkdim( A, k, DIM_m, A->m );
+            INSERT_TASK_ipiv_to_perm( &options, k * A->mb, tempkm, tempkm, 0, A->m,
+                                       IPIV, k );
+        }
+        chameleon_sequence_wait( chamctxt, sequence );
+    }
+
+    if ( trans == ChamNoTrans ) {
+        chameleon_pzlaswp( ws, ChamDirForward, B, IPIV, sequence, request );
+
+        chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
+
+        chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
+    }
+    else {
+        chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
+
+        chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
+
+        chameleon_pzlaswp( ws, ChamDirBackward, B, IPIV, sequence, request );
+    }
+
+    if ( user_ws == NULL ) {
+        CHAMELEON_zgetrf_WS_Free( ws );
+    }
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/compute/zlaswp.c b/compute/zlaswp.c
new file mode 100644
index 0000000000000000000000000000000000000000..6d7955e78d8ce52e3ff423df2182c29b7005f9c4
--- /dev/null
+++ b/compute/zlaswp.c
@@ -0,0 +1,388 @@
+/**
+ *
+ * @file zlaswp.c
+ *
+ * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zlaswp wrappers
+ *
+ * @version 1.3.0
+ * @author Alycia Lisito
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  @brief Computes the permutation P*op(A) or op(A)*P where P is the permutation
+ *         matrix generated from IPIV.
+ *
+ *******************************************************************************
+ *
+ * @param[in] side
+ *          Specifies whether the permutation is done on the rows or the columns.
+ *          = ChamLeft:  op(A) = A
+ *          = ChamRight: op(A) = A^T
+ *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order. P*op(A)
+ *          = ChamDirBackward: Reverse order. op(A)*P
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A. M >= 0.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A. N >= 0.
+ *
+ * @param[in,out] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ * @param[in] K1
+ *          The first element of IPIV for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] K2
+ *          The last element of ipiv for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] IPIV
+ *          Vector of pivot indices.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlaswp_Tile
+ * @sa CHAMELEON_zlaswp_Tile_Async
+ * @sa CHAMELEON_claswp
+ * @sa CHAMELEON_dlaswp
+ * @sa CHAMELEON_slaswp
+ *
+ */
+int CHAMELEON_zlaswp( cham_side_t            side,
+                      cham_dir_t             dir,
+                      int                    M,
+                      int                    N,
+                      CHAMELEON_Complex64_t *A,
+                      int                    LDA,
+                      int                    K1,
+                      int                    K2,
+                      int                   *IPIV )
+{
+    int                 status;
+    int                 NB;
+    CHAM_context_t     *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t         descAl, descAt;
+    CHAM_ipiv_t        *descIPIV;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( side == ChamRight ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented");
+        return CHAMELEON_ERR_NOT_SUPPORTED;
+    }
+    /* Check input arguments */
+    if ( M < 0 ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of M");
+        return -2;
+    }
+    if ( N < 0 ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of N");
+        return -3;
+    }
+    if ( LDA < chameleon_max( 1, M ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of LDA");
+        return -5;
+    }
+    if ( ( K1 < 1 ) || ( K1 > M ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K1");
+        return -6;
+    }
+    if ( ( K2 < 1 ) || ( K2 > M ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K2");
+        return -7;
+    }
+
+    /* Quick return */
+    if ( chameleon_min( N, M ) == 0 ) {
+        return (double)0.0;
+    }
+
+    /* Tune NB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
+    if ( status != CHAMELEON_SUCCESS ) {
+        chameleon_error("CHAMELEON_zlaswp", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NB */
+    NB = CHAMELEON_NB;
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
+                         A, NB, NB, LDA, N, M, N, sequence, &request );
+    CHAMELEON_Ipiv_Create( &descIPIV, &descAt, IPIV );
+
+    CHAMELEON_Ipiv_Init( &descAt, descIPIV );
+
+    /* Call the tile interface */
+    CHAMELEON_zlaswp_Tile_Async( side, dir, &descAt, K1, K2, descIPIV, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInput, ChamUpperLower, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt );
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return CHAMELEON_SUCCESS;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ *  @brief Tile equivalent of CHAMELEON_zlaswp().
+ *
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] side
+ *          Specifies whether the permutation is done on the rows or the columns.
+ *          = ChamLeft:  op(A) = A
+ *          = ChamRight: op(A) = A^T
+ *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order. P*op(A)
+ *          = ChamDirBackward: Reverse order. op(A)*P
+ *
+ * @param[in,out] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] K1
+ *          The first element of IPIV for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] K2
+ *          The last element of ipiv for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] IPIV
+ *          Vector of pivot indices.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlaswp
+ * @sa CHAMELEON_zlaswp_Tile_Async
+ * @sa CHAMELEON_claswp_Tile
+ * @sa CHAMELEON_dlaswp_Tile
+ * @sa CHAMELEON_slaswp_Tile
+ *
+ */
+int CHAMELEON_zlaswp_Tile( cham_side_t  side,
+                           cham_dir_t   dir,
+                           CHAM_desc_t *A,
+                           int          K1,
+                           int          K2,
+                           CHAM_ipiv_t *IPIV )
+{
+    CHAM_context_t     *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    int                 status;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( side == ChamRight ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented");
+        return CHAMELEON_ERR_NOT_SUPPORTED;
+    }
+    if ( ( K1 < 1 ) || ( K1 > A->m ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K1");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( ( K2 < 1 ) || ( K2 > A->m ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K2");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    CHAMELEON_zlaswp_Tile_Async( side, dir, A, K1, K2, IPIV, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+    CHAMELEON_Ipiv_Flush( IPIV, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ *  @brief Non-blocking equivalent of CHAMELEON_zlaswp_Tile().
+ *
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] side
+ *          Specifies whether the permutation is done on the rows or the columns.
+ *          = ChamLeft:  op(A) = A
+ *          = ChamRight: op(A) = A^T
+ *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order. P*op(A)
+ *          = ChamDirBackward: Reverse order. op(A)*P
+ *
+ * @param[in,out] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] K1
+ *          The first element of IPIV for which an interchange will
+ *          be done. Must follow the Fortran numbering standard
+ *
+ * @param[in] K2
+ *          The last element of ipiv for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] IPIV
+ *          Vector of pivot indices.
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlaswp
+ * @sa CHAMELEON_zlaswp_Tile
+ * @sa CHAMELEON_claswp_Tile_Async
+ * @sa CHAMELEON_dlaswp_Tile_Async
+ * @sa CHAMELEON_slaswp_Tile_Async
+ *
+ */
+int CHAMELEON_zlaswp_Tile_Async( cham_side_t         side,
+                                 cham_dir_t          dir,
+                                 CHAM_desc_t        *A,
+                                 int                 K1,
+                                 int                 K2,
+                                 CHAM_ipiv_t        *IPIV,
+                                 RUNTIME_sequence_t *sequence,
+                                 RUNTIME_request_t  *request )
+{
+    CHAM_context_t             *chamctxt;
+    struct chameleon_pzgetrf_s *ws;
+    RUNTIME_option_t            options;
+    int                         k, tempkm;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( side == ChamRight ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented");
+        return CHAMELEON_ERR_NOT_SUPPORTED;
+    }
+    if ( ( K1 < 1 ) || ( K1 > A->m ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K1");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( ( K2 < 1 ) || ( K2 > A->m ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K2");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( sequence == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if ( request == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if ( sequence->status == CHAMELEON_SUCCESS ) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if ( chameleon_desc_check(A) != CHAMELEON_SUCCESS ) {
+        chameleon_error("CHAMELEON_zlaswp_Tile_Async", "invalid first descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    /* Check input arguments */
+    if ( A->mb != A->nb ) {
+        chameleon_error("CHAMELEON_zlaswp_Tile_Async", "only matching tile sizes supported");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+
+    /* Quick return */
+    if ( chameleon_min( A->m, A->n ) == 0 ) {
+        return CHAMELEON_SUCCESS;
+    }
+
+    if ( IPIV->data != NULL ) {
+        RUNTIME_options_init( &options, chamctxt, sequence, request );
+        for ( k = 0; k < A->mt; k++ ) {
+            tempkm = A->get_blkdim( A, k, DIM_m, A->m );
+            INSERT_TASK_ipiv_to_perm( &options, k * A->mb, tempkm, tempkm, K1 - 1, K2 - 1,
+                                      IPIV, k );
+        }
+        chameleon_sequence_wait( chamctxt, sequence );
+    }
+
+    ws = CHAMELEON_zgetrf_WS_Alloc( A );
+
+    chameleon_pzlaswp( ws, dir, A, IPIV, sequence, request );
+
+    CHAMELEON_zgetrf_WS_Free( ws );
+
+    return CHAMELEON_SUCCESS;
+}
+
diff --git a/control/compute_z.h b/control/compute_z.h
index 812af3dce918e74926506810d38b2db8fc167e33..72f4504a4544e7098fa2335ea15deb4720dbc635 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -24,7 +24,7 @@
  * @author Lionel Eyraud-Dubois
  * @author Ana Hourcau
  * @author Pierre Esterie
- * @date 2024-12-09
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -172,7 +172,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
 void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzlaset( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha,                          CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzlaswp(CHAM_desc_t *B, int *IPIV, int inc, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+void chameleon_pzlaswp( struct chameleon_pzgetrf_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzlaswpc(CHAM_desc_t *B, int *IPIV, int inc, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/control/descriptor_ipiv.c b/control/descriptor_ipiv.c
index 84067cf5f987c425c7e5f1b989844fe7e1df7c64..d46269d32c670223b595842ff60569b65573e5b5 100644
--- a/control/descriptor_ipiv.c
+++ b/control/descriptor_ipiv.c
@@ -14,7 +14,7 @@
  * @author Matthieu Kuhn
  * @author Alycia Lisito
  * @author Florent Pruvost
- * @date 2024-08-29
+ * @date 2025-03-24
  *
  ***
  *
@@ -148,6 +148,45 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void
     return CHAMELEON_SUCCESS;
 }
 
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  @brief initialize the IPIV descriptor.
+ *
+ *******************************************************************************
+ *
+ * @param[in] descA
+ *          Descriptor of the matrix A.
+ *
+ * @param[in,out] descIPIV
+ *          Descriptor of the pivot array. Should be initialized using
+ *          CHAMELEON_Ipiv_Create() with data filled with the vector of pivot.
+ *
+ *******************************************************************************
+ *
+ *
+ */
+void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA,
+                          CHAM_ipiv_t       *descIPIV )
+{
+
+    RUNTIME_option_t    options;
+    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    RUNTIME_sequence_t *sequence = NULL;
+    CHAM_context_t     *chamctxt;
+
+    chamctxt = chameleon_context_self();
+    chameleon_sequence_create( chamctxt, &sequence );
+    RUNTIME_options_init( &options, chamctxt, sequence, &request );
+
+    INSERT_TASK_ipiv_init_data( &options, descIPIV );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    chameleon_sequence_destroy( chamctxt, sequence );
+}
+
 /**
  *****************************************************************************
  *
diff --git a/coreblas/compute/core_ipiv_to_perm.c b/coreblas/compute/core_ipiv_to_perm.c
index 6c19272b3eaec960eb457fc40e9ba77f06b24075..9b2b53ceedec775158a9e5ce192a7abdd7120896 100644
--- a/coreblas/compute/core_ipiv_to_perm.c
+++ b/coreblas/compute/core_ipiv_to_perm.c
@@ -11,7 +11,8 @@
  *
  * @version 1.3.0
  * @author Mathieu Faverge
- * @date 2024-02-18
+ * @author Matteo Marcos
+ * @date 2025-03-24
  */
 #include "coreblas.h"
 
@@ -44,6 +45,14 @@
  * @param[in] k
  *          The number of elements in ipiv. k >= 0.
  *
+ * @param[in] K1
+ *          The first element of IPIV for which an interchange will
+ *          be done.
+ *
+ * @param[in] K2
+ *          The last element of ipiv for which an interchange will
+ *          be done.
+ *
  * @param[in] ipiv
  *          The pivot array of size n. This is a (m0+1)-based indices array to follow
  *          the Fortran standard.
@@ -55,7 +64,7 @@
  *          The permutation array of the origin row indices (m0-based) of the [1,n] set of rows.
  *
  */
-void CORE_ipiv_to_perm( int m0, int m, int k, int *ipiv, int *perm, int *invp )
+void CORE_ipiv_to_perm( int m0, int m, int k, int K1, int K2, int *ipiv, int *perm, int *invp )
 {
     int i, j, ip;
     int i_1, ip_1;
@@ -66,6 +75,9 @@ void CORE_ipiv_to_perm( int m0, int m, int k, int *ipiv, int *perm, int *invp )
     }
 
     for(i = 0; i < k; i++) {
+        if ( ( i + m0 < K1 ) || ( i + m0 > K2 ) ) {
+            continue;
+        }
         ip = ipiv[i]-1;
         assert( ip - m0 >= i );
 
diff --git a/coreblas/include/coreblas.h b/coreblas/include/coreblas.h
index c72530c108bf89bae0a3456b7365d7e43605deec..623dcd541b974e25477560fa03dfbfb2dedfcb8e 100644
--- a/coreblas/include/coreblas.h
+++ b/coreblas/include/coreblas.h
@@ -18,7 +18,8 @@
  * @author Guillaume Sylvand
  * @author Mathieu Faverge
  * @author Raphael Boucherie
- * @date 2024-03-14
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #ifndef _coreblas_h_
@@ -94,7 +95,7 @@ void __coreblas_kernel_trace( const char *func, ... );
 
 #endif
 
-void CORE_ipiv_to_perm( int m0, int m, int k, int *ipiv, int *perm, int *invp );
+void CORE_ipiv_to_perm( int m0, int m, int k, int K1, int K2, int *ipiv, int *perm, int *invp );
 
 END_C_DECLS
 
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index fcd8177a0a0cc56a6e21080463956c98918d1a46..d9540d5e8472ed15af9ead33296f9ac9cd76d6ab 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -24,7 +24,8 @@
  * @author Alycia Lisito
  * @author Matthieu Kuhn
  * @author Ana Hourcau
- * @date 2024-10-17
+ * @author Matteo Marcos
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -48,7 +49,7 @@ int CHAMELEON_zgemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int
 int CHAMELEON_zgepdf_qdwh( int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *H, int LDH, gepdf_info_t *info );
 int CHAMELEON_zgeqrf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT);
 int CHAMELEON_zgeqrs(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *B, int LDB);
-//int CHAMELEON_zgesv(int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB);
+int CHAMELEON_zgesv(int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zgesv_incpiv(int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zgesv_nopiv(int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zgesvd(cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_Complex64_t *A, int LDA, double *S, CHAM_desc_t *descT, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT);
@@ -57,7 +58,7 @@ int CHAMELEON_zgetrf_incpiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHA
 int CHAMELEON_zgetrf_nopiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA);
 int CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV );
 //int CHAMELEON_zgetri(int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV);
-//int CHAMELEON_zgetrs(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB);
+int CHAMELEON_zgetrs(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zgetrs_incpiv(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zgetrs_nopiv(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zhemm(cham_side_t side, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *C, int LDC);
@@ -76,8 +77,7 @@ double CHAMELEON_zlansy(cham_normtype_t norm, cham_uplo_t uplo, int N, CHAMELEON
 double CHAMELEON_zlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, int M, int N, CHAMELEON_Complex64_t *A, int LDA);
 int CHAMELEON_zlascal(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA);
 int CHAMELEON_zlaset(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *A, int LDA);
-//int CHAMELEON_zlaswp(int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX);
-//int CHAMELEON_zlaswpc(int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX);
+int CHAMELEON_zlaswp( cham_side_t side, cham_dir_t dir, int M, int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV );
 int CHAMELEON_zlatms( int M, int N, cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAMELEON_Complex64_t *A, int LDA );
 int CHAMELEON_zlauum(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA);
 int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed );
@@ -129,7 +129,7 @@ int CHAMELEON_zgepdf_qdwh_Tile( CHAM_desc_t *A, CHAM_desc_t *H, gepdf_info_t *in
 int CHAMELEON_zgepdf_qr_Tile( int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *Q2 );
 int CHAMELEON_zgeqrf_Tile(CHAM_desc_t *A, CHAM_desc_t *T);
 int CHAMELEON_zgeqrs_Tile(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B);
-//int CHAMELEON_zgesv_Tile(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B);
+int CHAMELEON_zgesv_Tile(CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B);
 int CHAMELEON_zgesv_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B);
 int CHAMELEON_zgesv_nopiv_Tile(CHAM_desc_t *A, CHAM_desc_t *B);
 int CHAMELEON_zgesvd_Tile(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, double *S, CHAM_desc_t *T, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT);
@@ -138,7 +138,7 @@ int CHAMELEON_zgetrf_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV);
 int CHAMELEON_zgetrf_nopiv_Tile(CHAM_desc_t *A);
 int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV );
 //int CHAMELEON_zgetri_Tile(CHAM_desc_t *A, int *IPIV);
-//int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B);
+int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B);
 int CHAMELEON_zgetrs_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B);
 int CHAMELEON_zgetrs_nopiv_Tile(CHAM_desc_t *A, CHAM_desc_t *B);
 int CHAMELEON_zhemm_Tile(cham_side_t side, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C);
@@ -157,8 +157,7 @@ double CHAMELEON_zlansy_Tile(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t
 double CHAMELEON_zlantr_Tile(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A);
 int CHAMELEON_zlascal_Tile(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A);
 int CHAMELEON_zlaset_Tile(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A);
-//int CHAMELEON_zlaswp_Tile(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX);
-//int CHAMELEON_zlaswpc_Tile(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX);
+int CHAMELEON_zlaswp_Tile( cham_side_t side, cham_dir_t dir, CHAM_desc_t *A, int K1, int K2, CHAM_ipiv_t *IPIV );
 int CHAMELEON_zlatms_Tile( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A );
 int CHAMELEON_zlauum_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
 int CHAMELEON_zplghe_Tile(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed );
@@ -209,7 +208,7 @@ int CHAMELEON_zgemm_Tile_Async(cham_trans_t transA, cham_trans_t transB, CHAMELE
 int CHAMELEON_zgepdf_qdwh_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *H, gepdf_info_t *info, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zgeqrf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgeqrs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-//int CHAMELEON_zgesv_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zgesv_Tile_Async(CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B, void *user_wsA, void *user_wsB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgesv_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, double *S, CHAM_desc_t *T, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
@@ -218,7 +217,7 @@ int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV
 int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-//int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B, void *user_ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrs_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrs_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zhemm_Tile_Async(cham_side_t side, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
@@ -237,8 +236,7 @@ int CHAMELEON_zlansy_Tile_Async(cham_normtype_t norm, cham_uplo_t uplo, CHAM_des
 int CHAMELEON_zlantr_Tile_Async(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, double *value, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zlascal_Tile_Async(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zlaset_Tile_Async(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-//int CHAMELEON_zlaswp_Tile_Async(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-//int CHAMELEON_zlaswpc_Tile_Async(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zlaswp_Tile_Async( cham_side_t side, cham_dir_t dir, CHAM_desc_t *A, int K1, int K2, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zlatms_Tile_Async( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zlauum_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zplghe_Tile_Async(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
@@ -371,6 +369,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t
 int CHAMELEON_zTile_to_Lapack( CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA ) __attribute__((deprecated("Please refer to CHAMELEON_zDesc2Lap() instead")));
 int CHAMELEON_zLap2Desc( cham_uplo_t uplo, CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t *A );
 int CHAMELEON_zDesc2Lap( cham_uplo_t uplo, CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA );
+void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA, CHAM_ipiv_t *descIPIV );
 
 /**
  *  User Builder function prototypes
diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h
index 1e5e242b274612406036f7f63cd73e82365be8a7..b9cd9fcb4be875946d42537f3c75a1997b9a8826 100644
--- a/include/chameleon/tasks.h
+++ b/include/chameleon/tasks.h
@@ -17,7 +17,8 @@
  * @author Florent Pruvost
  * @author Matthieu Kuhn
  * @author Alycia Lisito
- * @date 2024-09-06
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #ifndef _chameleon_tasks_h_
@@ -167,12 +168,14 @@ void INSERT_TASK_hgemm( const RUNTIME_option_t *options,
                                                   const CHAM_desc_t *B, int Bm, int Bn,
                         CHAMELEON_Real16_t beta,  const CHAM_desc_t *C, int Cm, int Cn );
 
-void INSERT_TASK_ipiv_init   ( const RUNTIME_option_t *options,
-                               CHAM_ipiv_t *ipiv );
+void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
+                            CHAM_ipiv_t *ipiv );
+void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
+                                 CHAM_ipiv_t *ipiv );
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
                                CHAM_ipiv_t *ws, int k, int h, int rank );
 void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
-                               int m0, int m, int k,
+                               int m0, int m, int k, int K1, int K2,
                                const CHAM_ipiv_t *ipivdesc, int ipivk );
 
 #include "chameleon/tasks_z.h"
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index f444409b8279007d43d810f6b603595c414f9819..90b4578d47eadeab248d9d47cb45a4a93b74a1b2 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -25,7 +25,7 @@
  * @author Romain Peressoni
  * @author Matthieu Kuhn
  * @author Ana Hourcau
- * @date 2024-11-12
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -188,12 +188,12 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
 void INSERT_TASK_zlaset2( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
                           const CHAM_desc_t *tileA, int tileAm, int tileAn );
-void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
+void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options, cham_dir_t dir,
                              int m0, int k,
                              const CHAM_ipiv_t *tIPIV, int tIPIVk,
                              const CHAM_desc_t *tileA, int tileAm, int tileAn,
                              const CHAM_desc_t *tileB, int tileBm, int tileBn );
-void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
+void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options, cham_dir_t dir,
                              int m0, int k,
                              const CHAM_ipiv_t *tIPIV, int tIPIVk,
                              const CHAM_desc_t *tileA, int tileAm, int tileAn,
@@ -588,15 +588,20 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
  *
  * @ingroup CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_zperm_allreduce - Perfoms an allreduce operation on the tile
- * U(Um, Un) according to the permutation ipiv. This task is used in the LU
- * factorization with partial pivoting.
+ *  @brief Perfoms an allreduce operation on the tile
+ *  U(Um, Un) according to the permutation ipiv. This task is used in the LU
+ *  factorization with partial pivoting.
  *
  *******************************************************************************
  *
  * @param[in] options
  *          The runtime options data structure to pass through all insert_task calls.
  *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order
+ *          = ChamDirBackward: Reverse order
+ *
  * @param[in] A
  *          The descriptor of the matrix A.
  *
@@ -630,6 +635,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
  *******************************************************************************
  */
 void INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
+                                  cham_dir_t              dir,
                                   const CHAM_desc_t      *A,
                                   CHAM_desc_t            *U,
                                   int                     Um,
@@ -645,9 +651,9 @@ void INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
  *
  * @ingroup CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_zperm_allreduce_send_A - Sends the tile A(Am, An) to the processus
- * involved in the permutation. This task is used in the LU factorization with
- * partial pivoting.
+ *  @brief Sends the tile A(Am, An) to the processus
+ *  involved in the permutation. This task is used in the LU factorization with
+ *  partial pivoting.
  *
  *******************************************************************************
  *
@@ -687,15 +693,20 @@ void INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
  *
  * @ingroup CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_zperm_allreduce_send_perm - Sends the permutation ipivk to the
- * processus involved in the permutation. This task is used in the LU
- * factorization with partial pivoting.
+ *  @brief - Sends the permutation ipivk to the
+ *  processus involved in the permutation. This task is used in the LU
+ *  factorization with partial pivoting.
  *
  *******************************************************************************
  *
  * @param[in] options
  *          The runtime options data structure to pass through all insert_task calls.
  *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order
+ *          = ChamDirBackward: Reverse order
+ *
  * @param[in] ipiv
  *          The pivot structure that contains the informations for the LU
  *          factorization with partial pivoting.
@@ -715,6 +726,7 @@ void INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
  *******************************************************************************
  */
 void INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
+                                            cham_dir_t              dir,
                                             CHAM_ipiv_t            *ipiv,
                                             int                     ipivk,
                                             int                     myrank,
@@ -726,15 +738,20 @@ void INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
  *
  * @ingroup CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_zperm_allreduce_send_invp - Sends the inverse permutation ipivk
- * to the processus involved in the permutation. This task is used in the LU
- * factorization with partial pivoting.
+ *  @brief Sends the inverse permutation ipivk
+ *  to the processus involved in the permutation. This task is used in the LU
+ *  factorization with partial pivoting.
  *
  *******************************************************************************
  *
  * @param[in] options
  *          The runtime options data structure to pass through all insert_task calls.
  *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order
+ *          = ChamDirBackward: Reverse order
+ *
  * @param[in] ipiv
  *          The pivot structure that contains the informations for the LU
  *          factorization with partial pivoting.
@@ -754,6 +771,7 @@ void INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
  *******************************************************************************
  */
 void INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
+                                            cham_dir_t              dir,
                                             CHAM_ipiv_t            *ipiv,
                                             int                     ipivk,
                                             const CHAM_desc_t      *A,
@@ -761,3 +779,4 @@ void INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
                                             int                     n );
 
 #endif /* _chameleon_tasks_z_h_ */
+
diff --git a/runtime/openmp/codelets/codelet_ipiv.c b/runtime/openmp/codelets/codelet_ipiv.c
index c21d13280ea0d316f5cc5d1799345ae0e8a4bbb8..ccc7e8f46ea496f30d00d81dd0f418ba07fdd175 100644
--- a/runtime/openmp/codelets/codelet_ipiv.c
+++ b/runtime/openmp/codelets/codelet_ipiv.c
@@ -13,7 +13,8 @@
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
  * @author Alycia Lisito
- * @date 2024-08-29
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #include "chameleon_openmp.h"
@@ -28,6 +29,14 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
     (void)ipiv;
 }
 
+void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
+                                 CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)ipiv;
+}
+
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
                                CHAM_ipiv_t *ipiv, int k, int h, int rank )
 {
@@ -40,7 +49,7 @@ void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
-                               int m0, int m, int k,
+                               int m0, int m, int k, int K1, int K2,
                                const CHAM_ipiv_t *ipivdesc, int ipivk )
 {
     int *ipiv = NULL; // get pointer from ipivdesc
@@ -49,9 +58,11 @@ void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
 
 #pragma omp task firstprivate( m0, m, k ) depend( in:ipiv[0] ) depend( inout:perm[0] ) depend( inout:invp[0] )
     {
-        CORE_ipiv_to_perm( m0, m, k, ipiv, perm, invp );
+        CORE_ipiv_to_perm( m0, m, k, 1, m, ipiv, perm, invp );
     }
 
     (void)options;
+    (void)K1;
+    (void)K2;
     (void)ipivk;
 }
diff --git a/runtime/openmp/codelets/codelet_zlaswp.c b/runtime/openmp/codelets/codelet_zlaswp.c
index bce58c771ef3052ce4d20d16232082cd9a746f66..93bf20aef11964fa548adb7739b000af575b04ba 100644
--- a/runtime/openmp/codelets/codelet_zlaswp.c
+++ b/runtime/openmp/codelets/codelet_zlaswp.c
@@ -11,7 +11,7 @@
  *
  * @version 1.3.0
  * @author Mathieu Faverge
- * @date 2024-02-18
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -20,7 +20,7 @@
 #include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *U, int Um, int Un )
@@ -38,10 +38,11 @@ void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
     }
 
     (void)options;
+    (void)dir;
 }
 
 void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *B, int Bm, int Bn )
@@ -59,4 +60,5 @@ void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
     }
 
     (void)options;
+    (void)dir;
 }
diff --git a/runtime/openmp/codelets/codelet_zperm_allreduce.c b/runtime/openmp/codelets/codelet_zperm_allreduce.c
index eac34fdfd1f8a0814c277f7acb8a9b85cb594ec7..8b20a60fd43332dac7373edcb2de40ee552d050a 100644
--- a/runtime/openmp/codelets/codelet_zperm_allreduce.c
+++ b/runtime/openmp/codelets/codelet_zperm_allreduce.c
@@ -11,7 +11,7 @@
  *
  * @version 1.3.0
  * @author Alycia Lisito
- * @date 2024-11-12
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -38,6 +38,7 @@ INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        int                     myrank,
@@ -45,6 +46,7 @@ INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
                                        int                    *proc_involved  )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)myrank;
@@ -54,6 +56,7 @@ INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        const CHAM_desc_t      *A,
@@ -61,6 +64,7 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
                                        int                     n )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)A;
@@ -70,6 +74,7 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
+                             cham_dir_t              dir,
                              const CHAM_desc_t      *A,
                              CHAM_desc_t            *U,
                              int                     Um,
@@ -81,6 +86,7 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
                              void                   *ws )
 {
     (void)options;
+    (void)dir;
     (void)A;
     (void)U;
     (void)Um;
diff --git a/runtime/parsec/codelets/codelet_ipiv.c b/runtime/parsec/codelets/codelet_ipiv.c
index b6d582e5ac8514525665adebd27b9459a9076005..2145e00b3575d7de659f28422064616815acd22a 100644
--- a/runtime/parsec/codelets/codelet_ipiv.c
+++ b/runtime/parsec/codelets/codelet_ipiv.c
@@ -13,7 +13,8 @@
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
  * @author Alycia Lisito
- * @date 2024-08-29
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #include "chameleon_parsec.h"
@@ -28,6 +29,14 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
     (void)ipiv;
 }
 
+void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
+                                 CHAM_ipiv_t *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)ipiv;
+}
+
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
                                CHAM_ipiv_t *ipiv, int k, int h, int rank )
 {
@@ -49,14 +58,14 @@ CORE_ipiv_to_perm_parsec( parsec_execution_stream_t *context,
     parsec_dtd_unpack_args(
         this_task, &m0, &m, &k, &ipiv, &perm, &invp );
 
-    CORE_ipiv_to_perm( m0, m, k, ipiv, perm, invp );
+    CORE_ipiv_to_perm( m0, m, k, 1, m, ipiv, perm, invp );
 
     (void)context;
     return PARSEC_HOOK_RETURN_DONE;
 }
 
 void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
-                               int m0, int m, int k,
+                               int m0, int m, int k, int K1, int K2,
                                const CHAM_ipiv_t *ipivdesc, int ipivk )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
@@ -70,4 +79,7 @@ void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
         PASSED_BY_REF, RUNTIME_perm_getaddr( ipivdesc, ipivk ), chameleon_parsec_get_arena_index_perm( ipivdesc ) | OUTPUT,
         PASSED_BY_REF, RUNTIME_invp_getaddr( ipivdesc, ipivk ), chameleon_parsec_get_arena_index_invp( ipivdesc ) | OUTPUT,
         PARSEC_DTD_ARG_END );
+
+    (void)K1;
+    (void)K2;
 }
diff --git a/runtime/parsec/codelets/codelet_zlaswp.c b/runtime/parsec/codelets/codelet_zlaswp.c
index 12aaf7089ff41f4e4090e0fb6f18e518c9813fd3..65849c96d1aae96cc1000dd93e5efbebe481c7d9 100644
--- a/runtime/parsec/codelets/codelet_zlaswp.c
+++ b/runtime/parsec/codelets/codelet_zlaswp.c
@@ -11,7 +11,7 @@
  *
  * @version 1.3.0
  * @author Mathieu Faverge
- * @date 2024-02-18
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -33,7 +33,7 @@ CORE_zlaswp_get_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *U, int Um, int Un )
@@ -54,6 +54,8 @@ void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
         sizeof(int),         &(tileU->ld), VALUE,
         PASSED_BY_REF, RUNTIME_perm_getaddr( ipiv, ipivk ),     chameleon_parsec_get_arena_index_perm( ipiv ) | INPUT,
         PARSEC_DTD_ARG_END );
+
+    (void)dir;
 }
 
 static inline int
@@ -70,7 +72,7 @@ CORE_zlaswp_set_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *B, int Bm, int Bn )
@@ -91,4 +93,6 @@ void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
         sizeof(int),         &(tileB->ld), VALUE,
         PASSED_BY_REF, RUNTIME_invp_getaddr( ipiv, ipivk ),     chameleon_parsec_get_arena_index_invp( ipiv ) | INPUT,
         PARSEC_DTD_ARG_END );
+
+    (void)dir;
 }
diff --git a/runtime/parsec/codelets/codelet_zperm_allreduce.c b/runtime/parsec/codelets/codelet_zperm_allreduce.c
index 9ceb440c8a4e677630a68355daa7defda7f904fa..f68148e24b5e4c4e7d42d7248c8bf4a9948477c6 100644
--- a/runtime/parsec/codelets/codelet_zperm_allreduce.c
+++ b/runtime/parsec/codelets/codelet_zperm_allreduce.c
@@ -11,7 +11,7 @@
  *
  * @version 1.3.0
  * @author Alycia Lisito
- * @date 2024-11-12
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -38,6 +38,7 @@ INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        int                     myrank,
@@ -45,6 +46,7 @@ INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
                                        int                    *proc_involved  )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)myrank;
@@ -54,6 +56,7 @@ INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        const CHAM_desc_t      *A,
@@ -61,6 +64,7 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
                                        int                     n )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)A;
@@ -70,6 +74,7 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
+                             cham_dir_t              dir,
                              const CHAM_desc_t      *A,
                              CHAM_desc_t            *U,
                              int                     Um,
@@ -81,6 +86,7 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
                              void                   *ws )
 {
     (void)options;
+    (void)dir;
     (void)A;
     (void)U;
     (void)Um;
diff --git a/runtime/quark/codelets/codelet_ipiv.c b/runtime/quark/codelets/codelet_ipiv.c
index 8075d0f8a43fc8fe2498cafdb39ae034483aa5d2..bf0846d3dfe9d6043162827a4d0a3eab9414caed 100644
--- a/runtime/quark/codelets/codelet_ipiv.c
+++ b/runtime/quark/codelets/codelet_ipiv.c
@@ -13,7 +13,8 @@
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
  * @author Alycia Lisito
- * @date 2024-08-29
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #include "chameleon_quark.h"
@@ -28,6 +29,14 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
     (void)ipiv;
 }
 
+void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
+                                 CHAM_ipiv_t            *ipiv )
+{
+    assert( 0 );
+    (void)options;
+    (void)ipiv;
+}
+
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
                                CHAM_ipiv_t *ipiv, int k, int h, int rank )
 {
@@ -47,11 +56,11 @@ CORE_ipiv_to_perm_quark( Quark *quark )
 
     quark_unpack_args_6( quark, m0, m, k, ipiv, perm, invp );
 
-    CORE_ipiv_to_perm( m0, m, k, ipiv, perm, invp );
+    CORE_ipiv_to_perm( m0, m, k, 1, m, ipiv, perm, invp );
 }
 
 void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
-                               int m0, int m, int k,
+                               int m0, int m, int k, int K1, int K2,
                                const CHAM_ipiv_t *ipivdesc, int ipivk )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -65,4 +74,7 @@ void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
         sizeof(int*), RUNTIME_perm_getaddr( ipivdesc, ipivk ), OUTPUT,
         sizeof(int*), RUNTIME_invp_getaddr( ipivdesc, ipivk ), OUTPUT,
         0 );
+
+    (void)K1;
+    (void)K2;
 }
diff --git a/runtime/quark/codelets/codelet_zlaswp.c b/runtime/quark/codelets/codelet_zlaswp.c
index 176dd16916eb51e1b698ad0d17dbd0d37c1a1d61..8f5a1b57fd52bd2e401273171584ebcca1478e50 100644
--- a/runtime/quark/codelets/codelet_zlaswp.c
+++ b/runtime/quark/codelets/codelet_zlaswp.c
@@ -11,7 +11,7 @@
  *
  * @version 1.3.0
  * @author Mathieu Faverge
- * @date 2024-02-18
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -30,7 +30,7 @@ static void CORE_zlaswp_get_quark( Quark *quark )
 }
 
 void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *U, int Um, int Un )
@@ -46,6 +46,8 @@ void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
         sizeof(CHAM_tile_t*), RTBLKADDR(A, ChamComplexDouble, Am, An), INPUT,
         sizeof(CHAM_tile_t*), RTBLKADDR(U, ChamComplexDouble, Um, Un), INOUT,
         0 );
+
+    (void)dir;
 }
 
 static void CORE_zlaswp_set_quark( Quark *quark )
@@ -59,7 +61,7 @@ static void CORE_zlaswp_set_quark( Quark *quark )
 }
 
 void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *B, int Bm, int Bn )
@@ -75,4 +77,6 @@ void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
         sizeof(CHAM_tile_t*), RTBLKADDR(A, ChamComplexDouble, Am, An), INPUT,
         sizeof(CHAM_tile_t*), RTBLKADDR(B, ChamComplexDouble, Bm, Bn), INOUT,
         0 );
+
+    (void)dir;
 }
diff --git a/runtime/quark/codelets/codelet_zperm_allreduce.c b/runtime/quark/codelets/codelet_zperm_allreduce.c
index f297d343b33455ba6340f0b81c45e8d01d29600f..1a2a7089c8addc5715d074a6c04bc5e8732aed1b 100644
--- a/runtime/quark/codelets/codelet_zperm_allreduce.c
+++ b/runtime/quark/codelets/codelet_zperm_allreduce.c
@@ -11,7 +11,7 @@
  *
  * @version 1.3.0
  * @author Alycia Lisito
- * @date 2024-11-12
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -38,6 +38,7 @@ INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        int                     myrank,
@@ -45,6 +46,7 @@ INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
                                        int                    *proc_involved  )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)myrank;
@@ -54,6 +56,7 @@ INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        const CHAM_desc_t      *A,
@@ -61,6 +64,7 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
                                        int                     n )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)A;
@@ -70,6 +74,7 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
+                             cham_dir_t              dir,
                              const CHAM_desc_t      *A,
                              CHAM_desc_t            *U,
                              int                     Um,
@@ -81,6 +86,7 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
                              void                   *ws )
 {
     (void)options;
+    (void)dir;
     (void)A;
     (void)U;
     (void)Um;
diff --git a/runtime/starpu/codelets/codelet_ipiv.c b/runtime/starpu/codelets/codelet_ipiv.c
index 4498c63f3cbaba8655c740f98f7bdc4cc5fea974..5a16c6e2dda5d2e411415bf368f214bbbc8ec71b 100644
--- a/runtime/starpu/codelets/codelet_ipiv.c
+++ b/runtime/starpu/codelets/codelet_ipiv.c
@@ -13,21 +13,28 @@
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
  * @author Alycia Lisito
- * @date 2024-09-17
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #include "chameleon_starpu_internal.h"
-#include "runtime_codelets.h"
 
-static void cl_ipiv_init_cpu_func(void *descr[], void *cl_arg)
+struct cl_laswp_args_s {
+    int   m0;
+    int   n;
+    int   m;
+    int  *data;
+};
+
+static void cl_ipiv_init_cpu_func( void *descr[], void *cl_arg )
 {
 #if !defined(CHAMELEON_SIMULATION)
-    int *ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
+    int *ipiv = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
     int i, m0, n;
 
     starpu_codelet_unpack_args( cl_arg, &m0, &n );
 
-    for( i=0; i<n; i++ ) {
+    for( i = 0; i < n; i++ ) {
         ipiv[i] = m0 + i + 1;
     }
 #endif
@@ -46,10 +53,10 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
     int64_t mb = ipiv->mb;
     int     m;
 
-    for (m = 0; m < mt; m++) {
+    for ( m = 0; m < mt; m++ ) {
         starpu_data_handle_t ipiv_src = RUNTIME_ipiv_getaddr( ipiv, m );
         int m0 = m * mb;
-        int n  = (m == (mt-1)) ? ipiv->m - m0 : mb;
+        int n  = ( m == ( mt - 1 ) ) ? ipiv->m - m0 : mb;
 
         rt_starpu_insert_task(
             &cl_ipiv_init,
@@ -60,6 +67,62 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options,
     }
 }
 
+static void cl_ipiv_init_data_cpu_func( void *descr[], void *cl_arg )
+{
+#if !defined(CHAMELEON_SIMULATION)
+    struct cl_laswp_args_s *clargs = (struct cl_laswp_args_s *) cl_arg;
+
+    int *ipiv = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
+    int  n    = clargs->n;
+    int  i;
+
+    for( i = 0; i < n; i++ ) {
+        ipiv[i] = clargs->data[i];
+    }
+#endif
+}
+
+struct starpu_codelet cl_ipiv_init_data = {
+    .where     = STARPU_CPU,
+    .cpu_func  = cl_ipiv_init_data_cpu_func,
+    .nbuffers  = 1,
+};
+
+void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options,
+                                 CHAM_ipiv_t            *ipiv )
+{
+
+    int64_t mt   = ipiv->mt;
+    int64_t mb   = ipiv->mb;
+    int     m;
+
+    if ( ipiv->data == NULL ) {
+        return;
+    }
+
+    for ( m = 0; m < mt; m++ ) {
+        starpu_data_handle_t    ipiv_src = RUNTIME_ipiv_getaddr( ipiv, m );
+        struct cl_laswp_args_s *cl_args;
+        int                     m0, n;
+
+        m0 = m * mb;
+        n = ( m == ( mt-1 ) ) ? ipiv->m - m0 : mb;
+
+        cl_args     = malloc( sizeof(struct cl_laswp_args_s) );
+        cl_args->m0 = m0;
+        cl_args->n  = n;
+        cl_args->m  = ipiv->desc->m;
+
+        cl_args->data = ipiv->data + m0;
+
+        rt_starpu_insert_task(
+            &cl_ipiv_init_data,
+            STARPU_CL_ARGS, cl_args, sizeof(struct cl_laswp_args_s),
+            STARPU_W,       ipiv_src,
+            0);
+    }
+}
+
 void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
                                CHAM_ipiv_t *ipiv, int k, int h, int rank )
 {
@@ -67,7 +130,7 @@ void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
 
 #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI)
 #if !defined(HAVE_STARPU_MPI_REDUX_WRAPUP)
-    starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h   );
+    starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h );
     if ( h < ipiv->n ) {
         starpu_mpi_redux_data_prio_tree( options->sequence->comm, nextpiv,
                                          options->priority, 2 /* Binary tree */ );
@@ -86,16 +149,16 @@ void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options,
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_ipiv_to_perm_cpu_func( void *descr[], void *cl_arg )
 {
-    int m0, m, k;
+    int  m0, m, k, K1, K2;
     int *ipiv, *perm, *invp;
 
-    starpu_codelet_unpack_args( cl_arg, &m0, &m, &k );
+    starpu_codelet_unpack_args( cl_arg, &m0, &m, &k, &K1, &K2 );
 
     ipiv = (int*)STARPU_VECTOR_GET_PTR(descr[0]);
     perm = (int*)STARPU_VECTOR_GET_PTR(descr[1]);
     invp = (int*)STARPU_VECTOR_GET_PTR(descr[2]);
 
-    CORE_ipiv_to_perm( m0, m, k, ipiv, perm, invp );
+    CORE_ipiv_to_perm( m0, m, k, K1, K2, ipiv, perm, invp );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -115,7 +178,7 @@ static struct starpu_codelet cl_ipiv_to_perm = {
 };
 
 void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
-                               int m0, int m, int k,
+                               int m0, int m, int k, int K1, int K2,
                                const CHAM_ipiv_t *ipivdesc, int ipivk )
 {
     struct starpu_codelet *codelet = &cl_ipiv_to_perm;
@@ -125,6 +188,8 @@ void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
         STARPU_VALUE,             &m0,  sizeof(int),
         STARPU_VALUE,             &m,   sizeof(int),
         STARPU_VALUE,             &k,   sizeof(int),
+        STARPU_VALUE,             &K1,  sizeof(int),
+        STARPU_VALUE,             &K2,  sizeof(int),
         STARPU_R,                 RUNTIME_ipiv_getaddr( ipivdesc, ipivk ),
         STARPU_W,                 RUNTIME_perm_getaddr( ipivdesc, ipivk ),
         STARPU_W,                 RUNTIME_invp_getaddr( ipivdesc, ipivk ),
@@ -132,3 +197,4 @@ void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
         0 );
 }
+
diff --git a/runtime/starpu/codelets/codelet_zlaswp.c b/runtime/starpu/codelets/codelet_zlaswp.c
index 81c28d92f05d6c23e85e743b8402b79db31815b1..3829763abd896ca9db917a9d0573ac4d9b9b5255 100644
--- a/runtime/starpu/codelets/codelet_zlaswp.c
+++ b/runtime/starpu/codelets/codelet_zlaswp.c
@@ -13,7 +13,7 @@
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
  * @author Alycia Lisito
- * @date 2024-11-12
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -48,11 +48,12 @@ CODELETS_CPU( zlaswp_get, cl_zlaswp_get_cpu_func )
 #if defined(CHAMELEON_STARPU_USE_INSERT)
 
 void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *U, int Um, int Un )
 {
+    void                  *ipiv_handle;
     struct starpu_codelet *codelet = &cl_zlaswp_get;
     if ( A->get_rankof( A, Am, An) != A->myrank ) {
         return;
@@ -63,12 +64,18 @@ void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
     clargs->m0 = m0;
     clargs->k  = k;
 
+    if ( dir == ChamDirForward ) {
+        ipiv_handle = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
     //void (*callback)(void*) = options->profiling ? cl_zlaswp_get_callback : NULL;
 
     rt_starpu_insert_task(
         codelet,
         STARPU_CL_ARGS,             clargs, sizeof(struct cl_zlaswp_args_s),
-        STARPU_R,                   RUNTIME_perm_getaddr( ipiv, ipivk ),
+        STARPU_R,                   ipiv_handle,
         STARPU_R,                   RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_RW | STARPU_COMMUTE, RTBLKADDR(U, ChamComplexDouble, Um, Un),
         STARPU_PRIORITY,            options->priority,
@@ -80,18 +87,26 @@ void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
 #else /* defined(CHAMELEON_STARPU_USE_INSERT) */
 
 void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *U, int Um, int Un )
 {
-    int ret;
+    int                 ret;
     struct starpu_task *task;
+    void               *ipiv_handle;
 
     if ( A->get_rankof( A, Am, An) != A->myrank ) {
         return;
     }
 
+    if ( dir == ChamDirForward ) {
+        ipiv_handle = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+
     INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zlaswp_get, zlaswp_get, zlaswp, 3);
 
     /*
@@ -99,8 +114,7 @@ void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options,
      */
     starpu_cham_exchange_init_params( options, &params, U->get_rankof( U, Um, Un ) );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
-                                                  RUNTIME_perm_getaddr( ipiv, ipivk ),
-                                                  STARPU_R );
+                                                  ipiv_handle, STARPU_R );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_R );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ),
                                 STARPU_RW | STARPU_COMMUTE );
@@ -157,12 +171,14 @@ static void cl_zlaswp_set_cpu_func( void *descr[], void *cl_arg )
 CODELETS_CPU( zlaswp_set, cl_zlaswp_set_cpu_func )
 
 #if defined(CHAMELEON_STARPU_USE_INSERT)
+
 void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *B, int Bm, int Bn )
 {
+    void                  *ipiv_handle;
     struct starpu_codelet *codelet = &cl_zlaswp_set;
     if ( B->get_rankof( B, Bm, Bn) != A->myrank ) {
         return;
@@ -173,12 +189,19 @@ void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
     clargs->m0 = m0;
     clargs->k  = k;
 
+    if ( dir == ChamDirForward ) {
+        ipiv_handle = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+
     //void (*callback)(void*) = options->profiling ? cl_zlaswp_set_callback : NULL;
 
     rt_starpu_insert_task(
         codelet,
         STARPU_CL_ARGS,           clargs, sizeof(struct cl_zlaswp_args_s),
-        STARPU_R,                 RUNTIME_invp_getaddr( ipiv, ipivk ),
+        STARPU_R,                 ipiv_handle,
         STARPU_R,                 RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_RW,                RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         STARPU_PRIORITY,          options->priority,
@@ -186,20 +209,30 @@ void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
         0 );
 }
-#else
+
+#else /* defined(CHAMELEON_STARPU_USE_INSERT) */
+
 void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
-                             int m0, int k,
+                             cham_dir_t dir, int m0, int k,
                              const CHAM_ipiv_t *ipiv, int ipivk,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *B, int Bm, int Bn )
 {
-    int ret;
+    int                 ret;
     struct starpu_task *task;
+    void               *ipiv_handle;
 
     if ( B->get_rankof( B, Bm, Bn) != A->myrank ) {
         return;
     }
 
+    if( dir == ChamDirForward ) {
+        ipiv_handle = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+
     INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zlaswp_set, zlaswp_set, zlaswp, 3);
 
     /*
@@ -207,8 +240,7 @@ void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
      */
     starpu_cham_exchange_init_params( options, &params, B->get_rankof( B, Bm, Bn ) );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
-                                                  RUNTIME_invp_getaddr( ipiv, ipivk ),
-                                                  STARPU_R );
+                                                  ipiv_handle, STARPU_R );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_R );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( B, ChamComplexDouble, Bm, Bn ), STARPU_RW );
 
@@ -242,4 +274,5 @@ void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options,
     }
     starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
 }
-#endif
+#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */
+
diff --git a/runtime/starpu/codelets/codelet_zperm_allreduce.c b/runtime/starpu/codelets/codelet_zperm_allreduce.c
index e32b7ad9c46a2303eb1c4c6a18d442935fca6d3a..a479056c5f9321b75cd89a99349fd1ef1c3f3976 100644
--- a/runtime/starpu/codelets/codelet_zperm_allreduce.c
+++ b/runtime/starpu/codelets/codelet_zperm_allreduce.c
@@ -12,7 +12,7 @@
  * @version 1.3.0
  * @author Alycia Lisito
  * @author Pierre Esterie
- * @date 2024-11-14
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -21,6 +21,7 @@
 #include <coreblas/cblas_wrapper.h>
 
 #if defined(CHAMELEON_USE_MPI)
+
 struct cl_redux_args_s {
     int tempmm;
     int mb;
@@ -91,6 +92,7 @@ INSERT_TASK_zperm_allreduce_send( const RUNTIME_option_t *options,
 
 static void
 INSERT_TASK_zperm_allreduce_recv( const RUNTIME_option_t *options,
+                                  cham_dir_t              dir,
                                   CHAM_desc_t            *U,
                                   CHAM_ipiv_t            *ipiv,
                                   int                     ipivk,
@@ -105,6 +107,15 @@ INSERT_TASK_zperm_allreduce_recv( const RUNTIME_option_t *options,
                                   int                     p_first )
 {
     struct cl_redux_args_s *clargs;
+    void                   *ipiv_handle;
+
+    if ( dir == ChamDirForward ) {
+        ipiv_handle = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+
     clargs = malloc( sizeof( struct cl_redux_args_s ) );
     clargs->tempmm  = tempmm;
     clargs->mb      = U->mb;
@@ -121,7 +132,7 @@ INSERT_TASK_zperm_allreduce_recv( const RUNTIME_option_t *options,
         STARPU_CL_ARGS,           clargs, sizeof(struct cl_redux_args_s),
         STARPU_RW,                RTBLKADDR(U, CHAMELEON_Complex64_t, me,  n),
         STARPU_R,                 RTBLKADDR(U, CHAMELEON_Complex64_t, src, n),
-        STARPU_R,                 RUNTIME_perm_getaddr( ipiv, ipivk ),
+        STARPU_R,                 ipiv_handle,
         STARPU_EXECUTE_ON_NODE,   me,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
         STARPU_PRIORITY,          options->priority,
@@ -151,6 +162,7 @@ INSERT_TASK_zperm_allreduce_send( const RUNTIME_option_t *options,
 
 static void
 INSERT_TASK_zperm_allreduce_recv( const RUNTIME_option_t *options,
+                                  cham_dir_t              dir,
                                   CHAM_desc_t            *U,
                                   CHAM_ipiv_t            *ipiv,
                                   int                     ipivk,
@@ -164,8 +176,16 @@ INSERT_TASK_zperm_allreduce_recv( const RUNTIME_option_t *options,
                                   int                     np,
                                   int                     p_first )
 {
-    int ret;
+    int                 ret;
     struct starpu_task *task;
+    void               *ipiv_handle
+
+    if ( dir == ChamDirForward ) {
+        ipiv_handle = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
 
     INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zperm_allreduce_send, zperm_allreduce, redux, 3 );
 
@@ -176,7 +196,7 @@ INSERT_TASK_zperm_allreduce_recv( const RUNTIME_option_t *options,
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
                                                   RTBLKADDR( U, ChamComplexDouble, src, n ),
                                                   STARPU_R );
-    starpu_cham_register_descr( &nbdata, descrs, RUNTIME_perm_getaddr( ipiv, ipivk ), STARPU_R );
+    starpu_cham_register_descr( &nbdata, descrs, ipiv_handle, STARPU_R );
 
     task = starpu_task_create();
     task->cl = cl;
@@ -221,6 +241,7 @@ INSERT_TASK_zperm_allreduce_recv( const RUNTIME_option_t *options,
 
 static void
 zperm_allreduce_chameleon_starpu_task( const RUNTIME_option_t     *options,
+                                       cham_dir_t                  dir,
                                        const CHAM_desc_t          *A,
                                        CHAM_desc_t                *U,
                                        int                         Um,
@@ -229,10 +250,10 @@ zperm_allreduce_chameleon_starpu_task( const RUNTIME_option_t     *options,
                                        int                         ipivk,
                                        int                         k,
                                        int                         n,
-                                       struct chameleon_pzgetrf_s *ws)
+                                       struct chameleon_pzgetrf_s *ws )
 {
     int *proc_involved = ws->proc_involved;
-    int  np_involved   = chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt - k);
+    int  np_involved   = chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt - k );
     int  np_iter       = np_involved;
     int  p_recv, p_send, me, p_first;
     int  shift = 1;
@@ -253,9 +274,11 @@ zperm_allreduce_chameleon_starpu_task( const RUNTIME_option_t     *options,
             p_recv = proc_involved[ ( me - shift + np_involved ) % np_involved ];
 
             INSERT_TASK_zperm_allreduce_send( options, U, A->myrank, p_send, n );
-            INSERT_TASK_zperm_allreduce_recv( options, U, ipiv, ipivk, A->myrank, p_recv,
+            INSERT_TASK_zperm_allreduce_recv( options, dir, U, ipiv, ipivk, A->myrank, p_recv,
                                               n, k == (A->mt-1) ? A->m - k * A->mb : A->mb,
-                                              chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1), shift, np_involved, p_first );
+                                              chameleon_desc_datadist_get_iparam(A, 0),
+                                              chameleon_desc_datadist_get_iparam(A, 1),
+                                              shift, np_involved, p_first );
 
             shift   = shift << 1;
             np_iter = chameleon_ceil( np_iter, 2 );
@@ -265,6 +288,7 @@ zperm_allreduce_chameleon_starpu_task( const RUNTIME_option_t     *options,
 
 void
 INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
+                             cham_dir_t              dir,
                              const CHAM_desc_t      *A,
                              CHAM_desc_t            *U,
                              int                     Um,
@@ -280,7 +304,7 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
     switch( alg ) {
     case ChamStarPUTasks:
     default:
-        zperm_allreduce_chameleon_starpu_task( options, A, U, Um, Un, ipiv, ipivk, k, n, tmp );
+        zperm_allreduce_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, k, n, tmp );
     }
 }
 
@@ -307,33 +331,51 @@ INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        int                     myrank,
                                        int                     np,
                                        int                    *proc_involved )
 {
-    int p;
+    int   p;
+    void *ipiv_handle;
+
+    if ( dir == ChamDirForward ) {
+        ipiv_handle = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
 
     for ( p = 0; p < np; p++ ) {
         if ( proc_involved[ p ] == myrank ) {
             continue;
         }
         starpu_mpi_get_data_on_node_detached( options->sequence->comm,
-                                              RUNTIME_perm_getaddr( ipiv, ipivk ),
+                                              ipiv_handle,
                                               proc_involved[ p ], NULL, NULL );
     }
 }
 
 void
 INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        const CHAM_desc_t      *A,
                                        int                     k,
                                        int                     n )
 {
-    int b, rank;
+    int   b, rank;
+    void *ipiv_handle;
+
+    if ( dir == ChamDirForward ) {
+        ipiv_handle = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
 
     for ( b = k+1; (b < A->mt) && ((b-(k+1)) < chameleon_desc_datadist_get_iparam(A, 0)); b ++ ) {
         rank = A->get_rankof( A, b, n );
@@ -341,10 +383,11 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
             continue;
         }
         starpu_mpi_get_data_on_node_detached( options->sequence->comm,
-                                              RUNTIME_invp_getaddr( ipiv, ipivk ),
+                                              ipiv_handle,
                                               rank, NULL, NULL );
     }
 }
+
 #else
 void
 INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
@@ -353,7 +396,7 @@ INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
                                     int                     An,
                                     int                     myrank,
                                     int                     np,
-                                    int                    *proc_involved  )
+                                    int                    *proc_involved )
 {
     (void)options;
     (void)A;
@@ -366,11 +409,12 @@ INSERT_TASK_zperm_allreduce_send_A( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        int                     myrank,
                                        int                     np,
-                                       int                    *proc_involved  )
+                                       int                    *proc_involved )
 {
     (void)options;
     (void)ipiv;
@@ -382,6 +426,7 @@ INSERT_TASK_zperm_allreduce_send_perm( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        CHAM_ipiv_t            *ipiv,
                                        int                     ipivk,
                                        const CHAM_desc_t      *A,
@@ -398,6 +443,7 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options,
 
 void
 INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
+                             cham_dir_t              dir,
                              const CHAM_desc_t      *A,
                              CHAM_desc_t            *U,
                              int                     Um,
@@ -419,4 +465,5 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
     (void)n;
     (void)ws;
 }
+
 #endif
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index 81268734d901cbdf99cf27592207b53046423bf6..2221ee12853a7d47c04541f68dea65e3d39b953c 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -26,7 +26,8 @@
 #  @author Alycia Lisito
 #  @author Matthieu Kuhn
 #  @author Abel Calluaud
-#  @date 2025-01-24
+#  @author Matteo Marcos
+#  @date 2025-03-24
 #
 ###
 
@@ -52,6 +53,8 @@ set(ZSRC_W_STDAPI
   testing_zlantr.c
   testing_zgemm.c
   testing_zgetrf.c
+  testing_zgesv.c
+  testing_zgetrs.c
   testing_zhemm.c
   testing_zherk.c
   testing_zher2k.c
@@ -81,6 +84,7 @@ set(ZSRC_WO_STDAPI
   testing_zgenm2.c
   testing_zgesv_nopiv.c
   testing_zgesvd.c
+  testing_zlaswp.c
   testing_zgetrf_nopiv.c
   testing_zgetrs_nopiv.c
   testing_zgeqrf.c
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index 39b7e89e04daf060dcacc87d17b851ee83e3191d..297d2628a1d114bc9ec2472b1caf2ad381fa442c 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -110,8 +110,13 @@ if (NOT CHAMELEON_SIMULATION)
                 add_test( test_${cat}_${prec}getrf_ppivblocked_batch ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf.in )
                 set_tests_properties( test_${cat}_${prec}getrf_ppivblocked_batch
                                       PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=3" )
-
+                add_test( test_${cat}_${prec}laswp ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/laswp.in )
+                add_test( test_${cat}_${prec}getrs ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrs.in )
+                add_test( test_${cat}_${prec}gesv ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/gesv.in )
                 if ( ${cat} STREQUAL "mpi" )
+                    add_test( test_${cat}_${prec}laswp_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/laswp.in )
+                    add_test( test_${cat}_${prec}getrs_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrs.in )
+                    add_test( test_${cat}_${prec}gesv_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/gesv.in )
                     add_test( test_${cat}_${prec}getrf_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrf.in )
                     set_tests_properties( test_${cat}_${prec}getrf_ppiv_comm_with_task
                                           PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_GETRF_ALL_REDUCE=cham_spu_tasks" )
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index 52f552a7c69d947b4fb25a336603bb8f4cebbc84..979abaf12ef505f892758a130a7d79da7252172d 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -22,7 +22,8 @@
  * @author Lucas Nesi
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2025-01-15
+ * @author Matteo Marcos
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -111,10 +112,11 @@ parameter_t parameters[] = {
     { "trans",  "Value of the trans parameter ('ConjTrans', 'Trans', 'NoTrans')",  -11, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans,    {0}, NULL, pread_trans, sprint_trans },
     { "transA", "Value of the transA parameter ('ConjTrans', 'Trans', 'NoTrans')", -12, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans,    {0}, NULL, pread_trans, sprint_trans },
     { "transB", "Value of the transB parameter ('ConjTrans', 'Trans', 'NoTrans')", -13, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans,    {0}, NULL, pread_trans, sprint_trans },
-    { "uplo",   "Value of the uplo parameter ('Upper', 'Lower', 'UpperLower')",   -14, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 7, TestUplo,     {0}, NULL, pread_uplo,  sprint_uplo  },
-    { "diag",   "Value of the diag parameter ('NonUnit', 'Unit')",   -15, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 7, TestDiag,     {0}, NULL, pread_diag,  sprint_diag  },
-    { "side",   "Value of the side parameter ('Left', 'Right')",   -16, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestSide,     {0}, NULL, pread_side,  sprint_side  },
-    { "norm",   "Value of the norm parameter ('One', 'Frobenius', 'Inf', 'Max')",   -17, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestNormtype, {0}, NULL, pread_norm,  sprint_norm  },
+    { "uplo",   "Value of the uplo parameter ('Upper', 'Lower', 'UpperLower')",    -14, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 7, TestUplo,     {0}, NULL, pread_uplo,  sprint_uplo  },
+    { "diag",   "Value of the diag parameter ('NonUnit', 'Unit')",                 -15, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 7, TestDiag,     {0}, NULL, pread_diag,  sprint_diag  },
+    { "side",   "Value of the side parameter ('Left', 'Right')",                   -16, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestSide,     {0}, NULL, pread_side,  sprint_side  },
+    { "norm",   "Value of the norm parameter ('One', 'Frobenius', 'Inf', 'Max')",  -17, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestNormtype, {0}, NULL, pread_norm,  sprint_norm  },
+    { "dir",    "Value of the dir parameter ('Forward', 'Backward')",              -18, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 7, TestDir,      {0}, NULL, pread_dir,   sprint_dir   },
 
     { NULL, "Operation specific scalar", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "alpha", "Value of the scalar alpha",                       'x', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
@@ -131,6 +133,10 @@ parameter_t parameters[] = {
     { NULL, "SVD parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "jobu",  "Value of the jobu parameter ('NoVec', 'Vec', 'Ivec', 'AllVec', 'SVec', 'OVec')",  -50, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestJob, {0}, NULL, pread_job, sprint_job },
     { "jobvt", "Value of the jobvt parameter ('NoVec', 'Vec', 'Ivec', 'AllVec', 'SVec', 'OVec')", -51, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestJob, {0}, NULL, pread_job, sprint_job },
+
+    { NULL, "LASWP parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
+    { "k1",    "Index of the first element to permute",             -70, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int },
+    { "k2",    "Index of the last element to permute",              -71, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int },
 #endif
 
     { "tsub",          "Graph submission time in s",             999, PARAM_OUTPUT, 2, 13, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl },
diff --git a/testing/input/gesv.in b/testing/input/gesv.in
new file mode 100644
index 0000000000000000000000000000000000000000..95ecf582575d4956ad8732e2b8fe54a923f90bea
--- /dev/null
+++ b/testing/input/gesv.in
@@ -0,0 +1,20 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# GESV
+
+# nb: Tile size
+# ib: Inner tile size
+# n: Order of the matrix A and number of rows of matrix B
+# nrhs: The number of columns of matrix B
+# lda: Leading dimension of matrix A
+# ldb: Leading dimension of matrix B
+
+op = gesv
+nb = 4, 16, 17
+ib = 4, 12, 50
+n = 15, 21, 35
+nrhs = 1, 13, 22, 33
+lda = 40
+ldb = 41
+
diff --git a/testing/input/getrs.in b/testing/input/getrs.in
new file mode 100644
index 0000000000000000000000000000000000000000..9714143c4ddb4a953f2a9e756715a4ffe44b8735
--- /dev/null
+++ b/testing/input/getrs.in
@@ -0,0 +1,18 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# GETRS
+
+# nb: Tile size
+# n: Order of the matrix A and number of rows of matrix B
+# nrhs: The number of columns of matrix B
+# lda: Leading dimension of matrix A
+# ldb: Leading dimension of matrix B
+
+op = getrs
+nb = 16, 17
+ib = 16, 17
+n = 15, 21, 35
+nrhs = 1, 13, 22, 33
+lda = 40
+ldb = 41
diff --git a/testing/input/laswp.in b/testing/input/laswp.in
new file mode 100644
index 0000000000000000000000000000000000000000..41037f5e5cd0b79357b2d0df8beafc6e30c2b2a8
--- /dev/null
+++ b/testing/input/laswp.in
@@ -0,0 +1,20 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# LASWP
+
+# nb: Tile size
+# n: Order of the matrix A
+# lda: Leading dimension of matrix A
+# k1: First element of ipiv to apply the permutation.
+# k2: Last element of ipiv to apply the permutation.
+# dir: Specifies the order of the permutation.
+
+op = laswp
+nb = 4, 16, 17
+n = 15, 21, 35
+lda = 40
+k1 = 1, 2, 10
+k2 = 1, 2, 10
+dir = Forward, Backward
+
diff --git a/testing/run_list.c b/testing/run_list.c
index a6900e88e637536a16d1cb65664195f9f62a2d07..a8fefce450a464200d804f20123f401b79bcf364 100644
--- a/testing/run_list.c
+++ b/testing/run_list.c
@@ -13,7 +13,8 @@
  * @author Mathieu Faverge
  * @author Philippe Swartvagher
  * @author Alycia Lisito
- * @date 2024-02-18
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #include "testings.h"
@@ -442,6 +443,32 @@ run_arg_get_side( run_arg_list_t *arglist, const char *name, cham_side_t defval
     return rval.side;
 }
 
+/**
+ * @brief Searches for a cham_dir_t value by its name.
+ *
+ * @param[inout] arglist
+ *          The list of arguments.
+ *          On exit, if the argument was not in the list, the default value is
+ *          stored in it.
+ *
+ * @param[in] name
+ *          The name of the argument to look for.
+ *
+ * @param[in] defval
+ *          The default value if no argument is found with this name. This value
+ *          is added to the list if not found.
+ *
+ * @retval The value of the argument _name_.
+ */
+cham_dir_t
+run_arg_get_dir( run_arg_list_t *arglist, const char *name, cham_dir_t defval )
+{
+    val_t val, rval;
+    val.dir = defval;
+    rval = run_arg_get( arglist, name, val );
+    return rval.dir;
+}
+
 /**
  * @brief Searches for a cham_job_t value by its name.
  *
diff --git a/testing/testing_zgesv.c b/testing/testing_zgesv.c
new file mode 100644
index 0000000000000000000000000000000000000000..ac5ffe62194ecafeb589c215db6a83efd5c1f564
--- /dev/null
+++ b/testing/testing_zgesv.c
@@ -0,0 +1,256 @@
+/**
+ *
+ * @file testing_zgesv.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgesv testing
+ *
+ * @version 1.3.0
+ * @author Lucas Barros de Assis
+ * @author Mathieu Faverge
+ * @author Alycia Lisito
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> c d s
+ *
+ */
+#include "testings.h"
+#include "chameleon/chameleon_z.h"
+#include "testing_zcheck.h"
+#include <chameleon/flops.h>
+#include <chameleon/getenv.h>
+#include <coreblas/lapacke.h>
+
+static cham_fixdbl_t
+flops_zgesv( int N, int NRHS )
+{
+    cham_fixdbl_t flops = flops_zgetrf( N, N ) + flops_zgetrs( N, NRHS );
+    return flops;
+}
+
+#if !defined(CHAMELEON_TESTINGS_VENDOR)
+int
+testing_zgesv_desc( run_arg_list_t *args, int check )
+{
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+    int         async = parameters_getvalue_int( "async" );
+    int         nb    = run_arg_get_nb( args );
+    int         ib    = run_arg_get_ib( args );
+    int         N     = run_arg_get_int( args, "N", 1000 );
+    int         NRHS  = run_arg_get_int( args, "NRHS", 1 );
+    int         LDA   = run_arg_get_int( args, "LDA", N );
+    int         LDB   = run_arg_get_int( args, "LDB", N );
+    int         seedA = run_arg_get_int( args, "seedA", testing_ialea() );
+    int         seedB = run_arg_get_int( args, "seedB", testing_ialea() );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX;
+    CHAM_ipiv_t *descIPIV;
+    void        *wsA = NULL;
+    void        *wsB = NULL;
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+    CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
+
+    /* Creates the matrices */
+    parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, N, N );
+    parameters_desc_create( "X", &descX, ChamComplexDouble, nb, nb, LDB, NRHS, N, NRHS );
+    CHAMELEON_Ipiv_Create( &descIPIV, descA, NULL );
+
+    /* Fills the matrix with random values */
+    CHAMELEON_zplrnt_Tile( descA, seedA );
+    CHAMELEON_zplrnt_Tile( descX, seedB );
+
+    if ( async ) {
+        wsA = CHAMELEON_zgetrf_WS_Alloc( descA );
+        wsB = CHAMELEON_zgetrf_WS_Alloc( descX );
+    }
+
+    /* Calculates the solution */
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgesv_Tile_Async( descA, descIPIV, descX, wsA, wsB,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgesv_Tile( descA, descIPIV, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgesv( N, NRHS ) );
+
+    if ( async ) {
+        CHAMELEON_zgetrf_WS_Free( wsA );
+        CHAMELEON_zgetrf_WS_Free( wsB );
+    }
+
+    /* Checks the factorisation and the residual */
+    if ( check ) {
+        CHAM_desc_t *descA0, *descB;
+
+        /* Check the factorization */
+        descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE );
+        CHAMELEON_zplrnt_Tile( descA0, seedA );
+
+        CHAMELEON_zlaswp_Tile( ChamLeft, ChamDirForward, descA0, 1, N, descIPIV );
+
+        hres += check_zxxtrf( args, ChamGeneral, ChamUpperLower, descA0, descA );
+
+        if ( hres ) {
+            CHAMELEON_Desc_Destroy( &descA0 );
+            CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+            parameters_desc_destroy( &descA );
+            parameters_desc_destroy( &descX );
+            return hres;
+        }
+
+        /* Check the solve */
+        descB = CHAMELEON_Desc_Copy( descX, CHAMELEON_MAT_ALLOC_TILE );
+        CHAMELEON_zplrnt_Tile( descB, seedB );
+
+        CHAMELEON_zplrnt_Tile( descA0, seedA );
+        hres += check_zsolve( args, ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB );
+
+        CHAMELEON_Desc_Destroy( &descA0 );
+        CHAMELEON_Desc_Destroy( &descB );
+    }
+
+    CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+    parameters_desc_destroy( &descA );
+    parameters_desc_destroy( &descX );
+
+    return hres;
+}
+#endif
+
+int
+testing_zgesv_std( run_arg_list_t *args, int check )
+{
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+#if !defined(CHAMELEON_TESTINGS_VENDOR)
+    int    api   = parameters_getvalue_int( "api" );
+#endif
+    int    nb    = run_arg_get_nb( args );
+    int    ib    = run_arg_get_ib( args );
+    int    N     = run_arg_get_int( args, "N", 1000 );
+    int    NRHS  = run_arg_get_int( args, "NRHS", 1 );
+    int    LDA   = run_arg_get_int( args, "LDA", N );
+    int    LDB   = run_arg_get_int( args, "LDB", N );
+    int    seedA = run_arg_get_int( args, "seedA", testing_ialea() );
+    int    seedB = run_arg_get_int( args, "seedB", testing_ialea() );
+
+    /* Descriptors */
+    CHAMELEON_Complex64_t *A, *X;
+    int *IPIV;
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+    CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
+
+    /* Creates the matrices */
+    A = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N );
+    X = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS );
+    IPIV = malloc( sizeof(int) * N );
+
+    /* Fills the matrix with random values */
+    CHAMELEON_zplrnt( N, N,    A, LDA, seedA );
+    CHAMELEON_zplrnt( N, NRHS, X, LDB, seedB );
+
+    /* Calculates the solution */
+#if defined(CHAMELEON_TESTINGS_VENDOR)
+    testing_start( &test_data );
+    hres = LAPACKE_zgesv( LAPACK_COL_MAJOR, N, NRHS, A, LDA, IPIV, X, LDB );
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgesv( N, NRHS ) );
+#else
+    testing_start( &test_data );
+    switch ( api ) {
+    case 1:
+        hres = CHAMELEON_zgesv( N, NRHS, A, LDA, IPIV, X, LDB );
+        break;
+#if !defined(CHAMELEON_SIMULATION) && 0
+    case 2:
+        CHAMELEON_lapacke_zgesv( CblasColMajor, chameleon_lapack_const(uplo), N, NRHS, A, LDA, X, LDB );
+        break;
+#endif
+    default:
+        if ( CHAMELEON_Comm_rank() == 0 ) {
+            fprintf( stderr,
+                     "SKIPPED: This function can only be used with the option --api 1 or --api 2.\n" );
+        }
+        return -1;
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgesv( N, NRHS ) );
+
+    /* Checks the factorisation and residual */
+    if ( check ) {
+        CHAMELEON_Complex64_t *A0 = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N );
+        CHAMELEON_Complex64_t *B  = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS );
+
+        /* Check the factorization */
+        CHAMELEON_zplrnt( N, N, A0, LDA, seedA );
+        CHAMELEON_zlaswp( ChamLeft, ChamDirForward, N, N, A0, LDA, 1, N, IPIV );
+
+        hres += check_zxxtrf_std( args, ChamGeneral, ChamUpperLower, N, N, A0, A, LDA );
+
+        /* Check the solve */
+        CHAMELEON_zplrnt( N, N,    A0, LDA, seedA );
+        CHAMELEON_zplrnt( N, NRHS, B,  LDB, seedB );
+        hres += check_zsolve_std( args, ChamGeneral, ChamNoTrans, ChamUpperLower, N, NRHS, A0, LDA, X, B, LDB );
+
+        free( A0 );
+        free( B );
+    }
+#endif
+
+    free( A );
+    free( X );
+
+    (void)check;
+    return hres;
+}
+
+testing_t   test_zgesv;
+#if defined(CHAMELEON_TESTINGS_VENDOR)
+const char *zgesv_params[] = { "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+#else
+const char *zgesv_params[] = { "mtxfmt", "nb", "ib", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+#endif
+const char *zgesv_output[] = { NULL };
+const char *zgesv_outchk[] = { "RETURN", NULL };
+
+/**
+ * @brief Testing registration function
+ */
+void testing_zgesv_init( void ) __attribute__( ( constructor ) );
+void
+testing_zgesv_init( void )
+{
+    test_zgesv.name      = "zgesv";
+    test_zgesv.helper    = "General linear system solve (LU with partial pivoting)";
+    test_zgesv.params    = zgesv_params;
+    test_zgesv.output    = zgesv_output;
+    test_zgesv.outchk    = zgesv_outchk;
+#if defined(CHAMELEON_TESTINGS_VENDOR)
+    test_zgesv.fptr_desc = NULL;
+#else
+    test_zgesv.fptr_desc = testing_zgesv_desc;
+#endif
+    test_zgesv.fptr_std  = testing_zgesv_std;
+    test_zgesv.next      = NULL;
+
+    testing_register( &test_zgesv );
+}
+
diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c
index 1db2d0030f335ea521bc12bf22ae37a830c920cc..5e489b7f7c1e1f8c66e3c9f7ba86f3308c3fb75e 100644
--- a/testing/testing_zgetrf.c
+++ b/testing/testing_zgetrf.c
@@ -17,7 +17,8 @@
  * @author Lionel Eyraud-Dubois
  * @author Xavier Lacoste
  * @author Florent Pruvost
- * @date 2025-01-29
+ * @author Matteo Marcos
+ * @date 2025-03-24
  * @precisions normal z -> c d s
  *
  */
@@ -106,47 +107,24 @@ testing_zgetrf_desc( run_arg_list_t *args, int check )
     testing_stop( &test_data, flops_zgetrf( M, N ) );
 
     /* Checks the factorization and residual */
-#if !defined(CHAMELEON_SIMULATION)
     if ( check ) {
-        CHAM_desc_t *descA0c;
         CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE );
 
-        /* Create A0c as local to rank 0 on all nodes to gather the matrix */
-        CHAMELEON_Desc_Create_User(
-            &descA0c, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble,
-            nb, nb, nb*nb, M, N, 0, 0, M, N, 1, 1,
-            chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL, NULL );
-
         if ( diag == ChamUnit ) {
-            CHAMELEON_zplgtr_Tile( 0,     ChamUpper, descA0c, seedA   );
-            CHAMELEON_zplgtr_Tile( minMN, ChamLower, descA0c, seedA+1 );
-        }
-        else {
-            CHAMELEON_zplrnt_Tile( descA0c, seedA );
-        }
-
-        /* Compute the permutation of A0: P * A0 */
-        if ( CHAMELEON_Comm_rank() == 0 ) {
-            int *ipiv;
-
-            ipiv = malloc( sizeof(int) * minMN );
-            CHAMELEON_Ipiv_Gather( descIPIV, ipiv, 0 );
-            LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, 1, minMN, ipiv, 1 );
-            free( ipiv );
+            CHAMELEON_zplgtr_Tile( 0,     ChamUpper, descA0, seedA   );
+            CHAMELEON_zplgtr_Tile( minMN, ChamLower, descA0, seedA+1 );
         }
         else {
-            CHAMELEON_Ipiv_Gather( descIPIV, NULL, 0 );
+            CHAMELEON_zplrnt_Tile( descA0, seedA );
         }
 
-        CHAMELEON_zlacpy_Tile( ChamUpperLower, descA0c, descA0 );
-        CHAMELEON_Desc_Destroy( &descA0c );
+        CHAMELEON_zlaswp_Tile( ChamLeft, ChamDirForward, descA0, 1, descA0->m, descIPIV );
 
         hres += check_zxxtrf( args, ChamGeneral, ChamUpperLower,
                               descA0, descA );
 
         CHAMELEON_Desc_Destroy( &descA0 );
     }
-#endif /* !defined(CHAMELEON_SIMULATION) */
 
     if ( ws != NULL ) {
         CHAMELEON_zgetrf_WS_Free( ws );
@@ -223,7 +201,7 @@ testing_zgetrf_std( run_arg_list_t *args, int check )
         CHAMELEON_zplrnt( M, N, A0, LDA, seedA );
 
         /* Compute the permutation of A0: P * A0 */
-        LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, A0, M, 1, minMN, IPIV, 1 );
+        CHAMELEON_zlaswp( ChamLeft, ChamDirForward, M, N, A0, 1, minMN, minMN, IPIV );
 
         hres += check_zxxtrf_std( args, ChamGeneral, ChamUpperLower, M, N, A0, A, LDA );
 
diff --git a/testing/testing_zgetrs.c b/testing/testing_zgetrs.c
new file mode 100644
index 0000000000000000000000000000000000000000..4a3713be3db61d974b151d1100edb63dac512495
--- /dev/null
+++ b/testing/testing_zgetrs.c
@@ -0,0 +1,230 @@
+/**
+ *
+ * @file testing_zgetrs.c
+ *
+ * @copyright 2019-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgetrf testing
+ *
+ * @version 1.3.0
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> c d s
+ *
+ */
+#include <chameleon.h>
+#include <chameleon_lapack.h>
+#include "chameleon/chameleon_z.h"
+#include "testings.h"
+#include "testing_zcheck.h"
+#include <chameleon/flops.h>
+#include <chameleon/getenv.h>
+#if defined(CHAMELEON_TESTINGS_VENDOR) || !defined(CHAMELEON_SIMULATION)
+#include <coreblas.h>
+#include <coreblas/lapacke.h>
+#endif
+
+#if !defined(CHAMELEON_TESTINGS_VENDOR)
+int
+testing_zgetrs_desc( run_arg_list_t *args, int check )
+{
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+    int         async = parameters_getvalue_int( "async" );
+    int         nb    = run_arg_get_nb( args );
+    int         ib    = run_arg_get_ib( args );
+    int         N     = run_arg_get_int( args, "N", 1000 );
+    int         NRHS  = run_arg_get_int( args, "NRHS", 1 );
+    int         LDA   = run_arg_get_int( args, "LDA", N );
+    int         LDB   = run_arg_get_int( args, "LDB", N );
+    int         seedA = run_arg_get_int( args, "seedA", testing_ialea() );
+    int         seedB = run_arg_get_int( args, "seedB", testing_ialea() );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX;
+    CHAM_ipiv_t *descIPIV;
+    void        *ws = NULL;
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+    CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
+
+    /* Creates the matrices */
+    parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, N, N );
+    parameters_desc_create( "X", &descX, ChamComplexDouble, nb, nb, LDB, NRHS, N, NRHS );
+    CHAMELEON_Ipiv_Create( &descIPIV, descA, NULL );
+
+    CHAMELEON_zplrnt_Tile( descA, seedA );
+    CHAMELEON_zplrnt_Tile( descX, seedB );
+
+    CHAMELEON_zgetrf_Tile( descA, descIPIV );
+
+    if ( async ) {
+        ws = CHAMELEON_zgetrf_WS_Alloc( descX );
+    }
+
+    /* Calculates the solution */
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgetrs_Tile_Async( ChamNoTrans, descA, descIPIV, descX, ws, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgetrs_Tile( ChamNoTrans, descA, descIPIV, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgetrs( N, NRHS ) );
+
+    /* Checks the factorization and residual */
+#if !defined(CHAMELEON_SIMULATION)
+    if ( check ) {
+        CHAM_desc_t *descA0, *descB;
+
+        descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE );
+        descB  = CHAMELEON_Desc_Copy( descX, CHAMELEON_MAT_ALLOC_TILE );
+
+        CHAMELEON_zplrnt_Tile( descA0, seedA );
+        CHAMELEON_zplrnt_Tile( descB,  seedB );
+
+        hres += check_zsolve( args, ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB );
+
+        CHAMELEON_Desc_Destroy( &descA0 );
+        CHAMELEON_Desc_Destroy( &descB );
+    }
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+    if ( ws != NULL ) {
+        CHAMELEON_zgetrf_WS_Free( ws );
+    }
+
+    CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+    parameters_desc_destroy( &descA );
+    parameters_desc_destroy( &descX );
+
+    return hres;
+}
+#endif
+
+int
+testing_zgetrs_std( run_arg_list_t *args, int check )
+{
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+#if !defined(CHAMELEON_TESTINGS_VENDOR)
+    int         api   = parameters_getvalue_int( "api" );
+#endif
+    int         nb    = run_arg_get_nb( args );
+    int         N     = run_arg_get_int( args, "N", 1000 );
+    int         NRHS  = run_arg_get_int( args, "NRHS", 1 );
+    int         LDA   = run_arg_get_int( args, "LDA", N );
+    int         LDB   = run_arg_get_int( args, "LDB", N );
+    int         seedA = run_arg_get_int( args, "seedA", testing_ialea() );
+    int         seedB = run_arg_get_int( args, "seedB", testing_ialea() );
+
+    /* Descriptors */
+    CHAMELEON_Complex64_t *A, *X;
+    int *IPIV;
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+
+    /* Creates the matrices */
+    A = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N );
+    X = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS );
+    IPIV = malloc( sizeof(int) * N );
+
+    /* Fills the matrix with random values */
+    CHAMELEON_zplrnt( N, N, A, LDA, seedA );
+    CHAMELEON_zplrnt( N, NRHS, X, LDB, seedB );
+
+    CHAMELEON_zgetrf( N, N, A, LDA, IPIV );
+
+    /* Calculates the solution */
+#if defined(CHAMELEON_TESTINGS_VENDOR)
+    testing_start( &test_data );
+    hres = LAPACKE_zgetrs( LAPACK_COL_MAJOR, 'N', N, NRHS, A, LDA, IPIV, X, LDB );
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgetrs( N, NRHS ) );
+#else
+    testing_start( &test_data );
+    switch ( api ) {
+    case 1:
+        hres = CHAMELEON_zgetrs( ChamNoTrans, N, NRHS, A, LDA, IPIV, X, LDB);
+        break;
+#if !defined(CHAMELEON_SIMULATION) & 0
+    case 2:
+        CHAMELEON_lapacke_zgetrs( CblasColMajor, N, NRHS, A, LDA, IPIV, B, LDB );
+        break;
+#endif
+    default:
+        if ( CHAMELEON_Comm_rank() == 0 ) {
+            fprintf( stderr,
+                     "SKIPPED: This function can only be used with the option --api 1 or --api 2.\n" );
+        }
+        return -1;
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgetrs( N, NRHS ) );
+
+#if !defined(CHAMELEON_SIMULATION)
+    /* Checks the factorisation and residue */
+    if ( check ) {
+        CHAMELEON_Complex64_t *A0 = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N );
+        CHAMELEON_Complex64_t *B  = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS );
+
+        CHAMELEON_zplrnt( N, N, A0, LDA, seedA );
+        CHAMELEON_zplrnt( N, NRHS, B,  LDB, seedB );
+
+        hres += check_zsolve_std( args, ChamGeneral, ChamNoTrans, ChamUpperLower, N, NRHS, A0, LDA, X, B, LDB );
+
+        free( A0 );
+        free( B );
+    }
+#endif
+#endif
+
+    free ( IPIV );
+    free( A );
+    free( X );
+
+    (void)check;
+    return hres;
+}
+
+testing_t   test_zgetrs;
+#if defined(CHAMELEON_TESTINGS_VENDOR)
+const char *zgetrs_params[] = { "m", "n", "lda", "seedA", NULL };
+#else
+const char *zgetrs_params[] = { "mtxfmt", "nb", "ib", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+#endif
+const char *zgetrs_output[] = { NULL };
+const char *zgetrs_outchk[] = { "RETURN", NULL };
+
+/**
+ * @brief Testing registration function
+ */
+void testing_zgetrs_init( void ) __attribute__( ( constructor ) );
+void
+testing_zgetrs_init( void )
+{
+    test_zgetrs.name   = "zgetrs";
+    test_zgetrs.helper = "General triangular solve (LU with partial pivoting)";
+    test_zgetrs.params = zgetrs_params;
+    test_zgetrs.output = zgetrs_output;
+    test_zgetrs.outchk = zgetrs_outchk;
+#if defined(CHAMELEON_TESTINGS_VENDOR)
+    test_zgetrs.fptr_desc = NULL;
+#else
+    test_zgetrs.fptr_desc = testing_zgetrs_desc;
+#endif
+    test_zgetrs.fptr_std  = testing_zgetrs_std;
+    test_zgetrs.next   = NULL;
+
+    testing_register( &test_zgetrs );
+}
diff --git a/testing/testing_zlaswp.c b/testing/testing_zlaswp.c
new file mode 100644
index 0000000000000000000000000000000000000000..56c7a8a84afcacae9b143ecdfeb24369f9713668
--- /dev/null
+++ b/testing/testing_zlaswp.c
@@ -0,0 +1,141 @@
+/**
+ *
+ * @file testing_zlaswp.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zlaswp testing
+ *
+ * @version 1.3.0
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon/constants.h"
+#include "chameleon/struct.h"
+#include "testings.h"
+#include "chameleon/chameleon_z.h"
+#include "testing_zcheck.h"
+#include <chameleon/flops.h>
+#include <chameleon/getenv.h>
+#include <coreblas/lapacke.h>
+#include <chameleon/tasks.h>
+
+static void testing_zlaswp_ipiv_gen( int *IPIV,
+                                     int  M )
+{
+    int i;
+
+    for ( i = 0; i < M; i++ ) {
+        IPIV[i] = testing_ialea() % ( M - i ) + i + 1;
+    }
+}
+
+int
+testing_zlaswp_desc( run_arg_list_t *args, int check )
+{
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+    int         async   = parameters_getvalue_int( "async" );
+    cham_side_t side    = run_arg_get_side( args, "side", ChamLeft );
+    cham_dir_t  dir     = run_arg_get_dir( args,  "dir", ChamDirForward );
+    int         nb      = run_arg_get_nb(  args );
+    int         N       = run_arg_get_int( args, "N", 1000 );
+    int         M       = run_arg_get_int( args, "M", N );
+    int         LDA     = run_arg_get_int( args, "LDA", N );
+    int         seedA   = run_arg_get_int( args, "seedA", testing_ialea() );
+    int         K1      = run_arg_get_int( args, "K1", 1 );
+    int         K2      = run_arg_get_int( args, "K2", M );
+
+    int *IPIV     = malloc( sizeof(int) * M );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
+    CHAM_ipiv_t *descIPIV;
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+
+    /* Creates the matrices */
+    parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N );
+    CHAMELEON_zplrnt_Tile( descA, seedA );
+
+    testing_zlaswp_ipiv_gen( IPIV, M );
+    CHAMELEON_Ipiv_Create( &descIPIV, descA, IPIV );
+    CHAMELEON_Ipiv_Init( descA, descIPIV );
+
+    /* Calculates the solution */
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlaswp_Tile_Async( side, dir, descA, K1, K2, descIPIV, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zlaswp_Tile( side, dir, descA, K1, K2, descIPIV );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, 0 );
+
+#if !defined(CHAMELEON_SIMULATION)
+    if ( check ) {
+        CHAM_desc_t *descA0, *descA0c;
+        int          INCX = ( dir == ChamDirForward ) ? 1 : -1;
+
+        descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE );
+
+        CHAMELEON_Desc_Create_User(
+            &descA0c, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble,
+            nb, nb, nb*nb, M, N, 0, 0, M, N, 1, 1,
+            chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL, NULL );
+
+        CHAMELEON_zplrnt_Tile( descA0c, seedA );
+
+        if ( CHAMELEON_Comm_rank() == 0 ) {
+            LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, K1, K2, IPIV, INCX );
+        }
+
+        CHAMELEON_zlacpy_Tile( ChamUpperLower, descA0c, descA0 );
+        CHAMELEON_Desc_Destroy( &descA0c );
+
+        hres += check_zmatrices( args, ChamUpperLower, descA, descA0 );
+
+        CHAMELEON_Desc_Destroy( &descA0 );
+    }
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+    CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+    parameters_desc_destroy( &descA );
+    free( IPIV );
+
+    return hres;
+}
+
+testing_t   test_zlaswp;
+const char *zlaswp_params[] = { "mtxfmt", "nb", "n", "m", "lda", "seedA", "k1", "k2", "side", "dir", NULL };
+const char *zlaswp_output[] = { NULL };
+const char *zlaswp_outchk[] = { "RETURN", NULL };
+
+/**
+ * @brief Testing registration function
+ */
+void testing_zlaswp_init( void ) __attribute__( ( constructor ) );
+void
+testing_zlaswp_init( void )
+{
+    test_zlaswp.name      = "zlaswp";
+    test_zlaswp.helper    = "Row interchange on general matrices";
+    test_zlaswp.params    = zlaswp_params;
+    test_zlaswp.output    = zlaswp_output;
+    test_zlaswp.outchk    = zlaswp_outchk;
+    test_zlaswp.fptr_desc = testing_zlaswp_desc;
+    test_zlaswp.next      = NULL;
+
+    testing_register( &test_zlaswp );
+}
+
diff --git a/testing/testing_zposv.c b/testing/testing_zposv.c
index c2a739a9d263cc1c95e4a949bf93ac9718be443c..21237c75a5be31d75b45f46ccea89ff8bc3c42c4 100644
--- a/testing/testing_zposv.c
+++ b/testing/testing_zposv.c
@@ -14,7 +14,7 @@
  * @author Florent Pruvost
  * @author Mathieu Faverge
  * @author Alycia Lisito
- * @date 2025-01-29
+ * @date 2025-03-21
  * @precisions normal z -> c d s
  *
  */
@@ -80,7 +80,7 @@ testing_zposv_desc( run_arg_list_t *args, int check )
     test_data.hres = hres;
     testing_stop( &test_data, flops_zposv( N, NRHS ) );
 
-    /* Checks the factorisation and residue */
+    /* Checks the factorisation and the residual */
     if ( check ) {
         CHAM_desc_t *descA0, *descB;
 
@@ -167,7 +167,7 @@ testing_zposv_std( run_arg_list_t *args, int check )
     test_data.hres = hres;
     testing_stop( &test_data, flops_zposv( N, NRHS ) );
 
-    /* Checks the factorisation and residue */
+    /* Checks the factorisation and residual */
     if ( check ) {
         CHAMELEON_Complex64_t *A0 = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N );
         CHAMELEON_Complex64_t *B  = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS );
diff --git a/testing/testings.h b/testing/testings.h
index d341be66bbe4fe56964aea72fa2d4eeb4d8ec042..25283a6630e2c673cecda41380bbc32544b6085f 100644
--- a/testing/testings.h
+++ b/testing/testings.h
@@ -14,7 +14,8 @@
  * @author Alycia Lisito
  * @author Florent Pruvost
  * @author Lionel Eyraud-Dubois
- * @date 2024-03-21
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #ifndef _testings_h_
@@ -45,6 +46,7 @@ typedef enum valtype_ {
     TestUplo,
     TestDiag,
     TestSide,
+    TestDir,
     TestJob,
     TestNormtype,
     TestString,
@@ -59,6 +61,7 @@ union val_u {
     cham_uplo_t           uplo;
     cham_diag_t           diag;
     cham_side_t           side;
+    cham_dir_t            dir;
     cham_job_t            job;
     cham_normtype_t       ntype;
     CHAMELEON_Complex64_t zval;
@@ -168,6 +171,7 @@ val_t pread_trans    ( const char *str );
 val_t pread_uplo     ( const char *str );
 val_t pread_diag     ( const char *str );
 val_t pread_side     ( const char *str );
+val_t pread_dir      ( const char *str );
 val_t pread_job      ( const char *str );
 val_t pread_norm     ( const char *str );
 val_t pread_string   ( const char *str );
@@ -185,6 +189,7 @@ char *sprint_trans    ( val_t val, int human, int nbchar, char *str_in );
 char *sprint_uplo     ( val_t val, int human, int nbchar, char *str_in );
 char *sprint_diag     ( val_t val, int human, int nbchar, char *str_in );
 char *sprint_side     ( val_t val, int human, int nbchar, char *str_in );
+char *sprint_dir      ( val_t val, int human, int nbchar, char *str_in );
 char *sprint_job      ( val_t val, int human, int nbchar, char *str_in );
 char *sprint_norm     ( val_t val, int human, int nbchar, char *str_in );
 char *sprint_string   ( val_t val, int human, int nbchar, char *str_in );
@@ -209,6 +214,7 @@ cham_trans_t          run_arg_get_trans    ( run_arg_list_t *arglist, const char
 cham_uplo_t           run_arg_get_uplo     ( run_arg_list_t *arglist, const char *name, cham_uplo_t           defval );
 cham_diag_t           run_arg_get_diag     ( run_arg_list_t *arglist, const char *name, cham_diag_t           defval );
 cham_side_t           run_arg_get_side     ( run_arg_list_t *arglist, const char *name, cham_side_t           defval );
+cham_dir_t            run_arg_get_dir      ( run_arg_list_t *arglist, const char *name, cham_dir_t            defval );
 cham_job_t            run_arg_get_job      ( run_arg_list_t *arglist, const char *name, cham_job_t            defval );
 cham_normtype_t       run_arg_get_ntype    ( run_arg_list_t *arglist, const char *name, cham_normtype_t       defval );
 int                   run_arg_get_ib       ( run_arg_list_t *arglist );
diff --git a/testing/values.c b/testing/values.c
index 3f3ed0bbca83a1a82c6b5e660cf3705e2251d7c2..36e42b256569d32f5a3b9740cc784553b6527345 100644
--- a/testing/values.c
+++ b/testing/values.c
@@ -14,7 +14,8 @@
  * @author Alycia Lisito
  * @author Florent Pruvost
  * @author Philippe Swartvagher
- * @date 2024-08-28
+ * @author Matteo Marcos
+ * @date 2025-03-24
  *
  */
 #include "testings.h"
@@ -266,6 +267,41 @@ val_t pread_side( const char *str )
     return val;
 }
 
+
+/**
+ * @brief Convert the input string to a cham_dir_t
+ * @param[in] str
+ *    The input string
+ * @return The cham_dir_t read.
+ */
+val_t pread_dir( const char *str )
+{
+    val_t val;
+    val.dir = ChamDirForward;
+
+    /* Keep in sync with help documentation in testing/{chameleon,vendor}_ztesting.c */
+    if ( ( strcasecmp( "ChamDirForward", str ) == 0 ) ||
+         ( strcasecmp( "Forward",        str ) == 0 ) )
+    {
+        val.dir = ChamDirForward;
+    }
+    else if ( ( strcasecmp( "ChamDirBackward", str ) == 0 ) ||
+              ( strcasecmp( "Backward",        str ) == 0 ) )
+    {
+        val.dir = ChamDirBackward;
+    }
+    else {
+        int v = atoi( str );
+        if ( ( v == ChamDirForward ) || ( v == ( ChamDirForward - ChamDirBackward ) ) ) {
+            val.dir = ChamDirForward;
+        }
+        else {
+            val.dir = ChamDirBackward;
+        }
+    }
+    return val;
+}
+
 /**
  * @brief Convert the input string to a cham_job_t
  * @param[in] str
@@ -582,6 +618,25 @@ char *sprint_side( val_t val, int human, int nbchar, char *str_in )
     return str_in+rc;
 }
 
+/**
+ * @brief Convert the input string to a cham_dir_t
+ * @param[in] str
+ *    The input string
+ * @return The cham_dir_t read.
+ */
+char *sprint_dir( val_t val, int human, int nbchar, char *str_in )
+{
+    int rc;
+    if ( human ) {
+        rc = sprintf( str_in, " %-*s", nbchar,
+                      (val.dir == ChamDirForward) ? "Forward" : "Backward" );
+    }
+    else {
+        rc = sprintf( str_in, ";%d", val.dir );
+    }
+    return str_in+rc;
+}
+
 /**
  * @brief Convert the input string to a cham_job_t
  * @param[in] str