diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index ee148020a6833c8ad44a19f4e9f51c708d4af1d2..5d3f25e30fa1bc324eb1c4e796aeabb6857fe931 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -476,10 +476,10 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws,
                                 ipiv, k, A(k, n), Wu(A->myrank, n) );
 
         for(m=k+1; m<A->mt; m++){
-            INSERT_TASK_zlaswp_batched( options, m*A->mb, minmn, (void *)ws, ipiv, k,
+            INSERT_TASK_zlaswp_batched( options, ChamDirForward, m*A->mb, minmn, (void *)ws->laswp, ipiv, k,
                                         A(m, n), A(k, n), Wu(A->myrank, n), clargs );
         }
-        INSERT_TASK_zlaswp_batched_flush( options, ipiv, k, A(k, n), Wu(A->myrank, n), clargs );
+        INSERT_TASK_zlaswp_batched_flush( options, ChamDirForward, ipiv, k, A(k, n), Wu(A->myrank, n), clargs );
 
         INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws->laswp );
 
@@ -515,7 +515,7 @@ chameleon_pzgetrf_panel_permute_forward( struct chameleon_pzgetrf_s *ws,
     }
 #endif
 
-    if ( ws->batch_size_swap > 0 ) {
+    if ( ws->laswp->batch_size_swap > 0 ) {
         chameleon_pzgetrf_panel_permute_batched( ws, A, ipiv, k, n, options );
     }
     else {
@@ -550,7 +550,7 @@ chameleon_pzgetrf_panel_permute_backward( struct chameleon_pzgetrf_s *ws,
     }
 #endif
 
-    if ( ws->batch_size_swap > 0 ) {
+    if ( ws->laswp->batch_size_swap > 0 ) {
         chameleon_pzgetrf_panel_permute_batched( ws, A, ipiv, k, n, options );
     }
     else {
diff --git a/compute/pzlaswp.c b/compute/pzlaswp.c
index 2afedfd5a0bf6f0bf1fb38567f4043fcfdc7b5ea..ed672ae397855138aa88e9c69497003238994920 100644
--- a/compute/pzlaswp.c
+++ b/compute/pzlaswp.c
@@ -7,11 +7,9 @@
  *
  ***
  *
- * @brief Chameleon zlaswp parallel algorithm
+ * @brief Chameleon zlaswp parallel algorithm for row permutation.
  *
  * @version 1.3.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Alycia Lisito
  * @author Matteo Marcos
  * @date 2025-03-24
@@ -20,8 +18,8 @@
  */
 #include "control/common.h"
 
-#define A(m,n)   A,        m, n
-#define Wu(m,n)  &(ws->W), m, n
+#define A(m,n)   A,       m, n
+#define W(m,n)  &(ws->W), m, n
 
 /**
  *  Permutation of the panel n at step k
@@ -46,22 +44,65 @@ chameleon_pzlaswp_panel_permute( struct chameleon_pzlaswp_s *ws,
     withlacpy = options->withlacpy;
     options->withlacpy = 1;
     INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
-                       A(k, n), Wu(A->myrank, n) );
+                        A(k, n), W(A->myrank, n) );
     options->withlacpy = withlacpy;
 
     INSERT_TASK_zlaswp_get( options, dir, k*A->mb, tempkm,
-                           ipiv, k, A(k, n), Wu(A->myrank, n) );
+                           ipiv, k, A(k, n), W(A->myrank, n) );
 
     for ( m = k + 1; m < A->mt; m++ ) {
         /* Extract selected rows into A(k, n) */
         INSERT_TASK_zlaswp_get( options, dir, m*A->mb, tempkm,
-                               ipiv, k, A(m, n), Wu(A->myrank, n) );
+                                ipiv, k, A(m, n), W(A->myrank, n) );
         /* Copy rows from A(k,n) into their final position */
         INSERT_TASK_zlaswp_set( options, dir, m*A->mb, tempkm,
-                               ipiv, k, A(k, n), A(m, n) );
+                                ipiv, k, A(k, n), A(m, n) );
     }
 
-    INSERT_TASK_zperm_allreduce_row( options, dir, A, Wu(A->myrank, n), ipiv, k, k, n, ws );
+    INSERT_TASK_zperm_allreduce_row( options, dir, A, W(A->myrank, n), ipiv, k, k, n, ws );
+}
+
+/**
+ *  Permutation of the panel n at step k
+ */
+static inline void
+chameleon_pzlaswp_panel_permute_batched( struct chameleon_pzlaswp_s *ws,
+                                         cham_dir_t                  dir,
+                                         CHAM_desc_t                *A,
+                                         CHAM_ipiv_t                *ipiv,
+                                         int                         k,
+                                         int                         n,
+                                         RUNTIME_option_t           *options )
+{
+    int                        m;
+    int                        tempkm, tempnn;
+    int                        withlacpy;
+
+    void **clargs = malloc( sizeof(char *) );
+    *clargs = NULL;
+
+    tempkm = A->get_blkdim( A, k, DIM_m, A->m );
+    tempnn = A->get_blkdim( A, n, DIM_n, A->n );
+
+    /* Extract selected rows into U */
+    withlacpy = options->withlacpy;
+    options->withlacpy = 1;
+    INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
+                        A(k, n), W(A->myrank, n) );
+    options->withlacpy = withlacpy;
+
+    INSERT_TASK_zlaswp_get( options, dir, k*A->mb, tempkm,
+                            ipiv, k, A(k, n), W(A->myrank, n) );
+
+    for ( m = k + 1; m < A->mt; m++ ) {
+            INSERT_TASK_zlaswp_batched( options, dir, m*A->mb, tempkm, (void *)ws, ipiv, k,
+                                        A(m, n), A(k, n), W(A->myrank, n), clargs );
+    }
+    INSERT_TASK_zlaswp_batched_flush( options, dir, ipiv, k, A(k, n), W(A->myrank, n), clargs );
+
+    INSERT_TASK_zperm_allreduce_row( options, dir, A, W(A->myrank, n), ipiv, k, k, n, ws );
+
+    free( clargs );
 }
 
 static inline void
@@ -92,14 +133,19 @@ chameleon_pzlaswp_panel( struct chameleon_pzlaswp_s *ws,
     }
 #endif
 
-    chameleon_pzlaswp_panel_permute( ws, dir, A, ipiv, k, n, options );
+    if ( ws->batch_size_swap == 0 ){
+        chameleon_pzlaswp_panel_permute( ws, dir, A, ipiv, k, n, options );
+    }
+    else {
+        chameleon_pzlaswp_panel_permute_batched( ws, dir, A, ipiv, k, n, options );
+    }
 
     if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
 
         tempkm = A->get_blkdim( A, k, DIM_m, A->m );
         tempnn = A->get_blkdim( A, n, DIM_n, A->n );
         INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
-                            Wu(A->myrank, n), A(k, n) );
+                            W(A->myrank, n), A(k, n) );
         RUNTIME_data_flush( sequence, A(k, n) );
     }
     (void)reduce;
@@ -145,4 +191,3 @@ chameleon_pzlaswp( struct chameleon_pzlaswp_s *ws,
     }
     RUNTIME_options_finalize( &options, chamctxt );
 }
-
diff --git a/compute/pzlaswpc.c b/compute/pzlaswpc.c
index a3328070bdf6717ed4614f42f9493f8ffdcb36db..a62e28319d0561add56479bec503810611cd1bf0 100644
--- a/compute/pzlaswpc.c
+++ b/compute/pzlaswpc.c
@@ -18,8 +18,8 @@
  */
 #include "control/common.h"
 
-#define A(m,n)   A,        m, n
-#define Wc(m,n)  &(ws->W), m, n
+#define A(m,n)   A,       m, n
+#define W(m,n)  &(ws->W), m, n
 
 /**
  *  Permutation of the panel n at step k
@@ -34,32 +34,75 @@ chameleon_pzlaswpc_panel_permute( struct chameleon_pzlaswp_s *ws,
                                   RUNTIME_option_t           *options )
 {
     int                        n;
-    int                        tempkn, tempmm;
+    int                        tempmm, tempkn;
     int                        withlacpy;
 
-    tempkn = A->get_blkdim( A, k, DIM_n, A->n );
     tempmm = A->get_blkdim( A, m, DIM_m, A->m );
+    tempkn = A->get_blkdim( A, k, DIM_n, A->n );
 
     /* Extract selected rows into U */
     withlacpy = options->withlacpy;
     options->withlacpy = 1;
     INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkn,
-                       A(m, k), Wc(m, A->myrank) );
+                        A(m, k), W(m, A->myrank) );
     options->withlacpy = withlacpy;
 
     INSERT_TASK_zlaswpc_get( options, dir, tempkn, k*A->nb,
-                                ipiv, k, A(m, k), Wc(m, A->myrank) );
+                             ipiv, k, A(m, k), W(m, A->myrank) );
 
     for ( n = k + 1; n < A->nt; n++ ) {
         /* Extract selected rows into A(k, n) */
         INSERT_TASK_zlaswpc_get( options, dir, tempkn, n*A->nb,
-                                    ipiv, k, A(m, n), Wc(m, A->myrank) );
+                                 ipiv, k, A(m, n), W(m, A->myrank) );
         /* Copy rows from A(k,n) into their final position */
         INSERT_TASK_zlaswpc_set( options, dir, tempkn, n*A->nb,
-                                    ipiv, k, A(m, k), A(m, n) );
+                                 ipiv, k, A(m, k), A(m, n) );
+    }
+
+    INSERT_TASK_zperm_allreduce_col( options, dir, A, W(m, A->myrank), ipiv, k, m, k, ws );
+}
+
+/**
+ *  Permutation of the panel n at step k
+ */
+static inline void
+chameleon_pzlaswpc_panel_permute_batched( struct chameleon_pzlaswp_s *ws,
+                                          cham_dir_t                  dir,
+                                          CHAM_desc_t                *A,
+                                          CHAM_ipiv_t                *ipiv,
+                                          int                         m,
+                                          int                         k,
+                                          RUNTIME_option_t           *options )
+{
+    int                        n;
+    int                        tempmm, tempkn;
+    int                        withlacpy;
+
+    void **clargs = malloc( sizeof(char *) );
+    *clargs = NULL;
+
+    tempmm = A->get_blkdim( A, m, DIM_m, A->m );
+    tempkn = A->get_blkdim( A, k, DIM_n, A->n );
+
+    /* Extract selected rows into U */
+    withlacpy = options->withlacpy;
+    options->withlacpy = 1;
+    INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkn,
+                        A(m, k), W(m, A->myrank) );
+    options->withlacpy = withlacpy;
+
+    INSERT_TASK_zlaswpc_get( options, dir, tempkn, k*A->nb,
+                             ipiv, k, A(m, k), W(m, A->myrank) );
+
+    for ( n = k + 1; n < A->nt; n++ ) {
+        INSERT_TASK_zlaswpc_batched( options, dir, n*A->nb, tempkn, (void *)ws, ipiv, k,
+                                     A(m, n), A(m, k), W(m, A->myrank), clargs );
     }
+    INSERT_TASK_zlaswpc_batched_flush( options, dir, ipiv, k, A(m, k), W(m, A->myrank), clargs );
+
+    INSERT_TASK_zperm_allreduce_col( options, dir, A, W(m, A->myrank), ipiv, k, m, k, ws );
 
-    INSERT_TASK_zperm_allreduce_col( options, dir, A, Wc(m, A->myrank), ipiv, k, m, k, ws );
+    free( clargs );
 }
 
 static inline void
@@ -73,7 +116,7 @@ chameleon_pzlaswpc_panel( struct chameleon_pzlaswp_s *ws,
                           RUNTIME_sequence_t         *sequence )
 {
     CHAM_reduce_t *reduce = &(ws->reduce);
-    int            tempkn, tempmm;
+    int            tempmm, tempkn;
 
 #if defined(CHAMELEON_USE_MPI)
     chameleon_get_proc_involved_in_rowpanelk_2dbc( A, m, k, reduce );
@@ -90,14 +133,19 @@ chameleon_pzlaswpc_panel( struct chameleon_pzlaswp_s *ws,
     }
 #endif
 
-    chameleon_pzlaswpc_panel_permute( ws, dir, A, ipiv, m, k, options );
+    if ( ws->batch_size_swap == 0 ){
+        chameleon_pzlaswpc_panel_permute( ws, dir, A, ipiv, m, k, options );
+    }
+    else {
+        chameleon_pzlaswpc_panel_permute_batched( ws, dir, A, ipiv, m, k, options );
+    }
 
     if ( A->myrank == chameleon_getrankof_2d( A, m, k ) ) {
 
-        tempkn = A->get_blkdim( A, k, DIM_n, A->n );
         tempmm = A->get_blkdim( A, m, DIM_m, A->m );
+        tempkn = A->get_blkdim( A, k, DIM_n, A->n );
         INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkn,
-                            Wc(m, A->myrank), A(m, k) );
+                            W(m, A->myrank), A(m, k) );
         RUNTIME_data_flush( sequence, A(m, k) );
     }
     (void)reduce;
@@ -143,4 +191,3 @@ chameleon_pzlaswpc( struct chameleon_pzlaswp_s *ws,
     }
     RUNTIME_options_finalize( &options, chamctxt );
 }
-
diff --git a/compute/zgetrf.c b/compute/zgetrf.c
index ea1d50bc8c7251ffeb37a0b8c9e0668cfe31792f..8d733241ed174d240078dcb918b3fa3d8c4d0d9c 100644
--- a/compute/zgetrf.c
+++ b/compute/zgetrf.c
@@ -105,8 +105,9 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
     ws->batch_size_blas2 = ( ws->batch_size_blas2 > CHAMELEON_BATCH_SIZE ) ? CHAMELEON_BATCH_SIZE : ws->batch_size_blas2;
     ws->batch_size_blas3 = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE_BLAS3", batch_size );
     ws->batch_size_blas3 = ( ws->batch_size_blas3 > CHAMELEON_BATCH_SIZE ) ? CHAMELEON_BATCH_SIZE : ws->batch_size_blas3;
-    ws->batch_size_swap = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE_SWAP", batch_size );
-    ws->batch_size_swap = ( ws->batch_size_swap > CHAMELEON_BATCH_SIZE ) ? CHAMELEON_BATCH_SIZE : ws->batch_size_swap;
+
+    ws->laswp->batch_size_swap = ( ws->laswp->batch_size_swap == 0 ) ? batch_size : ws->laswp->batch_size_swap;
+    ws->laswp->batch_size_swap = ( ws->laswp->batch_size_swap > CHAMELEON_BATCH_SIZE ) ? CHAMELEON_BATCH_SIZE : ws->laswp->batch_size_swap;
 
     ws->ringswitch = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_RINGSWITCH", INT_MAX );
 
diff --git a/compute/zlaswp.c b/compute/zlaswp.c
index f00140efedbd5cc829d73cb581820720bfa0217c..5b0783d9e5ae1b31842919cb38b515c9b32a58d7 100644
--- a/compute/zlaswp.c
+++ b/compute/zlaswp.c
@@ -85,6 +85,12 @@ CHAMELEON_zlaswp_WS_Alloc( cham_side_t side, const CHAM_desc_t *A )
         chameleon_cleanenv( allreduce );
     }
 
+    ws->batch_size_swap = chameleon_getenv_get_value_int( "CHAMELEON_LASWP_BATCH_SIZE", 0 );
+    if ( ws->batch_size_swap > CHAMELEON_BATCH_SIZE ) {
+        chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_LASWP_BATCH_SIZE must be smaller than CHAMELEON_BATCH_SIZE, please recompile with the right CHAMELEON_BATCH_SIZE, or reduce the CHAMELEON_LASWP_BATCH_SIZE value\n" );
+        ws->batch_size_swap = CHAMELEON_BATCH_SIZE;
+    }
+
     if ( side == ChamLeft ) {
         chameleon_desc_init( &(ws->W), CHAMELEON_MAT_ALLOC_TILE,
                             ChamComplexDouble, A->mb, A->nb, A->mb*A->nb,
diff --git a/control/compute_z.h b/control/compute_z.h
index e747c2ac2203c63266acf4c928ad5a28495e0a86..d7482d0dc69d5910835b2bc81579db7bed04342a 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -44,8 +44,9 @@ struct chameleon_pzgemm_s {
  * @brief Data structure to handle the LASWP workspaces
  */
 struct chameleon_pzlaswp_s {
-    CHAM_desc_t   W;      /**< Workspace used for the row/column permutation. */
-    CHAM_reduce_t reduce; /**< Structure for reduction operations             */
+    CHAM_desc_t   W;                /**< Workspace used for the row/column permutation. */
+    CHAM_reduce_t reduce;           /**< Structure for reduction operations             */
+    int           batch_size_swap;  /**< Batch size for the permutation                 */
 };
 
 /**
@@ -57,7 +58,6 @@ struct chameleon_pzgetrf_s {
     int                         ib;               /**< Internal blocking parameter                                          */
     int                         batch_size_blas2; /**< Batch size for the blas 2 operations of the panel factorization      */
     int                         batch_size_blas3; /**< Batch size for the blas 3 operations of the panel factorization      */
-    int                         batch_size_swap;  /**< Batch size for the permutation                                       */
     int                         ringswitch;       /**< Define when to switch to ring bcast                                  */
     CHAM_desc_t                 U;                /**< Workspaces used for the panels permutation in getrf without pivoting */
     CHAM_desc_t                 Up;               /**< Workspace used for the panel factorization                           */
diff --git a/coreblas/compute/core_zlaswp.c b/coreblas/compute/core_zlaswp.c
index 0160e5c2e0d6cd0edb8dacb4b772142d79431a3f..b0d6c075055ea23e37a6e9b3cbaaf1ddc3d29216 100644
--- a/coreblas/compute/core_zlaswp.c
+++ b/coreblas/compute/core_zlaswp.c
@@ -24,8 +24,8 @@
  *
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
- * CORE_zlaswp_get extracts the rows from the tile B that have been selected as
- * pivot into the tile A.
+ * CORE_zlaswp_get extracts the rows from the tile A that have been selected as
+ * pivot into the tile B.
  *
  *******************************************************************************
  *
@@ -54,7 +54,7 @@
  *          On entry, a matrix of size ldb-by-n with 0s or already collected
  *          rows.
  *          On exit, B is filled with the selected rows from A, such that for
- *          each row i, B[i] = A[perm[i]-m0-1].
+ *          each row i, B[i,:] = A[perm[i]-m0-1,:].
  *
  * @param[in] ldb
  *          The leading dimension of the array B. ldb >= max(1,k).
@@ -155,7 +155,7 @@ CORE_zlaswp_get( int m0, int m, int n, int k,
  * @param[inout] B
  *          On entry, a matrix of size ldb-by-n that may require some pivoted rows.
  *          On exit, B is updated with the pivoted rows it needs to receive, such that for
- *          each row i, A[i] = B[invp[i]-m0-1].
+ *          each row i, A[i,:] = B[invp[i]-m0-1,:].
  *
  * @param[in] ldb
  *          The leading dimension of the array B. ldb >= max(1,m).
diff --git a/coreblas/compute/core_zlaswpc.c b/coreblas/compute/core_zlaswpc.c
index 6ba0f627835b08124b61b041a76cced4823baee4..50f6cd684ce2fd24541e6457704259780a0698ec 100644
--- a/coreblas/compute/core_zlaswpc.c
+++ b/coreblas/compute/core_zlaswpc.c
@@ -23,8 +23,8 @@
  *
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
- * CORE_zlaswpc_get extracts the columns from the tile B that have been selected as
- * pivot into the tile A.
+ * CORE_zlaswpc_get extracts the columns from the tile A that have been selected as
+ * pivot into the tile B.
  *
  *******************************************************************************
  *
@@ -33,10 +33,10 @@
  *         belongs to.
  *
  * @param[in] m
- *          The number of rows of the matrix A.
+ *          The number of rows of the matrices A and B.
  *
  * @param[in] n
- *         The number of columns of the matrices A and B.
+ *         The number of columns of the matrix A.
  *
  * @param[in] k
  *         The number of columns of the matrix B. This is the number of potential
@@ -44,19 +44,19 @@
  *
  * @param[in] A
  *          On entry, the matrix A of dimension lda-by-n where to extract the
- *          pivot columns if some are selected in the range m0..m0+m.
+ *          pivot columns if some are selected in the range n0..n0+n
  *
  * @param[in] lda
  *          The leading dimension of the array A. lda >= max(1,m).
  *
  * @param[inout] B
- *          On entry, a matrix of size ldb-by-n with 0s or already collected
+ *          On entry, a matrix of size ldb-by-k with 0s or already collected
  *          columns.
  *          On exit, B is filled with the selected columns from A, such that for
- *          each row i, B[i] = A[perm[i]-m0-1].
+ *          each column i, B[:,i] = A[:,perm[i]-n0-1].
  *
  * @param[in] ldb
- *          The leading dimension of the array B. ldb >= max(1,k).
+ *          The leading dimension of the array B. ldb >= max(1,m).
  *
  * @param[in] perm
  *          The permutation array of dimension k.
@@ -110,7 +110,6 @@ CORE_zlaswpc_get( int n0, int m, int n, int k,
     {
         int idx = perm[i] - n0;
 
-
         if ( ( idx >= 0 ) && (idx < n ) )
         {
             cblas_zcopy( m, A + idx * lda, 1,
@@ -136,10 +135,10 @@ CORE_zlaswpc_get( int n0, int m, int n, int k,
  *         belongs to.
  *
  * @param[in] m
- *          The number of rows of the matrix B.
+ *          The number of rows of the matrices A and B.
  *
  * @param[in] n
- *         The number of columns of the matrices A and B.
+ *         The number of columns of the matrix B.
  *
  * @param[in] k
  *         The number of columns of the matrix A. This is the number of potential
@@ -150,12 +149,12 @@ CORE_zlaswpc_get( int n0, int m, int n, int k,
  *          pivoted columns.
  *
  * @param[in] lda
- *          The leading dimension of the array A. lda >= max(1,k).
+ *          The leading dimension of the array A. lda >= max(1,m).
  *
  * @param[inout] B
- *          On entry, a matrix of size ldb-by-n that may require some pivoted columns.
+ *          On entry, a matrix of size ldb-by-k that may require some pivoted columns.
  *          On exit, B is updated with the pivoted columns it needs to receive, such that for
- *          each column i, A[i] = B[invp[i]-m0-1].
+ *          each column i, A[:,i] = B[:,invp[i]-n0-1].
  *
  * @param[in] ldb
  *          The leading dimension of the array B. ldb >= max(1,m).
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index abe3a2c543dfe1e196163f9ae84e8812e97b0bed..916208a605c4182a5272055077429bd4c97178b4 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -209,7 +209,8 @@ void INSERT_TASK_zlaswpc_set( const RUNTIME_option_t *options, cham_dir_t dir,
                              const CHAM_desc_t *tileA, int tileAm, int tileAn,
                              const CHAM_desc_t *tileB, int tileBm, int tileBn );
 void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
-                                 int m0, int minmn,
+                                 cham_dir_t              dir,
+                                 int m0, int m,
                                  void *ws,
                                  const CHAM_ipiv_t *ipiv, int ipivk,
                                  const CHAM_desc_t *Am, int Amm, int Amn,
@@ -217,10 +218,26 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
                                  const CHAM_desc_t *U,  int Um,  int Un,
                                  void **clargs_ptr );
 void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        const CHAM_ipiv_t *ipiv, int ipivk,
                                        const CHAM_desc_t *Ak, int Akm, int Akn,
                                        const CHAM_desc_t *U,  int Um,  int Un,
                                        void **clargs_ptr );
+void INSERT_TASK_zlaswpc_batched( const RUNTIME_option_t *options,
+                                  cham_dir_t              dir,
+                                  int n0, int n,
+                                  void *ws,
+                                  const CHAM_ipiv_t *ipiv, int ipivk,
+                                  const CHAM_desc_t *An, int Anm, int Ann,
+                                  const CHAM_desc_t *Ak, int Akm, int Akn,
+                                  const CHAM_desc_t *U,  int Um,  int Un,
+                                  void **clargs_ptr );
+void INSERT_TASK_zlaswpc_batched_flush( const RUNTIME_option_t *options,
+                                        cham_dir_t              dir,
+                                        const CHAM_ipiv_t *ipiv, int ipivk,
+                                        const CHAM_desc_t *Ak, int Akm, int Akn,
+                                        const CHAM_desc_t *U,  int Um,  int Un,
+                                        void **clargs_ptr );
 void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb,
                          const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 2f1254d2c02993e5b4b96a177e42b4b760fd6020..0d2f52bd30b54580d806cd1bf3158089f3da5153 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -85,6 +85,7 @@ set(CODELETS_ZSRC
     codelets/codelet_zlaswp.c
     codelets/codelet_zlaswpc.c
     codelets/codelet_zlaswp_batched.c
+    codelets/codelet_zlaswpc_batched.c
     codelets/codelet_zlatro.c
     codelets/codelet_zlauum.c
     codelets/codelet_zperm_allreduce.c
diff --git a/runtime/openmp/codelets/codelet_zlaswp_batched.c b/runtime/openmp/codelets/codelet_zlaswp_batched.c
index 3ce953cf8f888d83b74abe0550dccf381d390df5..f3d922de628f9299434fb63d6c4ba83b3421e6b9 100644
--- a/runtime/openmp/codelets/codelet_zlaswp_batched.c
+++ b/runtime/openmp/codelets/codelet_zlaswp_batched.c
@@ -19,8 +19,9 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
+                                 cham_dir_t              dir,
                                  int                     m0,
-                                 int                     minmn,
+                                 int                     m,
                                  void                   *ws,
                                  const CHAM_ipiv_t      *ipiv,
                                  int                     ipivk,
@@ -36,8 +37,9 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
                                  void                  **clargs_ptr )
 {
     (void)options;
+    (void)dir;
     (void)m0;
-    (void)minmn;
+    (void)m;
     (void)ws;
     (void)ipiv;
     (void)ipivk;
@@ -54,6 +56,7 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        const CHAM_ipiv_t      *ipiv,
                                        int                     ipivk,
                                        const CHAM_desc_t      *Ak,
@@ -65,6 +68,7 @@ void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
                                        void                  **clargs_ptr )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)Ak;
diff --git a/runtime/openmp/codelets/codelet_zlaswpc.c b/runtime/openmp/codelets/codelet_zlaswpc.c
index eabe24c2a4961a0dcd57b42852d9e2530d85b8c3..08c61b79116f9a0d451ad22589eb644bf0661c60 100644
--- a/runtime/openmp/codelets/codelet_zlaswpc.c
+++ b/runtime/openmp/codelets/codelet_zlaswpc.c
@@ -2,7 +2,7 @@
  *
  * @file openmp/codelet_zlaswpc.c
  *
- * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
diff --git a/runtime/openmp/codelets/codelet_zlaswpc_batched.c b/runtime/openmp/codelets/codelet_zlaswpc_batched.c
new file mode 100644
index 0000000000000000000000000000000000000000..c3cf522ce2f4601b48a44548e9094c2097ba0b63
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_zlaswpc_batched.c
@@ -0,0 +1,82 @@
+/**
+ *
+ * @file openmp/codelet_zlaswpc_batched.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon OpenMP codelets to apply zlaswp on a panel
+ *
+ * @version 1.3.0
+ * @author Alycia Lisito
+ * @author Matteo Marcos
+ * @date 2024-11-12
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_openmp.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zlaswpc_batched( const RUNTIME_option_t *options,
+                                  cham_dir_t              dir,
+                                  int                     n0,
+                                  int                     n,
+                                  void                   *ws,
+                                  const CHAM_ipiv_t      *ipiv,
+                                  int                     ipivk,
+                                  const CHAM_desc_t      *An,
+                                  int                     Anm,
+                                  int                     Ann,
+                                  const CHAM_desc_t      *Ak,
+                                  int                     Akm,
+                                  int                     Akn,
+                                  const CHAM_desc_t      *U,
+                                  int                     Um,
+                                  int                     Un,
+                                  void                  **clargs_ptr )
+{
+    (void)options;
+    (void)dir;
+    (void)n0;
+    (void)n;
+    (void)ws;
+    (void)ipiv;
+    (void)ipivk;
+    (void)An;
+    (void)Anm;
+    (void)Ann;
+    (void)Ak;
+    (void)Akm;
+    (void)Akn;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)clargs_ptr;
+}
+
+void INSERT_TASK_zlaswpc_batched_flush( const RUNTIME_option_t *options,
+                                        cham_dir_t              dir,
+                                        const CHAM_ipiv_t      *ipiv,
+                                        int                     ipivk,
+                                        const CHAM_desc_t      *Ak,
+                                        int                     Akm,
+                                        int                     Akn,
+                                        const CHAM_desc_t      *U,
+                                        int                     Um,
+                                        int                     Un,
+                                        void                  **clargs_ptr )
+{
+    (void)options;
+    (void)dir;
+    (void)ipiv;
+    (void)ipivk;
+    (void)Ak;
+    (void)Akm;
+    (void)Akn;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)clargs_ptr;
+}
diff --git a/runtime/parsec/codelets/codelet_zlaswp_batched.c b/runtime/parsec/codelets/codelet_zlaswp_batched.c
index 646b823aaa926255049ac96e56770023e8b2f08b..db683cded28ce6b36b257e006e76bb9db1fa9b48 100644
--- a/runtime/parsec/codelets/codelet_zlaswp_batched.c
+++ b/runtime/parsec/codelets/codelet_zlaswp_batched.c
@@ -19,8 +19,9 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
+                                 cham_dir_t              dir,
                                  int                     m0,
-                                 int                     minmn,
+                                 int                     m,
                                  void                   *ws,
                                  const CHAM_ipiv_t      *ipiv,
                                  int                     ipivk,
@@ -36,8 +37,9 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
                                  void                  **clargs_ptr )
 {
     (void)options;
+    (void)dir;
     (void)m0;
-    (void)minmn;
+    (void)m;
     (void)ws;
     (void)ipiv;
     (void)ipivk;
@@ -54,6 +56,7 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        const CHAM_ipiv_t      *ipiv,
                                        int                     ipivk,
                                        const CHAM_desc_t      *Ak,
@@ -65,6 +68,7 @@ void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
                                        void                  **clargs_ptr )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)Ak;
diff --git a/runtime/parsec/codelets/codelet_zlaswpc.c b/runtime/parsec/codelets/codelet_zlaswpc.c
index d7d55850738eab5f4201f69e2c280cf7dafff6c5..42a8b03c97a589afd606f31d25de7042734f8f47 100644
--- a/runtime/parsec/codelets/codelet_zlaswpc.c
+++ b/runtime/parsec/codelets/codelet_zlaswpc.c
@@ -2,7 +2,7 @@
  *
  * @file parsec/codelet_zlaswpc.c
  *
- * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
diff --git a/runtime/parsec/codelets/codelet_zlaswpc_batched.c b/runtime/parsec/codelets/codelet_zlaswpc_batched.c
new file mode 100644
index 0000000000000000000000000000000000000000..6336558d6a6a669a6681bf7cbfb17ff1c6aa42e2
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_zlaswpc_batched.c
@@ -0,0 +1,82 @@
+/**
+ *
+ * @file parsec/codelet_zlaswpc_batched.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon Parsec codelets to apply zlaswp on a panel
+ *
+ * @version 1.3.0
+ * @author Alycia Lisito
+ * @author Matteo Marcos
+ * @date 2024-11-12
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_parsec.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zlaswpc_batched( const RUNTIME_option_t *options,
+                                  cham_dir_t              dir,
+                                  int                     n0,
+                                  int                     n,
+                                  void                   *ws,
+                                  const CHAM_ipiv_t      *ipiv,
+                                  int                     ipivk,
+                                  const CHAM_desc_t      *An,
+                                  int                     Anm,
+                                  int                     Ann,
+                                  const CHAM_desc_t      *Ak,
+                                  int                     Akm,
+                                  int                     Akn,
+                                  const CHAM_desc_t      *U,
+                                  int                     Um,
+                                  int                     Un,
+                                  void                  **clargs_ptr )
+{
+    (void)options;
+    (void)dir;
+    (void)n0;
+    (void)n;
+    (void)ws;
+    (void)ipiv;
+    (void)ipivk;
+    (void)An;
+    (void)Anm;
+    (void)Ann;
+    (void)Ak;
+    (void)Akm;
+    (void)Akn;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)clargs_ptr;
+}
+
+void INSERT_TASK_zlaswpc_batched_flush( const RUNTIME_option_t *options,
+                                        cham_dir_t              dir,
+                                        const CHAM_ipiv_t      *ipiv,
+                                        int                     ipivk,
+                                        const CHAM_desc_t      *Ak,
+                                        int                     Akm,
+                                        int                     Akn,
+                                        const CHAM_desc_t      *U,
+                                        int                     Um,
+                                        int                     Un,
+                                        void                  **clargs_ptr )
+{
+    (void)options;
+    (void)dir;
+    (void)ipiv;
+    (void)ipivk;
+    (void)Ak;
+    (void)Akm;
+    (void)Akn;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)clargs_ptr;
+}
diff --git a/runtime/quark/codelets/codelet_zlaswp_batched.c b/runtime/quark/codelets/codelet_zlaswp_batched.c
index 2193c0943300f99ed2f0226688dcd3fcf1b0d6db..a285177b717adf9642429e6a902471317f9bf62d 100644
--- a/runtime/quark/codelets/codelet_zlaswp_batched.c
+++ b/runtime/quark/codelets/codelet_zlaswp_batched.c
@@ -19,8 +19,9 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
+                                 cham_dir_t              dir,
                                  int                     m0,
-                                 int                     minmn,
+                                 int                     m,
                                  void                   *ws,
                                  const CHAM_ipiv_t      *ipiv,
                                  int                     ipivk,
@@ -36,8 +37,9 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
                                  void                  **clargs_ptr )
 {
     (void)options;
+    (void)dir;
     (void)m0;
-    (void)minmn;
+    (void)m;
     (void)ws;
     (void)ipiv;
     (void)ipivk;
@@ -54,6 +56,7 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
+                                       cham_dir_t              dir,
                                        const CHAM_ipiv_t      *ipiv,
                                        int                     ipivk,
                                        const CHAM_desc_t      *Ak,
@@ -65,6 +68,7 @@ void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
                                        void                  **clargs_ptr )
 {
     (void)options;
+    (void)dir;
     (void)ipiv;
     (void)ipivk;
     (void)Ak;
diff --git a/runtime/quark/codelets/codelet_zlaswpc.c b/runtime/quark/codelets/codelet_zlaswpc.c
index 1ef79931f706278e73d3ff628126b03081f4102f..bff7d84200f63add99feb5f2f2544a1ad2540bb8 100644
--- a/runtime/quark/codelets/codelet_zlaswpc.c
+++ b/runtime/quark/codelets/codelet_zlaswpc.c
@@ -2,7 +2,7 @@
  *
  * @file quark/codelet_zlaswpc.c
  *
- * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
diff --git a/runtime/quark/codelets/codelet_zlaswpc_batched.c b/runtime/quark/codelets/codelet_zlaswpc_batched.c
new file mode 100644
index 0000000000000000000000000000000000000000..d5152eababe0d24929a27fef5bb4b9ac615f3b48
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zlaswpc_batched.c
@@ -0,0 +1,82 @@
+/**
+ *
+ * @file quark/codelet_zlaswpc_batched.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon quark codelets to apply zlaswp on a panel
+ *
+ * @version 1.3.0
+ * @author Alycia Lisito
+ * @author Matteo Marcos
+ * @date 2024-11-12
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_quark.h"
+#include "chameleon/tasks_z.h"
+
+void INSERT_TASK_zlaswpc_batched( const RUNTIME_option_t *options,
+                                  cham_dir_t              dir,
+                                  int                     n0,
+                                  int                     n,
+                                  void                   *ws,
+                                  const CHAM_ipiv_t      *ipiv,
+                                  int                     ipivk,
+                                  const CHAM_desc_t      *An,
+                                  int                     Anm,
+                                  int                     Ann,
+                                  const CHAM_desc_t      *Ak,
+                                  int                     Akm,
+                                  int                     Akn,
+                                  const CHAM_desc_t      *U,
+                                  int                     Um,
+                                  int                     Un,
+                                  void                  **clargs_ptr )
+{
+    (void)options;
+    (void)dir;
+    (void)n0;
+    (void)n;
+    (void)ws;
+    (void)ipiv;
+    (void)ipivk;
+    (void)An;
+    (void)Anm;
+    (void)Ann;
+    (void)Ak;
+    (void)Akm;
+    (void)Akn;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)clargs_ptr;
+}
+
+void INSERT_TASK_zlaswpc_batched_flush( const RUNTIME_option_t *options,
+                                        cham_dir_t              dir,
+                                        const CHAM_ipiv_t      *ipiv,
+                                        int                     ipivk,
+                                        const CHAM_desc_t      *Ak,
+                                        int                     Akm,
+                                        int                     Akn,
+                                        const CHAM_desc_t      *U,
+                                        int                     Um,
+                                        int                     Un,
+                                        void                  **clargs_ptr )
+{
+    (void)options;
+    (void)dir;
+    (void)ipiv;
+    (void)ipivk;
+    (void)Ak;
+    (void)Akm;
+    (void)Akn;
+    (void)U;
+    (void)Um;
+    (void)Un;
+    (void)clargs_ptr;
+}
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index e7f330d1be0fc38d4e66eb8fa9baf5889ca77dd9..995508105593cf948182355f68eeba307b317969 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -110,9 +110,9 @@ if ( STARPU_FOUND )
 #include <starpu.h>
 int main() {
   if (STARPU_NONE == 0)
-    return 1;
+    return EXIT_FAILURE;
   else
-    return 0;
+    return EXIT_SUCCESS;
 }
 ")
 
diff --git a/runtime/starpu/codelets/codelet_zlaswp_batched.c b/runtime/starpu/codelets/codelet_zlaswp_batched.c
index 8cc2a3adc593c698f3d79163781f44bd59b92d6e..25b0a3cde6519eb62d6616d46763dfd6941a9f4f 100644
--- a/runtime/starpu/codelets/codelet_zlaswp_batched.c
+++ b/runtime/starpu/codelets/codelet_zlaswp_batched.c
@@ -11,7 +11,8 @@
  *
  * @version 1.3.0
  * @author Alycia Lisito
- * @date 2024-11-12
+ * @author Matteo Marcos
+ * @date 2025-04-10
  * @precisions normal z -> c d s
  *
  */
@@ -20,7 +21,7 @@
 
 struct cl_zlaswp_batched_args_s {
     int                      tasks_nbr;
-    int                      minmn;
+    int                      m;
     int                      m0[CHAMELEON_BATCH_SIZE];
     struct starpu_data_descr handle_mode[CHAMELEON_BATCH_SIZE];
 };
@@ -30,21 +31,21 @@ static void
 cl_zlaswp_batched_cpu_func( void *descr[],
                             void *cl_arg )
 {
-    int          i, m0, minmn, *perm, *invp;
+    int          i, m0, m, *permget, *permset;
     CHAM_tile_t *A, *U, *B;
     struct cl_zlaswp_batched_args_s *clargs = ( struct cl_zlaswp_batched_args_s * ) cl_arg;
 
-    minmn = clargs->minmn;
-    perm = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
-    invp = (int *)STARPU_VECTOR_GET_PTR( descr[1] );
-    U    = (CHAM_tile_t *) cti_interface_get( descr[2] );
-    B    = (CHAM_tile_t *) cti_interface_get( descr[3] );
+    m       = clargs->m;
+    permget = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
+    permset = (int *)STARPU_VECTOR_GET_PTR( descr[1] );
+    U       = (CHAM_tile_t *) cti_interface_get( descr[2] );
+    B       = (CHAM_tile_t *) cti_interface_get( descr[3] );
 
     for ( i = 0; i < clargs->tasks_nbr; i++ ) {
         A  = (CHAM_tile_t *) cti_interface_get( descr[ i + 4 ] );
         m0 = clargs->m0[ i ];
-        TCORE_zlaswp_get( m0, A->m, A->n, minmn, A, U, perm );
-        TCORE_zlaswp_set( m0, A->m, A->n, minmn, B, A, invp );
+        TCORE_zlaswp_get( m0, A->m, A->n, m, A, U, permget );
+        TCORE_zlaswp_set( m0, A->m, A->n, m, B, A, permset );
     }
 }
 #endif
@@ -55,24 +56,18 @@ cl_zlaswp_batched_cpu_func( void *descr[],
 CODELETS_CPU( zlaswp_batched, cl_zlaswp_batched_cpu_func )
 
 void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
+                                 cham_dir_t              dir,
                                  int                     m0,
-                                 int                     minmn,
+                                 int                     m,
                                  void                   *ws,
-                                 const CHAM_ipiv_t      *ipiv,
-                                 int                     ipivk,
-                                 const CHAM_desc_t      *Am,
-                                 int                     Amm,
-                                 int                     Amn,
-                                 const CHAM_desc_t      *Ak,
-                                 int                     Akm,
-                                 int                     Akn,
-                                 const CHAM_desc_t      *U,
-                                 int                     Um,
-                                 int                     Un,
+                                 const CHAM_ipiv_t      *ipiv, int ipivk,
+                                 const CHAM_desc_t      *Am,   int Amm, int Amn,
+                                 const CHAM_desc_t      *Ak,   int Akm, int Akn,
+                                 const CHAM_desc_t      *U,    int Um,  int Un,
                                  void                  **clargs_ptr )
 {
     int task_num   = 0;
-    int batch_size = ((struct chameleon_pzgetrf_s *)ws)->batch_size_swap;
+    int batch_size = ((struct chameleon_pzlaswp_s *)ws)->batch_size_swap;
     struct cl_zlaswp_batched_args_s *clargs = *clargs_ptr;
     if ( Am->get_rankof( Am, Amm, Amn) != Am->myrank ) {
         return;
@@ -81,7 +76,7 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
     if( clargs == NULL ) {
         clargs = malloc( sizeof( struct cl_zlaswp_batched_args_s ) ) ;
         clargs->tasks_nbr = 0;
-        clargs->minmn     = minmn;
+        clargs->m         = m;
         *clargs_ptr       = clargs;
     }
 
@@ -92,36 +87,43 @@ void INSERT_TASK_zlaswp_batched( const RUNTIME_option_t *options,
     clargs->tasks_nbr ++;
 
     if ( clargs->tasks_nbr == batch_size ) {
-        INSERT_TASK_zlaswp_batched_flush( options, ipiv, ipivk, Ak, Akm, Akn, U, Um, Un, clargs_ptr );
+        INSERT_TASK_zlaswp_batched_flush( options, dir, ipiv, ipivk, Ak, Akm, Akn, U, Um, Un, clargs_ptr );
     }
 }
 
 #if defined(CHAMELEON_STARPU_USE_INSERT)
 
 void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
-                                       const CHAM_ipiv_t      *ipiv,
-                                       int                     ipivk,
-                                       const CHAM_desc_t      *Ak,
-                                       int                     Akm,
-                                       int                     Akn,
-                                       const CHAM_desc_t      *U,
-                                       int                     Um,
-                                       int                     Un,
+                                       cham_dir_t              dir,
+                                       const CHAM_ipiv_t      *ipiv, int ipivk,
+                                       const CHAM_desc_t      *Ak,   int Akm, int Akn,
+                                       const CHAM_desc_t      *U,    int Um,  int Un,
                                        void                  **clargs_ptr )
 {
     struct cl_zlaswp_batched_args_s *clargs   = *clargs_ptr;
-    int                             nhandles;
+    int                              nhandles;
+    void                            *ipiv_handle_get;
+    void                            *ipiv_handle_set;
 
     if( clargs == NULL ) {
         return;
     }
 
+    if ( dir == ChamDirForward ){
+        ipiv_handle_get = RUNTIME_perm_getaddr( ipiv, ipivk );
+        ipiv_handle_set = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle_get = RUNTIME_invp_getaddr( ipiv, ipivk );
+        ipiv_handle_set = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+
     nhandles = clargs->tasks_nbr;
     rt_starpu_insert_task(
         &cl_zlaswp_batched,
         STARPU_CL_ARGS,             clargs, sizeof(struct cl_zlaswp_batched_args_s),
-        STARPU_R,                   RUNTIME_perm_getaddr( ipiv, ipivk ),
-        STARPU_R,                   RUNTIME_invp_getaddr( ipiv, ipivk ),
+        STARPU_R,                   ipiv_handle_get,
+        STARPU_R,                   ipiv_handle_set,
         STARPU_RW | STARPU_COMMUTE, RTBLKADDR(U, ChamComplexDouble, Um, Un),
         STARPU_R,                   RTBLKADDR(Ak, ChamComplexDouble, Akm, Akn),
         STARPU_DATA_MODE_ARRAY,     clargs->handle_mode, nhandles,
@@ -136,24 +138,31 @@ void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
 #else /* defined(CHAMELEON_STARPU_USE_INSERT) */
 
 void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
-                                       const CHAM_ipiv_t      *ipiv,
-                                       int                     ipivk,
-                                       const CHAM_desc_t      *Ak,
-                                       int                     Akm,
-                                       int                     Akn,
-                                       const CHAM_desc_t      *U,
-                                       int                     Um,
-                                       int                     Un,
+                                       cham_dir_t              dir,
+                                       const CHAM_ipiv_t      *ipiv, int ipivk,
+                                       const CHAM_desc_t      *Ak,   int Akm, int Akn,
+                                       const CHAM_desc_t      *U,    int Um,  int Un,
                                        void                  **clargs_ptr )
 {
-    int ret, k;
-    struct starpu_task *task;
+    int                              ret, k;
+    struct starpu_task              *task;
     struct cl_zlaswp_batched_args_s *myclargs = *clargs_ptr;
+    void                            *ipiv_handle_get;
+    void                            *ipiv_handle_set;
 
     if( myclargs == NULL ) {
         return;
     }
 
+    if ( dir == ChamDirForward ){
+        ipiv_handle_get = RUNTIME_perm_getaddr( ipiv, ipivk );
+        ipiv_handle_set = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle_get = RUNTIME_invp_getaddr( ipiv, ipivk );
+        ipiv_handle_set = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+
     INSERT_TASK_COMMON_PARAMETERS( zlaswp_batched, myclargs->tasks_nbr + 4 );
 
     /*
@@ -161,10 +170,10 @@ void INSERT_TASK_zlaswp_batched_flush( const RUNTIME_option_t *options,
      */
     starpu_cham_exchange_init_params( options, &params, Ak->myrank );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
-                                                  RUNTIME_perm_getaddr( ipiv, ipivk ),
+                                                  ipiv_handle_get,
                                                   STARPU_R );
     starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
-                                                  RUNTIME_invp_getaddr( ipiv, ipivk ),
+                                                  ipiv_handle_set,
                                                   STARPU_R );
     starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ),
                                 STARPU_RW | STARPU_COMMUTE );
diff --git a/runtime/starpu/codelets/codelet_zlaswpc.c b/runtime/starpu/codelets/codelet_zlaswpc.c
index 6b468fb8b62cb39de982b13ec7f6b6b4ce84037c..ac2ca5108431785ad8faf2609a2401ab0417ff04 100644
--- a/runtime/starpu/codelets/codelet_zlaswpc.c
+++ b/runtime/starpu/codelets/codelet_zlaswpc.c
@@ -2,7 +2,7 @@
  *
  * @file starpu/codelet_zlaswpc.c
  *
- * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
diff --git a/runtime/starpu/codelets/codelet_zlaswpc_batched.c b/runtime/starpu/codelets/codelet_zlaswpc_batched.c
new file mode 100644
index 0000000000000000000000000000000000000000..8f59e7eb33e7564e79239dedcc36860478e1b1d6
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zlaswpc_batched.c
@@ -0,0 +1,214 @@
+/**
+ *
+ * @file starpu/codelet_zlaswpc_batched.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon StarPU codelets to apply zlaswp on a row panel
+ *
+ * @version 1.3.0
+ * @author Alycia Lisito
+ * @author Matteo Marcos
+ * @date 2025-04-10
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_starpu_internal.h"
+#include "runtime_codelet_z.h"
+
+struct cl_zlaswpc_batched_args_s {
+    int                      tasks_nbr;
+    int                      n;
+    int                      n0[CHAMELEON_BATCH_SIZE];
+    struct starpu_data_descr handle_mode[CHAMELEON_BATCH_SIZE];
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zlaswpc_batched_cpu_func( void *descr[],
+                            void *cl_arg )
+{
+    int          i, n0, n, *permget, *permset;
+    CHAM_tile_t *A, *U, *B;
+    struct cl_zlaswpc_batched_args_s *clargs = ( struct cl_zlaswpc_batched_args_s * ) cl_arg;
+
+    n       = clargs->n;
+    permget = (int *)STARPU_VECTOR_GET_PTR( descr[0] );
+    permset = (int *)STARPU_VECTOR_GET_PTR( descr[1] );
+    U       = (CHAM_tile_t *) cti_interface_get( descr[2] );
+    B       = (CHAM_tile_t *) cti_interface_get( descr[3] );
+
+    for ( i = 0; i < clargs->tasks_nbr; i++ ) {
+        A  = (CHAM_tile_t *) cti_interface_get( descr[ i + 4 ] );
+        n0 = clargs->n0[ i ];
+        TCORE_zlaswpc_get( n0, A->m, A->n, n, A, U, permget );
+        TCORE_zlaswpc_set( n0, A->m, A->n, n, B, A, permset );
+    }
+}
+#endif
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU( zlaswpc_batched, cl_zlaswpc_batched_cpu_func )
+
+void INSERT_TASK_zlaswpc_batched( const RUNTIME_option_t *options,
+                                  cham_dir_t              dir,
+                                  int                     n0,
+                                  int                     n,
+                                  void                   *ws,
+                                  const CHAM_ipiv_t      *ipiv, int ipivk,
+                                  const CHAM_desc_t      *An,   int Anm, int Ann,
+                                  const CHAM_desc_t      *Ak,   int Akm, int Akn,
+                                  const CHAM_desc_t      *U,    int Um,  int Un,
+                                  void                  **clargs_ptr )
+{
+    int task_num   = 0;
+    int batch_size = ((struct chameleon_pzlaswp_s *)ws)->batch_size_swap;
+    struct cl_zlaswpc_batched_args_s *clargs = *clargs_ptr;
+    if ( An->get_rankof( An, Anm, Ann) != An->myrank ) {
+        return;
+    }
+
+    if( clargs == NULL ) {
+        clargs = malloc( sizeof( struct cl_zlaswpc_batched_args_s ) ) ;
+        clargs->tasks_nbr = 0;
+        clargs->n         = n;
+        *clargs_ptr       = clargs;
+    }
+
+    task_num               = clargs->tasks_nbr;
+    clargs->n0[ task_num ] = n0;
+    clargs->handle_mode[ task_num ].handle = RTBLKADDR(An, CHAMELEON_Complex64_t, Anm, Ann);
+    clargs->handle_mode[ task_num ].mode   = STARPU_RW;
+    clargs->tasks_nbr ++;
+
+    if ( clargs->tasks_nbr == batch_size ) {
+        INSERT_TASK_zlaswpc_batched_flush( options, dir, ipiv, ipivk, Ak, Akm, Akn, U, Um, Un, clargs_ptr );
+    }
+}
+
+#if defined(CHAMELEON_STARPU_USE_INSERT)
+
+void INSERT_TASK_zlaswpc_batched_flush( const RUNTIME_option_t *options,
+                                        cham_dir_t              dir,
+                                        const CHAM_ipiv_t      *ipiv, int ipivk,
+                                        const CHAM_desc_t      *Ak,   int Akm, int Akn,
+                                        const CHAM_desc_t      *U,    int Um,  int Un,
+                                        void                  **clargs_ptr )
+{
+    struct cl_zlaswpc_batched_args_s *clargs   = *clargs_ptr;
+    int                               nhandles;
+    void                             *ipiv_handle_get;
+    void                             *ipiv_handle_set;
+
+    if( clargs == NULL ) {
+        return;
+    }
+
+    if ( dir == ChamDirForward ){
+        ipiv_handle_get = RUNTIME_perm_getaddr( ipiv, ipivk );
+        ipiv_handle_set = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle_get = RUNTIME_invp_getaddr( ipiv, ipivk );
+        ipiv_handle_set = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+
+    nhandles = clargs->tasks_nbr;
+    rt_starpu_insert_task(
+        &cl_zlaswpc_batched,
+        STARPU_CL_ARGS,             clargs, sizeof(struct cl_zlaswpc_batched_args_s),
+        STARPU_R,                   ipiv_handle_get,
+        STARPU_R,                   ipiv_handle_set,
+        STARPU_RW | STARPU_COMMUTE, RTBLKADDR(U, ChamComplexDouble, Um, Un),
+        STARPU_R,                   RTBLKADDR(Ak, ChamComplexDouble, Akm, Akn),
+        STARPU_DATA_MODE_ARRAY,     clargs->handle_mode, nhandles,
+        STARPU_PRIORITY,            options->priority,
+        STARPU_EXECUTE_ON_WORKER,   options->workerid,
+        0 );
+
+    /* clargs is freed by starpu. */
+    *clargs_ptr = NULL;
+}
+
+#else /* defined(CHAMELEON_STARPU_USE_INSERT) */
+
+void INSERT_TASK_zlaswpc_batched_flush( const RUNTIME_option_t *options,
+                                        cham_dir_t              dir,
+                                        const CHAM_ipiv_t      *ipiv, int ipivk,
+                                        const CHAM_desc_t      *Ak,   int Akm, int Akn,
+                                        const CHAM_desc_t      *U,    int Um,  int Un,
+                                        void                  **clargs_ptr )
+{
+    int ret, k;
+    struct starpu_task *task;
+    struct cl_zlaswpc_batched_args_s *myclargs = *clargs_ptr;
+    void                            *ipiv_handle_get;
+    void                            *ipiv_handle_set;
+
+    if( myclargs == NULL ) {
+        return;
+    }
+
+    if ( dir == ChamDirForward ){
+        ipiv_handle_get = RUNTIME_perm_getaddr( ipiv, ipivk );
+        ipiv_handle_set = RUNTIME_invp_getaddr( ipiv, ipivk );
+    }
+    else {
+        ipiv_handle_get = RUNTIME_invp_getaddr( ipiv, ipivk );
+        ipiv_handle_set = RUNTIME_perm_getaddr( ipiv, ipivk );
+    }
+
+    INSERT_TASK_COMMON_PARAMETERS( zlaswpc_batched, myclargs->tasks_nbr + 4 );
+
+    /*
+     * Register the data handles, might need to receive perm and invp
+     */
+    starpu_cham_exchange_init_params( options, &params, Ak->myrank );
+    starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
+                                                  ipiv_handle_get,
+                                                  STARPU_R );
+    starpu_cham_exchange_handle_before_execution( options, &params, &nbdata, descrs,
+                                                  ipiv_handle_set,
+                                                  STARPU_R );
+    starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ),
+                                STARPU_RW | STARPU_COMMUTE );
+    starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( Ak, ChamComplexDouble, Akm, Akn ), STARPU_R );
+    for ( k = 0; k < myclargs->tasks_nbr; k++ ) {
+        starpu_cham_register_descr( &nbdata, descrs, myclargs->handle_mode[ k ].handle, STARPU_RW );
+    }
+
+    task = starpu_task_create();
+    task->cl = cl;
+
+    /* Set codelet parameters */
+    task->cl_arg      = myclargs;
+    task->cl_arg_size = sizeof( struct cl_zlaswpc_batched_args_s );
+    task->cl_arg_free = 1;
+
+    /* Set common parameters */
+    starpu_cham_task_set_options( options, task, nbdata, descrs, NULL );
+
+    /* Flops */
+    task->flops = 0.;
+
+    ret = starpu_task_submit( task );
+    if ( ret == -ENODEV ) {
+        task->destroy = 0;
+        starpu_task_destroy( task );
+        chameleon_error( "INSERT_TASK_zlaswpc_batched", "Failed to submit the task to StarPU" );
+        return;
+    }
+    starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
+
+    /* clargs is freed by starpu. */
+    *clargs_ptr = NULL;
+    (void)clargs;
+    (void)cl_name;
+}
+
+#endif /* defined(CHAMELEON_STARPU_USE_INSERT) */
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index e04ed9503b376caa599e5d92fa905b53b74a7f2d..145838d04e6b6faa7bf2dccb082196eef73f3192 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -113,6 +113,10 @@ if (NOT CHAMELEON_SIMULATION)
                 add_test( test_${cat}_${prec}laswp ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/laswp.in )
                 add_test( test_${cat}_${prec}getrs ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrs.in )
                 add_test( test_${cat}_${prec}gesv ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/gesv.in )
+
+                add_test( test_${cat}_${prec}laswp_batch ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/laswp.in )
+                set_tests_properties( test_${cat}_${prec}laswp_batch
+                                      PROPERTIES ENVIRONMENT "CHAMELEON_LASWP_BATCH_SIZE=3" )
                 if ( ${cat} STREQUAL "mpi" )
                     add_test( test_${cat}_${prec}laswp_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/laswp.in )
                     add_test( test_${cat}_${prec}getrs_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrs.in )