From 1176bdaa366374b36f68412d11364578784fe2e1 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Mon, 17 Mar 2025 17:00:02 +0100
Subject: [PATCH] laswp: Add zlaswp operation and testing

---
 compute/CMakeLists.txt          |   2 +-
 compute/zlaswp.c                | 388 ++++++++++++++++++++++++++++++++
 control/descriptor_ipiv.c       |  41 +++-
 include/chameleon/chameleon_z.h |   9 +-
 testing/CMakeLists.txt          |   3 +-
 testing/CTestLists.cmake        |   2 +
 testing/chameleon_ztesting.c    |   4 +
 testing/input/laswp.in          |  20 ++
 testing/testing_zlaswp.c        | 141 ++++++++++++
 9 files changed, 601 insertions(+), 9 deletions(-)
 create mode 100644 compute/zlaswp.c
 create mode 100644 testing/input/laswp.in
 create mode 100644 testing/testing_zlaswp.c

diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index a5d95ed43..fafab2d2e 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -237,7 +237,7 @@ set(ZSRC
     #zhegv.c
     #zhegvd.c
     zhetrd.c
-    #zlaswp.c
+    zlaswp.c
     #zlaswpc.c
     #ztrsmrv.c
     ##################
diff --git a/compute/zlaswp.c b/compute/zlaswp.c
new file mode 100644
index 000000000..6d7955e78
--- /dev/null
+++ b/compute/zlaswp.c
@@ -0,0 +1,388 @@
+/**
+ *
+ * @file zlaswp.c
+ *
+ * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zlaswp wrappers
+ *
+ * @version 1.3.0
+ * @author Alycia Lisito
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  @brief Computes the permutation P*op(A) or op(A)*P where P is the permutation
+ *         matrix generated from IPIV.
+ *
+ *******************************************************************************
+ *
+ * @param[in] side
+ *          Specifies whether the permutation is done on the rows or the columns.
+ *          = ChamLeft:  op(A) = A
+ *          = ChamRight: op(A) = A^T
+ *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order. P*op(A)
+ *          = ChamDirBackward: Reverse order. op(A)*P
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A. M >= 0.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A. N >= 0.
+ *
+ * @param[in,out] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ * @param[in] K1
+ *          The first element of IPIV for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] K2
+ *          The last element of ipiv for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] IPIV
+ *          Vector of pivot indices.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlaswp_Tile
+ * @sa CHAMELEON_zlaswp_Tile_Async
+ * @sa CHAMELEON_claswp
+ * @sa CHAMELEON_dlaswp
+ * @sa CHAMELEON_slaswp
+ *
+ */
+int CHAMELEON_zlaswp( cham_side_t            side,
+                      cham_dir_t             dir,
+                      int                    M,
+                      int                    N,
+                      CHAMELEON_Complex64_t *A,
+                      int                    LDA,
+                      int                    K1,
+                      int                    K2,
+                      int                   *IPIV )
+{
+    int                 status;
+    int                 NB;
+    CHAM_context_t     *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t         descAl, descAt;
+    CHAM_ipiv_t        *descIPIV;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( side == ChamRight ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented");
+        return CHAMELEON_ERR_NOT_SUPPORTED;
+    }
+    /* Check input arguments */
+    if ( M < 0 ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of M");
+        return -2;
+    }
+    if ( N < 0 ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of N");
+        return -3;
+    }
+    if ( LDA < chameleon_max( 1, M ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of LDA");
+        return -5;
+    }
+    if ( ( K1 < 1 ) || ( K1 > M ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K1");
+        return -6;
+    }
+    if ( ( K2 < 1 ) || ( K2 > M ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K2");
+        return -7;
+    }
+
+    /* Quick return */
+    if ( chameleon_min( N, M ) == 0 ) {
+        return (double)0.0;
+    }
+
+    /* Tune NB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
+    if ( status != CHAMELEON_SUCCESS ) {
+        chameleon_error("CHAMELEON_zlaswp", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NB */
+    NB = CHAMELEON_NB;
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
+                         A, NB, NB, LDA, N, M, N, sequence, &request );
+    CHAMELEON_Ipiv_Create( &descIPIV, &descAt, IPIV );
+
+    CHAMELEON_Ipiv_Init( &descAt, descIPIV );
+
+    /* Call the tile interface */
+    CHAMELEON_zlaswp_Tile_Async( side, dir, &descAt, K1, K2, descIPIV, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInput, ChamUpperLower, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt );
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return CHAMELEON_SUCCESS;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ *  @brief Tile equivalent of CHAMELEON_zlaswp().
+ *
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] side
+ *          Specifies whether the permutation is done on the rows or the columns.
+ *          = ChamLeft:  op(A) = A
+ *          = ChamRight: op(A) = A^T
+ *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order. P*op(A)
+ *          = ChamDirBackward: Reverse order. op(A)*P
+ *
+ * @param[in,out] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] K1
+ *          The first element of IPIV for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] K2
+ *          The last element of ipiv for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] IPIV
+ *          Vector of pivot indices.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlaswp
+ * @sa CHAMELEON_zlaswp_Tile_Async
+ * @sa CHAMELEON_claswp_Tile
+ * @sa CHAMELEON_dlaswp_Tile
+ * @sa CHAMELEON_slaswp_Tile
+ *
+ */
+int CHAMELEON_zlaswp_Tile( cham_side_t  side,
+                           cham_dir_t   dir,
+                           CHAM_desc_t *A,
+                           int          K1,
+                           int          K2,
+                           CHAM_ipiv_t *IPIV )
+{
+    CHAM_context_t     *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    int                 status;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( side == ChamRight ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented");
+        return CHAMELEON_ERR_NOT_SUPPORTED;
+    }
+    if ( ( K1 < 1 ) || ( K1 > A->m ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K1");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( ( K2 < 1 ) || ( K2 > A->m ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K2");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    CHAMELEON_zlaswp_Tile_Async( side, dir, A, K1, K2, IPIV, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+    CHAMELEON_Ipiv_Flush( IPIV, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ *  @brief Non-blocking equivalent of CHAMELEON_zlaswp_Tile().
+ *
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] side
+ *          Specifies whether the permutation is done on the rows or the columns.
+ *          = ChamLeft:  op(A) = A
+ *          = ChamRight: op(A) = A^T
+ *
+ * @param[in] dir
+ *          Specifies the order of the permutation.
+ *          = ChamDirForward:  Natural order. P*op(A)
+ *          = ChamDirBackward: Reverse order. op(A)*P
+ *
+ * @param[in,out] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] K1
+ *          The first element of IPIV for which an interchange will
+ *          be done. Must follow the Fortran numbering standard
+ *
+ * @param[in] K2
+ *          The last element of ipiv for which an interchange will
+ *          be done. Must follow the Fortran numbering standard.
+ *
+ * @param[in] IPIV
+ *          Vector of pivot indices.
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlaswp
+ * @sa CHAMELEON_zlaswp_Tile
+ * @sa CHAMELEON_claswp_Tile_Async
+ * @sa CHAMELEON_dlaswp_Tile_Async
+ * @sa CHAMELEON_slaswp_Tile_Async
+ *
+ */
+int CHAMELEON_zlaswp_Tile_Async( cham_side_t         side,
+                                 cham_dir_t          dir,
+                                 CHAM_desc_t        *A,
+                                 int                 K1,
+                                 int                 K2,
+                                 CHAM_ipiv_t        *IPIV,
+                                 RUNTIME_sequence_t *sequence,
+                                 RUNTIME_request_t  *request )
+{
+    CHAM_context_t             *chamctxt;
+    struct chameleon_pzgetrf_s *ws;
+    RUNTIME_option_t            options;
+    int                         k, tempkm;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( side == ChamRight ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented");
+        return CHAMELEON_ERR_NOT_SUPPORTED;
+    }
+    if ( ( K1 < 1 ) || ( K1 > A->m ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K1");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( ( K2 < 1 ) || ( K2 > A->m ) ) {
+        chameleon_error("CHAMELEON_zlaswp", "illegal value of K2");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+    if ( sequence == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if ( request == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if ( sequence->status == CHAMELEON_SUCCESS ) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if ( chameleon_desc_check(A) != CHAMELEON_SUCCESS ) {
+        chameleon_error("CHAMELEON_zlaswp_Tile_Async", "invalid first descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    /* Check input arguments */
+    if ( A->mb != A->nb ) {
+        chameleon_error("CHAMELEON_zlaswp_Tile_Async", "only matching tile sizes supported");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+
+    /* Quick return */
+    if ( chameleon_min( A->m, A->n ) == 0 ) {
+        return CHAMELEON_SUCCESS;
+    }
+
+    if ( IPIV->data != NULL ) {
+        RUNTIME_options_init( &options, chamctxt, sequence, request );
+        for ( k = 0; k < A->mt; k++ ) {
+            tempkm = A->get_blkdim( A, k, DIM_m, A->m );
+            INSERT_TASK_ipiv_to_perm( &options, k * A->mb, tempkm, tempkm, K1 - 1, K2 - 1,
+                                      IPIV, k );
+        }
+        chameleon_sequence_wait( chamctxt, sequence );
+    }
+
+    ws = CHAMELEON_zgetrf_WS_Alloc( A );
+
+    chameleon_pzlaswp( ws, dir, A, IPIV, sequence, request );
+
+    CHAMELEON_zgetrf_WS_Free( ws );
+
+    return CHAMELEON_SUCCESS;
+}
+
diff --git a/control/descriptor_ipiv.c b/control/descriptor_ipiv.c
index 84067cf5f..d46269d32 100644
--- a/control/descriptor_ipiv.c
+++ b/control/descriptor_ipiv.c
@@ -14,7 +14,7 @@
  * @author Matthieu Kuhn
  * @author Alycia Lisito
  * @author Florent Pruvost
- * @date 2024-08-29
+ * @date 2025-03-24
  *
  ***
  *
@@ -148,6 +148,45 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void
     return CHAMELEON_SUCCESS;
 }
 
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  @brief initialize the IPIV descriptor.
+ *
+ *******************************************************************************
+ *
+ * @param[in] descA
+ *          Descriptor of the matrix A.
+ *
+ * @param[in,out] descIPIV
+ *          Descriptor of the pivot array. Should be initialized using
+ *          CHAMELEON_Ipiv_Create() with data filled with the vector of pivot.
+ *
+ *******************************************************************************
+ *
+ *
+ */
+void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA,
+                          CHAM_ipiv_t       *descIPIV )
+{
+
+    RUNTIME_option_t    options;
+    RUNTIME_request_t   request  = RUNTIME_REQUEST_INITIALIZER;
+    RUNTIME_sequence_t *sequence = NULL;
+    CHAM_context_t     *chamctxt;
+
+    chamctxt = chameleon_context_self();
+    chameleon_sequence_create( chamctxt, &sequence );
+    RUNTIME_options_init( &options, chamctxt, sequence, &request );
+
+    INSERT_TASK_ipiv_init_data( &options, descIPIV );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    chameleon_sequence_destroy( chamctxt, sequence );
+}
+
 /**
  *****************************************************************************
  *
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 37d352bfd..25cfa63ec 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -76,8 +76,7 @@ double CHAMELEON_zlansy(cham_normtype_t norm, cham_uplo_t uplo, int N, CHAMELEON
 double CHAMELEON_zlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, int M, int N, CHAMELEON_Complex64_t *A, int LDA);
 int CHAMELEON_zlascal(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA);
 int CHAMELEON_zlaset(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *A, int LDA);
-//int CHAMELEON_zlaswp(int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX);
-//int CHAMELEON_zlaswpc(int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX);
+int CHAMELEON_zlaswp( cham_side_t side, cham_dir_t dir, int M, int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV );
 int CHAMELEON_zlatms( int M, int N, cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAMELEON_Complex64_t *A, int LDA );
 int CHAMELEON_zlauum(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA);
 int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed );
@@ -157,8 +156,7 @@ double CHAMELEON_zlansy_Tile(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t
 double CHAMELEON_zlantr_Tile(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A);
 int CHAMELEON_zlascal_Tile(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A);
 int CHAMELEON_zlaset_Tile(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A);
-//int CHAMELEON_zlaswp_Tile(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX);
-//int CHAMELEON_zlaswpc_Tile(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX);
+int CHAMELEON_zlaswp_Tile( cham_side_t side, cham_dir_t dir, CHAM_desc_t *A, int K1, int K2, CHAM_ipiv_t *IPIV );
 int CHAMELEON_zlatms_Tile( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A );
 int CHAMELEON_zlauum_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
 int CHAMELEON_zplghe_Tile(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed );
@@ -237,8 +235,7 @@ int CHAMELEON_zlansy_Tile_Async(cham_normtype_t norm, cham_uplo_t uplo, CHAM_des
 int CHAMELEON_zlantr_Tile_Async(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, double *value, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zlascal_Tile_Async(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zlaset_Tile_Async(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-//int CHAMELEON_zlaswp_Tile_Async(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-//int CHAMELEON_zlaswpc_Tile_Async(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zlaswp_Tile_Async( cham_side_t side, cham_dir_t dir, CHAM_desc_t *A, int K1, int K2, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zlatms_Tile_Async( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zlauum_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zplghe_Tile_Async(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index 81268734d..083804058 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -26,7 +26,7 @@
 #  @author Alycia Lisito
 #  @author Matthieu Kuhn
 #  @author Abel Calluaud
-#  @date 2025-01-24
+#  @date 2025-03-24
 #
 ###
 
@@ -81,6 +81,7 @@ set(ZSRC_WO_STDAPI
   testing_zgenm2.c
   testing_zgesv_nopiv.c
   testing_zgesvd.c
+  testing_zlaswp.c
   testing_zgetrf_nopiv.c
   testing_zgetrs_nopiv.c
   testing_zgeqrf.c
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index 39b7e89e0..000c8fb3e 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -110,8 +110,10 @@ if (NOT CHAMELEON_SIMULATION)
                 add_test( test_${cat}_${prec}getrf_ppivblocked_batch ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf.in )
                 set_tests_properties( test_${cat}_${prec}getrf_ppivblocked_batch
                                       PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=3" )
+                add_test( test_${cat}_${prec}laswp ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/laswp.in )
 
                 if ( ${cat} STREQUAL "mpi" )
+                    add_test( test_${cat}_${prec}laswp_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/laswp.in )
                     add_test( test_${cat}_${prec}getrf_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrf.in )
                     set_tests_properties( test_${cat}_${prec}getrf_ppiv_comm_with_task
                                           PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_GETRF_ALL_REDUCE=cham_spu_tasks" )
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index 27e2ad9e4..979abaf12 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -133,6 +133,10 @@ parameter_t parameters[] = {
     { NULL, "SVD parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "jobu",  "Value of the jobu parameter ('NoVec', 'Vec', 'Ivec', 'AllVec', 'SVec', 'OVec')",  -50, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestJob, {0}, NULL, pread_job, sprint_job },
     { "jobvt", "Value of the jobvt parameter ('NoVec', 'Vec', 'Ivec', 'AllVec', 'SVec', 'OVec')", -51, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestJob, {0}, NULL, pread_job, sprint_job },
+
+    { NULL, "LASWP parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
+    { "k1",    "Index of the first element to permute",             -70, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int },
+    { "k2",    "Index of the last element to permute",              -71, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int },
 #endif
 
     { "tsub",          "Graph submission time in s",             999, PARAM_OUTPUT, 2, 13, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl },
diff --git a/testing/input/laswp.in b/testing/input/laswp.in
new file mode 100644
index 000000000..41037f5e5
--- /dev/null
+++ b/testing/input/laswp.in
@@ -0,0 +1,20 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# LASWP
+
+# nb: Tile size
+# n: Order of the matrix A
+# lda: Leading dimension of matrix A
+# k1: First element of ipiv to apply the permutation.
+# k2: Last element of ipiv to apply the permutation.
+# dir: Specifies the order of the permutation.
+
+op = laswp
+nb = 4, 16, 17
+n = 15, 21, 35
+lda = 40
+k1 = 1, 2, 10
+k2 = 1, 2, 10
+dir = Forward, Backward
+
diff --git a/testing/testing_zlaswp.c b/testing/testing_zlaswp.c
new file mode 100644
index 000000000..56c7a8a84
--- /dev/null
+++ b/testing/testing_zlaswp.c
@@ -0,0 +1,141 @@
+/**
+ *
+ * @file testing_zlaswp.c
+ *
+ * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zlaswp testing
+ *
+ * @version 1.3.0
+ * @author Matteo Marcos
+ * @date 2025-03-24
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon/constants.h"
+#include "chameleon/struct.h"
+#include "testings.h"
+#include "chameleon/chameleon_z.h"
+#include "testing_zcheck.h"
+#include <chameleon/flops.h>
+#include <chameleon/getenv.h>
+#include <coreblas/lapacke.h>
+#include <chameleon/tasks.h>
+
+static void testing_zlaswp_ipiv_gen( int *IPIV,
+                                     int  M )
+{
+    int i;
+
+    for ( i = 0; i < M; i++ ) {
+        IPIV[i] = testing_ialea() % ( M - i ) + i + 1;
+    }
+}
+
+int
+testing_zlaswp_desc( run_arg_list_t *args, int check )
+{
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+    int         async   = parameters_getvalue_int( "async" );
+    cham_side_t side    = run_arg_get_side( args, "side", ChamLeft );
+    cham_dir_t  dir     = run_arg_get_dir( args,  "dir", ChamDirForward );
+    int         nb      = run_arg_get_nb(  args );
+    int         N       = run_arg_get_int( args, "N", 1000 );
+    int         M       = run_arg_get_int( args, "M", N );
+    int         LDA     = run_arg_get_int( args, "LDA", N );
+    int         seedA   = run_arg_get_int( args, "seedA", testing_ialea() );
+    int         K1      = run_arg_get_int( args, "K1", 1 );
+    int         K2      = run_arg_get_int( args, "K2", M );
+
+    int *IPIV     = malloc( sizeof(int) * M );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
+    CHAM_ipiv_t *descIPIV;
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+
+    /* Creates the matrices */
+    parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N );
+    CHAMELEON_zplrnt_Tile( descA, seedA );
+
+    testing_zlaswp_ipiv_gen( IPIV, M );
+    CHAMELEON_Ipiv_Create( &descIPIV, descA, IPIV );
+    CHAMELEON_Ipiv_Init( descA, descIPIV );
+
+    /* Calculates the solution */
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlaswp_Tile_Async( side, dir, descA, K1, K2, descIPIV, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zlaswp_Tile( side, dir, descA, K1, K2, descIPIV );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, 0 );
+
+#if !defined(CHAMELEON_SIMULATION)
+    if ( check ) {
+        CHAM_desc_t *descA0, *descA0c;
+        int          INCX = ( dir == ChamDirForward ) ? 1 : -1;
+
+        descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE );
+
+        CHAMELEON_Desc_Create_User(
+            &descA0c, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble,
+            nb, nb, nb*nb, M, N, 0, 0, M, N, 1, 1,
+            chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL, NULL );
+
+        CHAMELEON_zplrnt_Tile( descA0c, seedA );
+
+        if ( CHAMELEON_Comm_rank() == 0 ) {
+            LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, K1, K2, IPIV, INCX );
+        }
+
+        CHAMELEON_zlacpy_Tile( ChamUpperLower, descA0c, descA0 );
+        CHAMELEON_Desc_Destroy( &descA0c );
+
+        hres += check_zmatrices( args, ChamUpperLower, descA, descA0 );
+
+        CHAMELEON_Desc_Destroy( &descA0 );
+    }
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+    CHAMELEON_Ipiv_Destroy( &descIPIV, descA );
+    parameters_desc_destroy( &descA );
+    free( IPIV );
+
+    return hres;
+}
+
+testing_t   test_zlaswp;
+const char *zlaswp_params[] = { "mtxfmt", "nb", "n", "m", "lda", "seedA", "k1", "k2", "side", "dir", NULL };
+const char *zlaswp_output[] = { NULL };
+const char *zlaswp_outchk[] = { "RETURN", NULL };
+
+/**
+ * @brief Testing registration function
+ */
+void testing_zlaswp_init( void ) __attribute__( ( constructor ) );
+void
+testing_zlaswp_init( void )
+{
+    test_zlaswp.name      = "zlaswp";
+    test_zlaswp.helper    = "Row interchange on general matrices";
+    test_zlaswp.params    = zlaswp_params;
+    test_zlaswp.output    = zlaswp_output;
+    test_zlaswp.outchk    = zlaswp_outchk;
+    test_zlaswp.fptr_desc = testing_zlaswp_desc;
+    test_zlaswp.next      = NULL;
+
+    testing_register( &test_zlaswp );
+}
+
-- 
GitLab