From abb9f173eba2578e9ac671badc28582c68a343ba Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Thu, 18 May 2023 10:26:58 +0200
Subject: [PATCH] zgetrf: Add IPIV to the prototype of the function

---
 compute/pzgetrf.c               |  1 +
 compute/zgetrf.c                | 22 +++++++++++++++++-----
 control/compute_z.h             |  2 +-
 include/chameleon/chameleon_z.h |  6 +++---
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 165801efb..c4501d591 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -183,6 +183,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
  */
 void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
                         CHAM_desc_t                *A,
+                        CHAM_desc_t                *IPIV,
                         RUNTIME_sequence_t         *sequence,
                         RUNTIME_request_t          *request )
 {
diff --git a/compute/zgetrf.c b/compute/zgetrf.c
index bcb8ee0c8..72c595373 100644
--- a/compute/zgetrf.c
+++ b/compute/zgetrf.c
@@ -167,7 +167,7 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
  *
  */
 int
-CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA )
+CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
 {
     int                 NB;
     int                 status;
@@ -271,7 +271,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA )
  *
  */
 int
-CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
+CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
 {
     CHAM_context_t     *chamctxt;
     RUNTIME_sequence_t *sequence = NULL;
@@ -287,8 +287,7 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
     chameleon_sequence_create( chamctxt, &sequence );
 
     ws = CHAMELEON_zgetrf_WS_Alloc( A );
-    CHAMELEON_zgetrf_Tile_Async( A, ws, sequence, &request );
-
+    CHAMELEON_zgetrf_Tile_Async( A, IPIV, ws, sequence, &request );
     CHAMELEON_Desc_Flush( A, sequence );
 
     chameleon_sequence_wait( chamctxt, sequence );
@@ -334,6 +333,7 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
  */
 int
 CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
+                             CHAM_desc_t        *IPIV,
                              void               *user_ws,
                              RUNTIME_sequence_t *sequence,
                              RUNTIME_request_t  *request )
@@ -375,12 +375,24 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
         chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid first descriptor" );
         return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
     }
+    if ( chameleon_desc_check( IPIV ) != CHAMELEON_SUCCESS ) {
+        chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid second descriptor" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
 
     /* Check input arguments */
     if ( A->nb != A->mb ) {
         chameleon_error( "CHAMELEON_zgetrf_Tile", "only square tiles supported" );
         return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
     }
+    if ( IPIV->mb != A->mb ) {
+        chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must have the number of rows as tiles of A" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+    if ( IPIV->nb != 1 ) {
+        chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must be vectore with only one column per tile" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
 
     if ( user_ws == NULL ) {
         ws = CHAMELEON_zgetrf_WS_Alloc( A );
@@ -389,7 +401,7 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
         ws = user_ws;
     }
 
-    chameleon_pzgetrf( user_ws, A, sequence, request );
+    chameleon_pzgetrf( user_ws, A, IPIV, sequence, request );
 
     if ( user_ws == NULL ) {
         CHAMELEON_Desc_Flush( A, sequence );
diff --git a/control/compute_z.h b/control/compute_z.h
index 1dd9b1305..634bd2d5c 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -87,7 +87,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc
 void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_desc_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 1f04c61aa..da5a5bc65 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -53,7 +53,7 @@ int CHAMELEON_zgesvd(cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_
 //int CHAMELEON_zgetrf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV);
 int CHAMELEON_zgetrf_incpiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV);
 int CHAMELEON_zgetrf_nopiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA);
-int CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA );
+int CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV );
 //int CHAMELEON_zgetri(int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV);
 //int CHAMELEON_zgetrs(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zgetrs_incpiv(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV, CHAMELEON_Complex64_t *B, int LDB);
@@ -134,7 +134,7 @@ int CHAMELEON_zgesvd_Tile(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, dou
 //int CHAMELEON_zgetrf_Tile(CHAM_desc_t *A, int *IPIV);
 int CHAMELEON_zgetrf_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV);
 int CHAMELEON_zgetrf_nopiv_Tile(CHAM_desc_t *A);
-int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A );
+int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV );
 //int CHAMELEON_zgetri_Tile(CHAM_desc_t *A, int *IPIV);
 //int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B);
 int CHAMELEON_zgetrs_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B);
@@ -211,7 +211,7 @@ int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *
 //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrs_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-- 
GitLab