From 61dbad0ed513d2bd36bdf86e8817e4784b45bb23 Mon Sep 17 00:00:00 2001
From: Matthieu KUHN <bkuhnm@l0.spartan.bench.local>
Date: Thu, 31 Mar 2022 17:16:11 +0200
Subject: [PATCH] getrf_nopiv: Add Alloc/Free functions to manage temporary
 buffers of the pzgetrf_nopiv algorithm to better control the MPI transfers
 overhead

---
 compute/zgetrf_nopiv.c          | 94 ++++++++++++++++++++++++++++++++-
 include/chameleon/chameleon_z.h |  2 +
 2 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c
index d6e1c27ec..07ebfa2e9 100644
--- a/compute/zgetrf_nopiv.c
+++ b/compute/zgetrf_nopiv.c
@@ -11,19 +11,109 @@
  *
  * @brief Chameleon zgetrf_nopiv wrappers
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Omar Zenati
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Alycia Lisito
- * @date 2022-02-22
+ * @author Matthieu Kuhn
+ * @date 2024-10-17
  *
  * @precisions normal z -> s d c
  *
  */
 #include "control/common.h"
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  @brief Allocate the required workspaces for asynchronous getrf
+ *
+ *******************************************************************************
+ *
+ * @param[in] A
+ *          The descriptor of the matrix A.
+ *
+ *******************************************************************************
+ *
+ * @retval An allocated opaque pointer to use in CHAMELEON_zgetrf_nopiv_Tile_Async()
+ * and to free with CHAMELEON_zgetrf_nopiv_WS_Free().
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgetrf_nopiv_Tile_Async
+ * @sa CHAMELEON_zgetrf_nopiv_WS_Free
+ *
+ */
+void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    struct chameleon_pzgetrf_nopiv_s *options;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        return NULL;
+    }
+
+    options = calloc( 1, sizeof(struct chameleon_pzgetrf_nopiv_s) );
+    options->use_workspace = 0;
+
+    if ( ( ( A->p > 1 ) || ( A->q > 1 ) ) &&
+         ( A->get_rankof_init == chameleon_getrankof_2d ) &&
+         ( chamctxt->generic_enabled != CHAMELEON_TRUE ) )
+    {
+        int lookahead = chamctxt->lookahead;
+        options->use_workspace = 1;
+
+        chameleon_desc_init( &(options->WL), CHAMELEON_MAT_ALLOC_TILE,
+                             ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb),
+                             A->mt * A->mb, A->nb * A->q * lookahead, 0, 0,
+                             A->mt * A->mb, A->nb * A->q * lookahead, A->p, A->q,
+                             NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
+
+        chameleon_desc_init( &(options->WU), CHAMELEON_MAT_ALLOC_TILE,
+                             ChamComplexDouble,
+                             A->mb, A->nb, (A->mb * A->nb),
+                             A->mb * A->p * lookahead, A->nt * A->nb, 0, 0,
+                             A->mb * A->p * lookahead, A->nt * A->nb, A->p, A->q,
+                             NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
+    }
+
+    return (void*)options;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ * @brief Free the allocated workspaces for asynchronous getrf
+ *
+ *******************************************************************************
+ *
+ * @param[in,out] user_ws
+ *          On entry, the opaque pointer allocated by CHAMELEON_zgetrf_nopiv_WS_Alloc()
+ *          On exit, all data are freed.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgetrf_nopiv_Tile_Async
+ * @sa CHAMELEON_zgetrf_nopiv_WS_Alloc
+ *
+ */
+void CHAMELEON_zgetrf_nopiv_WS_Free( void *user_ws )
+{
+    struct chameleon_pzgetrf_nopiv_s *ws = (struct chameleon_pzgetrf_nopiv_s*)user_ws;
+
+    if ( ws->use_workspace ) {
+        chameleon_desc_destroy( &(ws->WL) );
+        chameleon_desc_destroy( &(ws->WU) );
+    }
+    free( ws );
+}
 
 /**
  ********************************************************************************
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 3f33260f4..9bd22083b 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -333,6 +333,8 @@ void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A );
 void  CHAMELEON_zgram_WS_Free( void *ws );
 void *CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A );
 void  CHAMELEON_zgetrf_WS_Free( void *ws );
+void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A );
+void  CHAMELEON_zgetrf_nopiv_WS_Free( void *ws );
 
 int CHAMELEON_Alloc_Workspace_zgesv_incpiv(        int N, CHAM_desc_t **descL, int **IPIV, int p, int q);
 int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, int **IPIV, int p, int q);
-- 
GitLab