From 8c9825d5cdf91145d317446254bf34098e32ab63 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Thu, 17 Oct 2024 11:54:05 +0200
Subject: [PATCH] context: Add an optlacpy context option

---
 control/context.c                        |  3 ++-
 include/chameleon/struct_context.h       | 17 +++++++++--------
 runtime/starpu/control/runtime_options.c |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/control/context.c b/control/context.c
index cfbe13a71..5bcd6d598 100644
--- a/control/context.c
+++ b/control/context.c
@@ -21,7 +21,7 @@
  * @author Matthieu Kuhn
  * @author Loris Lucido
  * @author Terry Cojean
- * @date 2023-09-11
+ * @date 2024-10-17
  *
  ***
  *
@@ -140,6 +140,7 @@ CHAM_context_t *chameleon_context_create()
     chamctxt->progress_enabled   = chameleon_env_on_off( "CHAMELEON_PROGRESS",        CHAMELEON_FALSE );
     chamctxt->generic_enabled    = chameleon_env_on_off( "CHAMELEON_GENERIC",         CHAMELEON_FALSE );
     chamctxt->autominmax_enabled = chameleon_env_on_off( "CHAMELEON_AUTOMINMAX",      CHAMELEON_TRUE  );
+    chamctxt->optlacpy_enabled   = chameleon_env_on_off( "CHAMELEON_OPTIMIZED_LACPY", CHAMELEON_TRUE  );
 
     chamctxt->runtime_paused     = CHAMELEON_FALSE;
 
diff --git a/include/chameleon/struct_context.h b/include/chameleon/struct_context.h
index 471dcfc11..b66621cca 100644
--- a/include/chameleon/struct_context.h
+++ b/include/chameleon/struct_context.h
@@ -14,7 +14,7 @@
  * @version 1.3.0
  * @author Mathieu Faverge
  * @author Florent Pruvost
- * @date 2024-03-16
+ * @date 2024-10-17
  *
  */
 #ifndef _struct_context_h_
@@ -43,18 +43,19 @@ typedef struct chameleon_context_s {
     cham_bool_t        progress_enabled;
     cham_bool_t        generic_enabled;
     cham_bool_t        autominmax_enabled;
+    cham_bool_t        optlacpy_enabled;   /**< Enable runtime cpy instead of lacpy kernel            */
     cham_bool_t        runtime_paused;
 
-    cham_householder_t householder;        /**> "domino" (flat) or tree-based (reduction) Householder */
-    cham_translation_t translation;        /**> In place or Out of place layout conversion            */
+    cham_householder_t householder;        /**< "domino" (flat) or tree-based (reduction) Householder */
+    cham_translation_t translation;        /**< In place or Out of place layout conversion            */
 
     int                nb;
     int                ib;
-    int                rhblock;            /**> block size for tree-based (reduction) Householder     */
-    int                lookahead;          /**> depth of the look ahead in algorithms                 */
-    void              *schedopt;           /**> structure for runtimes                                */
-    int                mpi_outer_init;     /**> MPI has been initialized outside our functions        */
-    MPI_Comm           comm;               /**> MPI communicator                                      */
+    int                rhblock;            /**< block size for tree-based (reduction) Householder     */
+    int                lookahead;          /**< depth of the look ahead in algorithms                 */
+    void              *schedopt;           /**< structure for runtimes                                */
+    int                mpi_outer_init;     /**< MPI has been initialized outside our functions        */
+    MPI_Comm           comm;               /**< MPI communicator                                      */
 } CHAM_context_t;
 
 END_C_DECLS
diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c
index 64f3b569e..8423d9d7d 100644
--- a/runtime/starpu/control/runtime_options.c
+++ b/runtime/starpu/control/runtime_options.c
@@ -32,7 +32,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
     options->priority  = RUNTIME_PRIORITY_MIN;
     options->workerid  = (schedopt == NULL) ? -1 : schedopt->workerid;
     options->forcesub  = 0;
-    options->withlacpy = 0;
+    options->withlacpy = !(chamctxt->optlacpy_enabled);
     options->withcuda  = (chamctxt->ncudas > 0);
     options->ws_wsize  = 0;
     options->ws_hsize  = 0;
-- 
GitLab