diff --git a/control/context.c b/control/context.c
index cfbe13a714ec97d1f817aaff6e4762e7e85b7ef4..5bcd6d598f9c2a6d90590934b51787a7e0b50162 100644
--- a/control/context.c
+++ b/control/context.c
@@ -21,7 +21,7 @@
  * @author Matthieu Kuhn
  * @author Loris Lucido
  * @author Terry Cojean
- * @date 2023-09-11
+ * @date 2024-10-17
  *
  ***
  *
@@ -140,6 +140,7 @@ CHAM_context_t *chameleon_context_create()
     chamctxt->progress_enabled   = chameleon_env_on_off( "CHAMELEON_PROGRESS",        CHAMELEON_FALSE );
     chamctxt->generic_enabled    = chameleon_env_on_off( "CHAMELEON_GENERIC",         CHAMELEON_FALSE );
     chamctxt->autominmax_enabled = chameleon_env_on_off( "CHAMELEON_AUTOMINMAX",      CHAMELEON_TRUE  );
+    chamctxt->optlacpy_enabled   = chameleon_env_on_off( "CHAMELEON_OPTIMIZED_LACPY", CHAMELEON_TRUE  );
 
     chamctxt->runtime_paused     = CHAMELEON_FALSE;
 
diff --git a/include/chameleon/struct_context.h b/include/chameleon/struct_context.h
index 471dcfc11acfbdf27fc58593950ae739ed4b32f9..b66621cca2b625d35a7b92f9aaf6dbd70f5f2113 100644
--- a/include/chameleon/struct_context.h
+++ b/include/chameleon/struct_context.h
@@ -14,7 +14,7 @@
  * @version 1.3.0
  * @author Mathieu Faverge
  * @author Florent Pruvost
- * @date 2024-03-16
+ * @date 2024-10-17
  *
  */
 #ifndef _struct_context_h_
@@ -43,18 +43,19 @@ typedef struct chameleon_context_s {
     cham_bool_t        progress_enabled;
     cham_bool_t        generic_enabled;
     cham_bool_t        autominmax_enabled;
+    cham_bool_t        optlacpy_enabled;   /**< Enable runtime cpy instead of lacpy kernel            */
     cham_bool_t        runtime_paused;
 
-    cham_householder_t householder;        /**> "domino" (flat) or tree-based (reduction) Householder */
-    cham_translation_t translation;        /**> In place or Out of place layout conversion            */
+    cham_householder_t householder;        /**< "domino" (flat) or tree-based (reduction) Householder */
+    cham_translation_t translation;        /**< In place or Out of place layout conversion            */
 
     int                nb;
     int                ib;
-    int                rhblock;            /**> block size for tree-based (reduction) Householder     */
-    int                lookahead;          /**> depth of the look ahead in algorithms                 */
-    void              *schedopt;           /**> structure for runtimes                                */
-    int                mpi_outer_init;     /**> MPI has been initialized outside our functions        */
-    MPI_Comm           comm;               /**> MPI communicator                                      */
+    int                rhblock;            /**< block size for tree-based (reduction) Householder     */
+    int                lookahead;          /**< depth of the look ahead in algorithms                 */
+    void              *schedopt;           /**< structure for runtimes                                */
+    int                mpi_outer_init;     /**< MPI has been initialized outside our functions        */
+    MPI_Comm           comm;               /**< MPI communicator                                      */
 } CHAM_context_t;
 
 END_C_DECLS
diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c
index 64f3b569ef2fb8ed732fb1589a3a6391563d88d0..8423d9d7d5530bb1fe15bee327be2c0b2a3d7f11 100644
--- a/runtime/starpu/control/runtime_options.c
+++ b/runtime/starpu/control/runtime_options.c
@@ -32,7 +32,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
     options->priority  = RUNTIME_PRIORITY_MIN;
     options->workerid  = (schedopt == NULL) ? -1 : schedopt->workerid;
     options->forcesub  = 0;
-    options->withlacpy = 0;
+    options->withlacpy = !(chamctxt->optlacpy_enabled);
     options->withcuda  = (chamctxt->ncudas > 0);
     options->ws_wsize  = 0;
     options->ws_hsize  = 0;