diff --git a/ChangeLog b/ChangeLog
index 2517d5d027d2a8c248466b97428388ce6cdab625..1e299d40f81f0d0b6ca075464c76cffbc7a3224d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,6 @@
 chameleon-1.3.0
 ------------------------------------------------------------------------
- - mixed-precision: introduce descripto with precision adapted to local norms
+ - mixed-precision: introduce descriptor with precision adapted to local norms
         - Add CHAMELEON_[dz]gered... functions to reduce the precision of the tiles based on a requested accuracy
         - Add CHAMELEON_[dz]gerst... functions to restore the original numerical precision of the tiles in a descriptor
  - types: add support for half precision arithmetic into the data descriptors
@@ -11,6 +11,8 @@ chameleon-1.3.0
    CHAMELEON_Desc_Create_User that requires aan additional `, NULL`
    parameters in the general case.
  - compute/poinv: Add the possibility to use an intermediate distribution for the TRTRI operation
+ - compute/getrf_nopiv: Add lookahead through temporary buffers to better regulate the communication allocations
+ - runtime/starpu: Whenever possible replace the lacpy codelet by a direct memory copy from the input handler to the output one
 
 chameleon-1.2.0
 ------------------------------------------------------------------------
diff --git a/compute/pzgepdf_qdwh.c b/compute/pzgepdf_qdwh.c
index 420ad6f502ceccea3a253363e5b00333c5f49aea..0d1b66cb267f374b5ce493c01a2ecb619603d645 100644
--- a/compute/pzgepdf_qdwh.c
+++ b/compute/pzgepdf_qdwh.c
@@ -15,7 +15,7 @@
  * @author Mathieu Faverge
  * @author Hatem Ltaief
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-05
+ * @date 2024-10-17
  * @precisions normal z -> s d c
  *
  */
@@ -35,8 +35,8 @@ static int _zgepdf_qdwh_opt_genD = 1;
 static int _zgepdf_qdwh_opt_genD = 0;
 #endif
 
-static int _zgepdf_qdwh_opt_qr = 1;
-static int _zgepdf_qdwh_opt_id = 1; // There is a numerical issue when combining this optimization and the StarPU lacpy
+static int _zgepdf_qdwh_opt_qr  = 1;
+static int _zgepdf_qdwh_opt_id  = 1;
 static int _zgepdf_qdwh_verbose = 0;
 
 /**
@@ -603,6 +603,7 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
     double conv = 100.;
     double normest, Unorm;
     int it, itconv, facto = -1;
+    cham_bool_t optlacpy_backup;
 
     double eps  = CHAMELEON_dlamch();
     double tol1 = 5. * eps;
@@ -615,6 +616,10 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
     }
     assert( chamctxt->scheduler != RUNTIME_SCHED_PARSEC );
 
+    /* Force unoptimized lacpy */
+    optlacpy_backup = chamctxt->optlacpy_enabled;
+    chamctxt->optlacpy_enabled = CHAMELEON_FALSE;
+
     if ( info ) {
         info->itQR = 0;
         info->itPO = 0;
@@ -848,5 +853,8 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
                                  &descB2, &descTS2, &descTT2, &descQ2, &descD2 );
     CHAMELEON_zgemm_WS_Free( gemm_ws );
 
+    /* Restore optimized lacpy value */
+    chamctxt->optlacpy_enabled = optlacpy_backup;
+
     return;
 }
diff --git a/compute/pzgetrf_nopiv.c b/compute/pzgetrf_nopiv.c
index daac4bad9652cd1512b023431c5ad1b325cd8c5c..18fb33ed81656b462cdcff0f52fe82e6080b90de 100644
--- a/compute/pzgetrf_nopiv.c
+++ b/compute/pzgetrf_nopiv.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zgetrf_nopiv parallel algorithm
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Omar Zenati
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
@@ -20,20 +20,23 @@
  * @author Samuel Thibault
  * @author Terry Cojean
  * @author Matthieu Kuhn
- * @date 2022-02-22
+ * @date 2024-10-17
  * @precisions normal z -> s d c
  *
  */
 #include "control/common.h"
 
-#define A(m,n) A,  m,  n
+#define A(m, n)  A,  m, n
+#define WD(m)    WL, m, m
+#define WL(m, n) WL, m, n
+#define WU(m, n) WU, m, n
 
 /**
  *  Parallel tile LU factorization with no pivoting - dynamic scheduling
  */
-void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
-                              RUNTIME_sequence_t *sequence,
-                              RUNTIME_request_t *request )
+void chameleon_pzgetrf_nopiv_generic( CHAM_desc_t        *A,
+                                      RUNTIME_sequence_t *sequence,
+                                      RUNTIME_request_t  *request )
 {
     CHAM_context_t *chamctxt;
     RUNTIME_option_t options;
@@ -121,3 +124,196 @@ void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
 
     RUNTIME_options_finalize(&options, chamctxt);
 }
+
+void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t        *A,
+                                 CHAM_desc_t        *WL,
+                                 CHAM_desc_t        *WU,
+                                 RUNTIME_sequence_t *sequence,
+                                 RUNTIME_request_t  *request )
+{
+    CHAM_context_t  *chamctxt;
+    RUNTIME_option_t options;
+
+    int k, m, n, ib, p, q, lp, lq;
+    int tempkm, tempkn, tempmm, tempnn;
+    int lookahead, myp, myq;
+
+    CHAMELEON_Complex64_t zone  = (CHAMELEON_Complex64_t) 1.0;
+    CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
+
+    chamctxt = chameleon_context_self();
+    if (sequence->status != CHAMELEON_SUCCESS) {
+        return;
+    }
+    RUNTIME_options_init(&options, chamctxt, sequence, request);
+
+    ib = CHAMELEON_IB;
+    lookahead = chamctxt->lookahead;
+    myp = A->myrank / A->q;
+    myq = A->myrank % A->q;
+
+    for (k = 0; k < chameleon_min(A->mt, A->nt); k++) {
+        RUNTIME_iteration_push(chamctxt, k);
+        lp = (k % lookahead) * A->p;
+        lq = (k % lookahead) * A->q;
+
+        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+
+        options.priority = 2*A->nt - 2*k;
+        INSERT_TASK_zgetrf_nopiv(
+            &options,
+            tempkm, tempkn, ib, A->mb,
+            A(k, k), A->mb*k);
+
+        /**
+         * Broadcast of A(k,k) along rings in both directions
+         */
+        {
+            INSERT_TASK_zlacpy(
+                &options,
+                ChamUpperLower, tempkm, tempkn,
+                A(  k, k ),
+                WL( k, (k % A->q) + lq ) );
+            INSERT_TASK_zlacpy(
+                &options,
+                ChamUpperLower, tempkm, tempkn,
+                A(  k, k ),
+                WU( (k % A->p) + lp, k ) );
+
+            for ( q=1; q < A->q; q++ ) {
+                INSERT_TASK_zlacpy(
+                    &options,
+                    ChamUpperLower, tempkm, tempkn,
+                    WL( k, ((k+q-1) % A->q) + lq ),
+                    WL( k, ((k+q)   % A->q) + lq ) );
+            }
+
+            for ( p=1; p < A->p; p++ ) {
+                INSERT_TASK_zlacpy(
+                    &options,
+                    ChamUpperLower, tempkm, tempkn,
+                    WU( ((k+p-1) % A->p) + lp, k ),
+                    WU( ((k+p)   % A->p) + lp, k ) );
+            }
+        }
+        RUNTIME_data_flush( sequence, A( k, k ) );
+
+        for (m = k+1; m < A->mt; m++) {
+
+            /* Skip the row if you are not involved with */
+            if ( m%A->p != myp ) {
+                continue;
+            }
+
+            options.priority = 2*A->nt - 2*k - m;
+            tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+
+            assert( A->get_rankof( A, m, k ) == WU->get_rankof( WU, myp + lp, k) );
+            INSERT_TASK_ztrsm(
+                &options,
+                ChamRight, ChamUpper, ChamNoTrans, ChamNonUnit,
+                tempmm, tempkn, A->mb,
+                zone, WU( myp + lp, k ),
+                      A( m, k ) );
+
+            /* Broadcast A(m,k) into temp buffers through a ring */
+            {
+                assert( A->get_rankof( A, m, k ) == WL->get_rankof( WL,  m, (k % A->q) + lq) );
+                INSERT_TASK_zlacpy(
+                    &options,
+                    ChamUpperLower, tempmm, tempkn,
+                    A(  m, k ),
+                    WL( m, (k % A->q) + lq) );
+
+                for ( q=1; q < A->q; q++ ) {
+                    INSERT_TASK_zlacpy(
+                        &options,
+                        ChamUpperLower, tempmm, tempkn,
+                        WL( m, ((k+q-1) % A->q) + lq ),
+                        WL( m, ((k+q)   % A->q) + lq ) );
+                }
+            }
+            RUNTIME_data_flush( sequence, A( m, k ) );
+        }
+
+        for (n = k+1; n < A->nt; n++) {
+
+            /* Skip the column if you are not involved with */
+            if ( n%A->q != myq ) {
+                continue;
+            }
+
+            tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+            options.priority = 2*A->nt - 2*k - n;
+
+            assert( A->get_rankof( A, k, n ) == WL->get_rankof( WL, k, myq+lq) );
+            INSERT_TASK_ztrsm(
+                &options,
+                ChamLeft, ChamLower, ChamNoTrans, ChamUnit,
+                tempkm, tempnn, A->mb,
+                zone, WL( k, myq + lq ),
+                      A( k, n ));
+
+            /* Broadcast A(k,n) into temp buffers through a ring */
+            {
+                assert( A->get_rankof( A, k, n ) == WU->get_rankof( WU, (k%A->p) + lp, n) );
+                INSERT_TASK_zlacpy(
+                    &options,
+                    ChamUpperLower, tempkm, tempnn,
+                    A(  k, n ),
+                    WU( (k % A->p) + lp, n ) );
+
+                for ( p=1; p < A->p; p++ ) {
+                    INSERT_TASK_zlacpy(
+                        &options,
+                        ChamUpperLower, tempkm, tempnn,
+                        WU( ((k+p-1) % A->p) + lp, n ),
+                        WU( ((k+p)   % A->p) + lp, n ) );
+                }
+            }
+            RUNTIME_data_flush( sequence, A( k, n ) );
+
+            for (m = k+1; m < A->mt; m++) {
+
+                /* Skip the row if you are not involved with */
+                if ( m%A->p != myp ) {
+                    continue;
+                }
+
+                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+                options.priority = 2*A->nt - 2*k  - n - m;
+
+                assert( A->get_rankof( A, m, n ) == WL->get_rankof( WL, m, myq + lq) );
+                assert( A->get_rankof( A, m, n ) == WU->get_rankof( WU, myp + lp, n) );
+
+                INSERT_TASK_zgemm(
+                    &options,
+                    ChamNoTrans, ChamNoTrans,
+                    tempmm, tempnn, A->mb, A->mb,
+                    mzone, WL( m, myq + lq ),
+                           WU( myp + lp, n ),
+                    zone,  A( m, n ));
+            }
+        }
+        RUNTIME_iteration_pop( chamctxt );
+    }
+
+    CHAMELEON_Desc_Flush( WL, sequence );
+    CHAMELEON_Desc_Flush( WU, sequence );
+
+    RUNTIME_options_finalize( &options, chamctxt );
+}
+
+void chameleon_pzgetrf_nopiv( struct chameleon_pzgetrf_nopiv_s *ws,
+                              CHAM_desc_t                      *A,
+                              RUNTIME_sequence_t               *sequence,
+                              RUNTIME_request_t                *request )
+{
+    if ( ws && ws->use_workspace ) {
+        chameleon_pzgetrf_nopiv_ws( A, &(ws->WL), &(ws->WU), sequence, request );
+    }
+    else {
+        chameleon_pzgetrf_nopiv_generic( A, sequence, request );
+    }
+}
diff --git a/compute/zgesv_nopiv.c b/compute/zgesv_nopiv.c
index c7c703a45dcf31b7129d31b5baefd549fb78d5f9..cf5c756eb00259ded3da6bc1c8dcfb5965768089 100644
--- a/compute/zgesv_nopiv.c
+++ b/compute/zgesv_nopiv.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zgesv_nopiv wrappers
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Jakub Kurzak
@@ -19,7 +19,8 @@
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @author Matthieu Kuhn
+ * @date 2024-10-17
  * @precisions normal z -> s d c
  *
  */
@@ -82,8 +83,8 @@
  *
  */
 int CHAMELEON_zgesv_nopiv( int N, int NRHS,
-                       CHAMELEON_Complex64_t *A, int LDA,
-                       CHAMELEON_Complex64_t *B, int LDB )
+                           CHAMELEON_Complex64_t *A, int LDA,
+                           CHAMELEON_Complex64_t *B, int LDB )
 {
     int NB;
     int status;
@@ -92,6 +93,7 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
     RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
     CHAM_desc_t descAl, descAt;
     CHAM_desc_t descBl, descBt;
+    void *ws = NULL;
 
     chamctxt = chameleon_context_self();
     if (chamctxt == NULL) {
@@ -138,7 +140,8 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
                      B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
 
     /* Call the tile interface */
-    CHAMELEON_zgesv_nopiv_Tile_Async( &descAt, &descBt, sequence, &request );
+    ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( &descAt );
+    CHAMELEON_zgesv_nopiv_Tile_Async( &descAt, &descBt, ws, sequence, &request );
 
     /* Submit the matrix conversion back */
     chameleon_ztile2lap( chamctxt, &descAl, &descAt,
@@ -149,6 +152,7 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
     chameleon_sequence_wait( chamctxt, sequence );
 
     /* Cleanup the temporary data */
+    CHAMELEON_zgetrf_nopiv_WS_Free( ws );
     chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
     chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
 
@@ -195,10 +199,11 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
  */
 int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B )
 {
-    CHAM_context_t *chamctxt;
+    CHAM_context_t     *chamctxt;
     RUNTIME_sequence_t *sequence = NULL;
-    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
-    int status;
+    RUNTIME_request_t   request = RUNTIME_REQUEST_INITIALIZER;
+    int                 status;
+    void               *ws;
 
     chamctxt = chameleon_context_self();
     if (chamctxt == NULL) {
@@ -207,12 +212,15 @@ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B )
     }
     chameleon_sequence_create( chamctxt, &sequence );
 
-    CHAMELEON_zgesv_nopiv_Tile_Async( A, B, sequence, &request );
+    ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A );
+    CHAMELEON_zgesv_nopiv_Tile_Async( A, B, ws, sequence, &request );
 
     CHAMELEON_Desc_Flush( A, sequence );
     CHAMELEON_Desc_Flush( B, sequence );
 
     chameleon_sequence_wait( chamctxt, sequence );
+    CHAMELEON_zgetrf_nopiv_WS_Free( ws );
+
     status = sequence->status;
     chameleon_sequence_destroy( chamctxt, sequence );
     return status;
@@ -248,10 +256,14 @@ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B )
  * @sa CHAMELEON_zcgesv_Tile_Async
  *
  */
-int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *B,
-                                  RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t        *A,
+                                      CHAM_desc_t        *B,
+                                      void               *user_ws,
+                                      RUNTIME_sequence_t *sequence,
+                                      RUNTIME_request_t *request )
 {
-    CHAM_context_t *chamctxt;
+    CHAM_context_t                   *chamctxt;
+    struct chameleon_pzgetrf_nopiv_s *ws;
 
     chamctxt = chameleon_context_self();
     if (chamctxt == NULL) {
@@ -294,11 +306,23 @@ int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *B,
      return CHAMELEON_SUCCESS;
      */
 
-    chameleon_pzgetrf_nopiv( A, sequence, request );
+   if ( user_ws == NULL ) {
+        ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A );
+    }
+    else {
+        ws = user_ws;
+    }
+    chameleon_pzgetrf_nopiv( ws, A, sequence, request );
 
     chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
 
     chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
 
+    if ( user_ws == NULL ) {
+        CHAMELEON_Desc_Flush( A, sequence );
+        CHAMELEON_Desc_Flush( B, sequence );
+        chameleon_sequence_wait( chamctxt, sequence );
+        CHAMELEON_zgetrf_nopiv_WS_Free( ws );
+    }
     return CHAMELEON_SUCCESS;
 }
diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c
index d6e1c27ec6423bb5c0db35a0f96615003aa4fbe8..3409e97f7a80cfb0844b4eefa66d6646be6239f1 100644
--- a/compute/zgetrf_nopiv.c
+++ b/compute/zgetrf_nopiv.c
@@ -11,19 +11,109 @@
  *
  * @brief Chameleon zgetrf_nopiv wrappers
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Omar Zenati
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Alycia Lisito
- * @date 2022-02-22
+ * @author Matthieu Kuhn
+ * @date 2024-10-17
  *
  * @precisions normal z -> s d c
  *
  */
 #include "control/common.h"
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  @brief Allocate the required workspaces for asynchronous getrf
+ *
+ *******************************************************************************
+ *
+ * @param[in] A
+ *          The descriptor of the matrix A.
+ *
+ *******************************************************************************
+ *
+ * @retval An allocated opaque pointer to use in CHAMELEON_zgetrf_nopiv_Tile_Async()
+ * and to free with CHAMELEON_zgetrf_nopiv_WS_Free().
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgetrf_nopiv_Tile_Async
+ * @sa CHAMELEON_zgetrf_nopiv_WS_Free
+ *
+ */
+void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    struct chameleon_pzgetrf_nopiv_s *options;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        return NULL;
+    }
+
+    options = calloc( 1, sizeof(struct chameleon_pzgetrf_nopiv_s) );
+    options->use_workspace = 0;
+
+    if ( ( ( A->p > 1 ) || ( A->q > 1 ) ) &&
+         ( A->get_rankof_init == chameleon_getrankof_2d ) &&
+         ( chamctxt->generic_enabled != CHAMELEON_TRUE ) )
+    {
+        int lookahead = chamctxt->lookahead;
+        options->use_workspace = 1;
+
+        chameleon_desc_init( &(options->WL), CHAMELEON_MAT_ALLOC_TILE,
+                             ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb),
+                             A->mt * A->mb, A->nb * A->q * lookahead, 0, 0,
+                             A->mt * A->mb, A->nb * A->q * lookahead, A->p, A->q,
+                             NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
+
+        chameleon_desc_init( &(options->WU), CHAMELEON_MAT_ALLOC_TILE,
+                             ChamComplexDouble,
+                             A->mb, A->nb, (A->mb * A->nb),
+                             A->mb * A->p * lookahead, A->nt * A->nb, 0, 0,
+                             A->mb * A->p * lookahead, A->nt * A->nb, A->p, A->q,
+                             NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
+    }
+
+    return (void*)options;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ * @brief Free the allocated workspaces for asynchronous getrf
+ *
+ *******************************************************************************
+ *
+ * @param[in,out] user_ws
+ *          On entry, the opaque pointer allocated by CHAMELEON_zgetrf_nopiv_WS_Alloc()
+ *          On exit, all data are freed.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgetrf_nopiv_Tile_Async
+ * @sa CHAMELEON_zgetrf_nopiv_WS_Alloc
+ *
+ */
+void CHAMELEON_zgetrf_nopiv_WS_Free( void *user_ws )
+{
+    struct chameleon_pzgetrf_nopiv_s *ws = (struct chameleon_pzgetrf_nopiv_s*)user_ws;
+
+    if ( ws->use_workspace ) {
+        chameleon_desc_destroy( &(ws->WL) );
+        chameleon_desc_destroy( &(ws->WU) );
+    }
+    free( ws );
+}
 
 /**
  ********************************************************************************
@@ -69,7 +159,7 @@
  *
  */
 int CHAMELEON_zgetrf_nopiv( int M, int N,
-                        CHAMELEON_Complex64_t *A, int LDA )
+                            CHAMELEON_Complex64_t *A, int LDA )
 {
     int NB;
     int status;
@@ -77,6 +167,7 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
     CHAM_context_t *chamctxt;
     RUNTIME_sequence_t *sequence = NULL;
     RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    void *ws = NULL;
 
     chamctxt = chameleon_context_self();
     if (chamctxt == NULL) {
@@ -117,7 +208,8 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
                      A, NB, NB, LDA, N, M, N, sequence, &request );
 
     /* Call the tile interface */
-    CHAMELEON_zgetrf_nopiv_Tile_Async( &descAt, sequence, &request );
+    ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( &descAt );
+    CHAMELEON_zgetrf_nopiv_Tile_Async( &descAt, ws, sequence, &request );
 
     /* Submit the matrix conversion back */
     chameleon_ztile2lap( chamctxt, &descAl, &descAt,
@@ -126,6 +218,7 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
     chameleon_sequence_wait( chamctxt, sequence );
 
     /* Cleanup the temporary data */
+    CHAMELEON_zgetrf_nopiv_WS_Free( ws );
     chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
 
     status = sequence->status;
@@ -169,10 +262,11 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
  */
 int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A )
 {
-    CHAM_context_t *chamctxt;
+    CHAM_context_t     *chamctxt;
     RUNTIME_sequence_t *sequence = NULL;
-    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
-    int status;
+    RUNTIME_request_t   request = RUNTIME_REQUEST_INITIALIZER;
+    int                 status;
+    void               *ws;
 
     chamctxt = chameleon_context_self();
     if (chamctxt == NULL) {
@@ -181,11 +275,14 @@ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A )
     }
     chameleon_sequence_create( chamctxt, &sequence );
 
-    CHAMELEON_zgetrf_nopiv_Tile_Async( A, sequence, &request );
+    ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A );
+    CHAMELEON_zgetrf_nopiv_Tile_Async( A, ws, sequence, &request );
 
     CHAMELEON_Desc_Flush( A, sequence );
 
     chameleon_sequence_wait( chamctxt, sequence );
+    CHAMELEON_zgetrf_nopiv_WS_Free( ws );
+
     status = sequence->status;
     chameleon_sequence_destroy( chamctxt, sequence );
     return status;
@@ -224,11 +321,13 @@ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A )
  * @sa CHAMELEON_zgetrs_Tile_Async
  *
  */
-int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A,
-                                   RUNTIME_sequence_t *sequence,
-                                   RUNTIME_request_t *request )
+int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t        *A,
+                                       void               *user_ws,
+                                       RUNTIME_sequence_t *sequence,
+                                       RUNTIME_request_t  *request )
 {
-    CHAM_context_t *chamctxt;
+    CHAM_context_t                   *chamctxt;
+    struct chameleon_pzgetrf_nopiv_s *ws;
 
     chamctxt = chameleon_context_self();
     if (chamctxt == NULL) {
@@ -263,7 +362,19 @@ int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A,
         return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
     }
 
-    chameleon_pzgetrf_nopiv( A, sequence, request );
+   if ( user_ws == NULL ) {
+        ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A );
+    }
+    else {
+        ws = user_ws;
+    }
+
+    chameleon_pzgetrf_nopiv( ws, A, sequence, request );
 
+    if ( user_ws == NULL ) {
+        CHAMELEON_Desc_Flush( A, sequence );
+        chameleon_sequence_wait( chamctxt, sequence );
+        CHAMELEON_zgetrf_nopiv_WS_Free( ws );
+    }
     return CHAMELEON_SUCCESS;
 }
diff --git a/control/chameleon_zf77.c b/control/chameleon_zf77.c
index 70541b792b2b3ee527c1737af4f314a86a7f57d3..64fad284a4f118b79bef98e0c622f415e3a2ac0a 100644
--- a/control/chameleon_zf77.c
+++ b/control/chameleon_zf77.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon Fortran77 computational routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @comment This file is automatically generated by tools/genf77interface.pl
@@ -21,7 +21,8 @@
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Alycia Lisito
- * @date 2022-02-22
+ * @author Matthieu Kuhn
+ * @date 2024-10-17
  * @precisions normal z -> c d s
  *
  */
@@ -727,7 +728,7 @@ void CHAMELEON_ZGETRF_INCPIV_TILE_ASYNC(CHAM_desc_t *A, CHAM_desc_t *L, int *IPI
 { *info = CHAMELEON_zgetrf_incpiv_Tile_Async(A, L, IPIV, sequence, request); }
 
 void CHAMELEON_ZGETRF_NOPIV_TILE_ASYNC(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request, int *info)
-{ *info = CHAMELEON_zgetrf_nopiv_Tile_Async(A, sequence, request); }
+{ *info = CHAMELEON_zgetrf_nopiv_Tile_Async(A, NULL, sequence, request); }
 
 //void CHAMELEON_ZGETRI_TILE_ASYNC(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request, int *info)
 //{ *info = CHAMELEON_zgetri_Tile_Async(A, IPIV, W, sequence, request); }
diff --git a/control/compute_z.h b/control/compute_z.h
index 06c8854c1b05a7c1bffbca0fa8e615218363672d..c0bac97556b86fbe2aaa5b61abcb9a645abf246b 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -23,7 +23,7 @@
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
  * @author Ana Hourcau
- * @date 2024-07-17
+ * @date 2024-10-17
  * @precisions normal z -> c d s
  *
  */
@@ -52,6 +52,49 @@ struct chameleon_pzgetrf_s {
     int          involved:1;
 };
 
+/**
+ * @brief   Data structure to handle the GETRF temporary workspaces
+ *          for MPI transfers.
+ *
+ * @comment The idea is to manage explicitely temporary
+ *          blocks arising from MPI transfers automatically
+ *          inferred by StarPU, hence limiting the total number
+ *          of temporary data allocated for these blocks.
+ *
+ *          The blocks to be sent/received on the network are
+ *          copied into those buffers. These copies are
+ *          then used by the algorithm in place of the regular
+ *          blocks of the problem matrix.
+ *
+ *          For WL (resp. WU), the number of allocated blocks
+ *          corresponds to the number of blocks on the column
+ *          (resp. on the line) multiplied by lookahead number
+ *          from the current chameleon context.
+ *
+ *          Then, depending on the block panel index, we access
+ *          one of the temporary column blocks of WL and row blocks
+ *          of WU int a circular way.
+ *
+ *          For instance, for the block panel index k, the block
+ *          A(m,k) produced by the TRSM(A(k,k),A(m,k)) is stored
+ *          into temporary buffer WL(m,k%chamctxt->lookahead).
+ *          Similarly, the block A(k,n) is stored into the temporary
+ *          block WU(k%chamctxt->lookahead, n).
+ *
+ *          Notice that, by doing so, the notion of look ahead is
+ *          reintroduced : artificial dependencies are implied by
+ *          the circular usage of WL and WU temporary workspaces.
+ *
+ */
+struct chameleon_pzgetrf_nopiv_s {
+    int use_workspace;
+
+    CHAM_desc_t WL; /* Workspace to store temporary blocks of the */
+                    /* diagonal and the lower part of the problem matrix */
+    CHAM_desc_t WU; /* Workspace to store temporary blocks of the */
+                    /* upper part of the problem matrix */
+};
+
 /**
  * @brief Data structure to handle the Centering-Scaled workspaces
  */
@@ -103,7 +146,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
 void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+void chameleon_pzgetrf_nopiv(struct chameleon_pzgetrf_nopiv_s *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_rectil(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzhegst(int itype, cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/control/context.c b/control/context.c
index cfbe13a714ec97d1f817aaff6e4762e7e85b7ef4..5bcd6d598f9c2a6d90590934b51787a7e0b50162 100644
--- a/control/context.c
+++ b/control/context.c
@@ -21,7 +21,7 @@
  * @author Matthieu Kuhn
  * @author Loris Lucido
  * @author Terry Cojean
- * @date 2023-09-11
+ * @date 2024-10-17
  *
  ***
  *
@@ -140,6 +140,7 @@ CHAM_context_t *chameleon_context_create()
     chamctxt->progress_enabled   = chameleon_env_on_off( "CHAMELEON_PROGRESS",        CHAMELEON_FALSE );
     chamctxt->generic_enabled    = chameleon_env_on_off( "CHAMELEON_GENERIC",         CHAMELEON_FALSE );
     chamctxt->autominmax_enabled = chameleon_env_on_off( "CHAMELEON_AUTOMINMAX",      CHAMELEON_TRUE  );
+    chamctxt->optlacpy_enabled   = chameleon_env_on_off( "CHAMELEON_OPTIMIZED_LACPY", CHAMELEON_TRUE  );
 
     chamctxt->runtime_paused     = CHAMELEON_FALSE;
 
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 3f33260f4436ec195323874d3c13b9b44d2c62e4..9b085201650e266247ade2308e6522b58d4070a9 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -24,7 +24,7 @@
  * @author Alycia Lisito
  * @author Matthieu Kuhn
  * @author Ana Hourcau
- * @date 2024-07-17
+ * @date 2024-10-17
  * @precisions normal z -> c d s
  *
  */
@@ -211,11 +211,11 @@ int CHAMELEON_zgeqrf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence
 int CHAMELEON_zgeqrs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 //int CHAMELEON_zgesv_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgesv_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, double *S, CHAM_desc_t *T, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
@@ -333,6 +333,8 @@ void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A );
 void  CHAMELEON_zgram_WS_Free( void *ws );
 void *CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A );
 void  CHAMELEON_zgetrf_WS_Free( void *ws );
+void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A );
+void  CHAMELEON_zgetrf_nopiv_WS_Free( void *ws );
 
 int CHAMELEON_Alloc_Workspace_zgesv_incpiv(        int N, CHAM_desc_t **descL, int **IPIV, int p, int q);
 int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, int **IPIV, int p, int q);
diff --git a/include/chameleon/runtime_struct.h b/include/chameleon/runtime_struct.h
index 72e353accaba3542c4ac36dd129fdc4552f72989..4c1b9c5210f9153221a2b6668c1868e6fbd5af42 100644
--- a/include/chameleon/runtime_struct.h
+++ b/include/chameleon/runtime_struct.h
@@ -17,7 +17,7 @@
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Philippe Virouleau
- * @date 2024-03-16
+ * @date 2024-10-17
  *
  */
 #ifndef _chameleon_runtime_struct_h_
@@ -103,6 +103,7 @@ typedef struct runtime_option_s {
     int                 priority;  /**< Define the submitted task priority                       */
     int                 workerid;  /**< Define the prefered worker id to perform the tasks       */
     int                 forcesub;  /**< Force task submission if true                            */
+    int                 withlacpy; /**< Flag to force the use of lacpy copies                    */
     int                 withcuda;  /**< Flag to know if cuda is enabled/disabled                 */
     size_t              ws_wsize;  /**< Define the worker workspace size                         */
     size_t              ws_hsize;  /**< Define the host workspace size for hybrid CPU/GPU kernel */
diff --git a/include/chameleon/struct_context.h b/include/chameleon/struct_context.h
index 471dcfc11acfbdf27fc58593950ae739ed4b32f9..b66621cca2b625d35a7b92f9aaf6dbd70f5f2113 100644
--- a/include/chameleon/struct_context.h
+++ b/include/chameleon/struct_context.h
@@ -14,7 +14,7 @@
  * @version 1.3.0
  * @author Mathieu Faverge
  * @author Florent Pruvost
- * @date 2024-03-16
+ * @date 2024-10-17
  *
  */
 #ifndef _struct_context_h_
@@ -43,18 +43,19 @@ typedef struct chameleon_context_s {
     cham_bool_t        progress_enabled;
     cham_bool_t        generic_enabled;
     cham_bool_t        autominmax_enabled;
+    cham_bool_t        optlacpy_enabled;   /**< Enable runtime cpy instead of lacpy kernel            */
     cham_bool_t        runtime_paused;
 
-    cham_householder_t householder;        /**> "domino" (flat) or tree-based (reduction) Householder */
-    cham_translation_t translation;        /**> In place or Out of place layout conversion            */
+    cham_householder_t householder;        /**< "domino" (flat) or tree-based (reduction) Householder */
+    cham_translation_t translation;        /**< In place or Out of place layout conversion            */
 
     int                nb;
     int                ib;
-    int                rhblock;            /**> block size for tree-based (reduction) Householder     */
-    int                lookahead;          /**> depth of the look ahead in algorithms                 */
-    void              *schedopt;           /**> structure for runtimes                                */
-    int                mpi_outer_init;     /**> MPI has been initialized outside our functions        */
-    MPI_Comm           comm;               /**> MPI communicator                                      */
+    int                rhblock;            /**< block size for tree-based (reduction) Householder     */
+    int                lookahead;          /**< depth of the look ahead in algorithms                 */
+    void              *schedopt;           /**< structure for runtimes                                */
+    int                mpi_outer_init;     /**< MPI has been initialized outside our functions        */
+    MPI_Comm           comm;               /**< MPI communicator                                      */
 } CHAM_context_t;
 
 END_C_DECLS
diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c
index 77f4a51320216dc19eb043937dd2758bce71869a..11ecfb4dee30a518cb127114483cec03e0942d5d 100644
--- a/runtime/openmp/control/runtime_options.c
+++ b/runtime/openmp/control/runtime_options.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon OpenMP options routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Cedric Augonnet
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2022-02-22
+ * @date 2024-10-17
  *
  */
 #include "chameleon_openmp.h"
@@ -31,6 +31,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
     options->priority  = RUNTIME_PRIORITY_MIN;
     options->workerid  = -1;
     options->forcesub  = 0;
+    options->withlacpy = 0;
     options->ws_wsize  = 0;
     options->ws_hsize  = 0;
     options->ws_worker = NULL;
diff --git a/runtime/parsec/control/runtime_options.c b/runtime/parsec/control/runtime_options.c
index edb2017e599215a46c683013a311c9c07883e23d..a4f8c0ffffec83029f3d2d1bdbaac3da9bf53a72 100644
--- a/runtime/parsec/control/runtime_options.c
+++ b/runtime/parsec/control/runtime_options.c
@@ -11,10 +11,10 @@
  *
  * @brief Chameleon PaRSEC options routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Reazul Hoque
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2024-10-17
  *
  */
 #include "chameleon_parsec.h"
@@ -22,17 +22,18 @@
 void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
                            RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
-    options->sequence   = sequence;
-    options->request    = request;
-    options->profiling  = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
-    options->parallel   = CHAMELEON_PARALLEL == CHAMELEON_TRUE;
-    options->priority   = RUNTIME_PRIORITY_MIN;
-    options->workerid   = -1;
-    options->forcesub   = 0;
-    options->ws_wsize   = 0;
-    options->ws_hsize   = 0;
-    options->ws_worker  = NULL;
-    options->ws_host    = NULL;
+    options->sequence  = sequence;
+    options->request   = request;
+    options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
+    options->parallel  = CHAMELEON_PARALLEL == CHAMELEON_TRUE;
+    options->priority  = RUNTIME_PRIORITY_MIN;
+    options->workerid  = -1;
+    options->forcesub  = 0;
+    options->withlacpy = 0;
+    options->ws_wsize  = 0;
+    options->ws_hsize  = 0;
+    options->ws_worker = NULL;
+    options->ws_host   = NULL;
     return;
 }
 
diff --git a/runtime/quark/control/runtime_options.c b/runtime/quark/control/runtime_options.c
index 8c090263308a8a861fdaab945c3d3759228da37e..5cb4ac31371ac8fcce34002ae14ef4f78f2d522a 100644
--- a/runtime/quark/control/runtime_options.c
+++ b/runtime/quark/control/runtime_options.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon Quark options routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Vijay Joshi
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2024-10-17
  *
  */
 #include "chameleon_quark.h"
@@ -34,18 +34,19 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
     qopt->flags.thread_set_to_manual_scheduling = -1;
 
     /* Initialize options */
-    options->sequence   = sequence;
-    options->request    = request;
-    options->profiling  = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
-    options->parallel   = CHAMELEON_PARALLEL == CHAMELEON_TRUE;
-    options->priority   = RUNTIME_PRIORITY_MIN;
-    options->workerid   = -1;
-    options->forcesub   = 0;
+    options->sequence  = sequence;
+    options->request   = request;
+    options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
+    options->parallel  = CHAMELEON_PARALLEL == CHAMELEON_TRUE;
+    options->priority  = RUNTIME_PRIORITY_MIN;
+    options->workerid  = -1;
+    options->forcesub  = 0;
+    options->withlacpy = 0;
 
-    options->ws_wsize   = 0;
-    options->ws_hsize   = 0;
-    options->ws_worker  = NULL;
-    options->ws_host    = NULL;
+    options->ws_wsize  = 0;
+    options->ws_hsize  = 0;
+    options->ws_worker = NULL;
+    options->ws_host   = NULL;
 
     /* quark in options */
     qopt->quark = (Quark*)(chamctxt->schedopt);
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index aa8d73ed3774d60b34f1b9d6641c984db59fe820..93c951a246545a970c272e50a544c62b9fd986d8 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -21,7 +21,7 @@
  * @author Florent Pruvost
  * @author Samuel Thibault
  * @author Alycia Lisito
- * @date 2024-03-16
+ * @date 2024-10-17
  * @precisions normal z -> c d s
  *
  */
@@ -148,7 +148,8 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
 
 #if !defined(CHAMELEON_USE_MPI) || defined(HAVE_STARPU_MPI_DATA_CPY_PRIORITY)
     /* Insert the task */
-    if ( (uplo == ChamUpperLower) &&
+    if ( (!options->withlacpy) &&
+         (uplo == ChamUpperLower) &&
          (tileA->m == m) && (tileA->n == n) &&
          (tileB->m == m) && (tileB->n == n) &&
          (displA == 0) && (displB == 0) )
@@ -225,7 +226,8 @@ void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
 
 #if !defined(CHAMELEON_USE_MPI) || defined(HAVE_STARPU_MPI_DATA_CPY_PRIORITY)
     /* Insert the task */
-    if ( (uplo == ChamUpperLower) &&
+    if ( (!options->withlacpy) &&
+         (uplo == ChamUpperLower) &&
          (tileA->m == m) && (tileA->n == n) &&
          (tileB->m == m) && (tileB->n == n) )
     {
diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c
index 8ec2551f59d59e94c0eb763196576911db49d0cc..8423d9d7d5530bb1fe15bee327be2c0b2a3d7f11 100644
--- a/runtime/starpu/control/runtime_options.c
+++ b/runtime/starpu/control/runtime_options.c
@@ -16,7 +16,7 @@
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
- * @date 2023-07-04
+ * @date 2024-10-17
  *
  */
 #include "chameleon_starpu_internal.h"
@@ -32,6 +32,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
     options->priority  = RUNTIME_PRIORITY_MIN;
     options->workerid  = (schedopt == NULL) ? -1 : schedopt->workerid;
     options->forcesub  = 0;
+    options->withlacpy = !(chamctxt->optlacpy_enabled);
     options->withcuda  = (chamctxt->ncudas > 0);
     options->ws_wsize  = 0;
     options->ws_hsize  = 0;
diff --git a/testing/testing_zgesv_nopiv.c b/testing/testing_zgesv_nopiv.c
index 8855334349498a9a28f1ca2cc3e70cfe1f95ea9e..d1fd9b1df9c7b745c178f42b69892b781c87efed 100644
--- a/testing/testing_zgesv_nopiv.c
+++ b/testing/testing_zgesv_nopiv.c
@@ -13,7 +13,7 @@
  * @author Lucas Barros de Assis
  * @author Mathieu Faverge
  * @author Alycia Lisito
- * @date 2023-07-05
+ * @date 2024-10-17
  * @precisions normal z -> c d s
  *
  */
@@ -64,7 +64,7 @@ testing_zgesv_nopiv_desc( run_arg_list_t *args, int check )
     /* Calculates the solution */
     testing_start( &test_data );
     if ( async ) {
-        hres = CHAMELEON_zgesv_nopiv_Tile_Async( descA, descX,
+        hres = CHAMELEON_zgesv_nopiv_Tile_Async( descA, descX, NULL,
                                                  test_data.sequence, &test_data.request );
         CHAMELEON_Desc_Flush( descA, test_data.sequence );
         CHAMELEON_Desc_Flush( descX, test_data.sequence );
diff --git a/testing/testing_zgetrf_nopiv.c b/testing/testing_zgetrf_nopiv.c
index afa44ede3d749a4caa695a8f780ee9f30615e793..657513eaf26371399a7402e1e0b305afdc8fb854 100644
--- a/testing/testing_zgetrf_nopiv.c
+++ b/testing/testing_zgetrf_nopiv.c
@@ -13,7 +13,9 @@
  * @author Lucas Barros de Assis
  * @author Mathieu Faverge
  * @author Alycia Lisito
- * @date 2023-07-05
+ * @author Lucas Barros De Assis
+ * @author Matthieu Kuhn
+ * @date 2024-10-17
  * @precisions normal z -> c d s
  *
  */
@@ -53,7 +55,7 @@ testing_zgetrf_nopiv_desc( run_arg_list_t *args, int check )
     /* Calculates the solution */
     testing_start( &test_data );
     if ( async ) {
-        hres = CHAMELEON_zgetrf_nopiv_Tile_Async( descA, test_data.sequence, &test_data.request );
+        hres = CHAMELEON_zgetrf_nopiv_Tile_Async( descA, NULL, test_data.sequence, &test_data.request );
         CHAMELEON_Desc_Flush( descA, test_data.sequence );
     }
     else {