diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c
index 40af52744e74444a23d6d4505df24c61af0d5a9b..dcec74350500739b6fdbed7c38cd09f9588447e7 100644
--- a/coreblas/compute/core_ztile.c
+++ b/coreblas/compute/core_ztile.c
@@ -367,6 +367,17 @@ TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t
     CORE_zlacpy( uplo, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld );
 }
 
+void
+TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB )
+{
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+
+    const CHAMELEON_Complex64_t *Aptr = CHAM_tile_get_ptr( A );
+    CHAMELEON_Complex64_t       *Bptr = CHAM_tile_get_ptr( B );
+    CORE_zlacpy( uplo, M, N, Aptr + displA, LDA, Bptr + displB, LDB );
+}
+
 void
 TCORE_zlange( cham_normtype_t    norm,
               int                M,
diff --git a/coreblas/compute/core_ztile_empty.c b/coreblas/compute/core_ztile_empty.c
index 90bfbd3e2c5ce5838ff97b6a28fd8333d389c2c7..30347d3320ba80c657ffa39795021357a43d9b7f 100644
--- a/coreblas/compute/core_ztile_empty.c
+++ b/coreblas/compute/core_ztile_empty.c
@@ -263,6 +263,12 @@ TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t
     return;
 }
 
+void
+TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB )
+{
+    return;
+}
+
 void
 TCORE_zlange( cham_normtype_t    norm,
               int                M,
diff --git a/coreblas/eztrace_module/coreblas_tcore_eztrace_module b/coreblas/eztrace_module/coreblas_tcore_eztrace_module
index 18698b8a1949840e2eeb9cf7e6bccc8b936ed363..fd60e50fbee88238b51e97130f5907e808b79e91 100644
--- a/coreblas/eztrace_module/coreblas_tcore_eztrace_module
+++ b/coreblas/eztrace_module/coreblas_tcore_eztrace_module
@@ -172,6 +172,7 @@ void TCORE_zher2k( int uplo, int trans, int N, int K, void *alpha, const void *A
 int  TCORE_zherfb( int uplo, int N, int K, int IB, int NB, const void *A, const void *T, void *C, void *WORK, int ldwork );
 int  TCORE_zhessq( int storev, int uplo, int N, const void *A, void *sclssq );
 void TCORE_zlacpy( int uplo, int M, int N, const void *A, void *B );
+void TCORE_zlacpyx( int uplo, int M, int N, int displA, int displB, const void *A, int LDA, void *B, int LDB );
 void TCORE_zlange( int norm, int M, int N, const void *A, double *work, double *normA );
 void TCORE_zlanhe( int norm, int uplo, int N, const void *A, double *work, double *normA );
 void TCORE_zlansy( int norm, int uplo, int N, const void *A, double *work, double *normA );
diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h
index aefd6461b4d57107a48585ef697e7a14bfb625f0..74443b88724db7a787cb93be514bdd7108372cb3 100644
--- a/coreblas/include/coreblas/coreblas_ztile.h
+++ b/coreblas/include/coreblas/coreblas_ztile.h
@@ -43,6 +43,7 @@ int  TCORE_zherfb( cham_uplo_t uplo, int N, int K, int IB, int NB, const CHAM_ti
 int  TCORE_zhessq( cham_store_t storev, cham_uplo_t uplo, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq );
 #endif
 void TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
+void TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB );
 void TCORE_zlange( cham_normtype_t norm, int M, int N, const CHAM_tile_t *A, double *work, double *normA );
 #if defined(PRECISION_z) || defined(PRECISION_c)
 void TCORE_zlanhe( cham_normtype_t norm, cham_uplo_t uplo, int N, const CHAM_tile_t *A, double *work, double *normA );
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index 00fa905af5b0ad44ecf9df9b6d682e8ee711a7f5..bcca068490306ad8eb346b18ec1d6331c650089e 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -50,6 +50,7 @@ CHAMELEON_CL_CB(zher2k,        cti_handle_get_m(task->handles[0]), cti_handle_ge
 CHAMELEON_CL_CB(zherk,         cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                     ( 1.+   M)*M*N)
 #endif
 CHAMELEON_CL_CB(zlacpy,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zlacpyx,       cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
 CHAMELEON_CL_CB(zlange,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
 CHAMELEON_CL_CB(zlaset,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
 CHAMELEON_CL_CB(zlaset2,       cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index 9453453af38dd61bc1fe2b0b80a04752bac140c7..7302ec42c94f7aa69adfa5692fa2ded758b797c9 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -33,6 +33,8 @@ struct cl_zlacpy_args_s {
     int n;
     int displA;
     int displB;
+    int lda;
+    int ldb;
     CHAM_tile_t *tileA;
     CHAM_tile_t *tileB;
 };
@@ -50,17 +52,32 @@ cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
 
     assert( clargs->displA == 0 );
     assert( clargs->displB == 0 );
-    /* A = tileA->mat; */
-    /* B = tileB->mat; */
-    /* CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); */
+    CHAMELEON_Complex64_t *A = tileA->mat;
+    CHAMELEON_Complex64_t *B = tileB->mat;
+    // CORE_zlacpy( clargs->uplo, clargs->m, clargs->n, A + clargs->displA, tileA->ld, B + clargs->displB, tileB->ld );
     TCORE_zlacpy( clargs->uplo, clargs->m, clargs->n, tileA, tileB );
 }
+
+static void
+cl_zlacpyx_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zlacpy_args_s *clargs = (struct cl_zlacpy_args_s *)cl_arg;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+
+    TCORE_zlacpyx( clargs->uplo, clargs->m, clargs->n, clargs->displA, clargs->displB,
+                   tileA, clargs->lda, tileB, clargs->ldb );
+}
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU( zlacpy, cl_zlacpy_cpu_func )
+CODELETS_CPU( zlacpy,  cl_zlacpy_cpu_func  )
+CODELETS_CPU( zlacpyx, cl_zlacpyx_cpu_func )
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int m, int n, int nb,
@@ -70,7 +87,7 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
     struct cl_zlacpy_args_s *clargs = NULL;
     void (*callback)(void*);
     int                      exec = 0;
-    char                    *cl_name = "zlacpy";
+    char                    *cl_name = "zlacpyx";
 
     /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
@@ -88,14 +105,16 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
         clargs->displB = displB;
         clargs->tileA  = A->get_blktile( A, Am, An );
         clargs->tileB  = B->get_blktile( B, Bm, Bn );
+        clargs->lda    = clargs->tileA->ld;
+        clargs->ldb    = clargs->tileB->ld;
     }
 
     /* Callback fro profiling information */
-    callback = options->profiling ? cl_zlacpy_callback : NULL;
+    callback = options->profiling ? cl_zlacpyx_callback : NULL;
 
     /* Insert the task */
     rt_starpu_insert_task(
-        &cl_zlacpy,
+        &cl_zlacpyx,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zlacpy_args_s),
         STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
@@ -119,7 +138,51 @@ void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
-    INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                         0, A, Am, An,
-                         0, B, Bm, Bn );
+    struct cl_zlacpy_args_s *clargs = NULL;
+    void (*callback)(void*);
+    int                      exec    = 0;
+    char                    *cl_name = "zlacpy";
+
+    /* Handle cache */
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_W(B, Bm, Bn);
+    exec = __chameleon_need_exec;
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    if ( exec ) {
+        clargs = malloc( sizeof( struct cl_zlacpy_args_s ) );
+        clargs->uplo   = uplo;
+        clargs->m      = m;
+        clargs->n      = n;
+        clargs->displA = 0;
+        clargs->displB = 0;
+        clargs->tileA  = A->get_blktile( A, Am, An );
+        clargs->tileB  = B->get_blktile( B, Bm, Bn );
+        clargs->lda    = clargs->tileA->ld;
+        clargs->ldb    = clargs->tileB->ld;
+    }
+
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zlacpy_callback : NULL;
+
+    /* Insert the task */
+    rt_starpu_insert_task(
+        &cl_zlacpy,
+        /* Task codelet arguments */
+        STARPU_CL_ARGS, clargs, sizeof(struct cl_zlacpy_args_s),
+        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_W,      RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME,              cl_name,
+#endif
+
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 013a6222f66807dbaf947dcfaed016b6f693f342..bd823f41066f1958b5396b10a7bc4589de887b23 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -94,6 +94,7 @@ CODELETS_HEADER(zhe2ge);
 CODELETS_HEADER(zlascal);
 CODELETS_HEADER(ztradd);
 CODELETS_HEADER(zlacpy);
+CODELETS_HEADER(zlacpyx);
 CODELETS_HEADER(zlange);
 CODELETS_HEADER(zlange_max);
 CODELETS_HEADER(zlansy);