diff --git a/compute/map.c b/compute/map.c
index cef41364590a7a7f4c295cf4a4686556c6212676..1fcb883812652baa91f61db61c20542eaa6d199a 100644
--- a/compute/map.c
+++ b/compute/map.c
@@ -27,7 +27,16 @@
  *
  *******************************************************************************
  *
- * @param[in,out] uplo
+ * @param[in] access
+ *          - ChamR: A is accessed in read-only mode.
+ *          - ChamW: A is accessed in write-only mode.
+ *           WARNING: if the descriptor is set for allocation on the fly, the
+ *           flush call included in this synchronous API will free all allocated
+ *           data, prefer asynchronous call if you want to initialiaze data
+ *           before submitting another algorithm.
+ *          - ChamRW: A is accessed in read-write mode.
+ *
+ * @param[in] uplo
  *          - ChamUpper: Only the upper triangular part of the matrix is touched
  *          - ChamLower: Only the lower triangular part of the matrix is touched
  *          - ChamUpperLower: The entire the matrix is touched
@@ -51,7 +60,8 @@
  * @sa CHAMELEON_map_Tile_Async
  *
  */
-int CHAMELEON_map_Tile( cham_uplo_t           uplo,
+int CHAMELEON_map_Tile( cham_access_t         access,
+                        cham_uplo_t           uplo,
                         CHAM_desc_t          *A,
                         cham_unary_operator_t op_fct,
                         void                 *op_args )
@@ -68,7 +78,7 @@ int CHAMELEON_map_Tile( cham_uplo_t           uplo,
     }
     chameleon_sequence_create( chamctxt, &sequence );
 
-    CHAMELEON_map_Tile_Async( uplo, A, op_fct, op_args, sequence, &request );
+    CHAMELEON_map_Tile_Async( access, uplo, A, op_fct, op_args, sequence, &request );
 
     CHAMELEON_Desc_Flush( A, sequence );
 
@@ -89,6 +99,13 @@ int CHAMELEON_map_Tile( cham_uplo_t           uplo,
  *
  *******************************************************************************
  *
+ * @param[in] access
+ *          - ChamR: A is accessed in read-only mode.
+ *          - ChamW: A is accessed in write-only mode.
+ *          INFO: tile of A can be unallocated before the call if the
+ *          descriptor is set for allocation on the fly.
+ *          - ChamRW: A is accessed in read-write mode.
+ *
  * @param[in] sequence
  *          Identifies the sequence of function calls that this call belongs to
  *          (for completion checks and exception handling purposes).
@@ -105,7 +122,8 @@ int CHAMELEON_map_Tile( cham_uplo_t           uplo,
  * @sa CHAMELEON_map_Tile
  *
  */
-int CHAMELEON_map_Tile_Async( cham_uplo_t           uplo,
+int CHAMELEON_map_Tile_Async( cham_access_t         access,
+                              cham_uplo_t           uplo,
                               CHAM_desc_t          *A,
                               cham_unary_operator_t op_fct,
                               void                 *op_args,
@@ -146,7 +164,7 @@ int CHAMELEON_map_Tile_Async( cham_uplo_t           uplo,
         return CHAMELEON_SUCCESS;
     }
 
-    chameleon_pmap( uplo, A, op_fct, op_args, sequence, request );
+    chameleon_pmap( access, uplo, A, op_fct, op_args, sequence, request );
 
     return CHAMELEON_SUCCESS;
 }
diff --git a/compute/pmap.c b/compute/pmap.c
index 265186150fa808579aa3971a4915c895ef1d7c81..a2ddf33a4da40d5e8c42f3a10e91c99a5a0d75ec 100644
--- a/compute/pmap.c
+++ b/compute/pmap.c
@@ -20,7 +20,7 @@
 /**
  *  chameleon_pmap - Generate a random matrix by tiles.
  */
-void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
+void chameleon_pmap( cham_access_t access, cham_uplo_t uplo, CHAM_desc_t *A,
                      cham_unary_operator_t op_fct, void *op_args,
                      RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
@@ -39,12 +39,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
             for (m = 0; m < n; m++) {
                 INSERT_TASK_map(
                     &options,
-                    ChamUpperLower, A(m, n),
+                    access, ChamUpperLower, A(m, n),
                     op_fct, op_args );
             }
             INSERT_TASK_map(
                 &options,
-                uplo, A(n, n),
+                access, uplo, A(n, n),
                 op_fct, op_args );
         }
         break;
@@ -53,12 +53,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
         for (n = 0; n < A->nt; n++) {
             INSERT_TASK_map(
                 &options,
-                uplo, A(n, n),
+                access, uplo, A(n, n),
                 op_fct, op_args );
             for (m = n+1; m < A->mt; m++) {
                 INSERT_TASK_map(
                     &options,
-                    ChamUpperLower, A(m, n),
+                    access, ChamUpperLower, A(m, n),
                     op_fct, op_args );
             }
         }
@@ -70,7 +70,7 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
             for (n = 0; n < A->nt; n++) {
                 INSERT_TASK_map(
                     &options,
-                    uplo, A(m, n),
+                    access, uplo, A(m, n),
                     op_fct, op_args );
             }
         }
diff --git a/compute/pzlatms.c b/compute/pzlatms.c
index a96b68755ce99e1a6d3143b1b2f2286794b2d108..3881a9f9d43fdd8cc4dd65e977abf8e4cff24842 100644
--- a/compute/pzlatms.c
+++ b/compute/pzlatms.c
@@ -165,7 +165,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
     for (n = 0; n < kt; n++) {
         INSERT_TASK_map(
             &options,
-            ChamUpperLower, A(n, n),
+            ChamRW, ChamUpperLower, A(n, n),
             zlaset_diag, D );
     }
 
diff --git a/compute/zlacpy.c b/compute/zlacpy.c
index de3ff01320540b9648996366ad6c02d1c821ac1a..272bc55d54f5fc3c7529d72af3a42375a5d168f5 100644
--- a/compute/zlacpy.c
+++ b/compute/zlacpy.c
@@ -280,8 +280,8 @@ int CHAMELEON_zlacpy_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *
         return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
     }
     /* Check input arguments */
-    if (A->nb != A->mb) {
-        chameleon_error("CHAMELEON_zlacpy_Tile_Async", "only square tiles supported");
+    if ((A->mb != B->mb) || (A->nb != B->nb) ){
+        chameleon_error("CHAMELEON_zlacpy_Tile_Async", "only matching tile sizes supported");
         return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
     }
     /* Check input arguments */
diff --git a/compute/zprint.c b/compute/zprint.c
index fe44e05d4026610e716744b722447b2e8b9dadd1..2329d1a9bbfe25b286f29857e0cfea41b9254b62 100644
--- a/compute/zprint.c
+++ b/compute/zprint.c
@@ -152,7 +152,7 @@ int CHAMELEON_zprint( FILE *file, const char *header,
 
     /* Call the tile interface */
     zprint_runtime_id = chamctxt->scheduler;
-    chameleon_pmap( uplo, &descAt, zprint, &options, sequence, &request );
+    chameleon_pmap( ChamR, uplo, &descAt, zprint, &options, sequence, &request );
 
     /* Submit the matrix conversion back */
     chameleon_ztile2lap( chamctxt, &descAl, &descAt,
@@ -216,7 +216,7 @@ int CHAMELEON_zprint_Tile( FILE *file, const char *header,
     chameleon_sequence_create( chamctxt, &sequence );
 
     zprint_runtime_id = chamctxt->scheduler;
-    chameleon_pmap( uplo, A, zprint, &options, sequence, &request );
+    chameleon_pmap( ChamR, uplo, A, zprint, &options, sequence, &request );
     CHAMELEON_Desc_Flush( A, sequence );
 
     chameleon_sequence_wait( chamctxt, sequence );
diff --git a/control/common.h b/control/common.h
index f31ec1af3bf5f0fd5af475e2630d1cd9c575bcd2..c86bf94ad82144c2cad722efee87dc948161ded8 100644
--- a/control/common.h
+++ b/control/common.h
@@ -102,7 +102,7 @@ extern char *chameleon_lapack_constants[];
 extern "C" {
 #endif
 
-void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
+void chameleon_pmap( cham_access_t access, cham_uplo_t uplo, CHAM_desc_t *A,
                      cham_unary_operator_t operator, void *op_args,
                      RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 
@@ -127,7 +127,7 @@ static inline int chameleon_asprintf( char **strp, const char *fmt, ... )
     int rc;
 
     va_start( ap, fmt );
-    rc = asprintf( strp, fmt, ap );
+    rc = vasprintf( strp, fmt, ap );
     va_end( ap );
 
     assert( rc != -1 );
diff --git a/include/chameleon.h b/include/chameleon.h
index 8c3669af397ac25e53bdbd55c33feced0947a48e..77b6544f0af5e0e599baf8575cd8755b5e6c5f27 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -33,6 +33,10 @@
 #include "chameleon/struct.h"
 #include "chameleon/descriptor_helpers.h"
 
+#if defined(CHAMELEON_USE_MPI)
+#include <mpi.h>
+#endif
+
 /* ****************************************************************************
  * CHAMELEON runtime common API
  */
@@ -74,11 +78,13 @@ BEGIN_C_DECLS
 /* ****************************************************************************
  * CHAMELEON functionnalities
  */
-int CHAMELEON_map_Tile( cham_uplo_t           uplo,
+int CHAMELEON_map_Tile( cham_access_t         access,
+                        cham_uplo_t           uplo,
                         CHAM_desc_t          *A,
                         cham_unary_operator_t op_fct,
                         void                 *op_args );
-int CHAMELEON_map_Tile_Async( cham_uplo_t           uplo,
+int CHAMELEON_map_Tile_Async( cham_access_t         access,
+                              cham_uplo_t           uplo,
                               CHAM_desc_t          *A,
                               cham_unary_operator_t op_fct,
                               void                 *op_args,
diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h
index d3d2b43178ad3f0dcca7532a5c3f9b233875eb54..cff1f56529ce56ce1ddf52e81bac66e8c5f219d2 100644
--- a/include/chameleon/constants.h
+++ b/include/chameleon/constants.h
@@ -183,6 +183,15 @@ typedef enum chameleon_store_e {
     ChamEltwise    = 403, /**< Element by element storage */
 } cham_store_t;
 
+/**
+ * @brief Data access types.
+ */
+typedef enum chameleon_access_e {
+    ChamR  = (1 << 0),        /**< Read only  */
+    ChamW  = (1 << 1),        /**< Write only */
+    ChamRW = (ChamR | ChamW), /**< Read-Write */
+} cham_access_t;
+
 /**
  * @brief Chameleon GEMM-like algorithms
  */
diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h
index c4fa9ca9f87db2ef6427aefac5fd5a348c4df6f6..b7281f13566b0c9a4ed274d7a9be762a5c0f3cc9 100644
--- a/include/chameleon/tasks.h
+++ b/include/chameleon/tasks.h
@@ -97,7 +97,7 @@ typedef int (*cham_unary_operator_t)( const CHAM_desc_t *desc,
                                       CHAM_tile_t *data, void *op_args );
 
 void INSERT_TASK_map( const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
+                      cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
                       cham_unary_operator_t op_fct, void *op_args );
 
 #include "chameleon/tasks_z.h"
diff --git a/runtime/openmp/codelets/codelet_map.c b/runtime/openmp/codelets/codelet_map.c
index 7abf7c38e51cf97e2762106b8787014b9d086624..ae37e4ddf755ef9439b0d09f5845e0d3b2cddfa6 100644
--- a/runtime/openmp/codelets/codelet_map.c
+++ b/runtime/openmp/codelets/codelet_map.c
@@ -18,15 +18,34 @@
 #include "chameleon_openmp.h"
 
 void INSERT_TASK_map( const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
+                      cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
                       cham_unary_operator_t op_fct, void *op_args )
 {
     CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
+    switch( accessA ) {
+    case ChamW:
+#pragma omp task depend( out: tileA[0] )
+    {
+        op_fct( A, uplo, Am, An, tileA, op_args );
+    }
+    break;
+
+    case ChamR:
+#pragma omp task depend( in: tileA[0] )
+    {
+        op_fct( A, uplo, Am, An, tileA, op_args );
+    }
+
+    break;
+
+    case ChamRW:
+    default:
 #pragma omp task depend( inout: tileA[0] )
     {
         op_fct( A, uplo, Am, An, tileA, op_args );
     }
+    }
 
     (void)options;
 }
diff --git a/runtime/parsec/codelets/codelet_map.c b/runtime/parsec/codelets/codelet_map.c
index f2c10bfe901717448229dc177ff875cf31bf3017..a5a4b7f9a90c9a36905fc2b619ffc0d92cca2054 100644
--- a/runtime/parsec/codelets/codelet_map.c
+++ b/runtime/parsec/codelets/codelet_map.c
@@ -38,18 +38,20 @@ CORE_map_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_map( const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
+                      cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
                       cham_unary_operator_t op_fct, void *op_args )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
+    int parsec_accessA = cham_to_parsec_access( accessA );
+
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_map_parsec, options->priority, "map",
         sizeof(CHAM_desc_t*),             &A,    VALUE,
         sizeof(cham_uplo_t),              &uplo, VALUE,
         sizeof(int),                      &Am,   VALUE,
         sizeof(int),                      &An,   VALUE,
-        PASSED_BY_REF, RTBLKADDR(A, void, Am, An), chameleon_parsec_get_arena_index( A ) | INOUT,
+        PASSED_BY_REF, RTBLKADDR(A, void, Am, An), chameleon_parsec_get_arena_index( A ) | parsec_accessA,
         sizeof(cham_unary_operator_t),    &op_fct,  VALUE,
         sizeof(void*),                    &op_args, VALUE,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/include/chameleon_parsec.h b/runtime/parsec/include/chameleon_parsec.h
index e0d5cd190f38f40b04951eb7ec7fa9f8a7e71a34..30518fb809779ec0ba9c5ce45701a1187fb07621 100644
--- a/runtime/parsec/include/chameleon_parsec.h
+++ b/runtime/parsec/include/chameleon_parsec.h
@@ -42,6 +42,16 @@ chameleon_parsec_get_arena_index(const CHAM_desc_t *desc) {
     return ((chameleon_parsec_desc_t *)desc->schedopt)->arena_index;
 }
 
+static inline int cham_to_parsec_access( cham_access_t accessA ) {
+    if ( accessA == ChamR ) {
+        return INPUT;
+    }
+    if ( accessA == ChamW ) {
+        return OUTPUT;
+    }
+    return INOUT;
+}
+
 /*
  * Access to block pointer and leading dimension
  */
diff --git a/runtime/quark/codelets/codelet_map.c b/runtime/quark/codelets/codelet_map.c
index b4c2807cbe4e25b277226240b6a34195b9b281ef..f9781af957bd35cc7ce2d05ef751e3fef88cf9cb 100644
--- a/runtime/quark/codelets/codelet_map.c
+++ b/runtime/quark/codelets/codelet_map.c
@@ -32,7 +32,7 @@ void CORE_map_quark(Quark *quark)
 }
 
 void INSERT_TASK_map( const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
+                      cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
                       cham_unary_operator_t op_fct, void *op_args )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -43,7 +43,7 @@ void INSERT_TASK_map( const RUNTIME_option_t *options,
         sizeof(cham_uplo_t),              &uplo, VALUE,
         sizeof(int),                      &Am,   VALUE,
         sizeof(int),                      &An,   VALUE,
-        sizeof(void*), RTBLKADDR(A, void, Am, An), INOUT,
+        sizeof(void*), RTBLKADDR(A, void, Am, An), cham_to_quark_access( accessA ),
         sizeof(cham_unary_operator_t),    &op_fct,  VALUE,
         sizeof(void*),                    &op_args, VALUE,
         0);
diff --git a/runtime/quark/include/chameleon_quark.h b/runtime/quark/include/chameleon_quark.h
index 3405f2bb0150dbfabb9061b03a8e9a65ab5be40c..8e415b7c564c486fa9ffd9f4de9585adc3a9410e 100644
--- a/runtime/quark/include/chameleon_quark.h
+++ b/runtime/quark/include/chameleon_quark.h
@@ -36,6 +36,16 @@ typedef struct quark_option_s {
     Quark *quark;
 } quark_option_t;
 
+static inline int cham_to_quark_access( cham_access_t accessA ) {
+    if ( accessA == ChamR ) {
+        return INPUT;
+    }
+    if ( accessA == ChamW ) {
+        return OUTPUT;
+    }
+    return INOUT;
+}
+
 /*
  * Access to block pointer and leading dimension
  */
diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c
index 14f4e8e7acd073f9f20089acb0f54c010e2561d0..97de57e26efa988a6e425573d637d13115f8f2d3 100644
--- a/runtime/starpu/codelets/codelet_map.c
+++ b/runtime/starpu/codelets/codelet_map.c
@@ -43,7 +43,7 @@ static void cl_map_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU(map, cl_map_cpu_func)
 
 void INSERT_TASK_map( const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
+                      cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
                       cham_unary_operator_t op_fct, void *op_args )
 {
 
@@ -60,7 +60,7 @@ void INSERT_TASK_map( const RUNTIME_option_t *options,
         STARPU_VALUE,    &uplo,                   sizeof(cham_uplo_t),
         STARPU_VALUE,    &Am,                     sizeof(int),
         STARPU_VALUE,    &An,                     sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, void, Am, An),
+        cham_to_starpu_access(accessA), RTBLKADDR(A, void, Am, An),
         STARPU_VALUE,    &op_fct,                 sizeof(cham_unary_operator_t),
         STARPU_VALUE,    &op_args,                sizeof(void*),
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index f98a29b177a5febd622385ca75c59a467a7fe5db..22c3ca53498ab24640f6620cc76a4052a96852bd 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -111,6 +111,13 @@ typedef struct starpu_option_request_s {
 
 /**/
 
+static inline int cham_to_starpu_access( cham_access_t accessA ) {
+    assert( ChamR  == STARPU_R  );
+    assert( ChamW  == STARPU_W  );
+    assert( ChamRW == STARPU_RW );
+    return accessA;
+}
+
 /*
  * MPI Redefinitions
  */
diff --git a/testing/test_fembem b/testing/test_fembem
index 906d73c7abb0821e8df787f05fab2d503a2e76ff..a0056374bf0163ba878d842cbc81999fece362b4 160000
--- a/testing/test_fembem
+++ b/testing/test_fembem
@@ -1 +1 @@
-Subproject commit 906d73c7abb0821e8df787f05fab2d503a2e76ff
+Subproject commit a0056374bf0163ba878d842cbc81999fece362b4
diff --git a/testing/values.c b/testing/values.c
index 5d53e80b3d25dbe72d4d0a527e3698b0fe9ccbb6..5e4bdda1408b6dce022f0083dec90c9fe703555c 100644
--- a/testing/values.c
+++ b/testing/values.c
@@ -729,7 +729,11 @@ testing_salea()
 long
 testing_ialea()
 {
-    return random();
+    long r = random();
+#if defined(CHAMELEON_USE_MPI)
+    MPI_Bcast( &r, 1, MPI_LONG, 0, MPI_COMM_WORLD );
+#endif
+    return r;
 }
 
 /**