diff --git a/CMakeLists.txt b/CMakeLists.txt
index c7d6eef21ee1892a7a0976681b3fa775956f4cbe..55d8bca913e2a784ce9a322231fdfcc7987b1db7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -771,6 +771,11 @@ if( CHAMELEON_SCHED_STARPU )
                 " - starpu_mpi_comm_rank() test fails in StarPU${ColourReset}")
                 message("-- ${Red}Check in CMakeFiles/CMakeError.log to figure out why it fails${ColourReset}")
             endif()
+	    check_function_exists(starpu_mpi_cached_receive STARPU_MPI_CACHED_RECEIVE)
+	    if ( STARPU_MPI_CACHED_RECEIVE )
+		set(HAVE_STARPU_MPI_CACHED_RECEIVE 1)
+		message("-- ${Blue}Add definition HAVE_STARPU_MPI_CACHED_RECEIVE${ColourReset}")
+	    endif()
         endif()
         if(HWLOC_FOUND AND HWLOC_LIBRARY_DIRS)
             # the RPATH to be used when installing
diff --git a/control/config.h.in b/control/config.h.in
index a59b3e8bb41f12cd292666c77ce2806947288967..6691c4c16293cf8a3c8aec769a1496d16d1e6973 100644
--- a/control/config.h.in
+++ b/control/config.h.in
@@ -40,6 +40,7 @@
 #cmakedefine HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS
 #cmakedefine HAVE_STARPU_MPI_DATA_REGISTER
 #cmakedefine HAVE_STARPU_MPI_COMM_RANK
+#cmakedefine HAVE_STARPU_MPI_CACHED_RECEIVE
 
 /* MAGMA functions */
 #cmakedefine HAVE_MAGMA_GETRF_INCPIV_GPU
diff --git a/control/descriptor.h b/control/descriptor.h
index e60b3909fd3d3483a935c309a532a1dafe8699fe..7d1cb20577e03e7c25e45a93f2253d295130f784 100644
--- a/control/descriptor.h
+++ b/control/descriptor.h
@@ -210,26 +210,26 @@ inline static int morse_desc_islocal( const MORSE_desc_t *A, int m, int n )
 
 #define MORSE_ACCESS_R(A, Am, An) do { \
     if (morse_desc_islocal(A, Am, An)) __morse_need_submit = 1; \
-    RUNTIME_ACCESS_R(A, Am, An) \
+    RUNTIME_ACCESS_R(A, Am, An); \
 } while(0)
 
 #define MORSE_ACCESS_W(A, Am, An) do { \
     if (morse_desc_islocal(A, Am, An)) __morse_need_submit = 1; \
-    RUNTIME_ACCESS_W(A, Am, An) \
+    RUNTIME_ACCESS_W(A, Am, An); \
 } while(0)
 
 #define MORSE_ACCESS_RW(A, Am, An) do { \
     if (morse_desc_islocal(A, Am, An)) __morse_need_submit = 1; \
-    RUNTIME_ACCESS_RW(A, Am, An) \
+    RUNTIME_ACCESS_RW(A, Am, An); \
 } while(0)
 
 #define MORSE_RANK_CHANGED(rank) do {\
     __morse_need_submit = 1; \
-    RUNTIME_RANK_CHANGED(rank) \
+    RUNTIME_RANK_CHANGED(rank); \
 } while (0)
 
 #define MORSE_END_ACCESS_DECLARATION \
-    RUNTIME_END_ACCESS_DECLARATION \
+    RUNTIME_END_ACCESS_DECLARATION; \
     if (!__morse_need_submit) return; \
 }
 
diff --git a/runtime/starpu/codelets/codelet_dataflush.c b/runtime/starpu/codelets/codelet_dataflush.c
index 6418890ff0bffd73c9d9a8a9a61ef0168f03bec6..7f746330ce122b77cb31a00313f4d0a4ab4dc3f4 100644
--- a/runtime/starpu/codelets/codelet_dataflush.c
+++ b/runtime/starpu/codelets/codelet_dataflush.c
@@ -40,6 +40,18 @@ static void data_release(void *handle)
 }
 #endif
 
+#ifdef CHAMELEON_USE_MPI
+#ifdef HAVE_STARPU_MPI_CACHED_RECEIVE
+int RUNTIME_desc_iscached(const MORSE_desc_t *A, int Am, int An)
+{
+    starpu_data_handle_t *ptrtile = (starpu_data_handle_t*)(A->schedopt);
+    ptrtile += ((int64_t)(A->lmt) * (int64_t)An + (int64_t)Am);
+
+    return starpu_mpi_cached_receive(*ptrtile);
+}
+#endif
+#endif
+
 void MORSE_TASK_dataflush(const MORSE_option_t *options,
                           const MORSE_desc_t *A, int Am, int An)
 {
diff --git a/runtime/starpu/include/morse_starpu.h b/runtime/starpu/include/morse_starpu.h
index 987d179f0abd8f93927e3a72e406b27cbccdb3d2..c1980d3b615f7ef49d51d2797195c7851b2471a7 100644
--- a/runtime/starpu/include/morse_starpu.h
+++ b/runtime/starpu/include/morse_starpu.h
@@ -85,6 +85,22 @@ typedef struct starpu_conf starpu_conf_t;
 #define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_desc_getaddr( desc, m, n ) )
 
 void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp);
+#ifdef CHAMELEON_USE_MPI
+#ifdef HAVE_STARPU_MPI_CACHED_RECEIVE
+int RUNTIME_desc_iscached(const MORSE_desc_t *A, int Am, int An);
+#endif
+#endif
+
+#if defined(CHAMELEON_USE_MPI)
+#  if defined(HAVE_STARPU_MPI_CACHED_RECEIVE)
+#    define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do { if (RUNTIME_desc_iscached(A, Am, An)) __morse_need_submit = 1; } while(0)
+#  else
+#    warning "WAR dependencies need starpu_mpi_cached_receive support from StarPU 1.2.1 or greater"
+#    define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
+#  endif
+#else
+#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
+#endif
 
 #ifdef CHAMELEON_ENABLE_PRUNING_STATS
 
@@ -118,21 +134,23 @@ void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp)
 #define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
 #endif
 
-#define RUNTIME_BEGIN_ACCESS_DECLARATION \
+#define RUNTIME_BEGIN_ACCESS_DECLARATION        \
     RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
 
-#define RUNTIME_ACCESS_R(A, Am, An) \
+#define RUNTIME_ACCESS_R(A, Am, An)
 
-#define RUNTIME_ACCESS_W(A, Am, An) \
-    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
+#define RUNTIME_ACCESS_W(A, Am, An)             \
+    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
+    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
 
-#define RUNTIME_ACCESS_RW(A, Am, An) \
-    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
+#define RUNTIME_ACCESS_RW(A, Am, An)            \
+    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
+    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
 
-#define RUNTIME_RANK_CHANGED(rank) \
+#define RUNTIME_RANK_CHANGED(rank)              \
     RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
 
-#define RUNTIME_END_ACCESS_DECLARATION \
+#define RUNTIME_END_ACCESS_DECLARATION          \
     RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION;
 
 #endif /* _MORSE_STARPU_H_ */