From bab9be6034cb9fb47811c7d01ac611788741bdb9 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Fri, 17 Mar 2017 13:36:01 +0100
Subject: [PATCH] Reimplement taking iscached into account on top of memaccess

---
 CMakeLists.txt                              |  5 ++++
 control/config.h.in                         |  1 +
 runtime/starpu/codelets/codelet_dataflush.c | 12 ++++++++
 runtime/starpu/include/morse_starpu.h       | 32 +++++++++++++++------
 4 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c7d6eef21..55d8bca91 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -771,6 +771,11 @@ if( CHAMELEON_SCHED_STARPU )
                 " - starpu_mpi_comm_rank() test fails in StarPU${ColourReset}")
                 message("-- ${Red}Check in CMakeFiles/CMakeError.log to figure out why it fails${ColourReset}")
             endif()
+	    check_function_exists(starpu_mpi_cached_receive STARPU_MPI_CACHED_RECEIVE)
+	    if ( STARPU_MPI_CACHED_RECEIVE )
+		set(HAVE_STARPU_MPI_CACHED_RECEIVE 1)
+		message("-- ${Blue}Add definition HAVE_STARPU_MPI_CACHED_RECEIVE${ColourReset}")
+	    endif()
         endif()
         if(HWLOC_FOUND AND HWLOC_LIBRARY_DIRS)
             # the RPATH to be used when installing
diff --git a/control/config.h.in b/control/config.h.in
index a59b3e8bb..6691c4c16 100644
--- a/control/config.h.in
+++ b/control/config.h.in
@@ -40,6 +40,7 @@
 #cmakedefine HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS
 #cmakedefine HAVE_STARPU_MPI_DATA_REGISTER
 #cmakedefine HAVE_STARPU_MPI_COMM_RANK
+#cmakedefine HAVE_STARPU_MPI_CACHED_RECEIVE
 
 /* MAGMA functions */
 #cmakedefine HAVE_MAGMA_GETRF_INCPIV_GPU
diff --git a/runtime/starpu/codelets/codelet_dataflush.c b/runtime/starpu/codelets/codelet_dataflush.c
index 6418890ff..7f746330c 100644
--- a/runtime/starpu/codelets/codelet_dataflush.c
+++ b/runtime/starpu/codelets/codelet_dataflush.c
@@ -40,6 +40,18 @@ static void data_release(void *handle)
 }
 #endif
 
+#ifdef CHAMELEON_USE_MPI
+#ifdef HAVE_STARPU_MPI_CACHED_RECEIVE
+int RUNTIME_desc_iscached(const MORSE_desc_t *A, int Am, int An)
+{
+    starpu_data_handle_t *ptrtile = (starpu_data_handle_t*)(A->schedopt);
+    ptrtile += ((int64_t)(A->lmt) * (int64_t)An + (int64_t)Am);
+
+    return starpu_mpi_cached_receive(*ptrtile);
+}
+#endif
+#endif
+
 void MORSE_TASK_dataflush(const MORSE_option_t *options,
                           const MORSE_desc_t *A, int Am, int An)
 {
diff --git a/runtime/starpu/include/morse_starpu.h b/runtime/starpu/include/morse_starpu.h
index 987d179f0..687dc2648 100644
--- a/runtime/starpu/include/morse_starpu.h
+++ b/runtime/starpu/include/morse_starpu.h
@@ -85,6 +85,20 @@ typedef struct starpu_conf starpu_conf_t;
 #define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_desc_getaddr( desc, m, n ) )
 
 void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp);
+#ifdef CHAMELEON_USE_MPI
+#ifdef HAVE_STARPU_MPI_CACHED_RECEIVE
+int RUNTIME_desc_iscached(const MORSE_desc_t *A, int Am, int An);
+#endif
+#endif
+
+#if defined(CHAMELEON_USE_MPI) && defined(MORSE_WAR_DEPENDENCIES)
+#  ifndef HAVE_STARPU_MPI_CACHED_RECEIVE
+#    error "WAR dependencies need starpu_mpi_cached_receive support from StarPU"
+#  endif
+#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) if (morse_desc_iscached(A, Am, An)) __morse_need_submit = 1
+#else
+#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
+#endif
 
 #ifdef CHAMELEON_ENABLE_PRUNING_STATS
 
@@ -118,21 +132,23 @@ void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp)
 #define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
 #endif
 
-#define RUNTIME_BEGIN_ACCESS_DECLARATION \
+#define RUNTIME_BEGIN_ACCESS_DECLARATION        \
     RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
 
-#define RUNTIME_ACCESS_R(A, Am, An) \
+#define RUNTIME_ACCESS_R(A, Am, An)
 
-#define RUNTIME_ACCESS_W(A, Am, An) \
-    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
+#define RUNTIME_ACCESS_W(A, Am, An)             \
+    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
+    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
 
-#define RUNTIME_ACCESS_RW(A, Am, An) \
-    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
+#define RUNTIME_ACCESS_RW(A, Am, An)            \
+    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
+    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
 
-#define RUNTIME_RANK_CHANGED(rank) \
+#define RUNTIME_RANK_CHANGED(rank)              \
     RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
 
-#define RUNTIME_END_ACCESS_DECLARATION \
+#define RUNTIME_END_ACCESS_DECLARATION          \
     RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION;
 
 #endif /* _MORSE_STARPU_H_ */
-- 
GitLab