diff --git a/CMakeLists.txt b/CMakeLists.txt index c7d6eef21ee1892a7a0976681b3fa775956f4cbe..55d8bca913e2a784ce9a322231fdfcc7987b1db7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -771,6 +771,11 @@ if( CHAMELEON_SCHED_STARPU ) " - starpu_mpi_comm_rank() test fails in StarPU${ColourReset}") message("-- ${Red}Check in CMakeFiles/CMakeError.log to figure out why it fails${ColourReset}") endif() + check_function_exists(starpu_mpi_cached_receive STARPU_MPI_CACHED_RECEIVE) + if ( STARPU_MPI_CACHED_RECEIVE ) + set(HAVE_STARPU_MPI_CACHED_RECEIVE 1) + message("-- ${Blue}Add definition HAVE_STARPU_MPI_CACHED_RECEIVE${ColourReset}") + endif() endif() if(HWLOC_FOUND AND HWLOC_LIBRARY_DIRS) # the RPATH to be used when installing diff --git a/control/config.h.in b/control/config.h.in index a59b3e8bb41f12cd292666c77ce2806947288967..6691c4c16293cf8a3c8aec769a1496d16d1e6973 100644 --- a/control/config.h.in +++ b/control/config.h.in @@ -40,6 +40,7 @@ #cmakedefine HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS #cmakedefine HAVE_STARPU_MPI_DATA_REGISTER #cmakedefine HAVE_STARPU_MPI_COMM_RANK +#cmakedefine HAVE_STARPU_MPI_CACHED_RECEIVE /* MAGMA functions */ #cmakedefine HAVE_MAGMA_GETRF_INCPIV_GPU diff --git a/control/descriptor.h b/control/descriptor.h index e60b3909fd3d3483a935c309a532a1dafe8699fe..7d1cb20577e03e7c25e45a93f2253d295130f784 100644 --- a/control/descriptor.h +++ b/control/descriptor.h @@ -210,26 +210,26 @@ inline static int morse_desc_islocal( const MORSE_desc_t *A, int m, int n ) #define MORSE_ACCESS_R(A, Am, An) do { \ if (morse_desc_islocal(A, Am, An)) __morse_need_submit = 1; \ - RUNTIME_ACCESS_R(A, Am, An) \ + RUNTIME_ACCESS_R(A, Am, An); \ } while(0) #define MORSE_ACCESS_W(A, Am, An) do { \ if (morse_desc_islocal(A, Am, An)) __morse_need_submit = 1; \ - RUNTIME_ACCESS_W(A, Am, An) \ + RUNTIME_ACCESS_W(A, Am, An); \ } while(0) #define MORSE_ACCESS_RW(A, Am, An) do { \ if (morse_desc_islocal(A, Am, An)) __morse_need_submit = 1; \ - RUNTIME_ACCESS_RW(A, Am, An) \ + RUNTIME_ACCESS_RW(A, Am, An); \ } while(0) #define MORSE_RANK_CHANGED(rank) do {\ __morse_need_submit = 1; \ - RUNTIME_RANK_CHANGED(rank) \ + RUNTIME_RANK_CHANGED(rank); \ } while (0) #define MORSE_END_ACCESS_DECLARATION \ - RUNTIME_END_ACCESS_DECLARATION \ + RUNTIME_END_ACCESS_DECLARATION; \ if (!__morse_need_submit) return; \ } diff --git a/runtime/starpu/codelets/codelet_dataflush.c b/runtime/starpu/codelets/codelet_dataflush.c index 6418890ff0bffd73c9d9a8a9a61ef0168f03bec6..7f746330ce122b77cb31a00313f4d0a4ab4dc3f4 100644 --- a/runtime/starpu/codelets/codelet_dataflush.c +++ b/runtime/starpu/codelets/codelet_dataflush.c @@ -40,6 +40,18 @@ static void data_release(void *handle) } #endif +#ifdef CHAMELEON_USE_MPI +#ifdef HAVE_STARPU_MPI_CACHED_RECEIVE +int RUNTIME_desc_iscached(const MORSE_desc_t *A, int Am, int An) +{ + starpu_data_handle_t *ptrtile = (starpu_data_handle_t*)(A->schedopt); + ptrtile += ((int64_t)(A->lmt) * (int64_t)An + (int64_t)Am); + + return starpu_mpi_cached_receive(*ptrtile); +} +#endif +#endif + void MORSE_TASK_dataflush(const MORSE_option_t *options, const MORSE_desc_t *A, int Am, int An) { diff --git a/runtime/starpu/include/morse_starpu.h b/runtime/starpu/include/morse_starpu.h index 987d179f0abd8f93927e3a72e406b27cbccdb3d2..c1980d3b615f7ef49d51d2797195c7851b2471a7 100644 --- a/runtime/starpu/include/morse_starpu.h +++ b/runtime/starpu/include/morse_starpu.h @@ -85,6 +85,22 @@ typedef struct starpu_conf starpu_conf_t; #define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_desc_getaddr( desc, m, n ) ) void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp); +#ifdef CHAMELEON_USE_MPI +#ifdef HAVE_STARPU_MPI_CACHED_RECEIVE +int RUNTIME_desc_iscached(const MORSE_desc_t *A, int Am, int An); +#endif +#endif + +#if defined(CHAMELEON_USE_MPI) +# if defined(HAVE_STARPU_MPI_CACHED_RECEIVE) +# define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do { if (RUNTIME_desc_iscached(A, Am, An)) __morse_need_submit = 1; } while(0) +# else +# warning "WAR dependencies need starpu_mpi_cached_receive support from StarPU 1.2.1 or greater" +# define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) +# endif +#else +#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) +#endif #ifdef CHAMELEON_ENABLE_PRUNING_STATS @@ -118,21 +134,23 @@ void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp) #define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) #endif -#define RUNTIME_BEGIN_ACCESS_DECLARATION \ +#define RUNTIME_BEGIN_ACCESS_DECLARATION \ RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION -#define RUNTIME_ACCESS_R(A, Am, An) \ +#define RUNTIME_ACCESS_R(A, Am, An) -#define RUNTIME_ACCESS_W(A, Am, An) \ - RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) +#define RUNTIME_ACCESS_W(A, Am, An) \ + RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An); \ + RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) -#define RUNTIME_ACCESS_RW(A, Am, An) \ - RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) +#define RUNTIME_ACCESS_RW(A, Am, An) \ + RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An); \ + RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) -#define RUNTIME_RANK_CHANGED(rank) \ +#define RUNTIME_RANK_CHANGED(rank) \ RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) -#define RUNTIME_END_ACCESS_DECLARATION \ +#define RUNTIME_END_ACCESS_DECLARATION \ RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION; #endif /* _MORSE_STARPU_H_ */