diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index 3ba1927ecdca670bba780e27990bfc3ab46ec0b7..98cde2e99338479488dd82191307eb2923c90dc4 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -28,6 +28,8 @@ ### cmake_minimum_required(VERSION 3.1) +include(CheckStructHasMember) + set(CHAMELEON_STARPU_VERSION "1.3" CACHE STRING "necessary STARPU API version") find_package(STARPU ${CHAMELEON_STARPU_VERSION} REQUIRED) @@ -38,6 +40,7 @@ if ( STARPU_FOUND ) message("-- ${Blue}Add definition CHAMELEON_SCHED_STARPU" " - Activate StarPU in Chameleon${ColourReset}") get_target_property(CMAKE_REQUIRED_LIBRARIES MORSE::STARPU INTERFACE_LINK_LIBRARIES) + get_target_property(CMAKE_REQUIRED_INCLUDES MORSE::STARPU INTERFACE_INCLUDE_DIRECTORIES) check_function_exists(starpu_data_idle_prefetch_on_node HAVE_STARPU_IDLE_PREFETCH) if ( HAVE_STARPU_IDLE_PREFETCH ) message("-- ${Blue}Add definition HAVE_STARPU_IDLE_PREFETCH${ColourReset}") @@ -70,6 +73,10 @@ if ( STARPU_FOUND ) if ( HAVE_STARPU_DATA_PEEK ) message("-- ${Blue}Add definition HAVE_STARPU_DATA_PEEK${ColourReset}") endif() + check_struct_has_member( "struct starpu_data_interface_ops" reuse_data_on_node "starpu_data_interfaces.h" HAVE_STARPU_REUSE_DATA_ON_NODE LANGUAGE "C" ) + if ( HAVE_STARPU_REUSE_DATA_ON_NODE ) + message("-- ${Blue}Add definition HAVE_STARPU_REUSE_DATA_ON_NODE${ColourReset}") + endif() if (CHAMELEON_USE_MPI) # Add MPI in case StarPU don't have a public dependency on it @@ -120,6 +127,7 @@ if ( STARPU_FOUND ) endif() endif() unset(CMAKE_REQUIRED_LIBRARIES) + unset(CMAKE_REQUIRED_INCLUDES) endif ( STARPU_FOUND ) configure_file("include/chameleon_starpu.h.in" diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index e9fc23d52967610ab53ad3a99fd2736f80041782..3156108174b12ea2f34637d24a2bc7b66458a192 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -34,6 +34,7 @@ #cmakedefine HAVE_STARPU_DATA_SET_OOC_FLAG #cmakedefine HAVE_STARPU_INTERFACE_COPY2D #cmakedefine HAVE_STARPU_DATA_PEEK +#cmakedefine HAVE_STARPU_REUSE_DATA_ON_NODE #cmakedefine HAVE_STARPU_MPI_DATA_MIGRATE #cmakedefine HAVE_STARPU_MPI_DATA_REGISTER #cmakedefine HAVE_STARPU_MPI_COMM_RANK diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c index 0065c9901f582f803a928813a434aeba41f8b66a..30348afe17667e5a2f005a19f2ea7b470c07e06a 100644 --- a/runtime/starpu/interface/cham_tile_interface.c +++ b/runtime/starpu/interface/cham_tile_interface.c @@ -115,7 +115,7 @@ cti_init( void *data_interface ) static void cti_register_data_handle( starpu_data_handle_t handle, - unsigned home_node, + int home_node, void *data_interface ) { starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) data_interface; @@ -194,6 +194,23 @@ cti_free_data_on_node( void *data_interface, unsigned node ) cham_tile_interface->dev_handle = 0; } +#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE) +static void +cti_reuse_data_on_node( void *dst_data_interface, const void *cached_interface, unsigned node ) +{ + (void)node; + starpu_cham_tile_interface_t *dst_cham_tile = + (starpu_cham_tile_interface_t *) dst_data_interface; + starpu_cham_tile_interface_t *cached_cham_tile = + (starpu_cham_tile_interface_t *) cached_interface; + + /* update the data properly */ + dst_cham_tile->tile.mat = cached_cham_tile->tile.mat; + dst_cham_tile->tile.ld = dst_cham_tile->tile.m; + dst_cham_tile->dev_handle = cached_cham_tile->dev_handle; +} +#endif + static void * cti_to_pointer( void *data_interface, unsigned node ) { @@ -565,6 +582,9 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, void *src_mat = CHAM_tile_get_ptr( &(cham_tile_src->tile) ); void *dst_mat = CHAM_tile_get_ptr( &(cham_tile_dst->tile) ); + assert( ld_src >= m ); + assert( ld_dst >= m ); + #if defined(CHAMELEON_KERNELS_TRACE) fprintf( stderr, "[ANY->ANY] src(%s, type:%s, m=%d, n=%d, ld=%d, ptr:%p) dest(%s, type:%s, m=%d, n=%d, ld=%d, ptr:%p)\n", @@ -574,12 +594,13 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, cham_tile_dst->tile.m, cham_tile_dst->tile.n, cham_tile_dst->tile.ld, dst_mat ); #endif + m = m * elemsize; + ld_src = ld_src * elemsize; + ld_dst = ld_dst * elemsize; #if defined(HAVE_STARPU_INTERFACE_COPY2D) - ld_src *= elemsize; - ld_dst *= elemsize; if (starpu_interface_copy2d( (uintptr_t) src_mat, 0, src_node, (uintptr_t) dst_mat, 0, dst_node, - m * elemsize, n, ld_src, ld_dst, async_data ) ) { + m, n, ld_src, ld_dst, async_data ) ) { ret = -EAGAIN; } #else @@ -588,7 +609,7 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, /* Optimize unpartitioned and y-partitioned cases */ if ( starpu_interface_copy( (uintptr_t) src_mat, 0, src_node, (uintptr_t) dst_mat, 0, dst_node, - m * n * elemsize, async_data ) ) + m * n, async_data ) ) { ret = -EAGAIN; } @@ -596,9 +617,6 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, else { unsigned y; - ld_src *= elemsize; - ld_dst *= elemsize; - for (y = 0; y < n; y++) { uint32_t src_offset = y * ld_src; @@ -606,7 +624,7 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, if ( starpu_interface_copy( (uintptr_t) src_mat, src_offset, src_node, (uintptr_t) dst_mat, dst_offset, dst_node, - m * elemsize, async_data ) ) + m, async_data ) ) { ret = -EAGAIN; } @@ -614,7 +632,7 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, } #endif - starpu_interface_data_copy( src_node, dst_node, (size_t) n*m*elemsize ); + starpu_interface_data_copy( src_node, dst_node, m * n ); return ret; } @@ -630,6 +648,10 @@ struct starpu_data_interface_ops starpu_interface_cham_tile_ops = .register_data_handle = cti_register_data_handle, .allocate_data_on_node = cti_allocate_data_on_node, .free_data_on_node = cti_free_data_on_node, +#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE) + .reuse_data_on_node = cti_reuse_data_on_node, + .alloc_compare = cti_alloc_compare, +#endif .to_pointer = cti_to_pointer, .pointer_is_inside = cti_pointer_is_inside, .get_size = cti_get_size, @@ -637,7 +659,6 @@ struct starpu_data_interface_ops starpu_interface_cham_tile_ops = .footprint = cti_footprint, .alloc_footprint = cti_alloc_footprint, .compare = cti_compare, - .alloc_compare = cti_alloc_compare, .display = cti_display, .pack_data = cti_pack_data, #if defined(HAVE_STARPU_DATA_PEEK)