cham_tile_interface re-utilization with same size but different layout problem
I had a problem using the last git versions of Chameleon and StarPU that I think is directly related (solved) to this recent StarPU commit a56a30b3.
Basically, I was getting an "invalid pitch argument" from StarPU calls of cudaMemcpy2D when making a transfer from CPU to GPU.
The chameleon starpu_cham_tile_interface_s->tile->m and n are used for the cudaMemcpy2D.
The handle in question, a 2D block of size 45x320, was reusing a 320x45 by the StarPU's reuse_mem_chunk
function. They have the same size and, by the Chameleon alloc_footprint
, the same footprint. And as discussed by the commit, a simple memcpy inside reuse_mem_chunk
will fail (as m and n are different).
I have implemented .reuse_data_on_node
in starpu_interface_cham_tile_ops
to solve this problem as discussed in StarPU's commit. Everything works fine after the following patch:
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index 3ba1927e..e7d0a0ee 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -28,6 +28,8 @@
###
cmake_minimum_required(VERSION 3.1)
+include(CheckStructHasMember)
+
set(CHAMELEON_STARPU_VERSION "1.3" CACHE STRING "necessary STARPU API version")
find_package(STARPU ${CHAMELEON_STARPU_VERSION} REQUIRED)
@@ -71,6 +73,15 @@ if ( STARPU_FOUND )
message("-- ${Blue}Add definition HAVE_STARPU_DATA_PEEK${ColourReset}")
endif()
+ set(STARPU_REUSE_SAVE_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
+ list(APPEND CMAKE_REQUIRED_INCLUDES ${STARPU_INCLUDE_DIRS})
+ message("- ${CMAKE_REQUIRED_INCLUDES}")
+ check_struct_has_member( "struct starpu_data_interface_ops" reuse_data_on_node starpu.h HAVE_STARPU_REUSE_DATA_ON_NODE )
+ if ( HAVE_STARPU_REUSE_DATA_ON_NODE )
+ message("-- ${Blue}Add definition HAVE_STARPU_REUSE_DATA_ON_NODE${ColourReset}")
+ endif()
+ set(CMAKE_REQUIRED_INCLUDES ${STARPU_REUSE_SAVE_CMAKE_REQUIRED_INCLUDES})
+
if (CHAMELEON_USE_MPI)
# Add MPI in case StarPU don't have a public dependency on it
set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES};MPI::MPI_C")
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index e9fc23d5..4ae050fe 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -41,6 +41,7 @@
#cmakedefine HAVE_STARPU_MPI_COMM_GET_ATTR
#cmakedefine HAVE_STARPU_MPI_INIT_CONF
#cmakedefine HAVE_STARPU_MPI_WAIT_FOR_ALL
+#cmakedefine HAVE_STARPU_REUSE_DATA_ON_NODE
#cmakedefine HAVE_STARPU_MPI_INTERFACE_DATATYPE_NODE_REGISTER
#cmakedefine HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER
diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c
index 0065c990..981ebfde 100644
--- a/runtime/starpu/interface/cham_tile_interface.c
+++ b/runtime/starpu/interface/cham_tile_interface.c
@@ -624,12 +624,28 @@ static const struct starpu_data_copy_methods cti_copy_methods =
.any_to_any = cti_copy_any_to_any,
};
+#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE)
+static void
+cti_reuse_data_on_node( void *data_interface, const void *new_interface, unsigned node )
+{
+ starpu_cham_tile_interface_t *cham_tile_interface = data_interface;
+ const starpu_cham_tile_interface_t *new_cti_interface = new_interface;
+
+ cham_tile_interface->tile.mat = new_cti_interface->tile.mat;
+ cham_tile_interface->tile.ld = cham_tile_interface->tile.m;
+ cham_tile_interface->dev_handle = new_cti_interface->dev_handle;
+}
+#endif
+
struct starpu_data_interface_ops starpu_interface_cham_tile_ops =
{
.init = cti_init,
.register_data_handle = cti_register_data_handle,
.allocate_data_on_node = cti_allocate_data_on_node,
.free_data_on_node = cti_free_data_on_node,
+#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE)
+ .reuse_data_on_node = cti_reuse_data_on_node,
+#endif
.to_pointer = cti_to_pointer,
.pointer_is_inside = cti_pointer_is_inside,
.get_size = cti_get_size,
I could not create a Merge request, so I am creating this issue. Thanks,
EDIT: I've updated the line:
cham_tile_interface->tile.ld = cham_tile_interface->tile.m;
I believe "ld" should inherit its own tile m always...