diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ecad49b276fd783d159dfc0a022c8a7f6f47ad6..de8df69df660c388518ce08b652504db6f4d024f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,9 +101,6 @@ set( RP_CHAMELEON_DICTIONNARY ${CMAKE_SOURCE_DIR}/cmake_modules/local_subs.py ) set( RP_CHAMELEON_PRECISIONS "s;d;c;z" ) include(RulesPrecisions) -option(CHAMELEON_USE_MIGRATE - "This options enables the data migration in QR algorithms" ON) - # Options to select the runtime # ----------------------------- @@ -175,6 +172,14 @@ if (CHAMELEON_ENABLE_MPI AND NOT CHAMELEON_USE_MPI) message("-- ${BoldGreen}CHAMELEON_USE_MPI is set to OFF, turn it ON to use MPI (unsupported by Quark)${ColourReset}") endif() +cmake_dependent_option(CHAMELEON_USE_MIGRATE + "This options enables the data migration in QR algorithms" ON + "CHAMELEON_USE_MPI" OFF) + +cmake_dependent_option(CHAMELEON_USE_MPI_DATATYPES + "Enable MPI datatypes wheneve supported by the runtime" ON + "CHAMELEON_USE_MPI" OFF) + # Enable Cuda kernels if StarPU (only if StarPU or PaRSEC runtime is enabled) # TODO: Default should be changed to ON/OFF when it will be ok cmake_dependent_option(CHAMELEON_USE_CUDA @@ -658,7 +663,11 @@ if( CHAMELEON_SCHED_STARPU ) endif() check_function_exists(starpu_mpi_interface_datatype_register HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER) if ( HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER ) - message("-- ${Blue}Add definition HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER${ColourReset}") + message("-- ${Blue}Add definition HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER${ColourReset}") + else() + if( CHAMELEON_USE_MPI_DATATYPES ) + set( CHAMELEON_USE_MPI_DATATYPES OFF ) + endif() endif() check_function_exists(starpu_interface_copy2d HAVE_STARPU_INTERFACE_COPY2D) if ( HAVE_STARPU_INTERFACE_COPY2D ) diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in index 688df8866668276dbda279cc97b070c0ae130454..10eb9136608e47c242bb7e4ad553b2b7531ca7e5 100644 --- a/include/chameleon/config.h.in +++ b/include/chameleon/config.h.in @@ -38,6 +38,10 @@ #if !defined(CHAMELEON_USE_MPI) && defined(CHAMELEON_USE_MIGRATE) #undef CHAMELEON_USE_MIGRATE #endif +#cmakedefine CHAMELEON_USE_MPI_DATATYPES +#if !defined(CHAMELEON_USE_MPI) && defined(CHAMELEON_USE_MPI_DATATYPES) +#undef CHAMELEON_USE_MPI_DATATYPES +#endif /* GPU Support */ #cmakedefine CHAMELEON_USE_CUDA diff --git a/runtime/starpu/include/cham_tile_interface.h b/runtime/starpu/include/cham_tile_interface.h index 1a7f887aae261c7c23f043f163ffbe16c7557239..0e04100a515861b4b68988a81755a3bcb21c6324 100644 --- a/runtime/starpu/include/cham_tile_interface.h +++ b/runtime/starpu/include/cham_tile_interface.h @@ -34,7 +34,7 @@ struct starpu_cham_tile_interface_s cham_flttype_t flttype; /**< Type of the elements of the matrix */ size_t allocsize; /**< size actually currently allocated */ size_t tilesize; /**< size of the elements of the matrix */ - CHAM_tile_t tile; /**< Internal tile structure used to store + CHAM_tile_t tile; /**< Internal tile structure used to store information on non memory home_node */ }; diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index f5a4fc1e6819f07549341639eb0c08324d01a799..9b8dc1372ee5bfdaeb0022d0de0dbb75e5460525 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -40,6 +40,10 @@ #cmakedefine HAVE_STARPU_MPI_WAIT_FOR_ALL #cmakedefine HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER +#if !defined(HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER) && defined(CHAMELEON_USE_MPI_DATATYPES) +#error "This version of StarPU does not support MPI datatypes (Please compile with -DCHAMELEON_USE_MPI_DATATYPES=OFF)" +#endif + #if defined(CHAMELEON_USE_MPI) #include <starpu_mpi.h> #else diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c index eaab054d4c17844f19152095558cb4ad51dcc7d6..ea335c6404d8d52ec38a477c73c11c7909bc1c90 100644 --- a/runtime/starpu/interface/cham_tile_interface.c +++ b/runtime/starpu/interface/cham_tile_interface.c @@ -319,30 +319,47 @@ cti_unpack_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t c starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) starpu_data_get_interface_on_node(handle, node); - CHAM_tile_t dsttile; char *tmp = ptr; - /* Extract the size of the information t unpack */ - memcpy( &(cham_tile_interface->allocsize), tmp, sizeof(size_t) ); - tmp += sizeof(size_t); +#if defined(CHAMELEON_USE_MPI_DATATYPES) + /* + * We may end up here if an early reception occured before the handle of the + * received data has been registered. Thus, datatype was not existant and we + * need to unpack the data ourselves + */ + STARPU_ASSERT( count == cham_tile_interface->allocsize ); + STARPU_ASSERT( cham_tile_interface->tile.format & CHAMELEON_TILE_FULLRANK ); +#else + { + CHAM_tile_t dsttile; - /* Extract the tile metadata of the remote tile */ - memcpy( &dsttile, tmp, sizeof(CHAM_tile_t) ); - tmp += sizeof(CHAM_tile_t); + /* Extract the size of the information to unpack */ + memcpy( &(cham_tile_interface->allocsize), tmp, sizeof(size_t) ); + tmp += sizeof(size_t); - cham_tile_interface->tile.format = dsttile.format; - cham_tile_interface->tile.ld = cham_tile_interface->tile.m; - STARPU_ASSERT( cham_tile_interface->tile.m == dsttile.m ); - STARPU_ASSERT( cham_tile_interface->tile.n == dsttile.n ); - STARPU_ASSERT( count == cham_tile_interface->allocsize + sizeof(size_t) + sizeof(CHAM_tile_t) ); + /* Extract the tile metadata of the remote tile */ + memcpy( &dsttile, tmp, sizeof(CHAM_tile_t) ); + tmp += sizeof(CHAM_tile_t); + /* + * Update with the local information. Data is packed now, and do not + * need leading dimension anymore + */ + cham_tile_interface->tile.format = dsttile.format; + cham_tile_interface->tile.ld = cham_tile_interface->tile.m; + + STARPU_ASSERT( cham_tile_interface->tile.m == dsttile.m ); + STARPU_ASSERT( cham_tile_interface->tile.n == dsttile.n ); + STARPU_ASSERT( count == cham_tile_interface->allocsize + sizeof(size_t) + sizeof(CHAM_tile_t) ); + } +#endif /* Unpack the real data */ if ( cham_tile_interface->tile.format & CHAMELEON_TILE_FULLRANK ) { cti_unpack_data_fullrank( cham_tile_interface, tmp ); } else { - STARPU_ASSERT_MSG( 1, "Unsupported format for pack." ); + STARPU_ASSERT_MSG( 1, "Unsupported format for unpack." ); } /* Free the received information */ @@ -481,7 +498,7 @@ cti_handle_get_allocsize( starpu_data_handle_t handle ) return cham_tile_interface->allocsize; } -#if defined(HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER) +#if defined(CHAMELEON_USE_MPI_DATATYPES) int cti_allocate_datatype( starpu_data_handle_t handle, MPI_Datatype *datatype ) @@ -518,7 +535,7 @@ starpu_cham_tile_interface_init() if ( starpu_interface_cham_tile_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID ) { starpu_interface_cham_tile_ops.interfaceid = starpu_data_interface_get_next_id(); -#if defined(HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER) +#if defined(CHAMELEON_USE_MPI_DATATYPES) starpu_mpi_interface_datatype_register( starpu_interface_cham_tile_ops.interfaceid, cti_allocate_datatype, cti_free_datatype ); @@ -531,7 +548,7 @@ starpu_cham_tile_interface_fini() { if ( starpu_interface_cham_tile_ops.interfaceid != STARPU_UNKNOWN_INTERFACE_ID ) { -#if defined(HAVE_STARPU_MPI_INTERFACE_DATATYPE_REGISTER) +#if defined(CHAMELEON_USE_MPI_DATATYPES) starpu_mpi_interface_datatype_unregister( starpu_interface_cham_tile_ops.interfaceid ); #endif }