diff --git a/control/control.c b/control/control.c index c3cacb10a68b4328b6b2bc9ce24b0b7915a37f23..fbc777a11f487ae9a56a4dac4fc1ee12c708ea95 100644 --- a/control/control.c +++ b/control/control.c @@ -20,7 +20,7 @@ * @author Samuel Thibault * @author Philippe Swartvagher * @author Loris Lucido - * @date 2023-07-04 + * @date 2024-03-16 * *** * @@ -35,7 +35,8 @@ * * @ingroup Control * - * @brief Initialize CHAMELEON. + * @brief Initialize CHAMELEON with number of cpus and gpus (using + * MPI_COMM_WORLD). * ****************************************************************************** * @@ -59,7 +60,8 @@ int __chameleon_init(int cores, int gpus) * * @ingroup Control * - * @brief Initialize CHAMELEON. + * @brief Initialize CHAMELEON with number of cpus and gpus and threads per + * worker (using MPI_COMM_WORLD). * ****************************************************************************** * @@ -78,6 +80,37 @@ int __chameleon_init(int cores, int gpus) * */ int __chameleon_initpar(int ncpus, int ngpus, int nthreads_per_worker) +{ + return __chameleon_initparcomm( ncpus, ngpus, nthreads_per_worker, MPI_COMM_WORLD ); +} + +/** + * + * @ingroup Control + * + * @brief Initialize CHAMELEON with number of cpus and gpus and threads per + * worker and using a given MPI communicator. + * + ****************************************************************************** + * + * @param[in] ncpus + * Number of cores to use. + * + * @param[in] ngpus + * Number of cuda devices to use. + * + * @param[in] nthreads_per_worker + * Number of threads per worker (cpu, cuda device). + * + * @param[in] comm + * The MPI communicator. + * + ****************************************************************************** + * + * @retval CHAMELEON_SUCCESS successful exit + * + */ +int __chameleon_initparcomm(int ncpus, int ngpus, int nthreads_per_worker, MPI_Comm comm) { CHAM_context_t *chamctxt; @@ -124,6 +157,7 @@ int __chameleon_initpar(int ncpus, int ngpus, int nthreads_per_worker) #endif chamctxt->ncudas = ngpus; + chamctxt->comm = comm; return RUNTIME_init( chamctxt, ncpus, ngpus, nthreads_per_worker ); } @@ -145,15 +179,23 @@ int __chameleon_finalize(void) chameleon_error("CHAMELEON_Finalize", "CHAMELEON not initialized"); return CHAMELEON_ERR_NOT_INITIALIZED; } - RUNTIME_flush(); + + /* Make sure all data are flushed */ + RUNTIME_flush( chamctxt ); + + /* Wait for anything running */ # if !defined(CHAMELEON_SIMULATION) RUNTIME_barrier(chamctxt); # endif + + /* Stop the runtime system */ RUNTIME_finalize( chamctxt ); #if defined(CHAMELEON_USE_MPI) - if (!chamctxt->mpi_outer_init) + /* Finalize MPI if initialized by Chameleon */ + if ( !chamctxt->mpi_outer_init ) { MPI_Finalize(); + } #endif chameleon_context_destroy(); diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index 137adfbba9179c7f5dc49ca0d0ad8f4bc1ac1dbe..c314957fa12d6e2e09c7ac48cec96637c3904c78 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -24,7 +24,7 @@ # @author Florent Pruvost # @author Guillaume Sylvand # @author Matthieu Kuhn -# @date 2023-08-31 +# @date 2024-03-16 # ### @@ -164,6 +164,9 @@ endif() target_link_libraries(coreblas PRIVATE MORSE::LAPACKE) target_link_libraries(coreblas PRIVATE MORSE::CBLAS) target_link_libraries(coreblas PUBLIC MORSE::M) +if (CHAMELEON_USE_MPI) + target_link_libraries(coreblas PUBLIC MPI::MPI_C) +endif() # export target coreblas install(EXPORT coreblasTargets diff --git a/example/lapack_to_chameleon/step6.h b/example/lapack_to_chameleon/step6.h index b834863c1f91b6606f3fe985030df8f8c9ba06cb..d60683987471c510aafa20ce55a6a87f0e358bcb 100644 --- a/example/lapack_to_chameleon/step6.h +++ b/example/lapack_to_chameleon/step6.h @@ -11,10 +11,10 @@ * * @brief Chameleon step6 example header * - * @version 1.2.0 + * @version 1.3.0 * @author Florent Pruvost * @author Mathieu Faverge - * @date 2022-02-22 + * @date 2024-03-16 * */ #ifndef _step6_h_ @@ -34,7 +34,7 @@ enum iparam_step6 { IPARAM_THRDNBR, /* Number of cores */ IPARAM_NCUDAS, /* Number of cuda devices */ - IPARAM_NMPI, /* Number of cuda devices */ + IPARAM_NMPI, /* Number of MPI PROCS */ IPARAM_N, /* Number of columns of the matrix */ IPARAM_NB, /* Number of columns in a tile */ IPARAM_IB, /* Inner-blocking size */ diff --git a/include/chameleon.h b/include/chameleon.h index 2abadc0dfd0fa4156c3fbd8647f3a126981223c2..7ac09793f4931b420310728d04760ea74172ea20 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -18,7 +18,7 @@ * @author Florent Pruvost * @author Philippe Virouleau * @author Lionel Eyraud-Dubois - * @date 2024-03-11 + * @date 2024-03-16 * */ #ifndef _chameleon_h_ @@ -117,6 +117,7 @@ int CHAMELEON_Initialized (void); int CHAMELEON_My_Mpi_Rank (void) __attribute__((deprecated)); int __chameleon_init (int nworkers, int ncudas); int __chameleon_initpar (int nworkers, int ncudas, int nthreads_per_worker); +int __chameleon_initparcomm (int nworkers, int ncudas, int nthreads_per_worker, MPI_Comm comm); int __chameleon_finalize (void); int CHAMELEON_Pause (void); int CHAMELEON_Resume (void); @@ -237,16 +238,23 @@ void CHAMELEON_Ipiv_Print ( const CHAM_ipiv_t *ipiv ); * */ #if defined(CHAMELEON_SCHED_OPENMP) -#define CHAMELEON_Init( _nworkers_, _ncudas_ ) \ + +#define CHAMELEON_Init( _nworkers_, _ncudas_ ) \ __chameleon_init( (_nworkers_), (_ncudas_) ); \ - _Pragma("omp parallel") \ - _Pragma("omp master") \ + _Pragma("omp parallel") \ + _Pragma("omp master") \ { -#define CHAMELEON_InitPar( _nworkers_, _ncudas_, _nthreads_per_worker_ ) \ +#define CHAMELEON_InitPar( _nworkers_, _ncudas_, _nthreads_per_worker_ ) \ __chameleon_initpar( (_nworkers_), (_ncudas_), (_nthreads_per_worker_) ); \ - _Pragma("omp parallel")\ - _Pragma("omp master")\ + _Pragma("omp parallel") \ + _Pragma("omp master") \ + { + +#define CHAMELEON_InitParComm( _nworkers_, _ncudas_, _nthreads_per_worker_, _comm_ ) \ + __chameleon_initparcomm( (_nworkers_), (_ncudas_), (_nthreads_per_worker_), (_comm_) ); \ + _Pragma("omp parallel") \ + _Pragma("omp master") \ { #define CHAMELEON_Finalize() \ @@ -255,11 +263,14 @@ void CHAMELEON_Ipiv_Print ( const CHAM_ipiv_t *ipiv ); #else -#define CHAMELEON_Init( _nworkers_, _ncudas_ ) \ +#define CHAMELEON_Init( _nworkers_, _ncudas_ ) \ __chameleon_init( (_nworkers_), (_ncudas_) ); #define CHAMELEON_InitPar( _nworkers_, _ncudas_, _nthreads_per_worker_ ) \ - __chameleon_initpar( (_nworkers_), (_ncudas_), (_nthreads_per_worker_) ); + __chameleon_initpar( (_nworkers_), (_ncudas_), (_nthreads_per_worker_), MPI_COMM_WORLD ); + +#define CHAMELEON_InitParComm( _nworkers_, _ncudas_, _nthreads_per_worker_, _comm_ ) \ + __chameleon_initparcomm( (_nworkers_), (_ncudas_), (_nthreads_per_worker_), (_comm_) ); #define CHAMELEON_Finalize() \ __chameleon_finalize(); diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h index 9b2239c1bc53591a5d0fec3609b2ef7add1953b7..010b8b1b5eb68c9f841021a7072f8192579bb545 100644 --- a/include/chameleon/runtime.h +++ b/include/chameleon/runtime.h @@ -480,7 +480,7 @@ RUNTIME_desc_flush( const CHAM_desc_t *desc, * This function flushes all data from the distributed cache of the runtime system. */ void -RUNTIME_flush( ); +RUNTIME_flush( CHAM_context_t *chamctxt ); /** * @brief Flush a single piece of data. diff --git a/include/chameleon/runtime_struct.h b/include/chameleon/runtime_struct.h index 3028d328bac41300310dd4f1eadeb9656466798f..c83da6fd9d1af72851219f06c9aba880331ccd8f 100644 --- a/include/chameleon/runtime_struct.h +++ b/include/chameleon/runtime_struct.h @@ -17,12 +17,23 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Philippe Virouleau - * @date 2023-07-04 + * @date 2024-03-16 * */ #ifndef _chameleon_runtime_struct_h_ #define _chameleon_runtime_struct_h_ +#if defined(CHAMELEON_USE_MPI) +#include <mpi.h> +#else +#ifndef MPI_Comm +typedef uintptr_t MPI_Comm; +#endif +#ifndef MPI_COMM_WORLD +#define MPI_COMM_WORLD 0 +#endif +#endif + BEGIN_C_DECLS /** @@ -70,6 +81,7 @@ typedef struct runtime_sequence_s { int status; /**< Return status registered by the tasks for the request */ RUNTIME_request_t *request; /**< Pointer to the request that failed if any, NULL otherwise */ void *schedopt; /**< Specific runtime data pointer to handle the sequence */ + MPI_Comm comm; /**< MPI communicator */ } RUNTIME_sequence_t; /** diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index 00a79664c17b4d051ea0858ae910b8f87460a8d9..3664d5c2bdca39950d494c4863f1cb4b2b150a42 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -19,7 +19,7 @@ * @author Samuel Thibault * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois - * @date 2023-08-31 + * @date 2024-03-16 * */ #ifndef _chameleon_struct_h_ @@ -30,6 +30,17 @@ #include "chameleon/constants.h" #include "chameleon/runtime_struct.h" +#if defined(CHAMELEON_USE_MPI) +#include <mpi.h> +#else +#ifndef MPI_Comm +typedef uintptr_t MPI_Comm; +#endif +#ifndef MPI_COMM_WORLD +#define MPI_COMM_WORLD 0 +#endif +#endif + BEGIN_C_DECLS #define CHAMELEON_TILE_FULLRANK (1 << 0) @@ -191,6 +202,7 @@ typedef struct chameleon_context_s { int lookahead; // depth of the look ahead in algorithms void *schedopt; // structure for runtimes int mpi_outer_init; // MPI has been initialized outside our functions + MPI_Comm comm; // MPI communicator } CHAM_context_t; static inline void * diff --git a/runtime/openmp/control/runtime_descriptor.c b/runtime/openmp/control/runtime_descriptor.c index 38ea0a3b1dbb198bee1d1bd5db1b55d49dd3c572..075724b9b9e5dbcfd250fd34eaf946391ea3ad26 100644 --- a/runtime/openmp/control/runtime_descriptor.c +++ b/runtime/openmp/control/runtime_descriptor.c @@ -11,12 +11,12 @@ * * @brief Chameleon OpenMP descriptor routines * - * @version 1.2.0 + * @version 1.3.0 * @author Vijay Joshi * @author Cedric Castagnede * @author Philippe Virouleau * @author Mathieu Faverge - * @date 2022-02-22 + * @date 2024-03-16 * */ #include "chameleon_openmp.h" @@ -59,7 +59,7 @@ int RUNTIME_desc_release( const CHAM_desc_t *desc ) } void -RUNTIME_desc_flush( const CHAM_desc_t *desc, +RUNTIME_desc_flush( const CHAM_desc_t *desc, const RUNTIME_sequence_t *sequence ) { (void)desc; @@ -69,8 +69,9 @@ RUNTIME_desc_flush( const CHAM_desc_t *desc, void -RUNTIME_flush( ) +RUNTIME_flush( CHAM_context_t *chamctxt ) { + (void)chamctxt; return; } diff --git a/runtime/parsec/control/runtime_descriptor.c b/runtime/parsec/control/runtime_descriptor.c index 360d673261c5493d196e7f2638b427fa9df8b0cb..b1266e284513e234c72859355a79b5f3dc342b3f 100644 --- a/runtime/parsec/control/runtime_descriptor.c +++ b/runtime/parsec/control/runtime_descriptor.c @@ -11,12 +11,12 @@ * * @brief Chameleon PaRSEC descriptor routines * - * @version 1.2.0 + * @version 1.3.0 * @author Reazul Hoque * @author Mathieu Faverge * @author Guillaume Sylvand * @author Samuel Thibault - * @date 2022-02-22 + * @date 2024-03-16 * */ #include "chameleon_parsec.h" @@ -345,8 +345,10 @@ int RUNTIME_desc_release( const CHAM_desc_t *desc ) /** * Flush cached data */ -void RUNTIME_flush() +void RUNTIME_flush( CHAM_context_t *chamctxt ) { + (void)chamctxt; + return; } void RUNTIME_desc_flush( const CHAM_desc_t *desc, diff --git a/runtime/quark/control/runtime_descriptor.c b/runtime/quark/control/runtime_descriptor.c index 6301b9c1408feb4f8883b73a338bcc31dedcf441..4435e06629694e45492cb5884d9797d3d3501195 100644 --- a/runtime/quark/control/runtime_descriptor.c +++ b/runtime/quark/control/runtime_descriptor.c @@ -11,13 +11,13 @@ * * @brief Chameleon Quark descriptor routines * - * @version 1.2.0 + * @version 1.3.0 * @author Vijay Joshi * @author Cedric Castagnede * @author Florent Pruvost * @author Mathieu Faverge * @author Samuel Thibault - * @date 2022-02-22 + * @date 2024-03-16 * */ #include "chameleon_quark.h" @@ -60,7 +60,7 @@ int RUNTIME_desc_release( const CHAM_desc_t *desc ) } void -RUNTIME_desc_flush( const CHAM_desc_t *desc, +RUNTIME_desc_flush( const CHAM_desc_t *desc, const RUNTIME_sequence_t *sequence ) { (void)desc; @@ -70,8 +70,9 @@ RUNTIME_desc_flush( const CHAM_desc_t *desc, void -RUNTIME_flush( ) +RUNTIME_flush( CHAM_context_t *chamctxt ) { + (void)chamctxt; return; } diff --git a/runtime/starpu/codelets/codelet_zgersum.c b/runtime/starpu/codelets/codelet_zgersum.c index 8f8b2eaebc2d5701e734bbf59111c4c0474d13f2..dd44fb9f61150ebf4219af469b79bfd779cf8085 100644 --- a/runtime/starpu/codelets/codelet_zgersum.c +++ b/runtime/starpu/codelets/codelet_zgersum.c @@ -15,7 +15,7 @@ * @author Romain Peressoni * @author Mathieu Faverge * @author Antoine Jego - * @date 2023-07-06 + * @date 2024-03-16 * @precisions normal z -> c d s * */ @@ -128,7 +128,7 @@ RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An ) { #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI) - starpu_mpi_redux_data_prio_tree( MPI_COMM_WORLD, + starpu_mpi_redux_data_prio_tree( options->sequence->comm, RTBLKADDR(A, ChamComplexDouble, Am, An), options->priority + 1, 2 /* Binary tree */ ); diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index af940bf2879a6b5a8985f163cb3d349b34d784b3..53647940eac27d0b6ac51d7f2f64042100841b5e 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -21,7 +21,7 @@ * @author Florent Pruvost * @author Samuel Thibault * @author Alycia Lisito - * @date 2023-07-06 + * @date 2024-03-16 * @precisions normal z -> c d s * */ @@ -120,9 +120,9 @@ insert_task_zlacpy_on_remote_node( const RUNTIME_option_t *options, { void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL; #if defined(CHAMELEON_RUNTIME_SYNC) - starpu_mpi_data_cpy_priority( handleB, handleA, MPI_COMM_WORLD, 0, callback, NULL, options->priority ); + starpu_mpi_data_cpy_priority( handleB, handleA, options->sequence->comm, 0, callback, NULL, options->priority ); #else - starpu_mpi_data_cpy_priority( handleB, handleA, MPI_COMM_WORLD, 1, callback, NULL, options->priority ); + starpu_mpi_data_cpy_priority( handleB, handleA, options->sequence->comm, 1, callback, NULL, options->priority ); #endif } #endif diff --git a/runtime/starpu/control/runtime_async.c b/runtime/starpu/control/runtime_async.c index a439e5d8fe17986708508dceeab859ae922356dc..ea19203fcf95effe511255ff690ccdb7810f28eb 100644 --- a/runtime/starpu/control/runtime_async.c +++ b/runtime/starpu/control/runtime_async.c @@ -11,12 +11,12 @@ * * @brief Chameleon StarPU asynchronous routines * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Cedric Castagnede * @author Florent Pruvost * @author Samuel Thibault - * @date 2022-02-22 + * @date 2024-03-16 * */ #include "chameleon_starpu.h" @@ -28,7 +28,7 @@ int RUNTIME_sequence_create( CHAM_context_t *chamctxt, RUNTIME_sequence_t *sequence ) { (void)chamctxt; - (void)sequence; + sequence->comm = chamctxt->comm; return CHAMELEON_SUCCESS; } @@ -58,10 +58,10 @@ int RUNTIME_sequence_wait( CHAM_context_t *chamctxt, #if defined(CHAMELEON_USE_MPI) # if defined(HAVE_STARPU_MPI_WAIT_FOR_ALL) - starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_wait_for_all(sequence->comm); # else starpu_task_wait_for_all(); - starpu_mpi_barrier(MPI_COMM_WORLD); + starpu_mpi_barrier(sequence->comm); # endif #else starpu_task_wait_for_all(); diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c index b8fd4003fa742629c5ffb518609067d67e97609d..96e1c3ff7cbad2e0c5336a2804ab5d98e08ee56d 100644 --- a/runtime/starpu/control/runtime_control.c +++ b/runtime/starpu/control/runtime_control.c @@ -21,7 +21,7 @@ * @author Matthieu Kuhn * @author Loris Lucido * @author Terry Cojean - * @date 2023-08-22 + * @date 2024-03-16 * */ #include "chameleon_starpu.h" @@ -100,7 +100,7 @@ void chameleon_starpu_parallel_worker_fini( starpu_sched_opt_t *sched_opt ) /** * */ -static int chameleon_starpu_init( struct starpu_conf *conf ) +static int chameleon_starpu_init( MPI_Comm comm, struct starpu_conf *conf ) { int hres = CHAMELEON_SUCCESS; int rc; @@ -118,7 +118,7 @@ static int chameleon_starpu_init( struct starpu_conf *conf ) # endif # if defined(HAVE_STARPU_MPI_INIT_CONF) - rc = starpu_mpi_init_conf(NULL, NULL, !flag, MPI_COMM_WORLD, conf); + rc = starpu_mpi_init_conf(NULL, NULL, !flag, comm, conf); # else rc = starpu_init(conf); if (rc < 0) { @@ -186,7 +186,7 @@ int RUNTIME_init( CHAM_context_t *chamctxt, if ((ncpus == -1)||(nthreads_per_worker == -1)) { - hres = chameleon_starpu_init( conf ); + hres = chameleon_starpu_init( chamctxt->comm, conf ); chamctxt->nworkers = ncpus; chamctxt->nthreads_per_worker = nthreads_per_worker; @@ -202,7 +202,7 @@ int RUNTIME_init( CHAM_context_t *chamctxt, conf->use_explicit_workers_bindid = 1; - hres = chameleon_starpu_init( conf ); + hres = chameleon_starpu_init( chamctxt->comm, conf ); chamctxt->nworkers = ncpus; chamctxt->nthreads_per_worker = nthreads_per_worker; @@ -300,11 +300,11 @@ void RUNTIME_barrier( CHAM_context_t *chamctxt ) #if defined(CHAMELEON_USE_MPI) # if defined(HAVE_STARPU_MPI_WAIT_FOR_ALL) - starpu_mpi_wait_for_all(MPI_COMM_WORLD); - starpu_mpi_barrier(MPI_COMM_WORLD); + starpu_mpi_wait_for_all( chamctxt->comm ); + starpu_mpi_barrier( chamctxt->comm ); # else starpu_task_wait_for_all(); - starpu_mpi_barrier(MPI_COMM_WORLD); + starpu_mpi_barrier( chamctxt->comm ); # endif #else starpu_task_wait_for_all(); @@ -380,9 +380,9 @@ int RUNTIME_comm_rank( CHAM_context_t *chamctxt ) #if defined(CHAMELEON_USE_MPI) # if defined(HAVE_STARPU_MPI_COMM_RANK) - starpu_mpi_comm_rank( MPI_COMM_WORLD, &rank ); + starpu_mpi_comm_rank( chamctxt->comm, &rank ); # else - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_rank( chamctxt->comm, &rank ); # endif #endif @@ -398,9 +398,9 @@ int RUNTIME_comm_size( CHAM_context_t *chamctxt ) int size; #if defined(CHAMELEON_USE_MPI) # if defined(HAVE_STARPU_MPI_COMM_RANK) - starpu_mpi_comm_size( MPI_COMM_WORLD, &size ); + starpu_mpi_comm_size( chamctxt->comm, &size ); # else - MPI_Comm_size( MPI_COMM_WORLD, &size ); + MPI_Comm_size( chamctxt->comm, &size ); # endif #else size = 1; diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index 1e98660118946a75eec93f00bed0091b3301b7c0..2e72133c6acf979cba775db9488a0da93ffd2ab1 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -20,7 +20,7 @@ * @author Raphael Boucherie * @author Samuel Thibault * @author Loris Lucido - * @date 2023-08-22 + * @date 2024-03-16 * */ #include "chameleon_starpu.h" @@ -149,7 +149,7 @@ void RUNTIME_desc_create( CHAM_desc_t *desc ) * Book the number of tags required to describe this matrix */ { - chameleon_starpu_tag_init(); + chameleon_starpu_tag_init( ); desc->mpitag = chameleon_starpu_tag_book( nbtiles ); if ( desc->mpitag == -1 ) { @@ -267,10 +267,10 @@ int RUNTIME_desc_release( const CHAM_desc_t *desc ) /** * Flush cached data */ -void RUNTIME_flush() +void RUNTIME_flush( CHAM_context_t *chamctxt ) { #if defined(CHAMELEON_USE_MPI) - starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD); + starpu_mpi_cache_flush_all_data( chamctxt->comm ); #endif } @@ -317,7 +317,7 @@ void RUNTIME_data_flush( const RUNTIME_sequence_t *sequence, } #if defined(CHAMELEON_USE_MPI) - starpu_mpi_cache_flush( MPI_COMM_WORLD, *handlebis ); + starpu_mpi_cache_flush( sequence->comm, *handlebis ); #endif if ( local ) { @@ -345,7 +345,7 @@ void RUNTIME_data_migrate( const RUNTIME_sequence_t *sequence, old_rank = starpu_mpi_data_get_rank( lhandle ); if ( old_rank != new_rank ) { - starpu_mpi_data_migrate( MPI_COMM_WORLD, lhandle, new_rank ); + starpu_mpi_data_migrate( sequence->comm, lhandle, new_rank ); } (void)sequence; diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c index 640ffa83e0a3dac177b72d0e95b049ba8a4427cf..634378e8dc41bb41b9a0610fa7642711c9371f10 100644 --- a/runtime/starpu/control/runtime_descriptor_ipiv.c +++ b/runtime/starpu/control/runtime_descriptor_ipiv.c @@ -223,7 +223,7 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) - starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); + starpu_mpi_cache_flush( sequence->comm, *handle ); if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) #endif { @@ -236,7 +236,7 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) - starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); + starpu_mpi_cache_flush( sequence->comm, *handle ); if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) #endif { @@ -272,7 +272,7 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) - starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); + starpu_mpi_cache_flush( sequence->comm, *handle ); if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) #endif { @@ -285,7 +285,7 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) - starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); + starpu_mpi_cache_flush( sequence->comm, *handle ); if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) #endif { @@ -323,7 +323,7 @@ void RUNTIME_ipiv_gather( const RUNTIME_sequence_t *sequence, if (already_received == 0) { MPI_Status status; - starpu_mpi_recv( ipiv_src, owner, tag, MPI_COMM_WORLD, &status ); + starpu_mpi_recv( ipiv_src, owner, tag, sequence->comm, &status ); } } else if ( rank == owner ) @@ -332,7 +332,7 @@ void RUNTIME_ipiv_gather( const RUNTIME_sequence_t *sequence, int already_sent = starpu_mpi_cached_send_set( ipiv_src, node ); if (already_sent == 0) { - starpu_mpi_send( ipiv_src, node, tag, MPI_COMM_WORLD ); + starpu_mpi_send( ipiv_src, node, tag, sequence->comm ); } } } diff --git a/runtime/starpu/control/runtime_tags.c b/runtime/starpu/control/runtime_tags.c index 57c9f859e4d3343eedc0660b98ace968e99e0335..031a556641fd4a59b57f1dd78a548d84e0787628 100644 --- a/runtime/starpu/control/runtime_tags.c +++ b/runtime/starpu/control/runtime_tags.c @@ -5,11 +5,11 @@ * @copyright 2017-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. * - * @version 1.2.0 + * @version 1.3.0 * @author Pierre Ramet * @author Mathieu Faverge * @author Florent Pruvost - * @date 2021-10-04 + * @date 2024-03-16 * * Functions to manage the MPI data tags with StarPU (originated from PaStiX). * @@ -58,7 +58,8 @@ chameleon_starpu_tag_init( void ) int ok = 0; void *tag_ub_p = NULL; - starpu_mpi_comm_get_attr( MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &tag_ub_p, &ok ); + CHAM_context_t *chamctxt = chameleon_context_self(); + starpu_mpi_comm_get_attr( chamctxt->comm, STARPU_MPI_TAG_UB, &tag_ub_p, &ok ); starpu_tag_ub = (uint64_t)((intptr_t)tag_ub_p); if ( !ok ) { diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index fd6d0e4688bd4ce9e8ba80b1084a00c999e5cb56..b795b4c79454e65ad9e22ca9a37b124bfe6c734a 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -20,7 +20,7 @@ * @author Loris Lucido * @author Terry Cojean * @author Matthieu Kuhn - * @date 2023-08-22 + * @date 2024-03-16 * */ #ifndef _chameleon_starpu_h_ @@ -131,10 +131,10 @@ void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options, #if defined(CHAMELEON_RUNTIME_SYNC) #define rt_starpu_insert_task( _codelet_, ... ) \ - starpu_mpi_insert_task( MPI_COMM_WORLD, (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) + starpu_mpi_insert_task( options->sequence->comm, (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) #else #define rt_starpu_insert_task( _codelet_, ... ) \ - starpu_mpi_insert_task( MPI_COMM_WORLD, (_codelet_), ##__VA_ARGS__ ) + starpu_mpi_insert_task( options->sequence->comm, (_codelet_), ##__VA_ARGS__ ) #endif #else