diff --git a/control/control.c b/control/control.c
index c3cacb10a68b4328b6b2bc9ce24b0b7915a37f23..fbc777a11f487ae9a56a4dac4fc1ee12c708ea95 100644
--- a/control/control.c
+++ b/control/control.c
@@ -20,7 +20,7 @@
  * @author Samuel Thibault
  * @author Philippe Swartvagher
  * @author Loris Lucido
- * @date 2023-07-04
+ * @date 2024-03-16
  *
  ***
  *
@@ -35,7 +35,8 @@
  *
  * @ingroup Control
  *
- * @brief Initialize CHAMELEON.
+ * @brief Initialize CHAMELEON with number of cpus and gpus (using
+ * MPI_COMM_WORLD).
  *
  ******************************************************************************
  *
@@ -59,7 +60,8 @@ int __chameleon_init(int cores, int gpus)
  *
  * @ingroup Control
  *
- * @brief Initialize CHAMELEON.
+ * @brief Initialize CHAMELEON with number of cpus and gpus and threads per
+ * worker (using MPI_COMM_WORLD).
  *
  ******************************************************************************
  *
@@ -78,6 +80,37 @@ int __chameleon_init(int cores, int gpus)
  *
  */
 int __chameleon_initpar(int ncpus, int ngpus, int nthreads_per_worker)
+{
+    return __chameleon_initparcomm( ncpus, ngpus, nthreads_per_worker, MPI_COMM_WORLD );
+}
+
+/**
+ *
+ * @ingroup Control
+ *
+ * @brief Initialize CHAMELEON with number of cpus and gpus and threads per
+ * worker and using a given MPI communicator.
+ *
+ ******************************************************************************
+ *
+ * @param[in] ncpus
+ *          Number of cores to use.
+ *
+ * @param[in] ngpus
+ *          Number of cuda devices to use.
+ *
+ * @param[in] nthreads_per_worker
+ *          Number of threads per worker (cpu, cuda device).
+ *
+ * @param[in] comm
+ *          The MPI communicator.
+ *
+ ******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ */
+int __chameleon_initparcomm(int ncpus, int ngpus, int nthreads_per_worker, MPI_Comm comm)
 {
     CHAM_context_t *chamctxt;
 
@@ -124,6 +157,7 @@ int __chameleon_initpar(int ncpus, int ngpus, int nthreads_per_worker)
 #endif
 
     chamctxt->ncudas = ngpus;
+    chamctxt->comm = comm;
     return RUNTIME_init( chamctxt, ncpus, ngpus, nthreads_per_worker );
 }
 
@@ -145,15 +179,23 @@ int __chameleon_finalize(void)
         chameleon_error("CHAMELEON_Finalize", "CHAMELEON not initialized");
         return CHAMELEON_ERR_NOT_INITIALIZED;
     }
-    RUNTIME_flush();
+
+    /* Make sure all data are flushed */
+    RUNTIME_flush( chamctxt );
+
+    /* Wait for anything running */
 #  if !defined(CHAMELEON_SIMULATION)
     RUNTIME_barrier(chamctxt);
 #  endif
+
+    /* Stop the runtime system */
     RUNTIME_finalize( chamctxt );
 
 #if defined(CHAMELEON_USE_MPI)
-    if (!chamctxt->mpi_outer_init)
+    /* Finalize MPI if initialized by Chameleon */
+    if ( !chamctxt->mpi_outer_init ) {
         MPI_Finalize();
+    }
 #endif
 
     chameleon_context_destroy();
diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index 137adfbba9179c7f5dc49ca0d0ad8f4bc1ac1dbe..c314957fa12d6e2e09c7ac48cec96637c3904c78 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -24,7 +24,7 @@
 #  @author Florent Pruvost
 #  @author Guillaume Sylvand
 #  @author Matthieu Kuhn
-#  @date 2023-08-31
+#  @date 2024-03-16
 #
 ###
 
@@ -164,6 +164,9 @@ endif()
 target_link_libraries(coreblas PRIVATE MORSE::LAPACKE)
 target_link_libraries(coreblas PRIVATE MORSE::CBLAS)
 target_link_libraries(coreblas PUBLIC MORSE::M)
+if (CHAMELEON_USE_MPI)
+  target_link_libraries(coreblas PUBLIC MPI::MPI_C)
+endif()
 
 # export target coreblas
 install(EXPORT coreblasTargets
diff --git a/example/lapack_to_chameleon/step6.h b/example/lapack_to_chameleon/step6.h
index b834863c1f91b6606f3fe985030df8f8c9ba06cb..d60683987471c510aafa20ce55a6a87f0e358bcb 100644
--- a/example/lapack_to_chameleon/step6.h
+++ b/example/lapack_to_chameleon/step6.h
@@ -11,10 +11,10 @@
  *
  * @brief Chameleon step6 example header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Florent Pruvost
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2024-03-16
  *
  */
 #ifndef _step6_h_
@@ -34,7 +34,7 @@
 enum iparam_step6 {
     IPARAM_THRDNBR,        /* Number of cores                            */
     IPARAM_NCUDAS,         /* Number of cuda devices                     */
-    IPARAM_NMPI,           /* Number of cuda devices                     */
+    IPARAM_NMPI,           /* Number of MPI PROCS                        */
     IPARAM_N,              /* Number of columns of the matrix            */
     IPARAM_NB,             /* Number of columns in a tile                */
     IPARAM_IB,             /* Inner-blocking size                        */
diff --git a/include/chameleon.h b/include/chameleon.h
index 2abadc0dfd0fa4156c3fbd8647f3a126981223c2..7ac09793f4931b420310728d04760ea74172ea20 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -18,7 +18,7 @@
  * @author Florent Pruvost
  * @author Philippe Virouleau
  * @author Lionel Eyraud-Dubois
- * @date 2024-03-11
+ * @date 2024-03-16
  *
  */
 #ifndef _chameleon_h_
@@ -117,6 +117,7 @@ int CHAMELEON_Initialized       (void);
 int CHAMELEON_My_Mpi_Rank       (void) __attribute__((deprecated));
 int __chameleon_init            (int nworkers, int ncudas);
 int __chameleon_initpar         (int nworkers, int ncudas, int nthreads_per_worker);
+int __chameleon_initparcomm     (int nworkers, int ncudas, int nthreads_per_worker, MPI_Comm comm);
 int __chameleon_finalize        (void);
 int CHAMELEON_Pause             (void);
 int CHAMELEON_Resume            (void);
@@ -237,16 +238,23 @@ void CHAMELEON_Ipiv_Print ( const CHAM_ipiv_t *ipiv );
  *
  */
 #if defined(CHAMELEON_SCHED_OPENMP)
-#define CHAMELEON_Init( _nworkers_, _ncudas_ )           \
+
+#define CHAMELEON_Init( _nworkers_, _ncudas_ )          \
     __chameleon_init( (_nworkers_), (_ncudas_) );       \
-    _Pragma("omp parallel")                                    \
-    _Pragma("omp master")                                      \
+    _Pragma("omp parallel")                             \
+    _Pragma("omp master")                               \
     {
 
-#define CHAMELEON_InitPar( _nworkers_, _ncudas_, _nthreads_per_worker_ ) \
+#define CHAMELEON_InitPar( _nworkers_, _ncudas_, _nthreads_per_worker_ )      \
     __chameleon_initpar( (_nworkers_), (_ncudas_), (_nthreads_per_worker_) ); \
-    _Pragma("omp parallel")\
-    _Pragma("omp master")\
+    _Pragma("omp parallel")                                                   \
+    _Pragma("omp master")                                                     \
+    {
+
+#define CHAMELEON_InitParComm( _nworkers_, _ncudas_, _nthreads_per_worker_, _comm_ )        \
+    __chameleon_initparcomm( (_nworkers_), (_ncudas_), (_nthreads_per_worker_), (_comm_) ); \
+    _Pragma("omp parallel")                                                                 \
+    _Pragma("omp master")                                                                   \
     {
 
 #define CHAMELEON_Finalize()                    \
@@ -255,11 +263,14 @@ void CHAMELEON_Ipiv_Print ( const CHAM_ipiv_t *ipiv );
 
 #else
 
-#define CHAMELEON_Init( _nworkers_, _ncudas_ )            \
+#define CHAMELEON_Init( _nworkers_, _ncudas_ )          \
     __chameleon_init( (_nworkers_), (_ncudas_) );
 
 #define CHAMELEON_InitPar( _nworkers_, _ncudas_, _nthreads_per_worker_ ) \
-    __chameleon_initpar( (_nworkers_), (_ncudas_), (_nthreads_per_worker_) );
+    __chameleon_initpar( (_nworkers_), (_ncudas_), (_nthreads_per_worker_), MPI_COMM_WORLD );
+
+#define CHAMELEON_InitParComm( _nworkers_, _ncudas_, _nthreads_per_worker_, _comm_ ) \
+    __chameleon_initparcomm( (_nworkers_), (_ncudas_), (_nthreads_per_worker_), (_comm_) );
 
 #define CHAMELEON_Finalize()                    \
     __chameleon_finalize();
diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h
index 9b2239c1bc53591a5d0fec3609b2ef7add1953b7..010b8b1b5eb68c9f841021a7072f8192579bb545 100644
--- a/include/chameleon/runtime.h
+++ b/include/chameleon/runtime.h
@@ -480,7 +480,7 @@ RUNTIME_desc_flush( const CHAM_desc_t     *desc,
  * This function flushes all data from the distributed cache of the runtime system.
  */
 void
-RUNTIME_flush( );
+RUNTIME_flush( CHAM_context_t *chamctxt );
 
 /**
  * @brief Flush a single piece of data.
diff --git a/include/chameleon/runtime_struct.h b/include/chameleon/runtime_struct.h
index 3028d328bac41300310dd4f1eadeb9656466798f..c83da6fd9d1af72851219f06c9aba880331ccd8f 100644
--- a/include/chameleon/runtime_struct.h
+++ b/include/chameleon/runtime_struct.h
@@ -17,12 +17,23 @@
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Philippe Virouleau
- * @date 2023-07-04
+ * @date 2024-03-16
  *
  */
 #ifndef _chameleon_runtime_struct_h_
 #define _chameleon_runtime_struct_h_
 
+#if defined(CHAMELEON_USE_MPI)
+#include <mpi.h>
+#else
+#ifndef MPI_Comm
+typedef uintptr_t MPI_Comm;
+#endif
+#ifndef MPI_COMM_WORLD
+#define MPI_COMM_WORLD 0
+#endif
+#endif
+
 BEGIN_C_DECLS
 
 /**
@@ -70,6 +81,7 @@ typedef struct runtime_sequence_s {
     int                status;   /**< Return status registered by the tasks for the request     */
     RUNTIME_request_t *request;  /**< Pointer to the request that failed if any, NULL otherwise */
     void              *schedopt; /**< Specific runtime data pointer to handle the sequence      */
+    MPI_Comm           comm;     /**< MPI communicator                                         */
 } RUNTIME_sequence_t;
 
 /**
diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h
index 00a79664c17b4d051ea0858ae910b8f87460a8d9..3664d5c2bdca39950d494c4863f1cb4b2b150a42 100644
--- a/include/chameleon/struct.h
+++ b/include/chameleon/struct.h
@@ -19,7 +19,7 @@
  * @author Samuel Thibault
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2023-08-31
+ * @date 2024-03-16
  *
  */
 #ifndef _chameleon_struct_h_
@@ -30,6 +30,17 @@
 #include "chameleon/constants.h"
 #include "chameleon/runtime_struct.h"
 
+#if defined(CHAMELEON_USE_MPI)
+#include <mpi.h>
+#else
+#ifndef MPI_Comm
+typedef uintptr_t MPI_Comm;
+#endif
+#ifndef MPI_COMM_WORLD
+#define MPI_COMM_WORLD 0
+#endif
+#endif
+
 BEGIN_C_DECLS
 
 #define CHAMELEON_TILE_FULLRANK (1 << 0)
@@ -191,6 +202,7 @@ typedef struct chameleon_context_s {
     int                lookahead;          // depth of the look ahead in algorithms
     void              *schedopt;           // structure for runtimes
     int                mpi_outer_init;     // MPI has been initialized outside our functions
+    MPI_Comm           comm;               // MPI communicator
 } CHAM_context_t;
 
 static inline void *
diff --git a/runtime/openmp/control/runtime_descriptor.c b/runtime/openmp/control/runtime_descriptor.c
index 38ea0a3b1dbb198bee1d1bd5db1b55d49dd3c572..075724b9b9e5dbcfd250fd34eaf946391ea3ad26 100644
--- a/runtime/openmp/control/runtime_descriptor.c
+++ b/runtime/openmp/control/runtime_descriptor.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon OpenMP descriptor routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Vijay Joshi
  * @author Cedric Castagnede
  * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2024-03-16
  *
  */
 #include "chameleon_openmp.h"
@@ -59,7 +59,7 @@ int RUNTIME_desc_release( const CHAM_desc_t *desc )
 }
 
 void
-RUNTIME_desc_flush( const CHAM_desc_t     *desc,
+RUNTIME_desc_flush( const CHAM_desc_t        *desc,
                     const RUNTIME_sequence_t *sequence )
 {
     (void)desc;
@@ -69,8 +69,9 @@ RUNTIME_desc_flush( const CHAM_desc_t     *desc,
 
 
 void
-RUNTIME_flush( )
+RUNTIME_flush( CHAM_context_t *chamctxt )
 {
+    (void)chamctxt;
     return;
 }
 
diff --git a/runtime/parsec/control/runtime_descriptor.c b/runtime/parsec/control/runtime_descriptor.c
index 360d673261c5493d196e7f2638b427fa9df8b0cb..b1266e284513e234c72859355a79b5f3dc342b3f 100644
--- a/runtime/parsec/control/runtime_descriptor.c
+++ b/runtime/parsec/control/runtime_descriptor.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon PaRSEC descriptor routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Reazul Hoque
  * @author Mathieu Faverge
  * @author Guillaume Sylvand
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2024-03-16
  *
  */
 #include "chameleon_parsec.h"
@@ -345,8 +345,10 @@ int RUNTIME_desc_release( const CHAM_desc_t *desc )
 /**
  *  Flush cached data
  */
-void RUNTIME_flush()
+void RUNTIME_flush( CHAM_context_t *chamctxt )
 {
+    (void)chamctxt;
+    return;
 }
 
 void RUNTIME_desc_flush( const CHAM_desc_t        *desc,
diff --git a/runtime/quark/control/runtime_descriptor.c b/runtime/quark/control/runtime_descriptor.c
index 6301b9c1408feb4f8883b73a338bcc31dedcf441..4435e06629694e45492cb5884d9797d3d3501195 100644
--- a/runtime/quark/control/runtime_descriptor.c
+++ b/runtime/quark/control/runtime_descriptor.c
@@ -11,13 +11,13 @@
  *
  * @brief Chameleon Quark descriptor routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Vijay Joshi
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Mathieu Faverge
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2024-03-16
  *
  */
 #include "chameleon_quark.h"
@@ -60,7 +60,7 @@ int RUNTIME_desc_release( const CHAM_desc_t *desc )
 }
 
 void
-RUNTIME_desc_flush( const CHAM_desc_t     *desc,
+RUNTIME_desc_flush( const CHAM_desc_t        *desc,
                     const RUNTIME_sequence_t *sequence )
 {
     (void)desc;
@@ -70,8 +70,9 @@ RUNTIME_desc_flush( const CHAM_desc_t     *desc,
 
 
 void
-RUNTIME_flush( )
+RUNTIME_flush( CHAM_context_t *chamctxt )
 {
+    (void)chamctxt;
     return;
 }
 
diff --git a/runtime/starpu/codelets/codelet_zgersum.c b/runtime/starpu/codelets/codelet_zgersum.c
index 8f8b2eaebc2d5701e734bbf59111c4c0474d13f2..dd44fb9f61150ebf4219af469b79bfd779cf8085 100644
--- a/runtime/starpu/codelets/codelet_zgersum.c
+++ b/runtime/starpu/codelets/codelet_zgersum.c
@@ -15,7 +15,7 @@
  * @author Romain Peressoni
  * @author Mathieu Faverge
  * @author Antoine Jego
- * @date 2023-07-06
+ * @date 2024-03-16
  * @precisions normal z -> c d s
  *
  */
@@ -128,7 +128,7 @@ RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options,
                              const CHAM_desc_t *A, int Am, int An )
 {
 #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI)
-    starpu_mpi_redux_data_prio_tree( MPI_COMM_WORLD,
+    starpu_mpi_redux_data_prio_tree( options->sequence->comm,
                                      RTBLKADDR(A, ChamComplexDouble, Am, An),
                                      options->priority + 1,
                                      2 /* Binary tree */ );
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index af940bf2879a6b5a8985f163cb3d349b34d784b3..53647940eac27d0b6ac51d7f2f64042100841b5e 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -21,7 +21,7 @@
  * @author Florent Pruvost
  * @author Samuel Thibault
  * @author Alycia Lisito
- * @date 2023-07-06
+ * @date 2024-03-16
  * @precisions normal z -> c d s
  *
  */
@@ -120,9 +120,9 @@ insert_task_zlacpy_on_remote_node( const RUNTIME_option_t *options,
 {
     void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL;
 #if defined(CHAMELEON_RUNTIME_SYNC)
-    starpu_mpi_data_cpy_priority( handleB, handleA, MPI_COMM_WORLD, 0, callback, NULL, options->priority );
+    starpu_mpi_data_cpy_priority( handleB, handleA, options->sequence->comm, 0, callback, NULL, options->priority );
 #else
-    starpu_mpi_data_cpy_priority( handleB, handleA, MPI_COMM_WORLD, 1, callback, NULL, options->priority );
+    starpu_mpi_data_cpy_priority( handleB, handleA, options->sequence->comm, 1, callback, NULL, options->priority );
 #endif
 }
 #endif
diff --git a/runtime/starpu/control/runtime_async.c b/runtime/starpu/control/runtime_async.c
index a439e5d8fe17986708508dceeab859ae922356dc..ea19203fcf95effe511255ff690ccdb7810f28eb 100644
--- a/runtime/starpu/control/runtime_async.c
+++ b/runtime/starpu/control/runtime_async.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon StarPU asynchronous routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2024-03-16
  *
  */
 #include "chameleon_starpu.h"
@@ -28,7 +28,7 @@ int RUNTIME_sequence_create( CHAM_context_t  *chamctxt,
                              RUNTIME_sequence_t *sequence )
 {
     (void)chamctxt;
-    (void)sequence;
+    sequence->comm = chamctxt->comm;
     return CHAMELEON_SUCCESS;
 }
 
@@ -58,10 +58,10 @@ int RUNTIME_sequence_wait( CHAM_context_t     *chamctxt,
 
 #if defined(CHAMELEON_USE_MPI)
 #  if defined(HAVE_STARPU_MPI_WAIT_FOR_ALL)
-    starpu_mpi_wait_for_all(MPI_COMM_WORLD);
+    starpu_mpi_wait_for_all(sequence->comm);
 #  else
     starpu_task_wait_for_all();
-    starpu_mpi_barrier(MPI_COMM_WORLD);
+    starpu_mpi_barrier(sequence->comm);
 #  endif
 #else
     starpu_task_wait_for_all();
diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c
index b8fd4003fa742629c5ffb518609067d67e97609d..96e1c3ff7cbad2e0c5336a2804ab5d98e08ee56d 100644
--- a/runtime/starpu/control/runtime_control.c
+++ b/runtime/starpu/control/runtime_control.c
@@ -21,7 +21,7 @@
  * @author Matthieu Kuhn
  * @author Loris Lucido
  * @author Terry Cojean
- * @date 2023-08-22
+ * @date 2024-03-16
  *
  */
 #include "chameleon_starpu.h"
@@ -100,7 +100,7 @@ void chameleon_starpu_parallel_worker_fini( starpu_sched_opt_t *sched_opt )
 /**
  *
  */
-static int chameleon_starpu_init( struct starpu_conf *conf )
+static int chameleon_starpu_init( MPI_Comm comm, struct starpu_conf *conf )
 {
     int hres = CHAMELEON_SUCCESS;
     int rc;
@@ -118,7 +118,7 @@ static int chameleon_starpu_init( struct starpu_conf *conf )
 #  endif
 
 #  if defined(HAVE_STARPU_MPI_INIT_CONF)
-        rc = starpu_mpi_init_conf(NULL, NULL, !flag, MPI_COMM_WORLD, conf);
+        rc = starpu_mpi_init_conf(NULL, NULL, !flag, comm, conf);
 #  else
         rc = starpu_init(conf);
         if (rc < 0) {
@@ -186,7 +186,7 @@ int RUNTIME_init( CHAM_context_t *chamctxt,
 
     if ((ncpus == -1)||(nthreads_per_worker == -1))
     {
-        hres = chameleon_starpu_init( conf );
+        hres = chameleon_starpu_init( chamctxt->comm, conf );
 
         chamctxt->nworkers = ncpus;
         chamctxt->nthreads_per_worker = nthreads_per_worker;
@@ -202,7 +202,7 @@ int RUNTIME_init( CHAM_context_t *chamctxt,
 
         conf->use_explicit_workers_bindid = 1;
 
-        hres = chameleon_starpu_init( conf );
+        hres = chameleon_starpu_init( chamctxt->comm, conf );
 
         chamctxt->nworkers = ncpus;
         chamctxt->nthreads_per_worker = nthreads_per_worker;
@@ -300,11 +300,11 @@ void RUNTIME_barrier( CHAM_context_t *chamctxt )
 
 #if defined(CHAMELEON_USE_MPI)
 #  if defined(HAVE_STARPU_MPI_WAIT_FOR_ALL)
-    starpu_mpi_wait_for_all(MPI_COMM_WORLD);
-    starpu_mpi_barrier(MPI_COMM_WORLD);
+    starpu_mpi_wait_for_all( chamctxt->comm );
+    starpu_mpi_barrier( chamctxt->comm );
 #  else
     starpu_task_wait_for_all();
-    starpu_mpi_barrier(MPI_COMM_WORLD);
+    starpu_mpi_barrier( chamctxt->comm );
 #  endif
 #else
     starpu_task_wait_for_all();
@@ -380,9 +380,9 @@ int RUNTIME_comm_rank( CHAM_context_t *chamctxt )
 
 #if defined(CHAMELEON_USE_MPI)
 #  if defined(HAVE_STARPU_MPI_COMM_RANK)
-    starpu_mpi_comm_rank( MPI_COMM_WORLD, &rank );
+    starpu_mpi_comm_rank( chamctxt->comm, &rank );
 #  else
-    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+    MPI_Comm_rank( chamctxt->comm, &rank );
 #  endif
 #endif
 
@@ -398,9 +398,9 @@ int RUNTIME_comm_size( CHAM_context_t *chamctxt )
     int size;
 #if defined(CHAMELEON_USE_MPI)
 #  if defined(HAVE_STARPU_MPI_COMM_RANK)
-    starpu_mpi_comm_size( MPI_COMM_WORLD, &size );
+    starpu_mpi_comm_size( chamctxt->comm, &size );
 #  else
-    MPI_Comm_size( MPI_COMM_WORLD, &size );
+    MPI_Comm_size( chamctxt->comm, &size );
 #  endif
 #else
     size = 1;
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index 1e98660118946a75eec93f00bed0091b3301b7c0..2e72133c6acf979cba775db9488a0da93ffd2ab1 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -20,7 +20,7 @@
  * @author Raphael Boucherie
  * @author Samuel Thibault
  * @author Loris Lucido
- * @date 2023-08-22
+ * @date 2024-03-16
  *
  */
 #include "chameleon_starpu.h"
@@ -149,7 +149,7 @@ void RUNTIME_desc_create( CHAM_desc_t *desc )
      * Book the number of tags required to describe this matrix
      */
     {
-        chameleon_starpu_tag_init();
+        chameleon_starpu_tag_init( );
         desc->mpitag = chameleon_starpu_tag_book( nbtiles );
 
         if ( desc->mpitag == -1 ) {
@@ -267,10 +267,10 @@ int RUNTIME_desc_release( const CHAM_desc_t *desc )
 /**
  *  Flush cached data
  */
-void RUNTIME_flush()
+void RUNTIME_flush( CHAM_context_t *chamctxt )
 {
 #if defined(CHAMELEON_USE_MPI)
-    starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD);
+    starpu_mpi_cache_flush_all_data( chamctxt->comm );
 #endif
 }
 
@@ -317,7 +317,7 @@ void RUNTIME_data_flush( const RUNTIME_sequence_t *sequence,
         }
 
 #if defined(CHAMELEON_USE_MPI)
-        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handlebis );
+        starpu_mpi_cache_flush( sequence->comm, *handlebis );
 #endif
 
         if ( local ) {
@@ -345,7 +345,7 @@ void RUNTIME_data_migrate( const RUNTIME_sequence_t *sequence,
     old_rank = starpu_mpi_data_get_rank( lhandle );
 
     if ( old_rank != new_rank ) {
-        starpu_mpi_data_migrate( MPI_COMM_WORLD, lhandle, new_rank );
+        starpu_mpi_data_migrate( sequence->comm, lhandle, new_rank );
     }
 
     (void)sequence;
diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c
index 640ffa83e0a3dac177b72d0e95b049ba8a4427cf..634378e8dc41bb41b9a0610fa7642711c9371f10 100644
--- a/runtime/starpu/control/runtime_descriptor_ipiv.c
+++ b/runtime/starpu/control/runtime_descriptor_ipiv.c
@@ -223,7 +223,7 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
 
     if ( *handle != NULL ) {
 #if defined(CHAMELEON_USE_MPI)
-        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        starpu_mpi_cache_flush( sequence->comm, *handle );
         if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
 #endif
         {
@@ -236,7 +236,7 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence,
 
     if ( *handle != NULL ) {
 #if defined(CHAMELEON_USE_MPI)
-        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        starpu_mpi_cache_flush( sequence->comm, *handle );
         if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
 #endif
         {
@@ -272,7 +272,7 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
 
     if ( *handle != NULL ) {
 #if defined(CHAMELEON_USE_MPI)
-        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        starpu_mpi_cache_flush( sequence->comm, *handle );
         if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
 #endif
         {
@@ -285,7 +285,7 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence,
 
     if ( *handle != NULL ) {
 #if defined(CHAMELEON_USE_MPI)
-        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        starpu_mpi_cache_flush( sequence->comm, *handle );
         if ( starpu_mpi_data_get_rank( *handle ) == A->myrank )
 #endif
         {
@@ -323,7 +323,7 @@ void RUNTIME_ipiv_gather( const RUNTIME_sequence_t *sequence,
                 if (already_received == 0)
                 {
                     MPI_Status status;
-                    starpu_mpi_recv( ipiv_src, owner, tag, MPI_COMM_WORLD, &status );
+                    starpu_mpi_recv( ipiv_src, owner, tag, sequence->comm, &status );
                 }
             }
             else if ( rank == owner )
@@ -332,7 +332,7 @@ void RUNTIME_ipiv_gather( const RUNTIME_sequence_t *sequence,
                 int already_sent = starpu_mpi_cached_send_set( ipiv_src, node );
                 if (already_sent == 0)
                 {
-                    starpu_mpi_send( ipiv_src, node, tag, MPI_COMM_WORLD );
+                    starpu_mpi_send( ipiv_src, node, tag, sequence->comm );
                 }
             }
         }
diff --git a/runtime/starpu/control/runtime_tags.c b/runtime/starpu/control/runtime_tags.c
index 57c9f859e4d3343eedc0660b98ace968e99e0335..031a556641fd4a59b57f1dd78a548d84e0787628 100644
--- a/runtime/starpu/control/runtime_tags.c
+++ b/runtime/starpu/control/runtime_tags.c
@@ -5,11 +5,11 @@
  * @copyright 2017-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Pierre Ramet
  * @author Mathieu Faverge
  * @author Florent Pruvost
- * @date 2021-10-04
+ * @date 2024-03-16
  *
  * Functions to manage the MPI data tags with StarPU (originated from PaStiX).
  *
@@ -58,7 +58,8 @@ chameleon_starpu_tag_init( void )
         int          ok       = 0;
         void        *tag_ub_p = NULL;
 
-        starpu_mpi_comm_get_attr( MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &tag_ub_p, &ok );
+        CHAM_context_t *chamctxt = chameleon_context_self();
+        starpu_mpi_comm_get_attr( chamctxt->comm, STARPU_MPI_TAG_UB, &tag_ub_p, &ok );
         starpu_tag_ub = (uint64_t)((intptr_t)tag_ub_p);
 
         if ( !ok ) {
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index fd6d0e4688bd4ce9e8ba80b1084a00c999e5cb56..b795b4c79454e65ad9e22ca9a37b124bfe6c734a 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -20,7 +20,7 @@
  * @author Loris Lucido
  * @author Terry Cojean
  * @author Matthieu Kuhn
- * @date 2023-08-22
+ * @date 2024-03-16
  *
  */
 #ifndef _chameleon_starpu_h_
@@ -131,10 +131,10 @@ void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options,
 
 #if defined(CHAMELEON_RUNTIME_SYNC)
 #define rt_starpu_insert_task( _codelet_, ... )                         \
-    starpu_mpi_insert_task( MPI_COMM_WORLD, (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
+    starpu_mpi_insert_task( options->sequence->comm, (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
 #else
 #define rt_starpu_insert_task( _codelet_, ... )                         \
-    starpu_mpi_insert_task( MPI_COMM_WORLD, (_codelet_), ##__VA_ARGS__ )
+    starpu_mpi_insert_task( options->sequence->comm, (_codelet_), ##__VA_ARGS__ )
 #endif
 
 #else