diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 842724a6550a70ce96106b61b319c001c04d6bc4..93d2335244b5677cd5ed51cb019fa435659a1cda 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -41,16 +41,33 @@ void MORSE_TASK_zgemm(MORSE_option_t *options,
                       MORSE_enum transA, int transB,
                       int m, int n, int k, int nb,
                       MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda,
-                                                MORSE_desc_t *B, int Bm, int Bn, int ldb,
-                      MORSE_Complex64_t beta, MORSE_desc_t *C, int Cm, int Cn, int ldc)
+                                               MORSE_desc_t *B, int Bm, int Bn, int ldb,
+                      MORSE_Complex64_t beta,  MORSE_desc_t *C, int Cm, int Cn, int ldc)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgemm;
     void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
+    int sizeA = m*k;
+    int sizeB = k*n;
+    int sizeC = m*n;
+    int execution_rank = C->get_rankof( C, Cm, Cn );
+    int rank_changed=0;
+
+    // force execution on the rank owning the largest data (tile)
+    // the numerical facto 10 should be an environnement variable
+    if ( sizeA > 10*sizeC ){
+        execution_rank = A->get_rankof( A, Am, An );
+        rank_changed = 1;
+    }else if( sizeB > 10*sizeC ){
+        execution_rank = B->get_rankof( B, Bm, Bn );
+        rank_changed = 1;
+    }
 
     if ( morse_desc_islocal( A, Am, An ) ||
          morse_desc_islocal( B, Bm, Bn ) ||
-         morse_desc_islocal( C, Cm, Cn ) )
+         morse_desc_islocal( C, Cm, Cn ) ||
+         rank_changed
+       )
     {
         starpu_insert_task(
             codelet,
@@ -69,6 +86,9 @@ void MORSE_TASK_zgemm(MORSE_option_t *options,
             STARPU_VALUE,    &ldc,               sizeof(int),
             STARPU_PRIORITY,  options->priority,
             STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_USE_MPI)
+            STARPU_EXECUTE_ON_NODE, execution_rank,
+#endif
             0);
     }
 }
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index b88131bb6dbfd914007a47011bc19bdf86b6d4dd..2ed832347a7ffa3dfbad0386785138443430186f 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -46,9 +46,22 @@ void MORSE_TASK_ztrsm(MORSE_option_t *options,
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztrsm;
     void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL;
+    int sizeA = m*m;
+    int sizeB = m*n;
+    int execution_rank = B->get_rankof( B, Bm, Bn );
+    int rank_changed=0;
+
+    // force execution on the rank owning the largest data (tile)
+    // the numerical facto 10 should be an environnement variable
+    if ( sizeA > 10*sizeB ){
+        execution_rank = A->get_rankof( A, Am, An );
+        rank_changed=1;
+    }
 
     if ( morse_desc_islocal( A, Am, An ) ||
-	 morse_desc_islocal( B, Bm, Bn ) )
+         morse_desc_islocal( B, Bm, Bn ) ||
+         rank_changed
+       )
     {
         starpu_insert_task(
             codelet,
@@ -65,6 +78,9 @@ void MORSE_TASK_ztrsm(MORSE_option_t *options,
             STARPU_VALUE,    &ldb,                sizeof(int),
             STARPU_PRIORITY,  options->priority,
             STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_USE_MPI)
+            STARPU_EXECUTE_ON_NODE, execution_rank,
+#endif
             0);
     }
 }