From 2349938e0e22dfe96e6b49cc65d3e08b57b91d35 Mon Sep 17 00:00:00 2001
From: Florent Pruvost <florent.pruvost@inria.fr>
Date: Wed, 21 Jan 2015 09:42:54 +0000
Subject: [PATCH] force execution on the rank owning the largest data (tile)
 for potrf+potrs

---
 runtime/starpu/codelets/codelet_zgemm.c | 26 ++++++++++++++++++++++---
 runtime/starpu/codelets/codelet_ztrsm.c | 18 ++++++++++++++++-
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 842724a65..93d233524 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -41,16 +41,33 @@ void MORSE_TASK_zgemm(MORSE_option_t *options,
                       MORSE_enum transA, int transB,
                       int m, int n, int k, int nb,
                       MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda,
-                                                MORSE_desc_t *B, int Bm, int Bn, int ldb,
-                      MORSE_Complex64_t beta, MORSE_desc_t *C, int Cm, int Cn, int ldc)
+                                               MORSE_desc_t *B, int Bm, int Bn, int ldb,
+                      MORSE_Complex64_t beta,  MORSE_desc_t *C, int Cm, int Cn, int ldc)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgemm;
     void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
+    int sizeA = m*k;
+    int sizeB = k*n;
+    int sizeC = m*n;
+    int execution_rank = C->get_rankof( C, Cm, Cn );
+    int rank_changed=0;
+
+    // force execution on the rank owning the largest data (tile)
+    // the numerical facto 10 should be an environnement variable
+    if ( sizeA > 10*sizeC ){
+        execution_rank = A->get_rankof( A, Am, An );
+        rank_changed = 1;
+    }else if( sizeB > 10*sizeC ){
+        execution_rank = B->get_rankof( B, Bm, Bn );
+        rank_changed = 1;
+    }
 
     if ( morse_desc_islocal( A, Am, An ) ||
          morse_desc_islocal( B, Bm, Bn ) ||
-         morse_desc_islocal( C, Cm, Cn ) )
+         morse_desc_islocal( C, Cm, Cn ) ||
+         rank_changed
+       )
     {
         starpu_insert_task(
             codelet,
@@ -69,6 +86,9 @@ void MORSE_TASK_zgemm(MORSE_option_t *options,
             STARPU_VALUE,    &ldc,               sizeof(int),
             STARPU_PRIORITY,  options->priority,
             STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_USE_MPI)
+            STARPU_EXECUTE_ON_NODE, execution_rank,
+#endif
             0);
     }
 }
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index b88131bb6..2ed832347 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -46,9 +46,22 @@ void MORSE_TASK_ztrsm(MORSE_option_t *options,
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztrsm;
     void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL;
+    int sizeA = m*m;
+    int sizeB = m*n;
+    int execution_rank = B->get_rankof( B, Bm, Bn );
+    int rank_changed=0;
+
+    // force execution on the rank owning the largest data (tile)
+    // the numerical facto 10 should be an environnement variable
+    if ( sizeA > 10*sizeB ){
+        execution_rank = A->get_rankof( A, Am, An );
+        rank_changed=1;
+    }
 
     if ( morse_desc_islocal( A, Am, An ) ||
-	 morse_desc_islocal( B, Bm, Bn ) )
+         morse_desc_islocal( B, Bm, Bn ) ||
+         rank_changed
+       )
     {
         starpu_insert_task(
             codelet,
@@ -65,6 +78,9 @@ void MORSE_TASK_ztrsm(MORSE_option_t *options,
             STARPU_VALUE,    &ldb,                sizeof(int),
             STARPU_PRIORITY,  options->priority,
             STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_USE_MPI)
+            STARPU_EXECUTE_ON_NODE, execution_rank,
+#endif
             0);
     }
 }
-- 
GitLab