diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 842724a6550a70ce96106b61b319c001c04d6bc4..93d2335244b5677cd5ed51cb019fa435659a1cda 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -41,16 +41,33 @@ void MORSE_TASK_zgemm(MORSE_option_t *options, MORSE_enum transA, int transB, int m, int n, int k, int nb, MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda, - MORSE_desc_t *B, int Bm, int Bn, int ldb, - MORSE_Complex64_t beta, MORSE_desc_t *C, int Cm, int Cn, int ldc) + MORSE_desc_t *B, int Bm, int Bn, int ldb, + MORSE_Complex64_t beta, MORSE_desc_t *C, int Cm, int Cn, int ldc) { (void)nb; struct starpu_codelet *codelet = &cl_zgemm; void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; + int sizeA = m*k; + int sizeB = k*n; + int sizeC = m*n; + int execution_rank = C->get_rankof( C, Cm, Cn ); + int rank_changed=0; + + // force execution on the rank owning the largest data (tile) + // the numerical facto 10 should be an environnement variable + if ( sizeA > 10*sizeC ){ + execution_rank = A->get_rankof( A, Am, An ); + rank_changed = 1; + }else if( sizeB > 10*sizeC ){ + execution_rank = B->get_rankof( B, Bm, Bn ); + rank_changed = 1; + } if ( morse_desc_islocal( A, Am, An ) || morse_desc_islocal( B, Bm, Bn ) || - morse_desc_islocal( C, Cm, Cn ) ) + morse_desc_islocal( C, Cm, Cn ) || + rank_changed + ) { starpu_insert_task( codelet, @@ -69,6 +86,9 @@ void MORSE_TASK_zgemm(MORSE_option_t *options, STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, execution_rank, +#endif 0); } } diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index b88131bb6dbfd914007a47011bc19bdf86b6d4dd..2ed832347a7ffa3dfbad0386785138443430186f 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -46,9 +46,22 @@ void MORSE_TASK_ztrsm(MORSE_option_t *options, (void)nb; struct starpu_codelet *codelet = &cl_ztrsm; void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL; + int sizeA = m*m; + int sizeB = m*n; + int execution_rank = B->get_rankof( B, Bm, Bn ); + int rank_changed=0; + + // force execution on the rank owning the largest data (tile) + // the numerical facto 10 should be an environnement variable + if ( sizeA > 10*sizeB ){ + execution_rank = A->get_rankof( A, Am, An ); + rank_changed=1; + } if ( morse_desc_islocal( A, Am, An ) || - morse_desc_islocal( B, Bm, Bn ) ) + morse_desc_islocal( B, Bm, Bn ) || + rank_changed + ) { starpu_insert_task( codelet, @@ -65,6 +78,9 @@ void MORSE_TASK_ztrsm(MORSE_option_t *options, STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, execution_rank, +#endif 0); } }