diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index b2ff0e2fd31d81249aa42b7a0289c092f0b41b76..5c98a6f67c80659ceb853f978d039167968be1d9 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -1,20 +1,16 @@ /** * - * @copyright (c) 2009-2016 The University of Tennessee and The University - * of Tennessee Research Foundation. - * All rights reserved. - * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - **/ - -/** + * @copyright 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * * @file codelet_ztpmqrt.c * - * MORSE codelets kernel - * MORSE is a software package provided by Univ. of Tennessee, - * Univ. of California Berkeley and Univ. of Colorado Denver + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver * * @version 0.9.0 * @author Mathieu Faverge @@ -25,54 +21,6 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void MORSE_TASK_ztpmqrt( const MORSE_option_t *options, - MORSE_enum side, MORSE_enum trans, - int M, int N, int K, int L, int ib, int nb, - const MORSE_desc_t *V, int Vm, int Vn, int ldv, - const MORSE_desc_t *T, int Tm, int Tn, int ldt, - const MORSE_desc_t *A, int Am, int An, int lda, - const MORSE_desc_t *B, int Bm, int Bn, int ldb ) -{ - struct starpu_codelet *codelet = &cl_ztpmqrt; - void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL; - - MORSE_BEGIN_ACCESS_DECLARATION; - MORSE_ACCESS_R(V, Vm, Vn); - MORSE_ACCESS_R(T, Tm, Tn); - MORSE_ACCESS_RW(A, Am, An); - MORSE_ACCESS_RW(B, Bm, Bn); - MORSE_END_ACCESS_DECLARATION; - - starpu_insert_task( - starpu_mpi_codelet(codelet), - STARPU_VALUE, &side, sizeof(MORSE_enum), - STARPU_VALUE, &trans, sizeof(MORSE_enum), - STARPU_VALUE, &M, sizeof(int), - STARPU_VALUE, &N, sizeof(int), - STARPU_VALUE, &K, sizeof(int), - STARPU_VALUE, &L, sizeof(int), - STARPU_VALUE, &ib, sizeof(int), - STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), - STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), - STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), - STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), - /* Other options */ - STARPU_SCRATCH, options->ws_worker, - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, -#if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztpmqrt", -#endif - 0); - - (void)ib; (void)nb; -} - - #if !defined(CHAMELEON_SIMULATION) static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg) { @@ -150,8 +98,58 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) #endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ - /* * Codelet definition */ CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC) + +void +MORSE_TASK_ztpmqrt( const MORSE_option_t *options, + MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int ib, int nb, + const MORSE_desc_t *V, int Vm, int Vn, int ldv, + const MORSE_desc_t *T, int Tm, int Tn, int ldt, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb ) +{ + struct starpu_codelet *codelet = &cl_ztpmqrt; + void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL; + + MORSE_BEGIN_ACCESS_DECLARATION; + MORSE_ACCESS_R(V, Vm, Vn); + MORSE_ACCESS_R(T, Tm, Tn); + MORSE_ACCESS_RW(A, Am, An); + MORSE_ACCESS_RW(B, Bm, Bn); + MORSE_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &side, sizeof(MORSE_enum), + STARPU_VALUE, &trans, sizeof(MORSE_enum), + STARPU_VALUE, &M, sizeof(int), + STARPU_VALUE, &N, sizeof(int), + STARPU_VALUE, &K, sizeof(int), + STARPU_VALUE, &L, sizeof(int), + STARPU_VALUE, &ib, sizeof(int), + STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn), + STARPU_VALUE, &ldv, sizeof(int), + STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), + STARPU_VALUE, &ldt, sizeof(int), + STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + /* Other options */ + STARPU_SCRATCH, options->ws_worker, + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn), +#endif +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, (( L == 0 ) ? "ztsmqr" : "ztpmqrt"), +#endif + 0); + + (void)ib; (void)nb; +} diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index 1573e1ed4b82a2c4e9eaa05cacf7a575c00cc4e9..2405aec9b3676a27f3cea17c9e962b5e7174b82e 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -25,11 +25,45 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -void MORSE_TASK_ztpqrt( const MORSE_option_t *options, - int M, int N, int L, int ib, int nb, - const MORSE_desc_t *A, int Am, int An, int lda, - const MORSE_desc_t *B, int Bm, int Bn, int ldb, - const MORSE_desc_t *T, int Tm, int Tn, int ldt ) +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg) +{ + int M; + int N; + int L; + int ib; + MORSE_Complex64_t *A; + int lda; + MORSE_Complex64_t *B; + int ldb; + MORSE_Complex64_t *T; + int ldt; + MORSE_Complex64_t *WORK; + + A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib, + &lda, &ldb, &ldt ); + + CORE_ztpqrt( M, N, L, ib, + A, lda, B, ldb, T, ldt, WORK ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func) + +void +MORSE_TASK_ztpqrt( const MORSE_option_t *options, + int M, int N, int L, int ib, int nb, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb, + const MORSE_desc_t *T, int Tm, int Tn, int ldt ) { struct starpu_codelet *codelet = &cl_ztpqrt; void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL; @@ -56,6 +90,9 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options, STARPU_SCRATCH, options->ws_worker, STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn), +#endif #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "ztpqrt", #endif @@ -63,38 +100,3 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options, (void)ib; (void)nb; } - - -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg) -{ - int M; - int N; - int L; - int ib; - MORSE_Complex64_t *A; - int lda; - MORSE_Complex64_t *B; - int ldb; - MORSE_Complex64_t *T; - int ldt; - MORSE_Complex64_t *WORK; - - A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - - starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib, - &lda, &ldb, &ldt ); - - CORE_ztpqrt( M, N, L, ib, - A, lda, B, ldb, T, ldt, WORK ); -} -#endif /* !defined(CHAMELEON_SIMULATION) */ - - -/* - * Codelet definition - */ -CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func) diff --git a/runtime/starpu/codelets/codelet_ztsmqr.c b/runtime/starpu/codelets/codelet_ztsmqr.c index ef400fab496da37824ef338d9cb5f17f8c51d8f5..9d9f9877c1940c5e348cb4d7a98f8453d19299c4 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr.c +++ b/runtime/starpu/codelets/codelet_ztsmqr.c @@ -145,39 +145,12 @@ void MORSE_TASK_ztsmqr(const MORSE_option_t *options, struct starpu_codelet *codelet = &cl_ztsmqr; void (*callback)(void*) = options->profiling ? cl_ztsmqr_callback : NULL; int ldwork = side == MorseLeft ? ib : nb; - int sizeA1 = lda1*n1; - int sizeA2 = lda2*n2; - int sizeV = ldv*k; - int sizeT = ldt*n1; - int execution_rank = A2->get_rankof( A2, A2m, A2n ); - int rank_changed=0; - (void)execution_rank; - - /* force execution on the rank owning the largest data (tile) */ - int threshold; - char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD"); - if (env != NULL) - threshold = (unsigned)atoi(env); - else - threshold = 10; - if ( sizeA1 > threshold*sizeA2 ){ - execution_rank = A1->get_rankof( A1, A1m, A1n ); - rank_changed = 1; - }else if( sizeV > threshold*sizeA2 ){ - execution_rank = V->get_rankof( V, Vm, Vn ); - rank_changed = 1; - }else if( sizeT > threshold*sizeA2 ){ - execution_rank = T->get_rankof( T, Tm, Tn ); - rank_changed = 1; - } MORSE_BEGIN_ACCESS_DECLARATION; MORSE_ACCESS_RW(A1, A1m, A1n); MORSE_ACCESS_RW(A2, A2m, A2n); MORSE_ACCESS_R(V, Vm, Vn); MORSE_ACCESS_R(T, Tm, Tn); - if (rank_changed) - MORSE_RANK_CHANGED(execution_rank); MORSE_END_ACCESS_DECLARATION; starpu_insert_task( @@ -204,7 +177,7 @@ void MORSE_TASK_ztsmqr(const MORSE_option_t *options, STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, execution_rank, + STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), #endif #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "ztsmqr", diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c index a1bcada463ddc9b777cd1a6a69bdaf53324ec5f9..f063dfab9e7b47a42a65ec9dfebc7dfd4c2b01a5 100644 --- a/runtime/starpu/codelets/codelet_ztsqrt.c +++ b/runtime/starpu/codelets/codelet_ztsqrt.c @@ -111,7 +111,6 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options, MORSE_ACCESS_RW(A1, A1m, A1n); MORSE_ACCESS_RW(A2, A2m, A2n); MORSE_ACCESS_W(T, Tm, Tn); - MORSE_RANK_CHANGED(A2->get_rankof(A2, A2m, A2n)); MORSE_END_ACCESS_DECLARATION; starpu_insert_task( @@ -131,10 +130,12 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options, STARPU_VALUE, &h_work, sizeof(MORSE_starpu_ws_t *), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), +#endif #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "ztsqrt", #endif - STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), 0); } diff --git a/runtime/starpu/codelets/codelet_zttmqr.c b/runtime/starpu/codelets/codelet_zttmqr.c index 7563ba012bf8f52a88c146df13ec14b79a471d9e..1f0e21038bb595eb68b40d614cda3422c446e815 100644 --- a/runtime/starpu/codelets/codelet_zttmqr.c +++ b/runtime/starpu/codelets/codelet_zttmqr.c @@ -143,39 +143,12 @@ void MORSE_TASK_zttmqr(const MORSE_option_t *options, struct starpu_codelet *codelet = &cl_zttmqr; void (*callback)(void*) = options->profiling ? cl_zttmqr_callback : NULL; int ldwork = side == MorseLeft ? ib : nb; - int sizeA1 = lda1*n1; - int sizeA2 = lda2*n2; - int sizeV = ldv*k; - int sizeT = ldt*n1; - int execution_rank = A2->get_rankof( A2, A2m, A2n ); - int rank_changed=0; - (void)execution_rank; - - /* force execution on the rank owning the largest data (tile) */ - int threshold; - char* env = getenv("MORSE_COMM_FACTOR_THRESHOLD"); - if (env != NULL) - threshold = (unsigned)atoi(env); - else - threshold = 10; - if ( sizeA1 > threshold*sizeA2 ){ - execution_rank = A1->get_rankof( A1, A1m, A1n ); - rank_changed = 1; - }else if( sizeV > threshold*sizeA2 ){ - execution_rank = V->get_rankof( V, Vm, Vn ); - rank_changed = 1; - }else if( sizeT > threshold*sizeA2 ){ - execution_rank = T->get_rankof( T, Tm, Tn ); - rank_changed = 1; - } MORSE_BEGIN_ACCESS_DECLARATION; MORSE_ACCESS_RW(A1, A1m, A1n); MORSE_ACCESS_RW(A2, A2m, A2n); MORSE_ACCESS_R(V, Vm, Vn); MORSE_ACCESS_R(T, Tm, Tn); - if (rank_changed) - MORSE_RANK_CHANGED(execution_rank); MORSE_END_ACCESS_DECLARATION; starpu_insert_task( @@ -202,7 +175,7 @@ void MORSE_TASK_zttmqr(const MORSE_option_t *options, STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_USE_MPI) - STARPU_EXECUTE_ON_NODE, execution_rank, + STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), #endif #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zttmqr", diff --git a/runtime/starpu/codelets/codelet_zttqrt.c b/runtime/starpu/codelets/codelet_zttqrt.c index 8d03e589351f80c26e360d72e68497663fb1f2f3..3a51cdb2342a6b8b72af3afe6800a18d92306af5 100644 --- a/runtime/starpu/codelets/codelet_zttqrt.c +++ b/runtime/starpu/codelets/codelet_zttqrt.c @@ -137,6 +137,9 @@ void MORSE_TASK_zttqrt(const MORSE_option_t *options, STARPU_SCRATCH, options->ws_worker, STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, A2->get_rankof(A2, A2m, A2n), +#endif #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zttqrt", #endif