From a372581fe7c7a026559aa9969b8445995668bcdd Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Tue, 16 Mar 2021 11:31:18 +0100 Subject: [PATCH] Backport StarPU codelet restructuration from recursive --- runtime/starpu/codelets/codelet_zgemm.c | 137 +++++++++++++--------- runtime/starpu/codelets/codelet_zherk.c | 118 +++++++++++-------- runtime/starpu/codelets/codelet_zlacpy.c | 95 +++++++++------ runtime/starpu/codelets/codelet_zlascal.c | 73 ++++++++---- runtime/starpu/codelets/codelet_zlaset.c | 78 +++++++----- runtime/starpu/codelets/codelet_zlauum.c | 69 +++++++---- runtime/starpu/codelets/codelet_zplghe.c | 74 +++++++----- runtime/starpu/codelets/codelet_zplgsy.c | 79 ++++++++----- runtime/starpu/codelets/codelet_zplrnt.c | 71 +++++++---- runtime/starpu/codelets/codelet_zpotrf.c | 90 ++++++++------ runtime/starpu/codelets/codelet_zsyrk.c | 124 ++++++++++++-------- runtime/starpu/codelets/codelet_ztradd.c | 82 ++++++++----- runtime/starpu/codelets/codelet_ztrmm.c | 126 +++++++++++--------- runtime/starpu/codelets/codelet_ztrsm.c | 119 +++++++++++-------- runtime/starpu/codelets/codelet_ztrtri.c | 87 ++++++++------ 15 files changed, 855 insertions(+), 567 deletions(-) diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 9a3b9ea3f..0567065f3 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -25,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zgemm_args_s { cham_trans_t transA; cham_trans_t transB; int m; @@ -38,47 +36,54 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) CHAM_tile_t *tileB; CHAMELEON_Complex64_t beta; CHAM_tile_t *tileC; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zgemm_cpu_func( void *descr[], void *cl_arg ) +{ + struct cl_zgemm_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); tileC = cti_interface_get(descr[2]); - starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta); - TCORE_zgemm( transA, transB, - m, n, k, - alpha, tileA, tileB, - beta, tileC ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zgemm( clargs.transA, clargs.transB, + clargs.m, clargs.n, clargs.k, + clargs.alpha, tileA, tileB, + clargs.beta, tileC ); } #ifdef CHAMELEON_USE_CUDA -static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) +static void +cl_zgemm_cuda_func( void *descr[], void *_cl_arg ) { - cham_trans_t transA; - cham_trans_t transB; - int m; - int n; - int k; - cuDoubleComplex alpha; + struct cl_zgemm_args_s clargs; CHAM_tile_t *tileA; CHAM_tile_t *tileB; - cuDoubleComplex beta; CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); tileC = cti_interface_get(descr[2]); - starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta); + starpu_codelet_unpack_args( _cl_arg, &clargs ); RUNTIME_getStream( stream ); CUDA_zgemm( - transA, transB, - m, n, k, - &alpha, tileA->mat, tileA->ld, - tileB->mat, tileB->ld, - &beta, tileC->mat, tileC->ld, - stream); + clargs.transA, clargs.transB, + clargs.m, clargs.n, clargs.k, + (cuDoubleComplex*)&(clargs.alpha), + tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + (cuDoubleComplex*)&(clargs.beta), + tileC->mat, tileC->ld, + stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -92,56 +97,72 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zgemm(const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) +CODELETS( zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC ) + +void INSERT_TASK_zgemm( const RUNTIME_option_t *options, + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, beta, C, Cm, Cn ); } - (void)nb; - struct starpu_codelet *codelet = &cl_zgemm; - void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; - + struct cl_zgemm_args_s clargs = { + .transA = transA, + .transB = transB, + .m = m, + .n = n, + .k = k, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .tileB = B->get_blktile( B, Bm, Bn ), + .beta = beta, + .tileC = C->get_blktile( C, Cm, Cn ) + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid, accessC; + char *cl_name = "zgemm"; + + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(B, Bm, Bn); CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zgemm_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Reduce the C access if needed */ + accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &transB, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zgemm, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zgemm_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zgemm", + STARPU_NAME, cl_name, #endif - 0); + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 1b9d39818..b224473cb 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -25,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zherk_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zherk_args_s { cham_uplo_t uplo; cham_trans_t trans; int n; @@ -36,41 +34,46 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg) CHAM_tile_t *tileA; double beta; CHAM_tile_t *tileC; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zherk_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zherk_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileC = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); - TCORE_zherk(uplo, trans, - n, k, - alpha, tileA, - beta, tileC); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zherk( clargs.uplo, clargs.trans, clargs.n, clargs.k, + clargs.alpha, tileA, clargs.beta, tileC ); } -#ifdef CHAMELEON_USE_CUDA -static void cl_zherk_cuda_func(void *descr[], void *cl_arg) +#if defined(CHAMELEON_USE_CUDA) +static void +cl_zherk_cuda_func(void *descr[], void *cl_arg) { - cham_uplo_t uplo; - cham_trans_t trans; - int n; - int k; - double alpha; + struct cl_zherk_args_s clargs; CHAM_tile_t *tileA; - double beta; CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileC = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); + starpu_codelet_unpack_args( cl_arg, &clargs ); RUNTIME_getStream(stream); CUDA_zherk( - uplo, trans, n, k, - &alpha, tileA->mat, tileA->ld, - &beta, tileC->mat, tileC->ld, - stream); + clargs.uplo, clargs.trans, clargs.n, clargs.k, + (cuDoubleComplex*)&(clargs.alpha), + tileA->mat, tileA->ld, + (cuDoubleComplex*)&(clargs.beta), + tileC->mat, tileC->ld, + stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -78,52 +81,73 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS(zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC) +CODELETS( zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC ) -void INSERT_TASK_zherk(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - double alpha, const CHAM_desc_t *A, int Am, int An, - double beta, const CHAM_desc_t *C, int Cm, int Cn) +void INSERT_TASK_zherk( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + double alpha, const CHAM_desc_t *A, int Am, int An, + double beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { return INSERT_TASK_zlascal( options, uplo, n, n, nb, beta, C, Cm, Cn ); } - (void)nb; - struct starpu_codelet *codelet = &cl_zherk; - void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + struct cl_zherk_args_s clargs = { + .uplo = uplo, + .trans = trans, + .n = n, + .k = k, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .beta = beta, + .tileC = C->get_blktile( C, Cm, Cn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid, accessC; + char *cl_name = "zherk"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zherk_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Reduce the C access if needed */ + accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(double), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &beta, sizeof(double), - accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zherk, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zherk_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zherk", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index bec2d9228..3fd038bde 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -12,8 +12,6 @@ * @brief Chameleon zlacpy StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge @@ -27,70 +25,93 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zlacpy_args_s { cham_uplo_t uplo; - int M; - int N; + int m; + int n; int displA; int displB; CHAM_tile_t *tileA; CHAM_tile_t *tileB; - CHAMELEON_Complex64_t *A; - CHAMELEON_Complex64_t *B; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zlacpy_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zlacpy_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &displB); - - assert( tileA->format & CHAMELEON_TILE_FULLRANK ); - assert( tileB->format & CHAMELEON_TILE_FULLRANK ); - - A = tileA->mat; - B = tileB->mat; - CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + assert( clargs.displA == 0 ); + assert( clargs.displB == 0 ); + /* A = tileA->mat; */ + /* B = tileB->mat; */ + /* CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); */ + TCORE_zlacpy( clargs.uplo, clargs.m, clargs.n, tileA, tileB ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlacpy, cl_zlacpy_cpu_func) +CODELETS_CPU( zlacpy, cl_zlacpy_cpu_func ) void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, int displA, const CHAM_desc_t *A, int Am, int An, int displB, const CHAM_desc_t *B, int Bm, int Bn ) { - (void)nb; - struct starpu_codelet *codelet = &cl_zlacpy; - void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zlacpy_args_s clargs = { + .uplo = uplo, + .m = m, + .n = n, + .displA = displA, + .displB = displB, + .tileA = A->get_blktile( A, Am, An ), + .tileB = B->get_blktile( B, Bm, Bn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zlacpy"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R( A, Am, An ); - CHAMELEON_ACCESS_W( B, Bm, Bn ); + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_W(B, Bm, Bn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zlacpy_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(cham_uplo_t), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &displA, sizeof(int), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &displB, sizeof(int), - STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zlacpy, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zlacpy_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlacpy", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index bd6823a08..eb03a8685 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -21,33 +21,38 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zlascal_args_s { cham_uplo_t uplo; - int M; - int N; + int m; + int n; CHAMELEON_Complex64_t alpha; CHAM_tile_t *tileA; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zlascal_cpu_func( void *descr[], void *cl_arg ) +{ + struct cl_zlascal_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha); - TCORE_zlascal(uplo, M, N, alpha, tileA); - return; + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zlascal( clargs.uplo, clargs.m, clargs.n, clargs.alpha, tileA ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlascal, cl_zlascal_cpu_func) +CODELETS_CPU( zlascal, cl_zlascal_cpu_func ) void INSERT_TASK_zlascal( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An) + const CHAM_desc_t *A, int Am, int An ) { if ( alpha == 0. ) { return INSERT_TASK_zlaset( options, uplo, m, n, @@ -57,28 +62,46 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options, return; } - (void)nb; - struct starpu_codelet *codelet = &cl_zlascal; - void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zlascal_args_s clargs = { + .uplo = uplo, + .m = m, + .n = n, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zlascal"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zlascal_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zlascal, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zlascal_args_s), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlascal", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c index c7661bdb8..0636686e3 100644 --- a/runtime/starpu/codelets/codelet_zlaset.c +++ b/runtime/starpu/codelets/codelet_zlaset.c @@ -12,8 +12,6 @@ * @brief Chameleon zlaset StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Mathieu Faverge * @author Emmanuel Agullo @@ -26,56 +24,78 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlaset_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zlaset_args_s { cham_uplo_t uplo; - int M; - int N; + int m; + int n; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t beta; CHAM_tile_t *tileA; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zlaset_cpu_func( void *descr[], void *cl_arg ) +{ + struct cl_zlaset_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta); - TCORE_zlaset(uplo, M, N, alpha, beta, tileA); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zlaset( clargs.uplo, clargs.m, clargs.n, clargs.alpha, clargs.beta, tileA ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlaset, cl_zlaset_cpu_func) +CODELETS_CPU( zlaset, cl_zlaset_cpu_func ) -void INSERT_TASK_zlaset(const RUNTIME_option_t *options, - cham_uplo_t uplo, int M, int N, - CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, - const CHAM_desc_t *A, int Am, int An) +void INSERT_TASK_zlaset( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, + CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, + const CHAM_desc_t *A, int Am, int An ) { + struct cl_zlaset_args_s clargs = { + .uplo = uplo, + .m = m, + .n = n, + .alpha = alpha, + .beta = beta, + .tileA = A->get_blktile( A, Am, An ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zlaset"; - struct starpu_codelet *codelet = &cl_zlaset; - void (*callback)(void*) = options->profiling ? cl_zlaset_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zlaset_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &M, sizeof(int), - STARPU_VALUE, &N, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + &cl_zlaset, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zlaset_args_s), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlaset", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); } diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index 0f3f1911b..9ff3fc6e9 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -12,8 +12,6 @@ * @brief Chameleon zlauum StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge @@ -27,55 +25,74 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +struct cl_zlauum_args_s { + cham_uplo_t uplo; + int n; + CHAM_tile_t *tileA; +}; + #if !defined(CHAMELEON_SIMULATION) -static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) +static void +cl_zlauum_cpu_func(void *descr[], void *cl_arg) { - cham_uplo_t uplo; - int N; + struct cl_zlauum_args_s clargs; CHAM_tile_t *tileA; + int info = 0; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &N); - TCORE_zlauum(uplo, N, tileA); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zlauum( clargs.uplo, clargs.n, tileA ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlauum, cl_zlauum_cpu_func) +CODELETS_CPU( zlauum, cl_zlauum_cpu_func ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ void INSERT_TASK_zlauum( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, const CHAM_desc_t *A, int Am, int An ) { - (void)nb; - struct starpu_codelet *codelet = &cl_zlauum; - void (*callback)(void*) = options->profiling ? cl_zlauum_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zlauum_args_s clargs = { + .uplo = uplo, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zlauum"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zlauum_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zlauum, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zlauum_args_s), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlauum", + STARPU_NAME, cl_name, #endif - 0); + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c index 87c071e44..4c04611cc 100644 --- a/runtime/starpu/codelets/codelet_zplghe.c +++ b/runtime/starpu/codelets/codelet_zplghe.c @@ -12,8 +12,6 @@ * @brief Chameleon zplghe StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Piotr Luszczek * @author Pierre Lemarinier * @author Mathieu Faverge @@ -27,11 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zplghe_args_s { double bump; int m; int n; @@ -40,47 +34,73 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) int m0; int n0; unsigned long long int seed; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zplghe_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed ); - TCORE_zplghe( bump, m, n, tileA, bigM, m0, n0, seed ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zplghe( clargs.bump, clargs.m, clargs.n, tileA, + clargs.bigM, clargs.m0, clargs.n0, clargs.seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zplghe, cl_zplghe_cpu_func) +CODELETS_CPU( zplghe, cl_zplghe_cpu_func ) void INSERT_TASK_zplghe( const RUNTIME_option_t *options, double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int bigM, int m0, int n0, unsigned long long int seed ) { - struct starpu_codelet *codelet = &cl_zplghe; - void (*callback)(void*) = options->profiling ? cl_zplghe_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zplghe_args_s clargs = { + .bump = bump, + .m = m, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .bigM = bigM, + .m0 = m0, + .n0 = n0, + .seed = seed, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zplghe"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zplghe_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &bump, sizeof(double), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &bigM, sizeof(int), - STARPU_VALUE, &m0, sizeof(int), - STARPU_VALUE, &n0, sizeof(int), - STARPU_VALUE, &seed, sizeof(unsigned long long int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zplghe, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zplghe_args_s), + STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zplghe", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + } diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c index 748e78d33..57aec1bf2 100644 --- a/runtime/starpu/codelets/codelet_zplgsy.c +++ b/runtime/starpu/codelets/codelet_zplgsy.c @@ -12,8 +12,6 @@ * @brief Chameleon zplgsy StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Piotr Luszczek * @author Pierre Lemarinier * @author Mathieu Faverge @@ -27,11 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zplgsy_args_s { CHAMELEON_Complex64_t bump; int m; int n; @@ -40,48 +34,73 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) int m0; int n0; unsigned long long int seed; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zplgsy_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed ); - TCORE_zplgsy( bump, m, n, tileA, bigM, m0, n0, seed ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zplgsy( clargs.bump, clargs.m, clargs.n, tileA, + clargs.bigM, clargs.m0, clargs.n0, clargs.seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zplgsy, cl_zplgsy_cpu_func) +CODELETS_CPU( zplgsy, cl_zplgsy_cpu_func ) void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, - CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, - int bigM, int m0, int n0, unsigned long long int seed ) + CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, + int bigM, int m0, int n0, unsigned long long int seed ) { + struct cl_zplgsy_args_s clargs = { + .bump = bump, + .m = m, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .bigM = bigM, + .m0 = m0, + .n0 = n0, + .seed = seed, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zplgsy"; - struct starpu_codelet *codelet = &cl_zplgsy; - void (*callback)(void*) = options->profiling ? cl_zplgsy_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zplgsy_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &bump, sizeof(CHAMELEON_Complex64_t), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &bigM, sizeof(int), - STARPU_VALUE, &m0, sizeof(int), - STARPU_VALUE, &n0, sizeof(int), - STARPU_VALUE, &seed, sizeof(unsigned long long int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zplgsy, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zplgsy_args_s), + STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zplgsy", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + } diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c index 1376e950c..a4eef0c8a 100644 --- a/runtime/starpu/codelets/codelet_zplrnt.c +++ b/runtime/starpu/codelets/codelet_zplrnt.c @@ -12,8 +12,6 @@ * @brief Chameleon zplrnt StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Piotr Luszczek * @author Pierre Lemarinier * @author Mathieu Faverge @@ -27,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zplrnt_args_s { int m; int n; CHAM_tile_t *tileA; @@ -37,47 +33,72 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) int m0; int n0; unsigned long long int seed; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zplrnt_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zplrnt_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &bigM, &m0, &n0, &seed ); - TCORE_zplrnt( m, n, tileA, bigM, m0, n0, seed ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zplrnt( clargs.m, clargs.n, tileA, + clargs.bigM, clargs.m0, clargs.n0, clargs.seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zplrnt, cl_zplrnt_cpu_func) +CODELETS_CPU( zplrnt, cl_zplrnt_cpu_func ) void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, int m, int n, const CHAM_desc_t *A, int Am, int An, int bigM, int m0, int n0, unsigned long long int seed ) { + struct cl_zplrnt_args_s clargs = { + .m = m, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .bigM = bigM, + .m0 = m0, + .n0 = n0, + .seed = seed, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zplrnt"; - struct starpu_codelet *codelet = &cl_zplrnt; - void (*callback)(void*) = options->profiling ? cl_zplrnt_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zplrnt_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &bigM, sizeof(int), - STARPU_VALUE, &m0, sizeof(int), - STARPU_VALUE, &n0, sizeof(int), - STARPU_VALUE, &seed, sizeof(unsigned long long int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zplrnt, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zplrnt_args_s), + STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zplrnt", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); } diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index 2337b00fd..93f4f640d 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -12,8 +12,6 @@ * @brief Chameleon zpotrf StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge @@ -27,24 +25,30 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zpotrf_args_s { cham_uplo_t uplo; int n; CHAM_tile_t *tileA; int iinfo; RUNTIME_sequence_t *sequence; RUNTIME_request_t *request; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zpotrf_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zpotrf_args_s clargs; + CHAM_tile_t *tileA; int info = 0; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &iinfo, &sequence, &request); - TCORE_zpotrf(uplo, n, tileA, &info); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zpotrf( clargs.uplo, clargs.n, tileA, &info ); - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info ); } } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -52,46 +56,54 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -#if defined(CHAMELEON_SIMULATION) && defined(CHAMELEON_SIMULATION_EXTENDED) -CODELETS( zpotrf, cl_zpotrf_cpu_func, cl_zpotrf_cuda_func, STARPU_CUDA_ASYNC ) -#else CODELETS_CPU( zpotrf, cl_zpotrf_cpu_func ) -#endif -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - int iinfo) +void INSERT_TASK_zpotrf( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + int iinfo ) { - (void)nb; - struct starpu_codelet *codelet = &cl_zpotrf; - void (*callback)(void*) = options->profiling ? cl_zpotrf_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zpotrf_args_s clargs = { + .uplo = uplo, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .iinfo = iinfo, + .sequence = options->sequence, + .request = options->request, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zpotrf"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zpotrf_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &iinfo, sizeof(int), - STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), - STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), - /* STARPU_SCRATCH, options->ws_worker, */ - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zpotrf, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zpotrf_args_s), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zpotrf", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 9552eee9c..0cba41cab 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -25,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zsyrk_args_s { cham_uplo_t uplo; cham_trans_t trans; int n; @@ -36,41 +34,46 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) CHAM_tile_t *tileA; CHAMELEON_Complex64_t beta; CHAM_tile_t *tileC; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zsyrk_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zsyrk_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileC = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); - TCORE_zsyrk(uplo, trans, - n, k, - alpha, tileA, - beta, tileC); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zsyrk( clargs.uplo, clargs.trans, clargs.n, clargs.k, + clargs.alpha, tileA, clargs.beta, tileC ); } -#ifdef CHAMELEON_USE_CUDA -static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) +#if defined(CHAMELEON_USE_CUDA) +static void +cl_zsyrk_cuda_func(void *descr[], void *cl_arg) { - cham_uplo_t uplo; - cham_trans_t trans; - int n; - int k; - cuDoubleComplex alpha; + struct cl_zsyrk_args_s clargs; CHAM_tile_t *tileA; - cuDoubleComplex beta; CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileC = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); + starpu_codelet_unpack_args( cl_arg, &clargs ); RUNTIME_getStream(stream); CUDA_zsyrk( - uplo, trans, n, k, - &alpha, tileA->mat, tileA->ld, - &beta, tileC->mat, tileC->ld, - stream); + clargs.uplo, clargs.trans, clargs.n, clargs.k, + (cuDoubleComplex*)&(clargs.alpha), + tileA->mat, tileA->ld, + (cuDoubleComplex*)&(clargs.beta), + tileC->mat, tileC->ld, + stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -78,57 +81,74 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS(zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC) +CODELETS( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) +void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { return INSERT_TASK_zlascal( options, uplo, n, n, nb, beta, C, Cm, Cn ); } - (void)nb; - struct starpu_codelet *codelet = &cl_zsyrk; - void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + struct cl_zsyrk_args_s clargs = { + .uplo = uplo, + .trans = trans, + .n = n, + .k = k, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .beta = beta, + .tileC = C->get_blktile( C, Cm, Cn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid, accessC; + char *cl_name = "zsyrk"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zsyrk_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Reduce the C access if needed */ + accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zsyrk, + + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zsyrk_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zsyrk", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index 689cb6254..2cbaa3aef 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -21,30 +21,38 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_ztradd_args_s { cham_uplo_t uplo; cham_trans_t trans; - int M; - int N; + int m; + int n; CHAMELEON_Complex64_t alpha; CHAM_tile_t *tileA; CHAMELEON_Complex64_t beta; CHAM_tile_t *tileB; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_ztradd_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_ztradd_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &beta); - TCORE_ztradd(uplo, trans, M, N, alpha, tileA, beta, tileB); - return; + + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_ztradd( clargs.uplo, clargs.trans, clargs.m, clargs.n, + clargs.alpha, tileA, clargs.beta, tileB ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(ztradd, cl_ztradd_cpu_func) +CODELETS_CPU( ztradd, cl_ztradd_cpu_func ) void INSERT_TASK_ztradd( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, @@ -56,34 +64,54 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, beta, B, Bm, Bn ); } - struct starpu_codelet *codelet = &cl_ztradd; - void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW; + struct cl_ztradd_args_s clargs = { + .uplo = uplo, + .trans = trans, + .m = m, + .n = n, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .beta = beta, + .tileB = B->get_blktile( B, Bm, Bn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid, accessB; + char *cl_name = "ztradd"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_ztradd_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Reduce the B access if needed */ + accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_ztradd, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_ztradd_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztradd", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index 354e5f454..099e5ca8a 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -25,51 +25,53 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_ztrmm_args_s { cham_side_t side; cham_uplo_t uplo; cham_trans_t transA; cham_diag_t diag; - int M; - int N; + int m; + int n; CHAMELEON_Complex64_t alpha; CHAM_tile_t *tileA; CHAM_tile_t *tileB; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_ztrmm_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_ztrmm_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha); - TCORE_ztrmm(side, uplo, - transA, diag, - M, N, - alpha, tileA, - tileB); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_ztrmm( clargs.side, clargs.uplo, clargs.transA, clargs.diag, + clargs.m, clargs.n, clargs.alpha, tileA, tileB ); } #ifdef CHAMELEON_USE_CUDA -static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) +static void +cl_ztrmm_cuda_func(void *descr[], void *cl_arg) { - cham_side_t side; - cham_uplo_t uplo; - cham_trans_t transA; - cham_diag_t diag; - int M; - int N; - cuDoubleComplex alpha; + struct cl_ztrmm_args_s clargs; CHAM_tile_t *tileA; CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha); + + starpu_codelet_unpack_args( cl_arg, &clargs ); RUNTIME_getStream(stream); CUDA_ztrmm( - side, uplo, transA, diag, M, N, &alpha, + clargs.side, clargs.uplo, clargs.transA, clargs.diag, + clargs.m, clargs.n, + (cuDoubleComplex*)&(clargs.alpha), tileA->mat, tileA->ld, tileB->mat, tileB->ld, stream ); @@ -80,58 +82,66 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ - /* * Codelet definition */ -CODELETS(ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS( ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { - if ( alpha == 0. ) { - return INSERT_TASK_zlaset( options, ChamUpperLower, m, n, - alpha, alpha, B, Bm, Bn ); - } - - (void)nb; - struct starpu_codelet *codelet = &cl_ztrmm; - void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_ztrmm_args_s clargs = { + .side = side, + .uplo = uplo, + .transA = transA, + .diag = diag, + .m = m, + .n = n, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .tileB = B->get_blktile( B, Bm, Bn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "ztrmm"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_ztrmm_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_ztrmm, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_ztrmm_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrmm", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 694f0a99b..e7ad01a43 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -12,8 +12,6 @@ * @brief Chameleon ztrsm StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge @@ -27,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_ztrsm_args_s { cham_side_t side; cham_uplo_t uplo; cham_trans_t transA; @@ -39,38 +35,43 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) CHAMELEON_Complex64_t alpha; CHAM_tile_t *tileA; CHAM_tile_t *tileB; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_ztrsm_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_ztrsm_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha); - TCORE_ztrsm(side, uplo, - transA, diag, - m, n, - alpha, tileA, - tileB); + + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_ztrsm( clargs.side, clargs.uplo, clargs.transA, clargs.diag, + clargs.m, clargs.n, clargs.alpha, tileA, tileB ); } #ifdef CHAMELEON_USE_CUDA -static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) +static void +cl_ztrsm_cuda_func(void *descr[], void *cl_arg) { - cham_side_t side; - cham_uplo_t uplo; - cham_trans_t transA; - cham_diag_t diag; - int m; - int n; - cuDoubleComplex alpha; + struct cl_ztrsm_args_s clargs; CHAM_tile_t *tileA; CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha); + + starpu_codelet_unpack_args( cl_arg, &clargs ); RUNTIME_getStream(stream); CUDA_ztrsm( - side, uplo, transA, diag, m, n, &alpha, + clargs.side, clargs.uplo, clargs.transA, clargs.diag, + clargs.m, clargs.n, + (cuDoubleComplex*)&(clargs.alpha), tileA->mat, tileA->ld, tileB->mat, tileB->ld, stream ); @@ -81,52 +82,66 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS(ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { - (void)nb; - struct starpu_codelet *codelet = &cl_ztrsm; - void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_ztrsm_args_s clargs = { + .side = side, + .uplo = uplo, + .transA = transA, + .diag = diag, + .m = m, + .n = n, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .tileB = B->get_blktile( B, Bm, Bn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "ztrsm"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_ztrsm_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_ztrsm, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_ztrsm_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrsm", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index 01b14eef4..9b2b0e5e3 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -12,8 +12,6 @@ * @brief Chameleon ztrtri StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge @@ -27,24 +25,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_ztrtri_args_s { cham_uplo_t uplo; cham_diag_t diag; - int N; + int n; CHAM_tile_t *tileA; int iinfo; RUNTIME_sequence_t *sequence; RUNTIME_request_t *request; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_ztrtri_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_ztrtri_args_s clargs; + CHAM_tile_t *tileA; int info = 0; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &iinfo, &sequence, &request); - TCORE_ztrtri(uplo, diag, N, tileA, &info); - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_ztrtri( clargs.uplo, clargs.diag, clargs.n, tileA, &info ); + + if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info ); } } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -52,43 +57,55 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztrtri, cl_ztrtri_cpu_func) +CODELETS_CPU( ztrtri, cl_ztrtri_cpu_func ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_diag_t diag, - int n, int nb, + cham_uplo_t uplo, cham_diag_t diag, int n, int nb, const CHAM_desc_t *A, int Am, int An, int iinfo ) { - (void)nb; - struct starpu_codelet *codelet = &cl_ztrtri; - void (*callback)(void*) = options->profiling ? cl_ztrtri_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_ztrtri_args_s clargs = { + .uplo = uplo, + .diag = diag, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .iinfo = iinfo, + .sequence = options->sequence, + .request = options->request, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "ztrtri"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_ztrtri_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &iinfo, sizeof(int), - STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), - STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_ztrtri, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_ztrtri_args_s), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrtri", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } -- GitLab