diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 9a3b9ea3f127a9f7656afa8e27e08ecc9c976aab..0567065f3ceb12f9abf475429111f2df0cbae312 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -25,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zgemm_args_s { cham_trans_t transA; cham_trans_t transB; int m; @@ -38,47 +36,54 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) CHAM_tile_t *tileB; CHAMELEON_Complex64_t beta; CHAM_tile_t *tileC; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zgemm_cpu_func( void *descr[], void *cl_arg ) +{ + struct cl_zgemm_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); tileC = cti_interface_get(descr[2]); - starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta); - TCORE_zgemm( transA, transB, - m, n, k, - alpha, tileA, tileB, - beta, tileC ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zgemm( clargs.transA, clargs.transB, + clargs.m, clargs.n, clargs.k, + clargs.alpha, tileA, tileB, + clargs.beta, tileC ); } #ifdef CHAMELEON_USE_CUDA -static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) +static void +cl_zgemm_cuda_func( void *descr[], void *_cl_arg ) { - cham_trans_t transA; - cham_trans_t transB; - int m; - int n; - int k; - cuDoubleComplex alpha; + struct cl_zgemm_args_s clargs; CHAM_tile_t *tileA; CHAM_tile_t *tileB; - cuDoubleComplex beta; CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); tileC = cti_interface_get(descr[2]); - starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta); + starpu_codelet_unpack_args( _cl_arg, &clargs ); RUNTIME_getStream( stream ); CUDA_zgemm( - transA, transB, - m, n, k, - &alpha, tileA->mat, tileA->ld, - tileB->mat, tileB->ld, - &beta, tileC->mat, tileC->ld, - stream); + clargs.transA, clargs.transB, + clargs.m, clargs.n, clargs.k, + (cuDoubleComplex*)&(clargs.alpha), + tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + (cuDoubleComplex*)&(clargs.beta), + tileC->mat, tileC->ld, + stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -92,56 +97,72 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) - -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zgemm(const RUNTIME_option_t *options, - cham_trans_t transA, cham_trans_t transB, - int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) +CODELETS( zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC ) + +void INSERT_TASK_zgemm( const RUNTIME_option_t *options, + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, beta, C, Cm, Cn ); } - (void)nb; - struct starpu_codelet *codelet = &cl_zgemm; - void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; - + struct cl_zgemm_args_s clargs = { + .transA = transA, + .transB = transB, + .m = m, + .n = n, + .k = k, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .tileB = B->get_blktile( B, Bm, Bn ), + .beta = beta, + .tileC = C->get_blktile( C, Cm, Cn ) + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid, accessC; + char *cl_name = "zgemm"; + + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(B, Bm, Bn); CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zgemm_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Reduce the C access if needed */ + accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &transB, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zgemm, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zgemm_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zgemm", + STARPU_NAME, cl_name, #endif - 0); + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 1b9d39818e5c3e45e0ae02df5dace718f160a0e7..b224473cbfa7de6d234012c033178c827bf1eeff 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -25,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zherk_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zherk_args_s { cham_uplo_t uplo; cham_trans_t trans; int n; @@ -36,41 +34,46 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg) CHAM_tile_t *tileA; double beta; CHAM_tile_t *tileC; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zherk_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zherk_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileC = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); - TCORE_zherk(uplo, trans, - n, k, - alpha, tileA, - beta, tileC); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zherk( clargs.uplo, clargs.trans, clargs.n, clargs.k, + clargs.alpha, tileA, clargs.beta, tileC ); } -#ifdef CHAMELEON_USE_CUDA -static void cl_zherk_cuda_func(void *descr[], void *cl_arg) +#if defined(CHAMELEON_USE_CUDA) +static void +cl_zherk_cuda_func(void *descr[], void *cl_arg) { - cham_uplo_t uplo; - cham_trans_t trans; - int n; - int k; - double alpha; + struct cl_zherk_args_s clargs; CHAM_tile_t *tileA; - double beta; CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileC = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); + starpu_codelet_unpack_args( cl_arg, &clargs ); RUNTIME_getStream(stream); CUDA_zherk( - uplo, trans, n, k, - &alpha, tileA->mat, tileA->ld, - &beta, tileC->mat, tileC->ld, - stream); + clargs.uplo, clargs.trans, clargs.n, clargs.k, + (cuDoubleComplex*)&(clargs.alpha), + tileA->mat, tileA->ld, + (cuDoubleComplex*)&(clargs.beta), + tileC->mat, tileC->ld, + stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -78,52 +81,73 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS(zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC) +CODELETS( zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC ) -void INSERT_TASK_zherk(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - double alpha, const CHAM_desc_t *A, int Am, int An, - double beta, const CHAM_desc_t *C, int Cm, int Cn) +void INSERT_TASK_zherk( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + double alpha, const CHAM_desc_t *A, int Am, int An, + double beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { return INSERT_TASK_zlascal( options, uplo, n, n, nb, beta, C, Cm, Cn ); } - (void)nb; - struct starpu_codelet *codelet = &cl_zherk; - void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + struct cl_zherk_args_s clargs = { + .uplo = uplo, + .trans = trans, + .n = n, + .k = k, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .beta = beta, + .tileC = C->get_blktile( C, Cm, Cn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid, accessC; + char *cl_name = "zherk"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zherk_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Reduce the C access if needed */ + accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(double), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &beta, sizeof(double), - accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zherk, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zherk_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zherk", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index bec2d92288ad496a8e34d746ea7513f63c6c4a0c..3fd038bded4bd94643ae36e354a40453d4511747 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -12,8 +12,6 @@ * @brief Chameleon zlacpy StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge @@ -27,70 +25,93 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zlacpy_args_s { cham_uplo_t uplo; - int M; - int N; + int m; + int n; int displA; int displB; CHAM_tile_t *tileA; CHAM_tile_t *tileB; - CHAMELEON_Complex64_t *A; - CHAMELEON_Complex64_t *B; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zlacpy_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zlacpy_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &displB); - - assert( tileA->format & CHAMELEON_TILE_FULLRANK ); - assert( tileB->format & CHAMELEON_TILE_FULLRANK ); - - A = tileA->mat; - B = tileB->mat; - CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + assert( clargs.displA == 0 ); + assert( clargs.displB == 0 ); + /* A = tileA->mat; */ + /* B = tileB->mat; */ + /* CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); */ + TCORE_zlacpy( clargs.uplo, clargs.m, clargs.n, tileA, tileB ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlacpy, cl_zlacpy_cpu_func) +CODELETS_CPU( zlacpy, cl_zlacpy_cpu_func ) void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, int displA, const CHAM_desc_t *A, int Am, int An, int displB, const CHAM_desc_t *B, int Bm, int Bn ) { - (void)nb; - struct starpu_codelet *codelet = &cl_zlacpy; - void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zlacpy_args_s clargs = { + .uplo = uplo, + .m = m, + .n = n, + .displA = displA, + .displB = displB, + .tileA = A->get_blktile( A, Am, An ), + .tileB = B->get_blktile( B, Bm, Bn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zlacpy"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R( A, Am, An ); - CHAMELEON_ACCESS_W( B, Bm, Bn ); + CHAMELEON_ACCESS_R(A, Am, An); + CHAMELEON_ACCESS_W(B, Bm, Bn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zlacpy_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(cham_uplo_t), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &displA, sizeof(int), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &displB, sizeof(int), - STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zlacpy, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zlacpy_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlacpy", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index bd6823a08d8c3608141aeb9d145ffc671b6afad2..eb03a8685b19e0df448ccac2b2461de6e3250a54 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -21,33 +21,38 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zlascal_args_s { cham_uplo_t uplo; - int M; - int N; + int m; + int n; CHAMELEON_Complex64_t alpha; CHAM_tile_t *tileA; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zlascal_cpu_func( void *descr[], void *cl_arg ) +{ + struct cl_zlascal_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha); - TCORE_zlascal(uplo, M, N, alpha, tileA); - return; + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zlascal( clargs.uplo, clargs.m, clargs.n, clargs.alpha, tileA ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlascal, cl_zlascal_cpu_func) +CODELETS_CPU( zlascal, cl_zlascal_cpu_func ) void INSERT_TASK_zlascal( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An) + const CHAM_desc_t *A, int Am, int An ) { if ( alpha == 0. ) { return INSERT_TASK_zlaset( options, uplo, m, n, @@ -57,28 +62,46 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options, return; } - (void)nb; - struct starpu_codelet *codelet = &cl_zlascal; - void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zlascal_args_s clargs = { + .uplo = uplo, + .m = m, + .n = n, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zlascal"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zlascal_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zlascal, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zlascal_args_s), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlascal", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c index c7661bdb8278687f6d4cea4496eedd504d0df897..0636686e3be44a17947b741109eb1d9450fdf841 100644 --- a/runtime/starpu/codelets/codelet_zlaset.c +++ b/runtime/starpu/codelets/codelet_zlaset.c @@ -12,8 +12,6 @@ * @brief Chameleon zlaset StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Mathieu Faverge * @author Emmanuel Agullo @@ -26,56 +24,78 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zlaset_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zlaset_args_s { cham_uplo_t uplo; - int M; - int N; + int m; + int n; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t beta; CHAM_tile_t *tileA; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zlaset_cpu_func( void *descr[], void *cl_arg ) +{ + struct cl_zlaset_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta); - TCORE_zlaset(uplo, M, N, alpha, beta, tileA); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zlaset( clargs.uplo, clargs.m, clargs.n, clargs.alpha, clargs.beta, tileA ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlaset, cl_zlaset_cpu_func) +CODELETS_CPU( zlaset, cl_zlaset_cpu_func ) -void INSERT_TASK_zlaset(const RUNTIME_option_t *options, - cham_uplo_t uplo, int M, int N, - CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, - const CHAM_desc_t *A, int Am, int An) +void INSERT_TASK_zlaset( const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, + CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, + const CHAM_desc_t *A, int Am, int An ) { + struct cl_zlaset_args_s clargs = { + .uplo = uplo, + .m = m, + .n = n, + .alpha = alpha, + .beta = beta, + .tileA = A->get_blktile( A, Am, An ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zlaset"; - struct starpu_codelet *codelet = &cl_zlaset; - void (*callback)(void*) = options->profiling ? cl_zlaset_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zlaset_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &M, sizeof(int), - STARPU_VALUE, &N, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), + &cl_zlaset, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zlaset_args_s), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlaset", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); } diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index 0f3f1911bec1e1f83b684c398b82d67b0ff3fbcb..9ff3fc6e92bfa7fb42cb52ce9f83fa2b8b262d6e 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -12,8 +12,6 @@ * @brief Chameleon zlauum StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge @@ -27,55 +25,74 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" +struct cl_zlauum_args_s { + cham_uplo_t uplo; + int n; + CHAM_tile_t *tileA; +}; + #if !defined(CHAMELEON_SIMULATION) -static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) +static void +cl_zlauum_cpu_func(void *descr[], void *cl_arg) { - cham_uplo_t uplo; - int N; + struct cl_zlauum_args_s clargs; CHAM_tile_t *tileA; + int info = 0; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &N); - TCORE_zlauum(uplo, N, tileA); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zlauum( clargs.uplo, clargs.n, tileA ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zlauum, cl_zlauum_cpu_func) +CODELETS_CPU( zlauum, cl_zlauum_cpu_func ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ void INSERT_TASK_zlauum( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, const CHAM_desc_t *A, int Am, int An ) { - (void)nb; - struct starpu_codelet *codelet = &cl_zlauum; - void (*callback)(void*) = options->profiling ? cl_zlauum_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zlauum_args_s clargs = { + .uplo = uplo, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zlauum"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zlauum_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zlauum, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zlauum_args_s), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlauum", + STARPU_NAME, cl_name, #endif - 0); + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c index 87c071e44a71d4f48e3226b14e7e8c81d0fc13be..4c04611ccbbf97a8bffe43516bcf7979189551d2 100644 --- a/runtime/starpu/codelets/codelet_zplghe.c +++ b/runtime/starpu/codelets/codelet_zplghe.c @@ -12,8 +12,6 @@ * @brief Chameleon zplghe StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Piotr Luszczek * @author Pierre Lemarinier * @author Mathieu Faverge @@ -27,11 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zplghe_args_s { double bump; int m; int n; @@ -40,47 +34,73 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) int m0; int n0; unsigned long long int seed; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zplghe_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed ); - TCORE_zplghe( bump, m, n, tileA, bigM, m0, n0, seed ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zplghe( clargs.bump, clargs.m, clargs.n, tileA, + clargs.bigM, clargs.m0, clargs.n0, clargs.seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zplghe, cl_zplghe_cpu_func) +CODELETS_CPU( zplghe, cl_zplghe_cpu_func ) void INSERT_TASK_zplghe( const RUNTIME_option_t *options, double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int bigM, int m0, int n0, unsigned long long int seed ) { - struct starpu_codelet *codelet = &cl_zplghe; - void (*callback)(void*) = options->profiling ? cl_zplghe_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zplghe_args_s clargs = { + .bump = bump, + .m = m, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .bigM = bigM, + .m0 = m0, + .n0 = n0, + .seed = seed, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zplghe"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zplghe_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &bump, sizeof(double), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &bigM, sizeof(int), - STARPU_VALUE, &m0, sizeof(int), - STARPU_VALUE, &n0, sizeof(int), - STARPU_VALUE, &seed, sizeof(unsigned long long int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zplghe, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zplghe_args_s), + STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zplghe", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + } diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c index 748e78d3310277cbcd0da4d30c9d563afb1a66d1..57aec1bf28967835817f9ddbef05800b57b93aa8 100644 --- a/runtime/starpu/codelets/codelet_zplgsy.c +++ b/runtime/starpu/codelets/codelet_zplgsy.c @@ -12,8 +12,6 @@ * @brief Chameleon zplgsy StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Piotr Luszczek * @author Pierre Lemarinier * @author Mathieu Faverge @@ -27,11 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -/* cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ - -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zplgsy_args_s { CHAMELEON_Complex64_t bump; int m; int n; @@ -40,48 +34,73 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) int m0; int n0; unsigned long long int seed; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zplgsy_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed ); - TCORE_zplgsy( bump, m, n, tileA, bigM, m0, n0, seed ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zplgsy( clargs.bump, clargs.m, clargs.n, tileA, + clargs.bigM, clargs.m0, clargs.n0, clargs.seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zplgsy, cl_zplgsy_cpu_func) +CODELETS_CPU( zplgsy, cl_zplgsy_cpu_func ) void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, - CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, - int bigM, int m0, int n0, unsigned long long int seed ) + CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, + int bigM, int m0, int n0, unsigned long long int seed ) { + struct cl_zplgsy_args_s clargs = { + .bump = bump, + .m = m, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .bigM = bigM, + .m0 = m0, + .n0 = n0, + .seed = seed, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zplgsy"; - struct starpu_codelet *codelet = &cl_zplgsy; - void (*callback)(void*) = options->profiling ? cl_zplgsy_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zplgsy_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &bump, sizeof(CHAMELEON_Complex64_t), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &bigM, sizeof(int), - STARPU_VALUE, &m0, sizeof(int), - STARPU_VALUE, &n0, sizeof(int), - STARPU_VALUE, &seed, sizeof(unsigned long long int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zplgsy, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zplgsy_args_s), + STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zplgsy", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + } diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c index 1376e950c222a2c130b9ac9745c059d631face37..a4eef0c8a1df265dc9afd8d9c896811cf4634e52 100644 --- a/runtime/starpu/codelets/codelet_zplrnt.c +++ b/runtime/starpu/codelets/codelet_zplrnt.c @@ -12,8 +12,6 @@ * @brief Chameleon zplrnt StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Piotr Luszczek * @author Pierre Lemarinier * @author Mathieu Faverge @@ -27,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zplrnt_args_s { int m; int n; CHAM_tile_t *tileA; @@ -37,47 +33,72 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) int m0; int n0; unsigned long long int seed; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zplrnt_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zplrnt_args_s clargs; + CHAM_tile_t *tileA; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &bigM, &m0, &n0, &seed ); - TCORE_zplrnt( m, n, tileA, bigM, m0, n0, seed ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zplrnt( clargs.m, clargs.n, tileA, + clargs.bigM, clargs.m0, clargs.n0, clargs.seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(zplrnt, cl_zplrnt_cpu_func) +CODELETS_CPU( zplrnt, cl_zplrnt_cpu_func ) void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, int m, int n, const CHAM_desc_t *A, int Am, int An, int bigM, int m0, int n0, unsigned long long int seed ) { + struct cl_zplrnt_args_s clargs = { + .m = m, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .bigM = bigM, + .m0 = m0, + .n0 = n0, + .seed = seed, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zplrnt"; - struct starpu_codelet *codelet = &cl_zplrnt; - void (*callback)(void*) = options->profiling ? cl_zplrnt_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zplrnt_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &bigM, sizeof(int), - STARPU_VALUE, &m0, sizeof(int), - STARPU_VALUE, &n0, sizeof(int), - STARPU_VALUE, &seed, sizeof(unsigned long long int), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zplrnt, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zplrnt_args_s), + STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zplrnt", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); } diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index 2337b00fde7207c893e935e250deb2d1cafc0cb6..93f4f640d72b22660bd18dcf7067ddff008a7dec 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -12,8 +12,6 @@ * @brief Chameleon zpotrf StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge @@ -27,24 +25,30 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zpotrf_args_s { cham_uplo_t uplo; int n; CHAM_tile_t *tileA; int iinfo; RUNTIME_sequence_t *sequence; RUNTIME_request_t *request; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zpotrf_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zpotrf_args_s clargs; + CHAM_tile_t *tileA; int info = 0; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &iinfo, &sequence, &request); - TCORE_zpotrf(uplo, n, tileA, &info); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zpotrf( clargs.uplo, clargs.n, tileA, &info ); - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info ); } } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -52,46 +56,54 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -#if defined(CHAMELEON_SIMULATION) && defined(CHAMELEON_SIMULATION_EXTENDED) -CODELETS( zpotrf, cl_zpotrf_cpu_func, cl_zpotrf_cuda_func, STARPU_CUDA_ASYNC ) -#else CODELETS_CPU( zpotrf, cl_zpotrf_cpu_func ) -#endif -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, - cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - int iinfo) +void INSERT_TASK_zpotrf( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + int iinfo ) { - (void)nb; - struct starpu_codelet *codelet = &cl_zpotrf; - void (*callback)(void*) = options->profiling ? cl_zpotrf_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_zpotrf_args_s clargs = { + .uplo = uplo, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .iinfo = iinfo, + .sequence = options->sequence, + .request = options->request, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "zpotrf"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zpotrf_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &iinfo, sizeof(int), - STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), - STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), - /* STARPU_SCRATCH, options->ws_worker, */ - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zpotrf, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zpotrf_args_s), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zpotrf", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 9552eee9ca40f3982c39b2140e9d5a9a2fe1b7df..0cba41cab3ab660e5dada013b4614cb179059b34 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -25,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_zsyrk_args_s { cham_uplo_t uplo; cham_trans_t trans; int n; @@ -36,41 +34,46 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) CHAM_tile_t *tileA; CHAMELEON_Complex64_t beta; CHAM_tile_t *tileC; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zsyrk_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_zsyrk_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileC = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); - TCORE_zsyrk(uplo, trans, - n, k, - alpha, tileA, - beta, tileC); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_zsyrk( clargs.uplo, clargs.trans, clargs.n, clargs.k, + clargs.alpha, tileA, clargs.beta, tileC ); } -#ifdef CHAMELEON_USE_CUDA -static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) +#if defined(CHAMELEON_USE_CUDA) +static void +cl_zsyrk_cuda_func(void *descr[], void *cl_arg) { - cham_uplo_t uplo; - cham_trans_t trans; - int n; - int k; - cuDoubleComplex alpha; + struct cl_zsyrk_args_s clargs; CHAM_tile_t *tileA; - cuDoubleComplex beta; CHAM_tile_t *tileC; tileA = cti_interface_get(descr[0]); tileC = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); + starpu_codelet_unpack_args( cl_arg, &clargs ); RUNTIME_getStream(stream); CUDA_zsyrk( - uplo, trans, n, k, - &alpha, tileA->mat, tileA->ld, - &beta, tileC->mat, tileC->ld, - stream); + clargs.uplo, clargs.trans, clargs.n, clargs.k, + (cuDoubleComplex*)&(clargs.alpha), + tileA->mat, tileA->ld, + (cuDoubleComplex*)&(clargs.beta), + tileC->mat, tileC->ld, + stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -78,57 +81,74 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS(zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC) +CODELETS( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) +void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { return INSERT_TASK_zlascal( options, uplo, n, n, nb, beta, C, Cm, Cn ); } - (void)nb; - struct starpu_codelet *codelet = &cl_zsyrk; - void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + struct cl_zsyrk_args_s clargs = { + .uplo = uplo, + .trans = trans, + .n = n, + .k = k, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .beta = beta, + .tileC = C->get_blktile( C, Cm, Cn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid, accessC; + char *cl_name = "zsyrk"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_zsyrk_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Reduce the C access if needed */ + accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &k, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_zsyrk, + + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_zsyrk_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zsyrk", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index 689cb6254ed2b8956505108665977176761c8278..2cbaa3aeff6fdc406006f8bfbaeccce107267dd8 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -21,30 +21,38 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_ztradd_args_s { cham_uplo_t uplo; cham_trans_t trans; - int M; - int N; + int m; + int n; CHAMELEON_Complex64_t alpha; CHAM_tile_t *tileA; CHAMELEON_Complex64_t beta; CHAM_tile_t *tileB; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_ztradd_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_ztradd_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &beta); - TCORE_ztradd(uplo, trans, M, N, alpha, tileA, beta, tileB); - return; + + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_ztradd( clargs.uplo, clargs.trans, clargs.m, clargs.n, + clargs.alpha, tileA, clargs.beta, tileB ); } #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS_CPU(ztradd, cl_ztradd_cpu_func) +CODELETS_CPU( ztradd, cl_ztradd_cpu_func ) void INSERT_TASK_ztradd( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, @@ -56,34 +64,54 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, beta, B, Bm, Bn ); } - struct starpu_codelet *codelet = &cl_ztradd; - void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW; + struct cl_ztradd_args_s clargs = { + .uplo = uplo, + .trans = trans, + .m = m, + .n = n, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .beta = beta, + .tileB = B->get_blktile( B, Bm, Bn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid, accessB; + char *cl_name = "ztradd"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_ztradd_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Reduce the B access if needed */ + accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &trans, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_ztradd, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_ztradd_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztradd", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index 354e5f454d84d9268ae4df9eed5791a231d0c942..099e5ca8a987135094c2c675d3756f7328c94df1 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -25,51 +25,53 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_ztrmm_args_s { cham_side_t side; cham_uplo_t uplo; cham_trans_t transA; cham_diag_t diag; - int M; - int N; + int m; + int n; CHAMELEON_Complex64_t alpha; CHAM_tile_t *tileA; CHAM_tile_t *tileB; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_ztrmm_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_ztrmm_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha); - TCORE_ztrmm(side, uplo, - transA, diag, - M, N, - alpha, tileA, - tileB); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_ztrmm( clargs.side, clargs.uplo, clargs.transA, clargs.diag, + clargs.m, clargs.n, clargs.alpha, tileA, tileB ); } #ifdef CHAMELEON_USE_CUDA -static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) +static void +cl_ztrmm_cuda_func(void *descr[], void *cl_arg) { - cham_side_t side; - cham_uplo_t uplo; - cham_trans_t transA; - cham_diag_t diag; - int M; - int N; - cuDoubleComplex alpha; + struct cl_ztrmm_args_s clargs; CHAM_tile_t *tileA; CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha); + + starpu_codelet_unpack_args( cl_arg, &clargs ); RUNTIME_getStream(stream); CUDA_ztrmm( - side, uplo, transA, diag, M, N, &alpha, + clargs.side, clargs.uplo, clargs.transA, clargs.diag, + clargs.m, clargs.n, + (cuDoubleComplex*)&(clargs.alpha), tileA->mat, tileA->ld, tileB->mat, tileB->ld, stream ); @@ -80,58 +82,66 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ - /* * Codelet definition */ -CODELETS(ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS( ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { - if ( alpha == 0. ) { - return INSERT_TASK_zlaset( options, ChamUpperLower, m, n, - alpha, alpha, B, Bm, Bn ); - } - - (void)nb; - struct starpu_codelet *codelet = &cl_ztrmm; - void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_ztrmm_args_s clargs = { + .side = side, + .uplo = uplo, + .transA = transA, + .diag = diag, + .m = m, + .n = n, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .tileB = B->get_blktile( B, Bm, Bn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "ztrmm"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_ztrmm_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_ztrmm, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_ztrmm_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrmm", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 694f0a99bbd5b7638a9c81829e740d402d92375a..e7ad01a43e6745bc49ecaee676d74cb3f6180f49 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -12,8 +12,6 @@ * @brief Chameleon ztrsm StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Hatem Ltaief * @author Jakub Kurzak * @author Mathieu Faverge @@ -27,9 +25,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_ztrsm_args_s { cham_side_t side; cham_uplo_t uplo; cham_trans_t transA; @@ -39,38 +35,43 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) CHAMELEON_Complex64_t alpha; CHAM_tile_t *tileA; CHAM_tile_t *tileB; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_ztrsm_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_ztrsm_args_s clargs; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha); - TCORE_ztrsm(side, uplo, - transA, diag, - m, n, - alpha, tileA, - tileB); + + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_ztrsm( clargs.side, clargs.uplo, clargs.transA, clargs.diag, + clargs.m, clargs.n, clargs.alpha, tileA, tileB ); } #ifdef CHAMELEON_USE_CUDA -static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) +static void +cl_ztrsm_cuda_func(void *descr[], void *cl_arg) { - cham_side_t side; - cham_uplo_t uplo; - cham_trans_t transA; - cham_diag_t diag; - int m; - int n; - cuDoubleComplex alpha; + struct cl_ztrsm_args_s clargs; CHAM_tile_t *tileA; CHAM_tile_t *tileB; tileA = cti_interface_get(descr[0]); tileB = cti_interface_get(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha); + + starpu_codelet_unpack_args( cl_arg, &clargs ); RUNTIME_getStream(stream); CUDA_ztrsm( - side, uplo, transA, diag, m, n, &alpha, + clargs.side, clargs.uplo, clargs.transA, clargs.diag, + clargs.m, clargs.n, + (cuDoubleComplex*)&(clargs.alpha), tileA->mat, tileA->ld, tileB->mat, tileB->ld, stream ); @@ -81,52 +82,66 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) return; } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS(ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ -void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { - (void)nb; - struct starpu_codelet *codelet = &cl_ztrsm; - void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_ztrsm_args_s clargs = { + .side = side, + .uplo = uplo, + .transA = transA, + .diag = diag, + .m = m, + .n = n, + .alpha = alpha, + .tileA = A->get_blktile( A, Am, An ), + .tileB = B->get_blktile( B, Bm, Bn ), + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "ztrsm"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_ztrsm_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &side, sizeof(int), - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &transA, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_ztrsm, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_ztrsm_args_s), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrsm", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index 01b14eef48612375cd0042ddda0f3da191163e91..9b2b0e5e3ded41d29fbfb272d95eada319dade35 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -12,8 +12,6 @@ * @brief Chameleon ztrtri StarPU codelet * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge @@ -27,24 +25,31 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -#if !defined(CHAMELEON_SIMULATION) -static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) -{ +struct cl_ztrtri_args_s { cham_uplo_t uplo; cham_diag_t diag; - int N; + int n; CHAM_tile_t *tileA; int iinfo; RUNTIME_sequence_t *sequence; RUNTIME_request_t *request; +}; + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_ztrtri_cpu_func(void *descr[], void *cl_arg) +{ + struct cl_ztrtri_args_s clargs; + CHAM_tile_t *tileA; int info = 0; tileA = cti_interface_get(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &iinfo, &sequence, &request); - TCORE_ztrtri(uplo, diag, N, tileA, &info); - if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { - RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); + starpu_codelet_unpack_args( cl_arg, &clargs ); + TCORE_ztrtri( clargs.uplo, clargs.diag, clargs.n, tileA, &info ); + + if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { + RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info ); } } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -52,43 +57,55 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztrtri, cl_ztrtri_cpu_func) +CODELETS_CPU( ztrtri, cl_ztrtri_cpu_func ) -/** - * - * @ingroup INSERT_TASK_Complex64_t - * - */ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, - cham_uplo_t uplo, cham_diag_t diag, - int n, int nb, + cham_uplo_t uplo, cham_diag_t diag, int n, int nb, const CHAM_desc_t *A, int Am, int An, int iinfo ) { - (void)nb; - struct starpu_codelet *codelet = &cl_ztrtri; - void (*callback)(void*) = options->profiling ? cl_ztrtri_callback : NULL; - starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); - int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + struct cl_ztrtri_args_s clargs = { + .uplo = uplo, + .diag = diag, + .n = n, + .tileA = A->get_blktile( A, Am, An ), + .iinfo = iinfo, + .sequence = options->sequence, + .request = options->request, + }; + void (*callback)(void*); + RUNTIME_request_t *request = options->request; + starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); + int workerid; + char *cl_name = "ztrtri"; + /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_END_ACCESS_DECLARATION; + /* Callback fro profiling information */ + callback = options->profiling ? cl_ztrtri_callback : NULL; + + /* Fix the worker id */ + workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + + /* Insert the task */ rt_starpu_insert_task( - codelet, - STARPU_VALUE, &uplo, sizeof(int), - STARPU_VALUE, &diag, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &iinfo, sizeof(int), - STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), - STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), - STARPU_PRIORITY, options->priority, - STARPU_CALLBACK, callback, + &cl_ztrtri, + /* Task codelet arguments */ + STARPU_VALUE, &clargs, sizeof(struct cl_ztrtri_args_s), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + + /* Common task arguments */ + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "ztrtri", + STARPU_NAME, cl_name, #endif - 0); + + 0 ); + + (void)nb; }