Newer
Older
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Gwenole Lucas
* @date 2024-10-18
* @precisions normal z -> c d s
*

Mathieu Faverge
committed
*/

Mathieu Faverge
committed
#include "chameleon_starpu_internal.h"
struct cl_zsyrk_args_s {
cham_uplo_t uplo;
cham_trans_t trans;
int n;
int k;
CHAMELEON_Complex64_t alpha;
CHAMELEON_Complex64_t beta;
};
#if !defined(CHAMELEON_SIMULATION)
static void
cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileC;

Mathieu Faverge
committed
tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]);
assert( tileA->flttype == ChamComplexDouble );
assert( tileC->flttype == ChamComplexDouble );
TCORE_zsyrk( clargs->uplo, clargs->trans, clargs->n, clargs->k,
clargs->alpha, tileA, clargs->beta, tileC );
#if defined(CHAMELEON_USE_CUDA)
static void
cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
cublasHandle_t handle = starpu_cublas_get_local_handle();
struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg;

Mathieu Faverge
committed
CHAM_tile_t *tileA;
CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]);

PRUVOST Florent
committed
CUDA_zsyrk(
clargs->uplo, clargs->trans, clargs->n, clargs->k,
(cuDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld,
(cuDoubleComplex*)&(clargs->beta),
tileC->mat, tileC->ld,
#endif /* defined(CHAMELEON_USE_CUDA) */
#if defined(CHAMELEON_USE_HIP)
static void
cl_zsyrk_hip_func(void *descr[], void *cl_arg)
{
hipblasHandle_t handle = starpu_hipblas_get_local_handle();
struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]);
HIP_zsyrk(
clargs->uplo, clargs->trans, clargs->n, clargs->k,
(hipblasDoubleComplex*)&(clargs->alpha),
(hipblasDoubleComplex*)&(clargs->beta),
tileC->mat, tileC->ld,
handle );
}
#endif /* defined(CHAMELEON_USE_HIP) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
#if defined(CHAMELEON_USE_HIP)
CODELETS_GPU( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_hip_func, STARPU_HIP_ASYNC )
#else
CODELETS( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC )
#if defined(CHAMELEON_STARPU_USE_INSERT)
void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans,
int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn )

Mathieu Faverge
committed
if ( alpha == 0. ) {
INSERT_TASK_zlascal( options, uplo, n, n, nb,
beta, C, Cm, Cn );
return;

Mathieu Faverge
committed
}
void (*callback)(void*);
struct cl_zsyrk_args_s *clargs = NULL;
int exec = 0;
const char *cl_name = "zsyrk";
int accessC;
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(C, Cm, Cn);
exec = __chameleon_need_exec;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zsyrk_args_s ) );
clargs->uplo = uplo;
clargs->trans = trans;
clargs->n = n;
clargs->k = k;
clargs->alpha = alpha;
clargs->beta = beta;
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zsyrk_callback : NULL;
/* Reduce the C access if needed */
accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;

Mathieu Faverge
committed
cl_name = chameleon_codelet_name( cl_name, 2,
A->get_blktile( A, Am, An ),
C->get_blktile( C, Cm, Cn ) );
/* Insert the task */

Mathieu Faverge
committed
rt_starpu_insert_task(
&cl_zsyrk,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_zsyrk_args_s),
STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
accessC, RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
COJEAN Terry
committed
STARPU_POSSIBLY_PARALLEL, options->parallel,
STARPU_NAME, cl_name,
0 );
(void)nb;
#else
void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
cham_uplo_t uplo, cham_trans_t trans,
int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn )
{
if ( alpha == 0. ) {
INSERT_TASK_zlascal( options, uplo, n, n, nb,
beta, C, Cm, Cn );
return;
}
INSERT_TASK_COMMON_PARAMETERS( zsyrk, 2 );
int accessC;
/* Reduce the C access if needed */
accessC = ( beta == (CHAMELEON_Complex64_t)0. ) ? STARPU_W : STARPU_RW;
/*
* Set the data handles and initialize exchanges if needed
*/
starpu_cham_exchange_init_params( options, ¶ms, C->get_rankof( C, Cm, Cn ) );
starpu_cham_exchange_data_before_execution( options, ¶ms, &nbdata, descrs, A, Am, An, STARPU_R );
starpu_cham_exchange_data_before_execution( options, ¶ms, &nbdata, descrs, C, Cm, Cn, accessC );
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/*
* Not involved, let's return
*/
if ( nbdata == 0 ) {
return;
}
if ( params.do_execute )
{
int ret;
struct starpu_task *task = starpu_task_create();
task->cl = cl;
/* Set codelet parameters */
clargs = malloc( sizeof( struct cl_zsyrk_args_s ) );
clargs->uplo = uplo;
clargs->trans = trans;
clargs->n = n;
clargs->k = k;
clargs->alpha = alpha;
clargs->beta = beta;
task->cl_arg = clargs;
task->cl_arg_size = sizeof( struct cl_zsyrk_args_s );
task->cl_arg_free = 1;
/* Set common parameters */
starpu_cham_task_set_options( options, task, nbdata, descrs, cl_zsyrk_callback );
/* Flops */
task->flops = flops_zsyrk( k, n );
/* Refine name */
task->name = chameleon_codelet_name( cl_name, 2,
A->get_blktile( A, Am, An ),
C->get_blktile( C, Cm, Cn ) );
ret = starpu_task_submit( task );
if ( ret == -ENODEV ) {
task->destroy = 0;
starpu_task_destroy( task );
chameleon_error( "INSERT_TASK_zsyrk", "Failed to submit the task to StarPU" );
return;
}
}
starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
(void)nb;
}
#endif