Newer
Older
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Gwenole Lucas
* @author Loris Lucido
* @author Terry Cojean
* @date 2024-10-18
* @precisions normal z -> c d s
*

Mathieu Faverge
committed
*/

Mathieu Faverge
committed
#include "chameleon_starpu_internal.h"
struct cl_ztrsm_args_s {
cham_side_t side;
cham_uplo_t uplo;
cham_trans_t transA;
cham_diag_t diag;
int m;
int n;
};
#if !defined(CHAMELEON_SIMULATION)
static void
cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
{
struct cl_ztrsm_args_s *clargs = (struct cl_ztrsm_args_s*)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;

Mathieu Faverge
committed
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
assert( tileA->flttype == ChamComplexDouble );
assert( tileB->flttype == ChamComplexDouble );
TCORE_ztrsm( clargs->side, clargs->uplo, clargs->transA, clargs->diag,
clargs->m, clargs->n, clargs->alpha, tileA, tileB );
#if defined(CHAMELEON_USE_CUDA)
static void
cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
struct cl_ztrsm_args_s *clargs = (struct cl_ztrsm_args_s*)cl_arg;
cublasHandle_t handle = starpu_cublas_get_local_handle();

Mathieu Faverge
committed
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;

Mathieu Faverge
committed
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);

PRUVOST Florent
committed
CUDA_ztrsm(
clargs->side, clargs->uplo, clargs->transA, clargs->diag,
clargs->m, clargs->n,
(cuDoubleComplex*)&(clargs->alpha),

Mathieu Faverge
committed
tileA->mat, tileA->ld,
tileB->mat, tileB->ld,
#endif /* defined(CHAMELEON_USE_CUDA) */
#if defined(CHAMELEON_USE_HIP)
static void
cl_ztrsm_hip_func(void *descr[], void *cl_arg)
{
struct cl_ztrsm_args_s *clargs = (struct cl_ztrsm_args_s*)cl_arg;
hipblasHandle_t handle = starpu_hipblas_get_local_handle();
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
HIP_ztrsm(
clargs->side, clargs->uplo, clargs->transA, clargs->diag,
clargs->m, clargs->n,
(hipblasDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld,
tileB->mat, tileB->ld,
handle );
}
#endif /* defined(CHAMELEON_USE_HIP) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
#if defined(CHAMELEON_USE_HIP)
CODELETS_GPU( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_hip_func, STARPU_HIP_ASYNC )
#else
CODELETS( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC )
#if defined(CHAMELEON_STARPU_USE_INSERT)
void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
void (*callback)(void*);
struct cl_ztrsm_args_s *clargs = NULL;
int exec = 0;
const char *cl_name = "ztrsm";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(B, Bm, Bn);
exec = __chameleon_need_exec;
if ( exec ) {
clargs = malloc( sizeof( struct cl_ztrsm_args_s ) );
clargs->side = side;
clargs->uplo = uplo;
clargs->transA = transA;
clargs->diag = diag;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
}
/* Callback fro profiling information */
callback = options->profiling ? cl_ztrsm_callback : NULL;

Mathieu Faverge
committed
/* Refine name */
cl_name = chameleon_codelet_name( cl_name, 2,
A->get_blktile( A, Am, An ),
B->get_blktile( B, Bm, Bn ) );
/* Insert the task */

Mathieu Faverge
committed
rt_starpu_insert_task(
&cl_ztrsm,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_ztrsm_args_s),
STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
STARPU_RW, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
COJEAN Terry
committed
STARPU_POSSIBLY_PARALLEL, options->parallel,
STARPU_NAME, cl_name,
0 );
(void)nb;
#else
void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
{
INSERT_TASK_COMMON_PARAMETERS( ztrsm, 2 );
/*
* Set the data handles and initialize exchanges if needed
*/
starpu_cham_exchange_init_params( options, ¶ms, B->get_rankof( B, Bm, Bn ) );
starpu_cham_exchange_data_before_execution( options, ¶ms, &nbdata, descrs, A, Am, An, STARPU_R );
starpu_cham_exchange_data_before_execution( options, ¶ms, &nbdata, descrs, B, Bm, Bn, STARPU_RW );
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
/*
* Not involved, let's return
*/
if ( nbdata == 0 ) {
return;
}
if ( params.do_execute )
{
int ret;
struct starpu_task *task = starpu_task_create();
task->cl = cl;
/* Set codelet parameters */
clargs = malloc( sizeof( struct cl_ztrsm_args_s ) );
clargs->side = side;
clargs->uplo = uplo;
clargs->transA = transA;
clargs->diag = diag;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
task->cl_arg = clargs;
task->cl_arg_size = sizeof( struct cl_ztrsm_args_s );
task->cl_arg_free = 1;
/* Set common parameters */
starpu_cham_task_set_options( options, task, nbdata, descrs, cl_ztrsm_callback );
/* Flops */
task->flops = flops_ztrsm( side, m, n );
/* Refine name */
task->name = chameleon_codelet_name( cl_name, 2,
A->get_blktile( A, Am, An ),
B->get_blktile( B, Bm, Bn ) );
ret = starpu_task_submit( task );
if ( ret == -ENODEV ) {
task->destroy = 0;
starpu_task_destroy( task );
chameleon_error( "INSERT_TASK_ztrsm", "Failed to submit the task to StarPU" );
return;
}
}
starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs );
(void)nb;
}
#endif