Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 684dc885 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'starpu/codelet_refactor' into 'master'

StarPU:  refactor GPU codelets

See merge request !267
parents b647c9c2 fba5d132
No related branches found
No related tags found
1 merge request!267StarPU: refactor GPU codelets
Showing
with 331 additions and 276 deletions
......@@ -366,15 +366,6 @@ if(NOT CHAMELEON_SIMULATION)
endif()
if (CUDA_LIBRARIES)
set_target_properties(CUDA::CUDA PROPERTIES INTERFACE_LINK_LIBRARIES "${CUDA_LIBRARIES}")
set(CMAKE_REQUIRED_INCLUDES "${CUDA_INCLUDE_DIRS}")
set(CMAKE_REQUIRED_LIBRARIES "${CUDA_LIBRARIES}")
if(CUDA_VERSION VERSION_LESS "4.0")
set(CUDA_HAVE_PEER_DEVICE_MEMORY_ACCESS 0)
else()
check_function_exists(cuDeviceCanAccessPeer CUDA_HAVE_PEER_DEVICE_MEMORY_ACCESS)
endif()
unset(CMAKE_REQUIRED_INCLUDES)
unset(CMAKE_REQUIRED_LIBRARIES)
# Add cublas if found
if (CUDA_CUBLAS_LIBRARIES)
set_target_properties(CUDA::CUBLAS PROPERTIES INTERFACE_LINK_LIBRARIES "${CUDA_CUBLAS_LIBRARIES}")
......
......@@ -233,33 +233,41 @@ inline static int chameleon_desc_islocal( const CHAM_desc_t *A, int m, int n )
* CHAMELEON_ACCESS_RW(C, Cm, Cn)
* CHAMELEON_END_ACCESS_DECLARATION
*/
#define CHAMELEON_BEGIN_ACCESS_DECLARATION { \
unsigned __chameleon_need_submit = 0; \
#define CHAMELEON_BEGIN_ACCESS_DECLARATION { \
unsigned __chameleon_need_exec = 0; \
unsigned __chameleon_need_submit = 0; \
RUNTIME_BEGIN_ACCESS_DECLARATION
#define CHAMELEON_ACCESS_R(A, Am, An) do { \
if (chameleon_desc_islocal(A, Am, An)) __chameleon_need_submit = 1; \
RUNTIME_ACCESS_R(A, Am, An); \
} while(0)
#define CHAMELEON_ACCESS_W(A, Am, An) do { \
if (chameleon_desc_islocal(A, Am, An)) __chameleon_need_submit = 1; \
RUNTIME_ACCESS_W(A, Am, An); \
} while(0)
#define CHAMELEON_ACCESS_RW(A, Am, An) do { \
if (chameleon_desc_islocal(A, Am, An)) __chameleon_need_submit = 1; \
RUNTIME_ACCESS_RW(A, Am, An); \
} while(0)
#define CHAMELEON_RANK_CHANGED(rank) do {\
__chameleon_need_submit = 1; \
RUNTIME_RANK_CHANGED(rank); \
} while (0)
#define CHAMELEON_END_ACCESS_DECLARATION \
RUNTIME_END_ACCESS_DECLARATION; \
if (!__chameleon_need_submit) return; \
#define CHAMELEON_ACCESS_R(A, Am, An) do { \
if (chameleon_desc_islocal(A, Am, An)) __chameleon_need_submit = 1; \
RUNTIME_ACCESS_R(A, Am, An); \
} while(0)
#define CHAMELEON_ACCESS_W(A, Am, An) do { \
if (chameleon_desc_islocal(A, Am, An)) { \
__chameleon_need_exec = 1; \
__chameleon_need_submit = 1; \
} \
RUNTIME_ACCESS_W(A, Am, An); \
} while(0)
#define CHAMELEON_ACCESS_RW(A, Am, An) do { \
if (chameleon_desc_islocal(A, Am, An)) { \
__chameleon_need_exec = 1; \
__chameleon_need_submit = 1; \
} \
RUNTIME_ACCESS_RW(A, Am, An); \
} while(0)
#define CHAMELEON_RANK_CHANGED(rank) do { \
__chameleon_need_submit = 1; \
RUNTIME_RANK_CHANGED(rank); \
} while (0)
#define CHAMELEON_END_ACCESS_DECLARATION \
RUNTIME_END_ACCESS_DECLARATION; \
if (!__chameleon_need_submit) return; \
(void)__chameleon_need_exec; \
}
#ifdef __cplusplus
......
......@@ -31,7 +31,7 @@ struct cl_zcesca_args_s {
#if !defined(CHAMELEON_SIMULATION)
static void cl_zcesca_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zcesca_args_s clargs;
struct cl_zcesca_args_s *clargs = (struct cl_zcesca_args_s *)cl_arg;
CHAM_tile_t *Gi;
CHAM_tile_t *Gj;
CHAM_tile_t *G;
......@@ -46,9 +46,8 @@ static void cl_zcesca_cpu_func(void *descr[], void *cl_arg)
Dj = cti_interface_get(descr[4]);
A = cti_interface_get(descr[5]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zcesca( clargs.center, clargs.scale, clargs.axis,
clargs.m, clargs.n, clargs.mt, clargs.nt,
TCORE_zcesca( clargs->center, clargs->scale, clargs->axis,
clargs->m, clargs->n, clargs->mt, clargs->nt,
Gi, Gj, G, Di, Dj, A );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -68,19 +67,12 @@ void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
const CHAM_desc_t *Dj, int Djm, int Djn,
CHAM_desc_t *A, int Am, int An )
{
struct cl_zcesca_args_s clargs = {
.center = center,
.scale = scale,
.axis = axis,
.m = m,
.n = n,
.mt = mt,
.nt = nt
};
struct cl_zcesca_args_s *clargs = NULL;
struct starpu_codelet *codelet = &cl_zcesca;
void (*callback)(void*) = options->profiling ? cl_zcesca_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int exec = 0;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(Gi, Gim, Gin);
......@@ -89,11 +81,23 @@ void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
CHAMELEON_ACCESS_R(Di, Dim, Din);
CHAMELEON_ACCESS_R(Dj, Djm, Djn);
CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zcesca_args_s ) );
clargs->center = center;
clargs->scale = scale;
clargs->axis = axis;
clargs->m = m;
clargs->n = n;
clargs->mt = mt;
clargs->nt = nt;
}
rt_starpu_insert_task(
codelet,
STARPU_VALUE, &clargs, sizeof(struct cl_zcesca_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zcesca_args_s),
STARPU_R, RTBLKADDR(Gi, CHAMELEON_Complex64_t, Gim, Gin),
STARPU_R, RTBLKADDR(Gj, CHAMELEON_Complex64_t, Gjm, Gjn),
STARPU_R, RTBLKADDR(G, CHAMELEON_Complex64_t, Gm, Gn),
......
......@@ -19,6 +19,7 @@
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @author Florent Pruvost
* @author Gwenole Lucas
* @date 2021-03-16
* @precisions normal z -> c d s
*
......@@ -43,7 +44,7 @@ struct cl_zgemm_args_s {
static void
cl_zgemm_cpu_func( void *descr[], void *cl_arg )
{
struct cl_zgemm_args_s clargs;
struct cl_zgemm_args_s *clargs = (struct cl_zgemm_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
CHAM_tile_t *tileC;
......@@ -52,18 +53,17 @@ cl_zgemm_cpu_func( void *descr[], void *cl_arg )
tileB = cti_interface_get(descr[1]);
tileC = cti_interface_get(descr[2]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zgemm( clargs.transA, clargs.transB,
clargs.m, clargs.n, clargs.k,
clargs.alpha, tileA, tileB,
clargs.beta, tileC );
TCORE_zgemm( clargs->transA, clargs->transB,
clargs->m, clargs->n, clargs->k,
clargs->alpha, tileA, tileB,
clargs->beta, tileC );
}
#ifdef CHAMELEON_USE_CUDA
static void
cl_zgemm_cuda_func( void *descr[], void *_cl_arg )
cl_zgemm_cuda_func( void *descr[], void *cl_arg )
{
struct cl_zgemm_args_s clargs;
struct cl_zgemm_args_s *clargs = (struct cl_zgemm_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
CHAM_tile_t *tileC;
......@@ -72,17 +72,19 @@ cl_zgemm_cuda_func( void *descr[], void *_cl_arg )
tileB = cti_interface_get(descr[1]);
tileC = cti_interface_get(descr[2]);
starpu_codelet_unpack_args( _cl_arg, &clargs );
RUNTIME_getStream( stream );
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
assert( tileC->format & CHAMELEON_TILE_FULLRANK );
CUDA_zgemm(
clargs.transA, clargs.transB,
clargs.m, clargs.n, clargs.k,
(cuDoubleComplex*)&(clargs.alpha),
clargs->transA, clargs->transB,
clargs->m, clargs->n, clargs->k,
(cuDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld,
tileB->mat, tileB->ld,
(cuDoubleComplex*)&(clargs.beta),
(cuDoubleComplex*)&(clargs->beta),
tileC->mat, tileC->ld,
stream );
......@@ -112,22 +114,12 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
beta, C, Cm, Cn );
}
struct cl_zgemm_args_s clargs = {
.transA = transA,
.transB = transB,
.m = m,
.n = n,
.k = k,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.tileB = B->get_blktile( B, Bm, Bn ),
.beta = beta,
.tileC = C->get_blktile( C, Cm, Cn )
};
struct cl_zgemm_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid, accessC;
int exec = 0;
char *cl_name = "zgemm";
/* Handle cache */
......@@ -135,8 +127,23 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_R(B, Bm, Bn);
CHAMELEON_ACCESS_RW(C, Cm, Cn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zgemm_args_s ) );
clargs->transA = transA;
clargs->transB = transB;
clargs->m = m;
clargs->n = n;
clargs->k = k;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
clargs->beta = beta;
clargs->tileC = C->get_blktile( C, Cm, Cn );
}
/* Callback for profiling information */
callback = options->profiling ? cl_zgemm_callback : NULL;
......@@ -150,10 +157,12 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zgemm,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zgemm_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgemm_args_s),
/* Task handles */
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
......
......@@ -43,7 +43,6 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
tileD = cti_interface_get(descr[1]);
tileA = cti_interface_get(descr[2]);
starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV);
TCORE_zgessm(m, n, k, ib, IPIV, tileD, tileA);
}
......
......@@ -27,15 +27,14 @@ struct cl_zgesum_args_s {
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgesum_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zgesum_args_s clargs;
struct cl_zgesum_args_s *clargs = (struct cl_zgesum_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileW;
tileA = cti_interface_get(descr[0]);
tileW = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zgesum( clargs.storev, clargs.m, clargs.n, tileA, tileW );
TCORE_zgesum( clargs->storev, clargs->m, clargs->n, tileA, tileW );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -49,24 +48,29 @@ void INSERT_TASK_zgesum( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *SUMS, int SUMSm, int SUMSn )
{
struct cl_zgesum_args_s clargs = {
.storev = storev,
.m = m,
.n = n
};
struct cl_zgesum_args_s *clargs = NULL;
struct starpu_codelet *codelet = &cl_zgesum;
void (*callback)(void*) = options->profiling ? cl_zgesum_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int exec = 0;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(SUMS, SUMSm, SUMSn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zgesum_args_s ) );
clargs->storev = storev;
clargs->m = m;
clargs->n = n;
}
rt_starpu_insert_task(
codelet,
STARPU_VALUE, &clargs, sizeof(struct cl_zgesum_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgesum_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_RW, RTBLKADDR(SUMS, CHAMELEON_Complex64_t, SUMSm, SUMSn),
STARPU_PRIORITY, options->priority,
......
......@@ -35,7 +35,6 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N);
TCORE_zhe2ge(uplo, M, N, tileA, tileB);
}
......
......@@ -19,6 +19,7 @@
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @author Florent Pruvost
* @author Gwenole Lucas
* @date 2021-03-16
* @precisions normal z -> c
*
......@@ -41,38 +42,35 @@ struct cl_zherk_args_s {
static void
cl_zherk_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zherk_args_s clargs;
struct cl_zherk_args_s *clargs = (struct cl_zherk_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zherk( clargs.uplo, clargs.trans, clargs.n, clargs.k,
clargs.alpha, tileA, clargs.beta, tileC );
TCORE_zherk( clargs->uplo, clargs->trans, clargs->n, clargs->k,
clargs->alpha, tileA, clargs->beta, tileC );
}
#if defined(CHAMELEON_USE_CUDA)
static void
cl_zherk_cuda_func(void *descr[], void *cl_arg)
{
struct cl_zherk_args_s clargs;
struct cl_zherk_args_s *clargs = (struct cl_zherk_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
RUNTIME_getStream(stream);
CUDA_zherk(
clargs.uplo, clargs.trans, clargs.n, clargs.k,
(cuDoubleComplex*)&(clargs.alpha),
clargs->uplo, clargs->trans, clargs->n, clargs->k,
&(clargs->alpha),
tileA->mat, tileA->ld,
(cuDoubleComplex*)&(clargs.beta),
&(clargs->beta),
tileC->mat, tileC->ld,
stream );
......@@ -101,28 +99,33 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options,
beta, C, Cm, Cn );
}
struct cl_zherk_args_s clargs = {
.uplo = uplo,
.trans = trans,
.n = n,
.k = k,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.beta = beta,
.tileC = C->get_blktile( C, Cm, Cn ),
};
struct cl_zherk_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid, accessC;
int exec = 0;
char *cl_name = "zherk";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(C, Cm, Cn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zherk_args_s ) );
clargs->uplo = uplo;
clargs->trans = trans;
clargs->n = n;
clargs->k = k;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->beta = beta;
clargs->tileC = C->get_blktile( C, Cm, Cn );
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zherk_callback : NULL;
......@@ -136,7 +139,7 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zherk,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zherk_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zherk_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
......
......@@ -41,20 +41,19 @@ struct cl_zlacpy_args_s {
static void
cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zlacpy_args_s clargs;
struct cl_zlacpy_args_s *clargs = (struct cl_zlacpy_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
assert( clargs.displA == 0 );
assert( clargs.displB == 0 );
assert( clargs->displA == 0 );
assert( clargs->displB == 0 );
/* A = tileA->mat; */
/* B = tileB->mat; */
/* CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); */
TCORE_zlacpy( clargs.uplo, clargs.m, clargs.n, tileA, tileB );
TCORE_zlacpy( clargs->uplo, clargs->m, clargs->n, tileA, tileB );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -68,27 +67,32 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
int displA, const CHAM_desc_t *A, int Am, int An,
int displB, const CHAM_desc_t *B, int Bm, int Bn )
{
struct cl_zlacpy_args_s clargs = {
.uplo = uplo,
.m = m,
.n = n,
.displA = displA,
.displB = displB,
.tileA = A->get_blktile( A, Am, An ),
.tileB = B->get_blktile( B, Bm, Bn ),
};
struct cl_zlacpy_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "zlacpy";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_W(B, Bm, Bn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlacpy_args_s ) );
clargs->uplo = uplo;
clargs->m = m;
clargs->n = n;
clargs->displA = displA;
clargs->displB = displB;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zlacpy_callback : NULL;
......@@ -99,7 +103,7 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zlacpy,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zlacpy_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zlacpy_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
......
......@@ -38,7 +38,6 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args(cl_arg, &m, &n);
TCORE_zlag2c( m, n, tileA, tileB);
}
......@@ -96,7 +95,6 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args(cl_arg, &m, &n);
TCORE_clag2z( m, n, tileA, tileB);
}
......
......@@ -36,13 +36,12 @@ struct cl_zlascal_args_s {
static void
cl_zlascal_cpu_func( void *descr[], void *cl_arg )
{
struct cl_zlascal_args_s clargs;
struct cl_zlascal_args_s *clargs = (struct cl_zlascal_args_s *)cl_arg;
CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zlascal( clargs.uplo, clargs.m, clargs.n, clargs.alpha, tileA );
TCORE_zlascal( clargs->uplo, clargs->m, clargs->n, clargs->alpha, tileA );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -65,24 +64,29 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
return;
}
struct cl_zlascal_args_s clargs = {
.uplo = uplo,
.m = m,
.n = n,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
};
struct cl_zlascal_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "zlascal";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlascal_args_s ) );
clargs->uplo = uplo;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zlascal_callback : NULL;
......@@ -93,7 +97,7 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zlascal,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zlascal_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zlascal_args_s),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */
......
......@@ -39,13 +39,12 @@ struct cl_zlaset_args_s {
static void
cl_zlaset_cpu_func( void *descr[], void *cl_arg )
{
struct cl_zlaset_args_s clargs;
struct cl_zlaset_args_s *clargs = (struct cl_zlaset_args_s *)cl_arg;
CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zlaset( clargs.uplo, clargs.m, clargs.n, clargs.alpha, clargs.beta, tileA );
TCORE_zlaset( clargs->uplo, clargs->m, clargs->n, clargs->alpha, clargs->beta, tileA );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -59,25 +58,30 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
const CHAM_desc_t *A, int Am, int An )
{
struct cl_zlaset_args_s clargs = {
.uplo = uplo,
.m = m,
.n = n,
.alpha = alpha,
.beta = beta,
.tileA = A->get_blktile( A, Am, An ),
};
struct cl_zlaset_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "zlaset";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_W(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlaset_args_s ) );
clargs->uplo = uplo;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->beta = beta;
clargs->tileA = A->get_blktile( A, Am, An );
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zlaset_callback : NULL;
......@@ -88,7 +92,7 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zlaset,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zlaset_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zlaset_args_s),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */
......
......@@ -37,13 +37,12 @@ struct cl_zlauum_args_s {
static void
cl_zlauum_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zlauum_args_s clargs;
struct cl_zlauum_args_s *clargs = (struct cl_zlauum_args_s *)cl_arg;
CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zlauum( clargs.uplo, clargs.n, tileA );
TCORE_zlauum( clargs->uplo, clargs->n, tileA );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -56,22 +55,27 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n, int nb,
const CHAM_desc_t *A, int Am, int An )
{
struct cl_zlauum_args_s clargs = {
.uplo = uplo,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
};
struct cl_zlauum_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "zlauum";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlauum_args_s ) );
clargs->uplo = uplo;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zlauum_callback : NULL;
......@@ -82,7 +86,7 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zlauum,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zlauum_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zlauum_args_s),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */
......
......@@ -41,14 +41,13 @@ struct cl_zplghe_args_s {
#if !defined(CHAMELEON_SIMULATION)
static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zplghe_args_s clargs;
struct cl_zplghe_args_s *clargs = (struct cl_zplghe_args_s *)cl_arg;
CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zplghe( clargs.bump, clargs.m, clargs.n, tileA,
clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
TCORE_zplghe( clargs->bump, clargs->m, clargs->n, tileA,
clargs->bigM, clargs->m0, clargs->n0, clargs->seed );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -61,27 +60,32 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
int bigM, int m0, int n0, unsigned long long int seed )
{
struct cl_zplghe_args_s clargs = {
.bump = bump,
.m = m,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.bigM = bigM,
.m0 = m0,
.n0 = n0,
.seed = seed,
};
struct cl_zplghe_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "zplghe";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_W(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zplghe_args_s ) );
clargs->bump = bump;
clargs->m = m;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->bigM = bigM;
clargs->m0 = m0;
clargs->n0 = n0;
clargs->seed = seed;
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zplghe_callback : NULL;
......@@ -92,7 +96,7 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zplghe,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zplghe_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zplghe_args_s),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */
......
......@@ -41,14 +41,13 @@ struct cl_zplgsy_args_s {
#if !defined(CHAMELEON_SIMULATION)
static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zplgsy_args_s clargs;
struct cl_zplgsy_args_s *clargs = (struct cl_zplgsy_args_s *)cl_arg;
CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zplgsy( clargs.bump, clargs.m, clargs.n, tileA,
clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
TCORE_zplgsy( clargs->bump, clargs->m, clargs->n, tileA,
clargs->bigM, clargs->m0, clargs->n0, clargs->seed );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -61,27 +60,32 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
int bigM, int m0, int n0, unsigned long long int seed )
{
struct cl_zplgsy_args_s clargs = {
.bump = bump,
.m = m,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.bigM = bigM,
.m0 = m0,
.n0 = n0,
.seed = seed,
};
struct cl_zplgsy_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "zplgsy";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_W(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zplgsy_args_s ) );
clargs->bump = bump;
clargs->m = m;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->bigM = bigM;
clargs->m0 = m0;
clargs->n0 = n0;
clargs->seed = seed;
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zplgsy_callback : NULL;
......@@ -92,7 +96,7 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zplgsy,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zplgsy_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zplgsy_args_s),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */
......
......@@ -41,14 +41,13 @@ struct cl_zplrnt_args_s {
static void
cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zplrnt_args_s clargs;
struct cl_zplrnt_args_s *clargs = (struct cl_zplrnt_args_s *)cl_arg;
CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zplrnt( clargs.m, clargs.n, tileA,
clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
TCORE_zplrnt( clargs->m, clargs->n, tileA,
clargs->bigM, clargs->m0, clargs->n0, clargs->seed );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -61,26 +60,31 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
int m, int n, const CHAM_desc_t *A, int Am, int An,
int bigM, int m0, int n0, unsigned long long int seed )
{
struct cl_zplrnt_args_s clargs = {
.m = m,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.bigM = bigM,
.m0 = m0,
.n0 = n0,
.seed = seed,
};
struct cl_zplrnt_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "zplrnt";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_W(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zplrnt_args_s ) );
clargs->m = m;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->bigM = bigM;
clargs->m0 = m0;
clargs->n0 = n0;
clargs->seed = seed;
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zplrnt_callback : NULL;
......@@ -91,7 +95,7 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zplrnt,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zplrnt_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zplrnt_args_s),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */
......
......@@ -40,17 +40,16 @@ struct cl_zpotrf_args_s {
static void
cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zpotrf_args_s clargs;
struct cl_zpotrf_args_s *clargs = (struct cl_zpotrf_args_s *)cl_arg;
CHAM_tile_t *tileA;
int info = 0;
tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zpotrf( clargs.uplo, clargs.n, tileA, &info );
TCORE_zpotrf( clargs->uplo, clargs->n, tileA, &info );
if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info );
if ( (clargs->sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
RUNTIME_sequence_flush( NULL, clargs->sequence, clargs->request, clargs->iinfo+info );
}
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -65,25 +64,30 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An,
int iinfo )
{
struct cl_zpotrf_args_s clargs = {
.uplo = uplo,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.iinfo = iinfo,
.sequence = options->sequence,
.request = options->request,
};
struct cl_zpotrf_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "zpotrf";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zpotrf_args_s ) );
clargs->uplo = uplo;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->iinfo = iinfo;
clargs->sequence = options->sequence;
clargs->request = options->request;
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zpotrf_callback : NULL;
......@@ -94,7 +98,7 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_zpotrf,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zpotrf_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zpotrf_args_s),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */
......
......@@ -19,6 +19,7 @@
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @author Florent Pruvost
* @author Gwenole Lucas
* @date 2021-03-16
* @precisions normal z -> c d s
*
......@@ -41,38 +42,35 @@ struct cl_zsyrk_args_s {
static void
cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zsyrk_args_s clargs;
struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_zsyrk( clargs.uplo, clargs.trans, clargs.n, clargs.k,
clargs.alpha, tileA, clargs.beta, tileC );
TCORE_zsyrk( clargs->uplo, clargs->trans, clargs->n, clargs->k,
clargs->alpha, tileA, clargs->beta, tileC );
}
#if defined(CHAMELEON_USE_CUDA)
static void
cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
{
struct cl_zsyrk_args_s clargs;
struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
RUNTIME_getStream(stream);
CUDA_zsyrk(
clargs.uplo, clargs.trans, clargs.n, clargs.k,
(cuDoubleComplex*)&(clargs.alpha),
clargs->uplo, clargs->trans, clargs->n, clargs->k,
(cuDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld,
(cuDoubleComplex*)&(clargs.beta),
(cuDoubleComplex*)&(clargs->beta),
tileC->mat, tileC->ld,
stream );
......@@ -101,28 +99,33 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
beta, C, Cm, Cn );
}
struct cl_zsyrk_args_s clargs = {
.uplo = uplo,
.trans = trans,
.n = n,
.k = k,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.beta = beta,
.tileC = C->get_blktile( C, Cm, Cn ),
};
struct cl_zsyrk_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid, accessC;
int exec = 0;
char *cl_name = "zsyrk";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(C, Cm, Cn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zsyrk_args_s ) );
clargs->uplo = uplo;
clargs->trans = trans;
clargs->n = n;
clargs->k = k;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->beta = beta;
clargs->tileC = C->get_blktile( C, Cm, Cn );
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zsyrk_callback : NULL;
......@@ -135,9 +138,8 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
/* Insert the task */
rt_starpu_insert_task(
&cl_zsyrk,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zsyrk_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_zsyrk_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
......
......@@ -38,16 +38,15 @@ struct cl_ztradd_args_s {
static void
cl_ztradd_cpu_func(void *descr[], void *cl_arg)
{
struct cl_ztradd_args_s clargs;
struct cl_ztradd_args_s *clargs = (struct cl_ztradd_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_ztradd( clargs.uplo, clargs.trans, clargs.m, clargs.n,
clargs.alpha, tileA, clargs.beta, tileB );
TCORE_ztradd( clargs->uplo, clargs->trans, clargs->m, clargs->n,
clargs->alpha, tileA, clargs->beta, tileB );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -66,28 +65,33 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
beta, B, Bm, Bn );
}
struct cl_ztradd_args_s clargs = {
.uplo = uplo,
.trans = trans,
.m = m,
.n = n,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.beta = beta,
.tileB = B->get_blktile( B, Bm, Bn ),
};
struct cl_ztradd_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid, accessB;
int exec = 0;
char *cl_name = "ztradd";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(B, Bm, Bn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_ztradd_args_s ) );
clargs->uplo = uplo;
clargs->trans = trans;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->beta = beta;
clargs->tileB = B->get_blktile( B, Bm, Bn );
}
/* Callback fro profiling information */
callback = options->profiling ? cl_ztradd_callback : NULL;
......@@ -101,7 +105,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_ztradd,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_ztradd_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_ztradd_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
......
......@@ -42,37 +42,34 @@ struct cl_ztrmm_args_s {
static void
cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
{
struct cl_ztrmm_args_s clargs;
struct cl_ztrmm_args_s *clargs = (struct cl_ztrmm_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
TCORE_ztrmm( clargs.side, clargs.uplo, clargs.transA, clargs.diag,
clargs.m, clargs.n, clargs.alpha, tileA, tileB );
TCORE_ztrmm( clargs->side, clargs->uplo, clargs->transA, clargs->diag,
clargs->m, clargs->n, clargs->alpha, tileA, tileB );
}
#ifdef CHAMELEON_USE_CUDA
static void
cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
{
struct cl_ztrmm_args_s clargs;
struct cl_ztrmm_args_s *clargs = (struct cl_ztrmm_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
RUNTIME_getStream(stream);
CUDA_ztrmm(
clargs.side, clargs.uplo, clargs.transA, clargs.diag,
clargs.m, clargs.n,
(cuDoubleComplex*)&(clargs.alpha),
clargs->side, clargs->uplo, clargs->transA, clargs->diag,
clargs->m, clargs->n,
(cuDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld,
tileB->mat, tileB->ld,
stream );
......@@ -97,29 +94,34 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
{
struct cl_ztrmm_args_s clargs = {
.side = side,
.uplo = uplo,
.transA = transA,
.diag = diag,
.m = m,
.n = n,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.tileB = B->get_blktile( B, Bm, Bn ),
};
struct cl_ztrmm_args_s *clargs = NULL;
void (*callback)(void*);
RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid;
int exec = 0;
char *cl_name = "ztrmm";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(B, Bm, Bn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_ztrmm_args_s ) );
clargs->side = side;
clargs->uplo = uplo;
clargs->transA = transA;
clargs->diag = diag;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
}
/* Callback fro profiling information */
callback = options->profiling ? cl_ztrmm_callback : NULL;
......@@ -130,7 +132,7 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
rt_starpu_insert_task(
&cl_ztrmm,
/* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_ztrmm_args_s),
STARPU_CL_ARGS, clargs, sizeof(struct cl_ztrmm_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment