Mentions légales du service

Skip to content
Snippets Groups Projects
Commit fba5d132 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Switch to codelet argument allocation within chameleon

parent 3b0b9c96
No related branches found
No related tags found
1 merge request!267StarPU: refactor GPU codelets
Showing
with 342 additions and 279 deletions
...@@ -31,7 +31,7 @@ struct cl_zcesca_args_s { ...@@ -31,7 +31,7 @@ struct cl_zcesca_args_s {
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zcesca_cpu_func(void *descr[], void *cl_arg) static void cl_zcesca_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zcesca_args_s clargs; struct cl_zcesca_args_s *clargs = (struct cl_zcesca_args_s *)cl_arg;
CHAM_tile_t *Gi; CHAM_tile_t *Gi;
CHAM_tile_t *Gj; CHAM_tile_t *Gj;
CHAM_tile_t *G; CHAM_tile_t *G;
...@@ -46,9 +46,8 @@ static void cl_zcesca_cpu_func(void *descr[], void *cl_arg) ...@@ -46,9 +46,8 @@ static void cl_zcesca_cpu_func(void *descr[], void *cl_arg)
Dj = cti_interface_get(descr[4]); Dj = cti_interface_get(descr[4]);
A = cti_interface_get(descr[5]); A = cti_interface_get(descr[5]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zcesca( clargs->center, clargs->scale, clargs->axis,
TCORE_zcesca( clargs.center, clargs.scale, clargs.axis, clargs->m, clargs->n, clargs->mt, clargs->nt,
clargs.m, clargs.n, clargs.mt, clargs.nt,
Gi, Gj, G, Di, Dj, A ); Gi, Gj, G, Di, Dj, A );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -68,19 +67,12 @@ void INSERT_TASK_zcesca( const RUNTIME_option_t *options, ...@@ -68,19 +67,12 @@ void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
const CHAM_desc_t *Dj, int Djm, int Djn, const CHAM_desc_t *Dj, int Djm, int Djn,
CHAM_desc_t *A, int Am, int An ) CHAM_desc_t *A, int Am, int An )
{ {
struct cl_zcesca_args_s clargs = { struct cl_zcesca_args_s *clargs = NULL;
.center = center,
.scale = scale,
.axis = axis,
.m = m,
.n = n,
.mt = mt,
.nt = nt
};
struct starpu_codelet *codelet = &cl_zcesca; struct starpu_codelet *codelet = &cl_zcesca;
void (*callback)(void*) = options->profiling ? cl_zcesca_callback : NULL; void (*callback)(void*) = options->profiling ? cl_zcesca_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int exec = 0;
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(Gi, Gim, Gin); CHAMELEON_ACCESS_R(Gi, Gim, Gin);
...@@ -89,11 +81,23 @@ void INSERT_TASK_zcesca( const RUNTIME_option_t *options, ...@@ -89,11 +81,23 @@ void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
CHAMELEON_ACCESS_R(Di, Dim, Din); CHAMELEON_ACCESS_R(Di, Dim, Din);
CHAMELEON_ACCESS_R(Dj, Djm, Djn); CHAMELEON_ACCESS_R(Dj, Djm, Djn);
CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zcesca_args_s ) );
clargs->center = center;
clargs->scale = scale;
clargs->axis = axis;
clargs->m = m;
clargs->n = n;
clargs->mt = mt;
clargs->nt = nt;
}
rt_starpu_insert_task( rt_starpu_insert_task(
codelet, codelet,
STARPU_VALUE, &clargs, sizeof(struct cl_zcesca_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zcesca_args_s),
STARPU_R, RTBLKADDR(Gi, CHAMELEON_Complex64_t, Gim, Gin), STARPU_R, RTBLKADDR(Gi, CHAMELEON_Complex64_t, Gim, Gin),
STARPU_R, RTBLKADDR(Gj, CHAMELEON_Complex64_t, Gjm, Gjn), STARPU_R, RTBLKADDR(Gj, CHAMELEON_Complex64_t, Gjm, Gjn),
STARPU_R, RTBLKADDR(G, CHAMELEON_Complex64_t, Gm, Gn), STARPU_R, RTBLKADDR(G, CHAMELEON_Complex64_t, Gm, Gn),
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis * @author Lucas Barros de Assis
* @author Florent Pruvost * @author Florent Pruvost
* @author Gwenole Lucas
* @date 2021-03-16 * @date 2021-03-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -43,7 +44,7 @@ struct cl_zgemm_args_s { ...@@ -43,7 +44,7 @@ struct cl_zgemm_args_s {
static void static void
cl_zgemm_cpu_func( void *descr[], void *cl_arg ) cl_zgemm_cpu_func( void *descr[], void *cl_arg )
{ {
struct cl_zgemm_args_s clargs; struct cl_zgemm_args_s *clargs = (struct cl_zgemm_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
CHAM_tile_t *tileC; CHAM_tile_t *tileC;
...@@ -52,18 +53,17 @@ cl_zgemm_cpu_func( void *descr[], void *cl_arg ) ...@@ -52,18 +53,17 @@ cl_zgemm_cpu_func( void *descr[], void *cl_arg )
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
tileC = cti_interface_get(descr[2]); tileC = cti_interface_get(descr[2]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zgemm( clargs->transA, clargs->transB,
TCORE_zgemm( clargs.transA, clargs.transB, clargs->m, clargs->n, clargs->k,
clargs.m, clargs.n, clargs.k, clargs->alpha, tileA, tileB,
clargs.alpha, tileA, tileB, clargs->beta, tileC );
clargs.beta, tileC );
} }
#ifdef CHAMELEON_USE_CUDA #ifdef CHAMELEON_USE_CUDA
static void static void
cl_zgemm_cuda_func( void *descr[], void *_cl_arg ) cl_zgemm_cuda_func( void *descr[], void *cl_arg )
{ {
struct cl_zgemm_args_s clargs; struct cl_zgemm_args_s *clargs = (struct cl_zgemm_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
CHAM_tile_t *tileC; CHAM_tile_t *tileC;
...@@ -72,17 +72,19 @@ cl_zgemm_cuda_func( void *descr[], void *_cl_arg ) ...@@ -72,17 +72,19 @@ cl_zgemm_cuda_func( void *descr[], void *_cl_arg )
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
tileC = cti_interface_get(descr[2]); tileC = cti_interface_get(descr[2]);
starpu_codelet_unpack_args( _cl_arg, &clargs );
RUNTIME_getStream( stream ); RUNTIME_getStream( stream );
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
assert( tileC->format & CHAMELEON_TILE_FULLRANK );
CUDA_zgemm( CUDA_zgemm(
clargs.transA, clargs.transB, clargs->transA, clargs->transB,
clargs.m, clargs.n, clargs.k, clargs->m, clargs->n, clargs->k,
(cuDoubleComplex*)&(clargs.alpha), (cuDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld, tileA->mat, tileA->ld,
tileB->mat, tileB->ld, tileB->mat, tileB->ld,
(cuDoubleComplex*)&(clargs.beta), (cuDoubleComplex*)&(clargs->beta),
tileC->mat, tileC->ld, tileC->mat, tileC->ld,
stream ); stream );
...@@ -112,22 +114,12 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options, ...@@ -112,22 +114,12 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
beta, C, Cm, Cn ); beta, C, Cm, Cn );
} }
struct cl_zgemm_args_s clargs = { struct cl_zgemm_args_s *clargs = NULL;
.transA = transA,
.transB = transB,
.m = m,
.n = n,
.k = k,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.tileB = B->get_blktile( B, Bm, Bn ),
.beta = beta,
.tileC = C->get_blktile( C, Cm, Cn )
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid, accessC; int workerid, accessC;
int exec = 0;
char *cl_name = "zgemm"; char *cl_name = "zgemm";
/* Handle cache */ /* Handle cache */
...@@ -135,8 +127,23 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options, ...@@ -135,8 +127,23 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_R(B, Bm, Bn); CHAMELEON_ACCESS_R(B, Bm, Bn);
CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_ACCESS_RW(C, Cm, Cn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zgemm_args_s ) );
clargs->transA = transA;
clargs->transB = transB;
clargs->m = m;
clargs->n = n;
clargs->k = k;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
clargs->beta = beta;
clargs->tileC = C->get_blktile( C, Cm, Cn );
}
/* Callback for profiling information */ /* Callback for profiling information */
callback = options->profiling ? cl_zgemm_callback : NULL; callback = options->profiling ? cl_zgemm_callback : NULL;
...@@ -150,10 +157,12 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options, ...@@ -150,10 +157,12 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zgemm, &cl_zgemm,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zgemm_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zgemm_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), /* Task handles */
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
/* Common task arguments */ /* Common task arguments */
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
......
...@@ -43,7 +43,6 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg) ...@@ -43,7 +43,6 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
tileD = cti_interface_get(descr[1]); tileD = cti_interface_get(descr[1]);
tileA = cti_interface_get(descr[2]); tileA = cti_interface_get(descr[2]);
starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV); starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV);
TCORE_zgessm(m, n, k, ib, IPIV, tileD, tileA); TCORE_zgessm(m, n, k, ib, IPIV, tileD, tileA);
} }
......
...@@ -27,15 +27,14 @@ struct cl_zgesum_args_s { ...@@ -27,15 +27,14 @@ struct cl_zgesum_args_s {
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zgesum_cpu_func(void *descr[], void *cl_arg) static void cl_zgesum_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zgesum_args_s clargs; struct cl_zgesum_args_s *clargs = (struct cl_zgesum_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileW; CHAM_tile_t *tileW;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileW = cti_interface_get(descr[1]); tileW = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zgesum( clargs->storev, clargs->m, clargs->n, tileA, tileW );
TCORE_zgesum( clargs.storev, clargs.m, clargs.n, tileA, tileW );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -49,24 +48,29 @@ void INSERT_TASK_zgesum( const RUNTIME_option_t *options, ...@@ -49,24 +48,29 @@ void INSERT_TASK_zgesum( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *SUMS, int SUMSm, int SUMSn ) const CHAM_desc_t *SUMS, int SUMSm, int SUMSn )
{ {
struct cl_zgesum_args_s clargs = { struct cl_zgesum_args_s *clargs = NULL;
.storev = storev,
.m = m,
.n = n
};
struct starpu_codelet *codelet = &cl_zgesum; struct starpu_codelet *codelet = &cl_zgesum;
void (*callback)(void*) = options->profiling ? cl_zgesum_callback : NULL; void (*callback)(void*) = options->profiling ? cl_zgesum_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int exec = 0;
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(SUMS, SUMSm, SUMSn); CHAMELEON_ACCESS_RW(SUMS, SUMSm, SUMSn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zgesum_args_s ) );
clargs->storev = storev;
clargs->m = m;
clargs->n = n;
}
rt_starpu_insert_task( rt_starpu_insert_task(
codelet, codelet,
STARPU_VALUE, &clargs, sizeof(struct cl_zgesum_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zgesum_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_RW, RTBLKADDR(SUMS, CHAMELEON_Complex64_t, SUMSm, SUMSn), STARPU_RW, RTBLKADDR(SUMS, CHAMELEON_Complex64_t, SUMSm, SUMSn),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
......
...@@ -35,7 +35,6 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg) ...@@ -35,7 +35,6 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N); starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N);
TCORE_zhe2ge(uplo, M, N, tileA, tileB); TCORE_zhe2ge(uplo, M, N, tileA, tileB);
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis * @author Lucas Barros de Assis
* @author Florent Pruvost * @author Florent Pruvost
* @author Gwenole Lucas
* @date 2021-03-16 * @date 2021-03-16
* @precisions normal z -> c * @precisions normal z -> c
* *
...@@ -41,38 +42,35 @@ struct cl_zherk_args_s { ...@@ -41,38 +42,35 @@ struct cl_zherk_args_s {
static void static void
cl_zherk_cpu_func(void *descr[], void *cl_arg) cl_zherk_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zherk_args_s clargs; struct cl_zherk_args_s *clargs = (struct cl_zherk_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileC; CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]); tileC = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zherk( clargs->uplo, clargs->trans, clargs->n, clargs->k,
TCORE_zherk( clargs.uplo, clargs.trans, clargs.n, clargs.k, clargs->alpha, tileA, clargs->beta, tileC );
clargs.alpha, tileA, clargs.beta, tileC );
} }
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
static void static void
cl_zherk_cuda_func(void *descr[], void *cl_arg) cl_zherk_cuda_func(void *descr[], void *cl_arg)
{ {
struct cl_zherk_args_s clargs; struct cl_zherk_args_s *clargs = (struct cl_zherk_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileC; CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]); tileC = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
RUNTIME_getStream(stream); RUNTIME_getStream(stream);
CUDA_zherk( CUDA_zherk(
clargs.uplo, clargs.trans, clargs.n, clargs.k, clargs->uplo, clargs->trans, clargs->n, clargs->k,
(cuDoubleComplex*)&(clargs.alpha), &(clargs->alpha),
tileA->mat, tileA->ld, tileA->mat, tileA->ld,
(cuDoubleComplex*)&(clargs.beta), &(clargs->beta),
tileC->mat, tileC->ld, tileC->mat, tileC->ld,
stream ); stream );
...@@ -101,28 +99,33 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options, ...@@ -101,28 +99,33 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options,
beta, C, Cm, Cn ); beta, C, Cm, Cn );
} }
struct cl_zherk_args_s clargs = { struct cl_zherk_args_s *clargs = NULL;
.uplo = uplo,
.trans = trans,
.n = n,
.k = k,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.beta = beta,
.tileC = C->get_blktile( C, Cm, Cn ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid, accessC; int workerid, accessC;
int exec = 0;
char *cl_name = "zherk"; char *cl_name = "zherk";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_ACCESS_RW(C, Cm, Cn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zherk_args_s ) );
clargs->uplo = uplo;
clargs->trans = trans;
clargs->n = n;
clargs->k = k;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->beta = beta;
clargs->tileC = C->get_blktile( C, Cm, Cn );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zherk_callback : NULL; callback = options->profiling ? cl_zherk_callback : NULL;
...@@ -136,7 +139,7 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options, ...@@ -136,7 +139,7 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zherk, &cl_zherk,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zherk_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zherk_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
......
...@@ -41,20 +41,19 @@ struct cl_zlacpy_args_s { ...@@ -41,20 +41,19 @@ struct cl_zlacpy_args_s {
static void static void
cl_zlacpy_cpu_func(void *descr[], void *cl_arg) cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zlacpy_args_s clargs; struct cl_zlacpy_args_s *clargs = (struct cl_zlacpy_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs ); assert( clargs->displA == 0 );
assert( clargs.displA == 0 ); assert( clargs->displB == 0 );
assert( clargs.displB == 0 );
/* A = tileA->mat; */ /* A = tileA->mat; */
/* B = tileB->mat; */ /* B = tileB->mat; */
/* CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); */ /* CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); */
TCORE_zlacpy( clargs.uplo, clargs.m, clargs.n, tileA, tileB ); TCORE_zlacpy( clargs->uplo, clargs->m, clargs->n, tileA, tileB );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -68,27 +67,32 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, ...@@ -68,27 +67,32 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
int displA, const CHAM_desc_t *A, int Am, int An, int displA, const CHAM_desc_t *A, int Am, int An,
int displB, const CHAM_desc_t *B, int Bm, int Bn ) int displB, const CHAM_desc_t *B, int Bm, int Bn )
{ {
struct cl_zlacpy_args_s clargs = { struct cl_zlacpy_args_s *clargs = NULL;
.uplo = uplo,
.m = m,
.n = n,
.displA = displA,
.displB = displB,
.tileA = A->get_blktile( A, Am, An ),
.tileB = B->get_blktile( B, Bm, Bn ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "zlacpy"; char *cl_name = "zlacpy";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_W(B, Bm, Bn); CHAMELEON_ACCESS_W(B, Bm, Bn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlacpy_args_s ) );
clargs->uplo = uplo;
clargs->m = m;
clargs->n = n;
clargs->displA = displA;
clargs->displB = displB;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zlacpy_callback : NULL; callback = options->profiling ? cl_zlacpy_callback : NULL;
...@@ -99,7 +103,7 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, ...@@ -99,7 +103,7 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zlacpy, &cl_zlacpy,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zlacpy_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zlacpy_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
......
...@@ -38,7 +38,6 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) ...@@ -38,7 +38,6 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args(cl_arg, &m, &n); starpu_codelet_unpack_args(cl_arg, &m, &n);
TCORE_zlag2c( m, n, tileA, tileB); TCORE_zlag2c( m, n, tileA, tileB);
} }
...@@ -96,7 +95,6 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg) ...@@ -96,7 +95,6 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args(cl_arg, &m, &n); starpu_codelet_unpack_args(cl_arg, &m, &n);
TCORE_clag2z( m, n, tileA, tileB); TCORE_clag2z( m, n, tileA, tileB);
} }
......
...@@ -36,13 +36,12 @@ struct cl_zlascal_args_s { ...@@ -36,13 +36,12 @@ struct cl_zlascal_args_s {
static void static void
cl_zlascal_cpu_func( void *descr[], void *cl_arg ) cl_zlascal_cpu_func( void *descr[], void *cl_arg )
{ {
struct cl_zlascal_args_s clargs; struct cl_zlascal_args_s *clargs = (struct cl_zlascal_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zlascal( clargs->uplo, clargs->m, clargs->n, clargs->alpha, tileA );
TCORE_zlascal( clargs.uplo, clargs.m, clargs.n, clargs.alpha, tileA );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -65,24 +64,29 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options, ...@@ -65,24 +64,29 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
return; return;
} }
struct cl_zlascal_args_s clargs = { struct cl_zlascal_args_s *clargs = NULL;
.uplo = uplo,
.m = m,
.n = n,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "zlascal"; char *cl_name = "zlascal";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlascal_args_s ) );
clargs->uplo = uplo;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zlascal_callback : NULL; callback = options->profiling ? cl_zlascal_callback : NULL;
...@@ -93,7 +97,7 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options, ...@@ -93,7 +97,7 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zlascal, &cl_zlascal,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zlascal_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zlascal_args_s),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */ /* Common task arguments */
......
...@@ -39,13 +39,12 @@ struct cl_zlaset_args_s { ...@@ -39,13 +39,12 @@ struct cl_zlaset_args_s {
static void static void
cl_zlaset_cpu_func( void *descr[], void *cl_arg ) cl_zlaset_cpu_func( void *descr[], void *cl_arg )
{ {
struct cl_zlaset_args_s clargs; struct cl_zlaset_args_s *clargs = (struct cl_zlaset_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zlaset( clargs->uplo, clargs->m, clargs->n, clargs->alpha, clargs->beta, tileA );
TCORE_zlaset( clargs.uplo, clargs.m, clargs.n, clargs.alpha, clargs.beta, tileA );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -59,25 +58,30 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options, ...@@ -59,25 +58,30 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
const CHAM_desc_t *A, int Am, int An ) const CHAM_desc_t *A, int Am, int An )
{ {
struct cl_zlaset_args_s clargs = { struct cl_zlaset_args_s *clargs = NULL;
.uplo = uplo,
.m = m,
.n = n,
.alpha = alpha,
.beta = beta,
.tileA = A->get_blktile( A, Am, An ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "zlaset"; char *cl_name = "zlaset";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_ACCESS_W(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlaset_args_s ) );
clargs->uplo = uplo;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->beta = beta;
clargs->tileA = A->get_blktile( A, Am, An );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zlaset_callback : NULL; callback = options->profiling ? cl_zlaset_callback : NULL;
...@@ -88,7 +92,7 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options, ...@@ -88,7 +92,7 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zlaset, &cl_zlaset,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zlaset_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zlaset_args_s),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */ /* Common task arguments */
......
...@@ -37,13 +37,12 @@ struct cl_zlauum_args_s { ...@@ -37,13 +37,12 @@ struct cl_zlauum_args_s {
static void static void
cl_zlauum_cpu_func(void *descr[], void *cl_arg) cl_zlauum_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zlauum_args_s clargs; struct cl_zlauum_args_s *clargs = (struct cl_zlauum_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zlauum( clargs->uplo, clargs->n, tileA );
TCORE_zlauum( clargs.uplo, clargs.n, tileA );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -56,22 +55,27 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options, ...@@ -56,22 +55,27 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
cham_uplo_t uplo, int n, int nb, cham_uplo_t uplo, int n, int nb,
const CHAM_desc_t *A, int Am, int An ) const CHAM_desc_t *A, int Am, int An )
{ {
struct cl_zlauum_args_s clargs = { struct cl_zlauum_args_s *clargs = NULL;
.uplo = uplo,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "zlauum"; char *cl_name = "zlauum";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlauum_args_s ) );
clargs->uplo = uplo;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zlauum_callback : NULL; callback = options->profiling ? cl_zlauum_callback : NULL;
...@@ -82,7 +86,7 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options, ...@@ -82,7 +86,7 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zlauum, &cl_zlauum,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zlauum_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zlauum_args_s),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */ /* Common task arguments */
......
...@@ -41,14 +41,13 @@ struct cl_zplghe_args_s { ...@@ -41,14 +41,13 @@ struct cl_zplghe_args_s {
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zplghe_args_s clargs; struct cl_zplghe_args_s *clargs = (struct cl_zplghe_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zplghe( clargs->bump, clargs->m, clargs->n, tileA,
TCORE_zplghe( clargs.bump, clargs.m, clargs.n, tileA, clargs->bigM, clargs->m0, clargs->n0, clargs->seed );
clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -61,27 +60,32 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options, ...@@ -61,27 +60,32 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
int bigM, int m0, int n0, unsigned long long int seed ) int bigM, int m0, int n0, unsigned long long int seed )
{ {
struct cl_zplghe_args_s clargs = { struct cl_zplghe_args_s *clargs = NULL;
.bump = bump,
.m = m,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.bigM = bigM,
.m0 = m0,
.n0 = n0,
.seed = seed,
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "zplghe"; char *cl_name = "zplghe";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_ACCESS_W(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zplghe_args_s ) );
clargs->bump = bump;
clargs->m = m;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->bigM = bigM;
clargs->m0 = m0;
clargs->n0 = n0;
clargs->seed = seed;
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zplghe_callback : NULL; callback = options->profiling ? cl_zplghe_callback : NULL;
...@@ -92,7 +96,7 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options, ...@@ -92,7 +96,7 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zplghe, &cl_zplghe,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zplghe_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zplghe_args_s),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */ /* Common task arguments */
......
...@@ -41,14 +41,13 @@ struct cl_zplgsy_args_s { ...@@ -41,14 +41,13 @@ struct cl_zplgsy_args_s {
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zplgsy_args_s clargs; struct cl_zplgsy_args_s *clargs = (struct cl_zplgsy_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zplgsy( clargs->bump, clargs->m, clargs->n, tileA,
TCORE_zplgsy( clargs.bump, clargs.m, clargs.n, tileA, clargs->bigM, clargs->m0, clargs->n0, clargs->seed );
clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -61,27 +60,32 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, ...@@ -61,27 +60,32 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
int bigM, int m0, int n0, unsigned long long int seed ) int bigM, int m0, int n0, unsigned long long int seed )
{ {
struct cl_zplgsy_args_s clargs = { struct cl_zplgsy_args_s *clargs = NULL;
.bump = bump,
.m = m,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.bigM = bigM,
.m0 = m0,
.n0 = n0,
.seed = seed,
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "zplgsy"; char *cl_name = "zplgsy";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_ACCESS_W(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zplgsy_args_s ) );
clargs->bump = bump;
clargs->m = m;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->bigM = bigM;
clargs->m0 = m0;
clargs->n0 = n0;
clargs->seed = seed;
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zplgsy_callback : NULL; callback = options->profiling ? cl_zplgsy_callback : NULL;
...@@ -92,7 +96,7 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, ...@@ -92,7 +96,7 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zplgsy, &cl_zplgsy,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zplgsy_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zplgsy_args_s),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */ /* Common task arguments */
......
...@@ -41,14 +41,13 @@ struct cl_zplrnt_args_s { ...@@ -41,14 +41,13 @@ struct cl_zplrnt_args_s {
static void static void
cl_zplrnt_cpu_func(void *descr[], void *cl_arg) cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zplrnt_args_s clargs; struct cl_zplrnt_args_s *clargs = (struct cl_zplrnt_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zplrnt( clargs->m, clargs->n, tileA,
TCORE_zplrnt( clargs.m, clargs.n, tileA, clargs->bigM, clargs->m0, clargs->n0, clargs->seed );
clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -61,26 +60,31 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, ...@@ -61,26 +60,31 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
int m, int n, const CHAM_desc_t *A, int Am, int An, int m, int n, const CHAM_desc_t *A, int Am, int An,
int bigM, int m0, int n0, unsigned long long int seed ) int bigM, int m0, int n0, unsigned long long int seed )
{ {
struct cl_zplrnt_args_s clargs = { struct cl_zplrnt_args_s *clargs = NULL;
.m = m,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.bigM = bigM,
.m0 = m0,
.n0 = n0,
.seed = seed,
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "zplrnt"; char *cl_name = "zplrnt";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_W(A, Am, An); CHAMELEON_ACCESS_W(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zplrnt_args_s ) );
clargs->m = m;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->bigM = bigM;
clargs->m0 = m0;
clargs->n0 = n0;
clargs->seed = seed;
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zplrnt_callback : NULL; callback = options->profiling ? cl_zplrnt_callback : NULL;
...@@ -91,7 +95,7 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, ...@@ -91,7 +95,7 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zplrnt, &cl_zplrnt,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zplrnt_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zplrnt_args_s),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */ /* Common task arguments */
......
...@@ -40,17 +40,16 @@ struct cl_zpotrf_args_s { ...@@ -40,17 +40,16 @@ struct cl_zpotrf_args_s {
static void static void
cl_zpotrf_cpu_func(void *descr[], void *cl_arg) cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zpotrf_args_s clargs; struct cl_zpotrf_args_s *clargs = (struct cl_zpotrf_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
int info = 0; int info = 0;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zpotrf( clargs->uplo, clargs->n, tileA, &info );
TCORE_zpotrf( clargs.uplo, clargs.n, tileA, &info );
if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { if ( (clargs->sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info ); RUNTIME_sequence_flush( NULL, clargs->sequence, clargs->request, clargs->iinfo+info );
} }
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -65,25 +64,30 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options, ...@@ -65,25 +64,30 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *A, int Am, int An,
int iinfo ) int iinfo )
{ {
struct cl_zpotrf_args_s clargs = { struct cl_zpotrf_args_s *clargs = NULL;
.uplo = uplo,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.iinfo = iinfo,
.sequence = options->sequence,
.request = options->request,
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "zpotrf"; char *cl_name = "zpotrf";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zpotrf_args_s ) );
clargs->uplo = uplo;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->iinfo = iinfo;
clargs->sequence = options->sequence;
clargs->request = options->request;
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zpotrf_callback : NULL; callback = options->profiling ? cl_zpotrf_callback : NULL;
...@@ -94,7 +98,7 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options, ...@@ -94,7 +98,7 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zpotrf, &cl_zpotrf,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zpotrf_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zpotrf_args_s),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */ /* Common task arguments */
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis * @author Lucas Barros de Assis
* @author Florent Pruvost * @author Florent Pruvost
* @author Gwenole Lucas
* @date 2021-03-16 * @date 2021-03-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -41,38 +42,35 @@ struct cl_zsyrk_args_s { ...@@ -41,38 +42,35 @@ struct cl_zsyrk_args_s {
static void static void
cl_zsyrk_cpu_func(void *descr[], void *cl_arg) cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_zsyrk_args_s clargs; struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileC; CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]); tileC = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_zsyrk( clargs->uplo, clargs->trans, clargs->n, clargs->k,
TCORE_zsyrk( clargs.uplo, clargs.trans, clargs.n, clargs.k, clargs->alpha, tileA, clargs->beta, tileC );
clargs.alpha, tileA, clargs.beta, tileC );
} }
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
static void static void
cl_zsyrk_cuda_func(void *descr[], void *cl_arg) cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
{ {
struct cl_zsyrk_args_s clargs; struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileC; CHAM_tile_t *tileC;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileC = cti_interface_get(descr[1]); tileC = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
RUNTIME_getStream(stream); RUNTIME_getStream(stream);
CUDA_zsyrk( CUDA_zsyrk(
clargs.uplo, clargs.trans, clargs.n, clargs.k, clargs->uplo, clargs->trans, clargs->n, clargs->k,
(cuDoubleComplex*)&(clargs.alpha), (cuDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld, tileA->mat, tileA->ld,
(cuDoubleComplex*)&(clargs.beta), (cuDoubleComplex*)&(clargs->beta),
tileC->mat, tileC->ld, tileC->mat, tileC->ld,
stream ); stream );
...@@ -101,28 +99,33 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, ...@@ -101,28 +99,33 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
beta, C, Cm, Cn ); beta, C, Cm, Cn );
} }
struct cl_zsyrk_args_s clargs = { struct cl_zsyrk_args_s *clargs = NULL;
.uplo = uplo,
.trans = trans,
.n = n,
.k = k,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.beta = beta,
.tileC = C->get_blktile( C, Cm, Cn ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid, accessC; int workerid, accessC;
int exec = 0;
char *cl_name = "zsyrk"; char *cl_name = "zsyrk";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(C, Cm, Cn); CHAMELEON_ACCESS_RW(C, Cm, Cn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zsyrk_args_s ) );
clargs->uplo = uplo;
clargs->trans = trans;
clargs->n = n;
clargs->k = k;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->beta = beta;
clargs->tileC = C->get_blktile( C, Cm, Cn );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_zsyrk_callback : NULL; callback = options->profiling ? cl_zsyrk_callback : NULL;
...@@ -135,9 +138,8 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, ...@@ -135,9 +138,8 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zsyrk, &cl_zsyrk,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_zsyrk_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_zsyrk_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
......
...@@ -38,16 +38,15 @@ struct cl_ztradd_args_s { ...@@ -38,16 +38,15 @@ struct cl_ztradd_args_s {
static void static void
cl_ztradd_cpu_func(void *descr[], void *cl_arg) cl_ztradd_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_ztradd_args_s clargs; struct cl_ztradd_args_s *clargs = (struct cl_ztradd_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_ztradd( clargs->uplo, clargs->trans, clargs->m, clargs->n,
TCORE_ztradd( clargs.uplo, clargs.trans, clargs.m, clargs.n, clargs->alpha, tileA, clargs->beta, tileB );
clargs.alpha, tileA, clargs.beta, tileB );
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -66,28 +65,33 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, ...@@ -66,28 +65,33 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
beta, B, Bm, Bn ); beta, B, Bm, Bn );
} }
struct cl_ztradd_args_s clargs = { struct cl_ztradd_args_s *clargs = NULL;
.uplo = uplo,
.trans = trans,
.m = m,
.n = n,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.beta = beta,
.tileB = B->get_blktile( B, Bm, Bn ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid, accessB; int workerid, accessB;
int exec = 0;
char *cl_name = "ztradd"; char *cl_name = "ztradd";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_ACCESS_RW(B, Bm, Bn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_ztradd_args_s ) );
clargs->uplo = uplo;
clargs->trans = trans;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->beta = beta;
clargs->tileB = B->get_blktile( B, Bm, Bn );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_ztradd_callback : NULL; callback = options->profiling ? cl_ztradd_callback : NULL;
...@@ -101,7 +105,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, ...@@ -101,7 +105,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_ztradd, &cl_ztradd,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_ztradd_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_ztradd_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
......
...@@ -42,37 +42,34 @@ struct cl_ztrmm_args_s { ...@@ -42,37 +42,34 @@ struct cl_ztrmm_args_s {
static void static void
cl_ztrmm_cpu_func(void *descr[], void *cl_arg) cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_ztrmm_args_s clargs; struct cl_ztrmm_args_s *clargs = (struct cl_ztrmm_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_ztrmm( clargs->side, clargs->uplo, clargs->transA, clargs->diag,
TCORE_ztrmm( clargs.side, clargs.uplo, clargs.transA, clargs.diag, clargs->m, clargs->n, clargs->alpha, tileA, tileB );
clargs.m, clargs.n, clargs.alpha, tileA, tileB );
} }
#ifdef CHAMELEON_USE_CUDA #ifdef CHAMELEON_USE_CUDA
static void static void
cl_ztrmm_cuda_func(void *descr[], void *cl_arg) cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
{ {
struct cl_ztrmm_args_s clargs; struct cl_ztrmm_args_s *clargs = (struct cl_ztrmm_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
RUNTIME_getStream(stream); RUNTIME_getStream(stream);
CUDA_ztrmm( CUDA_ztrmm(
clargs.side, clargs.uplo, clargs.transA, clargs.diag, clargs->side, clargs->uplo, clargs->transA, clargs->diag,
clargs.m, clargs.n, clargs->m, clargs->n,
(cuDoubleComplex*)&(clargs.alpha), (cuDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld, tileA->mat, tileA->ld,
tileB->mat, tileB->ld, tileB->mat, tileB->ld,
stream ); stream );
...@@ -97,29 +94,34 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, ...@@ -97,29 +94,34 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn ) const CHAM_desc_t *B, int Bm, int Bn )
{ {
struct cl_ztrmm_args_s clargs = { struct cl_ztrmm_args_s *clargs = NULL;
.side = side,
.uplo = uplo,
.transA = transA,
.diag = diag,
.m = m,
.n = n,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.tileB = B->get_blktile( B, Bm, Bn ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "ztrmm"; char *cl_name = "ztrmm";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_ACCESS_RW(B, Bm, Bn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_ztrmm_args_s ) );
clargs->side = side;
clargs->uplo = uplo;
clargs->transA = transA;
clargs->diag = diag;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_ztrmm_callback : NULL; callback = options->profiling ? cl_ztrmm_callback : NULL;
...@@ -130,7 +132,7 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, ...@@ -130,7 +132,7 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_ztrmm, &cl_ztrmm,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_ztrmm_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_ztrmm_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis * @author Lucas Barros de Assis
* @author Florent Pruvost * @author Florent Pruvost
* @author Gwenole Lucas
* @date 2021-03-16 * @date 2021-03-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -42,37 +43,34 @@ struct cl_ztrsm_args_s { ...@@ -42,37 +43,34 @@ struct cl_ztrsm_args_s {
static void static void
cl_ztrsm_cpu_func(void *descr[], void *cl_arg) cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_ztrsm_args_s clargs; struct cl_ztrsm_args_s *clargs = (struct cl_ztrsm_args_s*)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_ztrsm( clargs->side, clargs->uplo, clargs->transA, clargs->diag,
TCORE_ztrsm( clargs.side, clargs.uplo, clargs.transA, clargs.diag, clargs->m, clargs->n, clargs->alpha, tileA, tileB );
clargs.m, clargs.n, clargs.alpha, tileA, tileB );
} }
#ifdef CHAMELEON_USE_CUDA #ifdef CHAMELEON_USE_CUDA
static void static void
cl_ztrsm_cuda_func(void *descr[], void *cl_arg) cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
{ {
struct cl_ztrsm_args_s clargs; struct cl_ztrsm_args_s *clargs = (struct cl_ztrsm_args_s*)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args( cl_arg, &clargs );
RUNTIME_getStream(stream); RUNTIME_getStream(stream);
CUDA_ztrsm( CUDA_ztrsm(
clargs.side, clargs.uplo, clargs.transA, clargs.diag, clargs->side, clargs->uplo, clargs->transA, clargs->diag,
clargs.m, clargs.n, clargs->m, clargs->n,
(cuDoubleComplex*)&(clargs.alpha), (cuDoubleComplex*)&(clargs->alpha),
tileA->mat, tileA->ld, tileA->mat, tileA->ld,
tileB->mat, tileB->ld, tileB->mat, tileB->ld,
stream ); stream );
...@@ -97,29 +95,34 @@ void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, ...@@ -97,29 +95,34 @@ void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn ) const CHAM_desc_t *B, int Bm, int Bn )
{ {
struct cl_ztrsm_args_s clargs = { struct cl_ztrsm_args_s *clargs = NULL;
.side = side,
.uplo = uplo,
.transA = transA,
.diag = diag,
.m = m,
.n = n,
.alpha = alpha,
.tileA = A->get_blktile( A, Am, An ),
.tileB = B->get_blktile( B, Bm, Bn ),
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "ztrsm"; char *cl_name = "ztrsm";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(B, Bm, Bn); CHAMELEON_ACCESS_RW(B, Bm, Bn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_ztrsm_args_s ) );
clargs->side = side;
clargs->uplo = uplo;
clargs->transA = transA;
clargs->diag = diag;
clargs->m = m;
clargs->n = n;
clargs->alpha = alpha;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_ztrsm_callback : NULL; callback = options->profiling ? cl_ztrsm_callback : NULL;
...@@ -130,7 +133,7 @@ void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, ...@@ -130,7 +133,7 @@ void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_ztrsm, &cl_ztrsm,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_ztrsm_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_ztrsm_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
......
...@@ -41,17 +41,16 @@ struct cl_ztrtri_args_s { ...@@ -41,17 +41,16 @@ struct cl_ztrtri_args_s {
static void static void
cl_ztrtri_cpu_func(void *descr[], void *cl_arg) cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
{ {
struct cl_ztrtri_args_s clargs; struct cl_ztrtri_args_s *clargs = (struct cl_ztrtri_args_s *)cl_arg;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
int info = 0; int info = 0;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
starpu_codelet_unpack_args( cl_arg, &clargs ); TCORE_ztrtri( clargs->uplo, clargs->diag, clargs->n, tileA, &info );
TCORE_ztrtri( clargs.uplo, clargs.diag, clargs.n, tileA, &info );
if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { if ( (clargs->sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info ); RUNTIME_sequence_flush( NULL, clargs->sequence, clargs->request, clargs->iinfo+info );
} }
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -66,26 +65,31 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, ...@@ -66,26 +65,31 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *A, int Am, int An,
int iinfo ) int iinfo )
{ {
struct cl_ztrtri_args_s clargs = { struct cl_ztrtri_args_s *clargs = NULL;
.uplo = uplo,
.diag = diag,
.n = n,
.tileA = A->get_blktile( A, Am, An ),
.iinfo = iinfo,
.sequence = options->sequence,
.request = options->request,
};
void (*callback)(void*); void (*callback)(void*);
RUNTIME_request_t *request = options->request; RUNTIME_request_t *request = options->request;
starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt); starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
int workerid; int workerid;
int exec = 0;
char *cl_name = "ztrtri"; char *cl_name = "ztrtri";
/* Handle cache */ /* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW(A, Am, An); CHAMELEON_ACCESS_RW(A, Am, An);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION; CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_ztrtri_args_s ) );
clargs->uplo = uplo;
clargs->diag = diag;
clargs->n = n;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->iinfo = iinfo;
clargs->sequence = options->sequence;
clargs->request = options->request;
}
/* Callback fro profiling information */ /* Callback fro profiling information */
callback = options->profiling ? cl_ztrtri_callback : NULL; callback = options->profiling ? cl_ztrtri_callback : NULL;
...@@ -96,7 +100,7 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, ...@@ -96,7 +100,7 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_ztrtri, &cl_ztrtri,
/* Task codelet arguments */ /* Task codelet arguments */
STARPU_VALUE, &clargs, sizeof(struct cl_ztrtri_args_s), STARPU_CL_ARGS, clargs, sizeof(struct cl_ztrtri_args_s),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
/* Common task arguments */ /* Common task arguments */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment