Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 7a5373b5 authored by LISITO Alycia's avatar LISITO Alycia Committed by Mathieu Faverge
Browse files

codelet (only starpu): added/corrected zlacpyx

parent d8f5289d
No related branches found
No related tags found
1 merge request!309Tile2band: Fix the lacpyx and tile2band algorithm
......@@ -367,6 +367,17 @@ TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t
CORE_zlacpy( uplo, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld );
}
void
TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB )
{
assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
const CHAMELEON_Complex64_t *Aptr = CHAM_tile_get_ptr( A );
CHAMELEON_Complex64_t *Bptr = CHAM_tile_get_ptr( B );
CORE_zlacpy( uplo, M, N, Aptr + displA, LDA, Bptr + displB, LDB );
}
void
TCORE_zlange( cham_normtype_t norm,
int M,
......
......@@ -263,6 +263,12 @@ TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t
return;
}
void
TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB )
{
return;
}
void
TCORE_zlange( cham_normtype_t norm,
int M,
......
......@@ -172,6 +172,7 @@ void TCORE_zher2k( int uplo, int trans, int N, int K, void *alpha, const void *A
int TCORE_zherfb( int uplo, int N, int K, int IB, int NB, const void *A, const void *T, void *C, void *WORK, int ldwork );
int TCORE_zhessq( int storev, int uplo, int N, const void *A, void *sclssq );
void TCORE_zlacpy( int uplo, int M, int N, const void *A, void *B );
void TCORE_zlacpyx( int uplo, int M, int N, int displA, int displB, const void *A, int LDA, void *B, int LDB );
void TCORE_zlange( int norm, int M, int N, const void *A, double *work, double *normA );
void TCORE_zlanhe( int norm, int uplo, int N, const void *A, double *work, double *normA );
void TCORE_zlansy( int norm, int uplo, int N, const void *A, double *work, double *normA );
......
......@@ -43,6 +43,7 @@ int TCORE_zherfb( cham_uplo_t uplo, int N, int K, int IB, int NB, const CHAM_ti
int TCORE_zhessq( cham_store_t storev, cham_uplo_t uplo, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq );
#endif
void TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
void TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB );
void TCORE_zlange( cham_normtype_t norm, int M, int N, const CHAM_tile_t *A, double *work, double *normA );
#if defined(PRECISION_z) || defined(PRECISION_c)
void TCORE_zlanhe( cham_normtype_t norm, cham_uplo_t uplo, int N, const CHAM_tile_t *A, double *work, double *normA );
......
......@@ -50,6 +50,7 @@ CHAMELEON_CL_CB(zher2k, cti_handle_get_m(task->handles[0]), cti_handle_ge
CHAMELEON_CL_CB(zherk, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, ( 1.+ M)*M*N)
#endif
CHAMELEON_CL_CB(zlacpy, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlacpyx, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlange, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlaset, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlaset2, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
......
......@@ -33,6 +33,8 @@ struct cl_zlacpy_args_s {
int n;
int displA;
int displB;
int lda;
int ldb;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
};
......@@ -50,17 +52,32 @@ cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
assert( clargs->displA == 0 );
assert( clargs->displB == 0 );
/* A = tileA->mat; */
/* B = tileB->mat; */
/* CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); */
CHAMELEON_Complex64_t *A = tileA->mat;
CHAMELEON_Complex64_t *B = tileB->mat;
// CORE_zlacpy( clargs->uplo, clargs->m, clargs->n, A + clargs->displA, tileA->ld, B + clargs->displB, tileB->ld );
TCORE_zlacpy( clargs->uplo, clargs->m, clargs->n, tileA, tileB );
}
static void
cl_zlacpyx_cpu_func(void *descr[], void *cl_arg)
{
struct cl_zlacpy_args_s *clargs = (struct cl_zlacpy_args_s *)cl_arg;
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
TCORE_zlacpyx( clargs->uplo, clargs->m, clargs->n, clargs->displA, clargs->displB,
tileA, clargs->lda, tileB, clargs->ldb );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU( zlacpy, cl_zlacpy_cpu_func )
CODELETS_CPU( zlacpy, cl_zlacpy_cpu_func )
CODELETS_CPU( zlacpyx, cl_zlacpyx_cpu_func )
void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n, int nb,
......@@ -70,7 +87,7 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
struct cl_zlacpy_args_s *clargs = NULL;
void (*callback)(void*);
int exec = 0;
char *cl_name = "zlacpy";
char *cl_name = "zlacpyx";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
......@@ -88,14 +105,16 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
clargs->displB = displB;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
clargs->lda = clargs->tileA->ld;
clargs->ldb = clargs->tileB->ld;
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zlacpy_callback : NULL;
callback = options->profiling ? cl_zlacpyx_callback : NULL;
/* Insert the task */
rt_starpu_insert_task(
&cl_zlacpy,
&cl_zlacpyx,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_zlacpy_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
......@@ -119,7 +138,51 @@ void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
{
INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
0, A, Am, An,
0, B, Bm, Bn );
struct cl_zlacpy_args_s *clargs = NULL;
void (*callback)(void*);
int exec = 0;
char *cl_name = "zlacpy";
/* Handle cache */
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_W(B, Bm, Bn);
exec = __chameleon_need_exec;
CHAMELEON_END_ACCESS_DECLARATION;
if ( exec ) {
clargs = malloc( sizeof( struct cl_zlacpy_args_s ) );
clargs->uplo = uplo;
clargs->m = m;
clargs->n = n;
clargs->displA = 0;
clargs->displB = 0;
clargs->tileA = A->get_blktile( A, Am, An );
clargs->tileB = B->get_blktile( B, Bm, Bn );
clargs->lda = clargs->tileA->ld;
clargs->ldb = clargs->tileB->ld;
}
/* Callback fro profiling information */
callback = options->profiling ? cl_zlacpy_callback : NULL;
/* Insert the task */
rt_starpu_insert_task(
&cl_zlacpy,
/* Task codelet arguments */
STARPU_CL_ARGS, clargs, sizeof(struct cl_zlacpy_args_s),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
/* Common task arguments */
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, cl_name,
#endif
0 );
(void)nb;
}
......@@ -94,6 +94,7 @@ CODELETS_HEADER(zhe2ge);
CODELETS_HEADER(zlascal);
CODELETS_HEADER(ztradd);
CODELETS_HEADER(zlacpy);
CODELETS_HEADER(zlacpyx);
CODELETS_HEADER(zlange);
CODELETS_HEADER(zlange_max);
CODELETS_HEADER(zlansy);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment