Mentions légales du service

Skip to content
Snippets Groups Projects
Commit e428d838 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'starpu/fix_inout' into 'master'

Reduce data access type whenever possible

This PR aims at reducing the data access type to enable more cases with the allocation on the fly.

For example, with a gemm: `alpha A *B  + beta * C`, and beta = 0. `C` is used only as output, and not as inout. This changes all the codelets to STARPU_VARIABLE_NBUFFERS.

See merge request !215
parents b9d980d0 ea4026f2
Branches
Tags
1 merge request!215Reduce data access type whenever possible
Showing
with 75 additions and 41 deletions
Subproject commit 657741dbab25d4008c4dfc2ebdf34a3f43bf00e6
Subproject commit be970c0169f847f9a61993d2e6a7cd49a409399e
......@@ -44,7 +44,7 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
CODELETS_CPU(dzasum, cl_dzasum_cpu_func)
void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
cham_store_t storev, cham_uplo_t uplo, int M, int N,
......
......@@ -39,7 +39,7 @@ static void cl_map_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(map, 1, cl_map_cpu_func)
CODELETS_CPU(map, cl_map_cpu_func)
void INSERT_TASK_map( const RUNTIME_option_t *options,
cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
......
......@@ -40,7 +40,7 @@ static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func)
CODELETS_CPU(zaxpy, cl_zaxpy_cpu_func)
void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
int M, CHAMELEON_Complex64_t alpha,
......@@ -52,6 +52,10 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
if ( alpha == 0. ) {
return;
}
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_RW(B, Bm, Bn);
......
......@@ -52,7 +52,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
CODELETS_CPU(zbuild, cl_zbuild_cpu_func)
void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An,
......
......@@ -81,9 +81,9 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
* Codelet definition
*/
#if defined(CHAMELEON_USE_CUBLAS_V2)
CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
CODELETS(zgeadd, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
CODELETS_CPU(zgeadd, cl_zgeadd_cpu_func)
#endif
/**
......@@ -144,10 +144,16 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn )
{
if ( alpha == 0. ) {
return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
beta, B, Bm, Bn );
}
struct starpu_codelet *codelet = &cl_zgeadd;
void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
......@@ -162,7 +168,7 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t),
STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, workerid,
......
......@@ -56,7 +56,7 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
CODELETS_CPU(zgelqt, cl_zgelqt_cpu_func)
void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
......
......@@ -94,7 +94,7 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
CODELETS(zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
/**
*
......@@ -108,11 +108,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
const CHAM_desc_t *B, int Bm, int Bn,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
{
if ( alpha == 0. ) {
return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
beta, C, Cm, Cn );
}
(void)nb;
struct starpu_codelet *codelet = &cl_zgemm;
void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
......@@ -131,7 +137,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t),
STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, workerid,
......
......@@ -57,7 +57,7 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
CODELETS_CPU(zgeqrt, cl_zgeqrt_cpu_func)
void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
......
......@@ -50,7 +50,7 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
CODELETS_CPU(zgessm, cl_zgessm_cpu_func)
void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
int m, int n, int k, int ib, int nb,
......
......@@ -43,7 +43,7 @@ static void cl_zgessq_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func)
CODELETS_CPU(zgessq, cl_zgessq_cpu_func)
void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
cham_store_t storev, int m, int n,
......
......@@ -52,7 +52,7 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func)
CODELETS_CPU(zgetrf, cl_zgetrf_cpu_func)
void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
int m, int n, int nb,
......
......@@ -56,7 +56,7 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
CODELETS_CPU(zgetrf_incpiv, cl_zgetrf_incpiv_cpu_func)
void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
......
......@@ -53,7 +53,7 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
CODELETS_CPU(zgetrf_nopiv, cl_zgetrf_nopiv_cpu_func)
void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
......
......@@ -43,7 +43,7 @@ static void cl_zgram_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zgram, 4, cl_zgram_cpu_func)
CODELETS_CPU(zgram, cl_zgram_cpu_func)
void INSERT_TASK_zgram( const RUNTIME_option_t *options,
cham_uplo_t uplo,
......@@ -51,21 +51,21 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options,
const CHAM_desc_t *Di, int Dim, int Din,
const CHAM_desc_t *Dj, int Djm, int Djn,
const CHAM_desc_t *D, int Dm, int Dn,
CHAM_desc_t *A, int Am, int An)
CHAM_desc_t *A, int Am, int An )
{
struct starpu_codelet *codelet = &cl_zgram;
void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL;
struct starpu_codelet *codelet = &cl_zgram;
void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(Di, Dim, Din);
CHAMELEON_ACCESS_R(Dj, Djm, Djn);
CHAMELEON_ACCESS_R(D, Dm, Dn);
CHAMELEON_ACCESS_RW(A, Am, An);
CHAMELEON_END_ACCESS_DECLARATION;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(Di, Dim, Din);
CHAMELEON_ACCESS_R(Dj, Djm, Djn);
CHAMELEON_ACCESS_R(D, Dm, Dn);
CHAMELEON_ACCESS_RW(A, Am, An);
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &uplo, sizeof(int),
STARPU_VALUE, &m, sizeof(int),
......
......@@ -42,18 +42,18 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
CODELETS_CPU(zhe2ge, cl_zhe2ge_cpu_func)
/**
*
* @ingroup INSERT_TASK_Complex64_t
*
*/
void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
cham_uplo_t uplo,
int m, int n, int mb,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn)
void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
cham_uplo_t uplo,
int m, int n, int mb,
const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn )
{
(void)mb;
struct starpu_codelet *codelet = &cl_zhe2ge;
......
......@@ -93,7 +93,7 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)
CODELETS(zhemm, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)
/**
*
......@@ -107,11 +107,17 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
const CHAM_desc_t *B, int Bm, int Bn,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
{
if ( alpha == 0. ) {
return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
beta, C, Cm, Cn );
}
(void)nb;
struct starpu_codelet *codelet = &cl_zhemm;
void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
......@@ -129,7 +135,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t),
STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, workerid,
......
......@@ -89,7 +89,7 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC)
CODELETS(zher2k, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC)
/**
*
......@@ -104,11 +104,17 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options,
const CHAM_desc_t *B, int Bm, int Bn,
double beta, const CHAM_desc_t *C, int Cm, int Cn )
{
if ( alpha == 0. ) {
return INSERT_TASK_zlascal( options, uplo, n, n, nb,
beta, C, Cm, Cn );
}
(void)nb;
struct starpu_codelet *codelet = &cl_zher2k;
void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
......@@ -126,7 +132,7 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options,
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &beta, sizeof(double),
STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, workerid,
......
......@@ -78,7 +78,7 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC)
CODELETS(zherfb, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC)
/**
*
......
......@@ -86,7 +86,7 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
CODELETS(zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
/**
*
......@@ -99,11 +99,17 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
double alpha, const CHAM_desc_t *A, int Am, int An,
double beta, const CHAM_desc_t *C, int Cm, int Cn)
{
if ( alpha == 0. ) {
return INSERT_TASK_zlascal( options, uplo, n, n, nb,
beta, C, Cm, Cn );
}
(void)nb;
struct starpu_codelet *codelet = &cl_zherk;
void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL;
starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
......@@ -119,7 +125,7 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
STARPU_VALUE, &alpha, sizeof(double),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &beta, sizeof(double),
STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, workerid,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment