diff --git a/compute/pmap.c b/compute/pmap.c index 265186150fa808579aa3971a4915c895ef1d7c81..deb087d28a00d694b38397d52e9533d807ce43d1 100644 --- a/compute/pmap.c +++ b/compute/pmap.c @@ -39,12 +39,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, for (m = 0; m < n; m++) { INSERT_TASK_map( &options, - ChamUpperLower, A(m, n), + ChamRW, ChamUpperLower, A(m, n), op_fct, op_args ); } INSERT_TASK_map( &options, - uplo, A(n, n), + ChamRW, uplo, A(n, n), op_fct, op_args ); } break; @@ -53,12 +53,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, for (n = 0; n < A->nt; n++) { INSERT_TASK_map( &options, - uplo, A(n, n), + ChamRW, uplo, A(n, n), op_fct, op_args ); for (m = n+1; m < A->mt; m++) { INSERT_TASK_map( &options, - ChamUpperLower, A(m, n), + ChamRW, ChamUpperLower, A(m, n), op_fct, op_args ); } } @@ -70,7 +70,7 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, for (n = 0; n < A->nt; n++) { INSERT_TASK_map( &options, - uplo, A(m, n), + ChamRW, uplo, A(m, n), op_fct, op_args ); } } diff --git a/compute/pzlatms.c b/compute/pzlatms.c index a96b68755ce99e1a6d3143b1b2f2286794b2d108..3881a9f9d43fdd8cc4dd65e977abf8e4cff24842 100644 --- a/compute/pzlatms.c +++ b/compute/pzlatms.c @@ -165,7 +165,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym for (n = 0; n < kt; n++) { INSERT_TASK_map( &options, - ChamUpperLower, A(n, n), + ChamRW, ChamUpperLower, A(n, n), zlaset_diag, D ); } diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index d3d2b43178ad3f0dcca7532a5c3f9b233875eb54..cff1f56529ce56ce1ddf52e81bac66e8c5f219d2 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -183,6 +183,15 @@ typedef enum chameleon_store_e { ChamEltwise = 403, /**< Element by element storage */ } cham_store_t; +/** + * @brief Data access types. + */ +typedef enum chameleon_access_e { + ChamR = (1 << 0), /**< Read only */ + ChamW = (1 << 1), /**< Write only */ + ChamRW = (ChamR | ChamW), /**< Read-Write */ +} cham_access_t; + /** * @brief Chameleon GEMM-like algorithms */ diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h index c4fa9ca9f87db2ef6427aefac5fd5a348c4df6f6..b7281f13566b0c9a4ed274d7a9be762a5c0f3cc9 100644 --- a/include/chameleon/tasks.h +++ b/include/chameleon/tasks.h @@ -97,7 +97,7 @@ typedef int (*cham_unary_operator_t)( const CHAM_desc_t *desc, CHAM_tile_t *data, void *op_args ); void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ); #include "chameleon/tasks_z.h" diff --git a/runtime/openmp/codelets/codelet_map.c b/runtime/openmp/codelets/codelet_map.c index 7abf7c38e51cf97e2762106b8787014b9d086624..ae37e4ddf755ef9439b0d09f5845e0d3b2cddfa6 100644 --- a/runtime/openmp/codelets/codelet_map.c +++ b/runtime/openmp/codelets/codelet_map.c @@ -18,15 +18,34 @@ #include "chameleon_openmp.h" void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ) { CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); + switch( accessA ) { + case ChamW: +#pragma omp task depend( out: tileA[0] ) + { + op_fct( A, uplo, Am, An, tileA, op_args ); + } + break; + + case ChamR: +#pragma omp task depend( in: tileA[0] ) + { + op_fct( A, uplo, Am, An, tileA, op_args ); + } + + break; + + case ChamRW: + default: #pragma omp task depend( inout: tileA[0] ) { op_fct( A, uplo, Am, An, tileA, op_args ); } + } (void)options; } diff --git a/runtime/parsec/codelets/codelet_map.c b/runtime/parsec/codelets/codelet_map.c index f2c10bfe901717448229dc177ff875cf31bf3017..a5a4b7f9a90c9a36905fc2b619ffc0d92cca2054 100644 --- a/runtime/parsec/codelets/codelet_map.c +++ b/runtime/parsec/codelets/codelet_map.c @@ -38,18 +38,20 @@ CORE_map_parsec( parsec_execution_stream_t *context, } void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); + int parsec_accessA = cham_to_parsec_access( accessA ); + parsec_dtd_taskpool_insert_task( PARSEC_dtd_taskpool, CORE_map_parsec, options->priority, "map", sizeof(CHAM_desc_t*), &A, VALUE, sizeof(cham_uplo_t), &uplo, VALUE, sizeof(int), &Am, VALUE, sizeof(int), &An, VALUE, - PASSED_BY_REF, RTBLKADDR(A, void, Am, An), chameleon_parsec_get_arena_index( A ) | INOUT, + PASSED_BY_REF, RTBLKADDR(A, void, Am, An), chameleon_parsec_get_arena_index( A ) | parsec_accessA, sizeof(cham_unary_operator_t), &op_fct, VALUE, sizeof(void*), &op_args, VALUE, PARSEC_DTD_ARG_END ); diff --git a/runtime/parsec/include/chameleon_parsec.h b/runtime/parsec/include/chameleon_parsec.h index e0d5cd190f38f40b04951eb7ec7fa9f8a7e71a34..30518fb809779ec0ba9c5ce45701a1187fb07621 100644 --- a/runtime/parsec/include/chameleon_parsec.h +++ b/runtime/parsec/include/chameleon_parsec.h @@ -42,6 +42,16 @@ chameleon_parsec_get_arena_index(const CHAM_desc_t *desc) { return ((chameleon_parsec_desc_t *)desc->schedopt)->arena_index; } +static inline int cham_to_parsec_access( cham_access_t accessA ) { + if ( accessA == ChamR ) { + return INPUT; + } + if ( accessA == ChamW ) { + return OUTPUT; + } + return INOUT; +} + /* * Access to block pointer and leading dimension */ diff --git a/runtime/quark/codelets/codelet_map.c b/runtime/quark/codelets/codelet_map.c index b4c2807cbe4e25b277226240b6a34195b9b281ef..f9781af957bd35cc7ce2d05ef751e3fef88cf9cb 100644 --- a/runtime/quark/codelets/codelet_map.c +++ b/runtime/quark/codelets/codelet_map.c @@ -32,7 +32,7 @@ void CORE_map_quark(Quark *quark) } void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); @@ -43,7 +43,7 @@ void INSERT_TASK_map( const RUNTIME_option_t *options, sizeof(cham_uplo_t), &uplo, VALUE, sizeof(int), &Am, VALUE, sizeof(int), &An, VALUE, - sizeof(void*), RTBLKADDR(A, void, Am, An), INOUT, + sizeof(void*), RTBLKADDR(A, void, Am, An), cham_to_quark_access( accessA ), sizeof(cham_unary_operator_t), &op_fct, VALUE, sizeof(void*), &op_args, VALUE, 0); diff --git a/runtime/quark/include/chameleon_quark.h b/runtime/quark/include/chameleon_quark.h index 3405f2bb0150dbfabb9061b03a8e9a65ab5be40c..8e415b7c564c486fa9ffd9f4de9585adc3a9410e 100644 --- a/runtime/quark/include/chameleon_quark.h +++ b/runtime/quark/include/chameleon_quark.h @@ -36,6 +36,16 @@ typedef struct quark_option_s { Quark *quark; } quark_option_t; +static inline int cham_to_quark_access( cham_access_t accessA ) { + if ( accessA == ChamR ) { + return INPUT; + } + if ( accessA == ChamW ) { + return OUTPUT; + } + return INOUT; +} + /* * Access to block pointer and leading dimension */ diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c index 14f4e8e7acd073f9f20089acb0f54c010e2561d0..97de57e26efa988a6e425573d637d13115f8f2d3 100644 --- a/runtime/starpu/codelets/codelet_map.c +++ b/runtime/starpu/codelets/codelet_map.c @@ -43,7 +43,7 @@ static void cl_map_cpu_func(void *descr[], void *cl_arg) CODELETS_CPU(map, cl_map_cpu_func) void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ) { @@ -60,7 +60,7 @@ void INSERT_TASK_map( const RUNTIME_option_t *options, STARPU_VALUE, &uplo, sizeof(cham_uplo_t), STARPU_VALUE, &Am, sizeof(int), STARPU_VALUE, &An, sizeof(int), - STARPU_RW, RTBLKADDR(A, void, Am, An), + cham_to_starpu_access(accessA), RTBLKADDR(A, void, Am, An), STARPU_VALUE, &op_fct, sizeof(cham_unary_operator_t), STARPU_VALUE, &op_args, sizeof(void*), STARPU_PRIORITY, options->priority, diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index f98a29b177a5febd622385ca75c59a467a7fe5db..22c3ca53498ab24640f6620cc76a4052a96852bd 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -111,6 +111,13 @@ typedef struct starpu_option_request_s { /**/ +static inline int cham_to_starpu_access( cham_access_t accessA ) { + assert( ChamR == STARPU_R ); + assert( ChamW == STARPU_W ); + assert( ChamRW == STARPU_RW ); + return accessA; +} + /* * MPI Redefinitions */