diff --git a/compute/map.c b/compute/map.c index cef41364590a7a7f4c295cf4a4686556c6212676..1fcb883812652baa91f61db61c20542eaa6d199a 100644 --- a/compute/map.c +++ b/compute/map.c @@ -27,7 +27,16 @@ * ******************************************************************************* * - * @param[in,out] uplo + * @param[in] access + * - ChamR: A is accessed in read-only mode. + * - ChamW: A is accessed in write-only mode. + * WARNING: if the descriptor is set for allocation on the fly, the + * flush call included in this synchronous API will free all allocated + * data, prefer asynchronous call if you want to initialiaze data + * before submitting another algorithm. + * - ChamRW: A is accessed in read-write mode. + * + * @param[in] uplo * - ChamUpper: Only the upper triangular part of the matrix is touched * - ChamLower: Only the lower triangular part of the matrix is touched * - ChamUpperLower: The entire the matrix is touched @@ -51,7 +60,8 @@ * @sa CHAMELEON_map_Tile_Async * */ -int CHAMELEON_map_Tile( cham_uplo_t uplo, +int CHAMELEON_map_Tile( cham_access_t access, + cham_uplo_t uplo, CHAM_desc_t *A, cham_unary_operator_t op_fct, void *op_args ) @@ -68,7 +78,7 @@ int CHAMELEON_map_Tile( cham_uplo_t uplo, } chameleon_sequence_create( chamctxt, &sequence ); - CHAMELEON_map_Tile_Async( uplo, A, op_fct, op_args, sequence, &request ); + CHAMELEON_map_Tile_Async( access, uplo, A, op_fct, op_args, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); @@ -89,6 +99,13 @@ int CHAMELEON_map_Tile( cham_uplo_t uplo, * ******************************************************************************* * + * @param[in] access + * - ChamR: A is accessed in read-only mode. + * - ChamW: A is accessed in write-only mode. + * INFO: tile of A can be unallocated before the call if the + * descriptor is set for allocation on the fly. + * - ChamRW: A is accessed in read-write mode. + * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). @@ -105,7 +122,8 @@ int CHAMELEON_map_Tile( cham_uplo_t uplo, * @sa CHAMELEON_map_Tile * */ -int CHAMELEON_map_Tile_Async( cham_uplo_t uplo, +int CHAMELEON_map_Tile_Async( cham_access_t access, + cham_uplo_t uplo, CHAM_desc_t *A, cham_unary_operator_t op_fct, void *op_args, @@ -146,7 +164,7 @@ int CHAMELEON_map_Tile_Async( cham_uplo_t uplo, return CHAMELEON_SUCCESS; } - chameleon_pmap( uplo, A, op_fct, op_args, sequence, request ); + chameleon_pmap( access, uplo, A, op_fct, op_args, sequence, request ); return CHAMELEON_SUCCESS; } diff --git a/compute/pmap.c b/compute/pmap.c index 265186150fa808579aa3971a4915c895ef1d7c81..a2ddf33a4da40d5e8c42f3a10e91c99a5a0d75ec 100644 --- a/compute/pmap.c +++ b/compute/pmap.c @@ -20,7 +20,7 @@ /** * chameleon_pmap - Generate a random matrix by tiles. */ -void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, +void chameleon_pmap( cham_access_t access, cham_uplo_t uplo, CHAM_desc_t *A, cham_unary_operator_t op_fct, void *op_args, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { @@ -39,12 +39,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, for (m = 0; m < n; m++) { INSERT_TASK_map( &options, - ChamUpperLower, A(m, n), + access, ChamUpperLower, A(m, n), op_fct, op_args ); } INSERT_TASK_map( &options, - uplo, A(n, n), + access, uplo, A(n, n), op_fct, op_args ); } break; @@ -53,12 +53,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, for (n = 0; n < A->nt; n++) { INSERT_TASK_map( &options, - uplo, A(n, n), + access, uplo, A(n, n), op_fct, op_args ); for (m = n+1; m < A->mt; m++) { INSERT_TASK_map( &options, - ChamUpperLower, A(m, n), + access, ChamUpperLower, A(m, n), op_fct, op_args ); } } @@ -70,7 +70,7 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, for (n = 0; n < A->nt; n++) { INSERT_TASK_map( &options, - uplo, A(m, n), + access, uplo, A(m, n), op_fct, op_args ); } } diff --git a/compute/pzlatms.c b/compute/pzlatms.c index a96b68755ce99e1a6d3143b1b2f2286794b2d108..3881a9f9d43fdd8cc4dd65e977abf8e4cff24842 100644 --- a/compute/pzlatms.c +++ b/compute/pzlatms.c @@ -165,7 +165,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym for (n = 0; n < kt; n++) { INSERT_TASK_map( &options, - ChamUpperLower, A(n, n), + ChamRW, ChamUpperLower, A(n, n), zlaset_diag, D ); } diff --git a/compute/zlacpy.c b/compute/zlacpy.c index de3ff01320540b9648996366ad6c02d1c821ac1a..272bc55d54f5fc3c7529d72af3a42375a5d168f5 100644 --- a/compute/zlacpy.c +++ b/compute/zlacpy.c @@ -280,8 +280,8 @@ int CHAMELEON_zlacpy_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t * return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); } /* Check input arguments */ - if (A->nb != A->mb) { - chameleon_error("CHAMELEON_zlacpy_Tile_Async", "only square tiles supported"); + if ((A->mb != B->mb) || (A->nb != B->nb) ){ + chameleon_error("CHAMELEON_zlacpy_Tile_Async", "only matching tile sizes supported"); return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); } /* Check input arguments */ diff --git a/compute/zprint.c b/compute/zprint.c index fe44e05d4026610e716744b722447b2e8b9dadd1..2329d1a9bbfe25b286f29857e0cfea41b9254b62 100644 --- a/compute/zprint.c +++ b/compute/zprint.c @@ -152,7 +152,7 @@ int CHAMELEON_zprint( FILE *file, const char *header, /* Call the tile interface */ zprint_runtime_id = chamctxt->scheduler; - chameleon_pmap( uplo, &descAt, zprint, &options, sequence, &request ); + chameleon_pmap( ChamR, uplo, &descAt, zprint, &options, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, @@ -216,7 +216,7 @@ int CHAMELEON_zprint_Tile( FILE *file, const char *header, chameleon_sequence_create( chamctxt, &sequence ); zprint_runtime_id = chamctxt->scheduler; - chameleon_pmap( uplo, A, zprint, &options, sequence, &request ); + chameleon_pmap( ChamR, uplo, A, zprint, &options, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); chameleon_sequence_wait( chamctxt, sequence ); diff --git a/control/common.h b/control/common.h index f31ec1af3bf5f0fd5af475e2630d1cd9c575bcd2..c86bf94ad82144c2cad722efee87dc948161ded8 100644 --- a/control/common.h +++ b/control/common.h @@ -102,7 +102,7 @@ extern char *chameleon_lapack_constants[]; extern "C" { #endif -void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, +void chameleon_pmap( cham_access_t access, cham_uplo_t uplo, CHAM_desc_t *A, cham_unary_operator_t operator, void *op_args, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); @@ -127,7 +127,7 @@ static inline int chameleon_asprintf( char **strp, const char *fmt, ... ) int rc; va_start( ap, fmt ); - rc = asprintf( strp, fmt, ap ); + rc = vasprintf( strp, fmt, ap ); va_end( ap ); assert( rc != -1 ); diff --git a/include/chameleon.h b/include/chameleon.h index 8c3669af397ac25e53bdbd55c33feced0947a48e..77b6544f0af5e0e599baf8575cd8755b5e6c5f27 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -33,6 +33,10 @@ #include "chameleon/struct.h" #include "chameleon/descriptor_helpers.h" +#if defined(CHAMELEON_USE_MPI) +#include <mpi.h> +#endif + /* **************************************************************************** * CHAMELEON runtime common API */ @@ -74,11 +78,13 @@ BEGIN_C_DECLS /* **************************************************************************** * CHAMELEON functionnalities */ -int CHAMELEON_map_Tile( cham_uplo_t uplo, +int CHAMELEON_map_Tile( cham_access_t access, + cham_uplo_t uplo, CHAM_desc_t *A, cham_unary_operator_t op_fct, void *op_args ); -int CHAMELEON_map_Tile_Async( cham_uplo_t uplo, +int CHAMELEON_map_Tile_Async( cham_access_t access, + cham_uplo_t uplo, CHAM_desc_t *A, cham_unary_operator_t op_fct, void *op_args, diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index d3d2b43178ad3f0dcca7532a5c3f9b233875eb54..cff1f56529ce56ce1ddf52e81bac66e8c5f219d2 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -183,6 +183,15 @@ typedef enum chameleon_store_e { ChamEltwise = 403, /**< Element by element storage */ } cham_store_t; +/** + * @brief Data access types. + */ +typedef enum chameleon_access_e { + ChamR = (1 << 0), /**< Read only */ + ChamW = (1 << 1), /**< Write only */ + ChamRW = (ChamR | ChamW), /**< Read-Write */ +} cham_access_t; + /** * @brief Chameleon GEMM-like algorithms */ diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h index c4fa9ca9f87db2ef6427aefac5fd5a348c4df6f6..b7281f13566b0c9a4ed274d7a9be762a5c0f3cc9 100644 --- a/include/chameleon/tasks.h +++ b/include/chameleon/tasks.h @@ -97,7 +97,7 @@ typedef int (*cham_unary_operator_t)( const CHAM_desc_t *desc, CHAM_tile_t *data, void *op_args ); void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ); #include "chameleon/tasks_z.h" diff --git a/runtime/openmp/codelets/codelet_map.c b/runtime/openmp/codelets/codelet_map.c index 7abf7c38e51cf97e2762106b8787014b9d086624..ae37e4ddf755ef9439b0d09f5845e0d3b2cddfa6 100644 --- a/runtime/openmp/codelets/codelet_map.c +++ b/runtime/openmp/codelets/codelet_map.c @@ -18,15 +18,34 @@ #include "chameleon_openmp.h" void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ) { CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); + switch( accessA ) { + case ChamW: +#pragma omp task depend( out: tileA[0] ) + { + op_fct( A, uplo, Am, An, tileA, op_args ); + } + break; + + case ChamR: +#pragma omp task depend( in: tileA[0] ) + { + op_fct( A, uplo, Am, An, tileA, op_args ); + } + + break; + + case ChamRW: + default: #pragma omp task depend( inout: tileA[0] ) { op_fct( A, uplo, Am, An, tileA, op_args ); } + } (void)options; } diff --git a/runtime/parsec/codelets/codelet_map.c b/runtime/parsec/codelets/codelet_map.c index f2c10bfe901717448229dc177ff875cf31bf3017..a5a4b7f9a90c9a36905fc2b619ffc0d92cca2054 100644 --- a/runtime/parsec/codelets/codelet_map.c +++ b/runtime/parsec/codelets/codelet_map.c @@ -38,18 +38,20 @@ CORE_map_parsec( parsec_execution_stream_t *context, } void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); + int parsec_accessA = cham_to_parsec_access( accessA ); + parsec_dtd_taskpool_insert_task( PARSEC_dtd_taskpool, CORE_map_parsec, options->priority, "map", sizeof(CHAM_desc_t*), &A, VALUE, sizeof(cham_uplo_t), &uplo, VALUE, sizeof(int), &Am, VALUE, sizeof(int), &An, VALUE, - PASSED_BY_REF, RTBLKADDR(A, void, Am, An), chameleon_parsec_get_arena_index( A ) | INOUT, + PASSED_BY_REF, RTBLKADDR(A, void, Am, An), chameleon_parsec_get_arena_index( A ) | parsec_accessA, sizeof(cham_unary_operator_t), &op_fct, VALUE, sizeof(void*), &op_args, VALUE, PARSEC_DTD_ARG_END ); diff --git a/runtime/parsec/include/chameleon_parsec.h b/runtime/parsec/include/chameleon_parsec.h index e0d5cd190f38f40b04951eb7ec7fa9f8a7e71a34..30518fb809779ec0ba9c5ce45701a1187fb07621 100644 --- a/runtime/parsec/include/chameleon_parsec.h +++ b/runtime/parsec/include/chameleon_parsec.h @@ -42,6 +42,16 @@ chameleon_parsec_get_arena_index(const CHAM_desc_t *desc) { return ((chameleon_parsec_desc_t *)desc->schedopt)->arena_index; } +static inline int cham_to_parsec_access( cham_access_t accessA ) { + if ( accessA == ChamR ) { + return INPUT; + } + if ( accessA == ChamW ) { + return OUTPUT; + } + return INOUT; +} + /* * Access to block pointer and leading dimension */ diff --git a/runtime/quark/codelets/codelet_map.c b/runtime/quark/codelets/codelet_map.c index b4c2807cbe4e25b277226240b6a34195b9b281ef..f9781af957bd35cc7ce2d05ef751e3fef88cf9cb 100644 --- a/runtime/quark/codelets/codelet_map.c +++ b/runtime/quark/codelets/codelet_map.c @@ -32,7 +32,7 @@ void CORE_map_quark(Quark *quark) } void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); @@ -43,7 +43,7 @@ void INSERT_TASK_map( const RUNTIME_option_t *options, sizeof(cham_uplo_t), &uplo, VALUE, sizeof(int), &Am, VALUE, sizeof(int), &An, VALUE, - sizeof(void*), RTBLKADDR(A, void, Am, An), INOUT, + sizeof(void*), RTBLKADDR(A, void, Am, An), cham_to_quark_access( accessA ), sizeof(cham_unary_operator_t), &op_fct, VALUE, sizeof(void*), &op_args, VALUE, 0); diff --git a/runtime/quark/include/chameleon_quark.h b/runtime/quark/include/chameleon_quark.h index 3405f2bb0150dbfabb9061b03a8e9a65ab5be40c..8e415b7c564c486fa9ffd9f4de9585adc3a9410e 100644 --- a/runtime/quark/include/chameleon_quark.h +++ b/runtime/quark/include/chameleon_quark.h @@ -36,6 +36,16 @@ typedef struct quark_option_s { Quark *quark; } quark_option_t; +static inline int cham_to_quark_access( cham_access_t accessA ) { + if ( accessA == ChamR ) { + return INPUT; + } + if ( accessA == ChamW ) { + return OUTPUT; + } + return INOUT; +} + /* * Access to block pointer and leading dimension */ diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c index 14f4e8e7acd073f9f20089acb0f54c010e2561d0..97de57e26efa988a6e425573d637d13115f8f2d3 100644 --- a/runtime/starpu/codelets/codelet_map.c +++ b/runtime/starpu/codelets/codelet_map.c @@ -43,7 +43,7 @@ static void cl_map_cpu_func(void *descr[], void *cl_arg) CODELETS_CPU(map, cl_map_cpu_func) void INSERT_TASK_map( const RUNTIME_option_t *options, - cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, cham_unary_operator_t op_fct, void *op_args ) { @@ -60,7 +60,7 @@ void INSERT_TASK_map( const RUNTIME_option_t *options, STARPU_VALUE, &uplo, sizeof(cham_uplo_t), STARPU_VALUE, &Am, sizeof(int), STARPU_VALUE, &An, sizeof(int), - STARPU_RW, RTBLKADDR(A, void, Am, An), + cham_to_starpu_access(accessA), RTBLKADDR(A, void, Am, An), STARPU_VALUE, &op_fct, sizeof(cham_unary_operator_t), STARPU_VALUE, &op_args, sizeof(void*), STARPU_PRIORITY, options->priority, diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index f98a29b177a5febd622385ca75c59a467a7fe5db..22c3ca53498ab24640f6620cc76a4052a96852bd 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -111,6 +111,13 @@ typedef struct starpu_option_request_s { /**/ +static inline int cham_to_starpu_access( cham_access_t accessA ) { + assert( ChamR == STARPU_R ); + assert( ChamW == STARPU_W ); + assert( ChamRW == STARPU_RW ); + return accessA; +} + /* * MPI Redefinitions */ diff --git a/testing/test_fembem b/testing/test_fembem index 906d73c7abb0821e8df787f05fab2d503a2e76ff..a0056374bf0163ba878d842cbc81999fece362b4 160000 --- a/testing/test_fembem +++ b/testing/test_fembem @@ -1 +1 @@ -Subproject commit 906d73c7abb0821e8df787f05fab2d503a2e76ff +Subproject commit a0056374bf0163ba878d842cbc81999fece362b4 diff --git a/testing/values.c b/testing/values.c index 5d53e80b3d25dbe72d4d0a527e3698b0fe9ccbb6..5e4bdda1408b6dce022f0083dec90c9fe703555c 100644 --- a/testing/values.c +++ b/testing/values.c @@ -729,7 +729,11 @@ testing_salea() long testing_ialea() { - return random(); + long r = random(); +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast( &r, 1, MPI_LONG, 0, MPI_COMM_WORLD ); +#endif + return r; } /**