From 05baffd0215ff503861da3350db8b881882df5d9 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Mon, 24 Sep 2018 23:58:03 +0200 Subject: [PATCH] Add a map function --- compute/CMakeLists.txt | 4 +- compute/map.c | 236 ++++++++++++++++++++++ compute/pmap.c | 80 ++++++++ control/common.h | 4 + include/chameleon.h | 14 ++ include/chameleon/tasks.h | 8 + runtime/starpu/CMakeLists.txt | 1 + runtime/starpu/codelets/codelet_map.c | 69 +++++++ runtime/starpu/include/runtime_codelets.h | 5 +- 9 files changed, 416 insertions(+), 5 deletions(-) create mode 100644 compute/map.c create mode 100644 compute/pmap.c create mode 100644 runtime/starpu/codelets/codelet_map.c diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 9e5eb0cb4..3f1dc31dd 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -59,7 +59,9 @@ set(CHAMELEON_CONTROL ../control/tile.c ../control/chameleon_f77.c ../control/chameleon_mf77.c -# ../control/chameleonwinthread.c + # ../control/chameleonwinthread.c + map.c + pmap.c ) set(flags_to_add "") diff --git a/compute/map.c b/compute/map.c new file mode 100644 index 000000000..51cff7d61 --- /dev/null +++ b/compute/map.c @@ -0,0 +1,236 @@ +/** + * + * @file map.c + * + * @copyright 2018-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon map wrappers + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2018-09-24 + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * Generate a random matrix by tiles. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of A. + * + * @param[in] N + * The order of the matrix A. N >= 0. + * + * @param[out] A + * On exit, The random matrix A generated. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa CHAMELEON_map_Tile + * @sa CHAMELEON_map_Tile_Async + * + */ +int CHAMELEON_map( cham_uplo_t uplo, int M, int N, + CHAMELEON_Complex64_t *A, int LDA, + cham_unary_operator_t operator, void *op_args ) +{ + int NB; + int status; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_map", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if (M < 0) { + chameleon_error("CHAMELEON_map", "illegal value of M"); + return -1; + } + if (N < 0) { + chameleon_error("CHAMELEON_map", "illegal value of N"); + return -2; + } + if (LDA < chameleon_max(1, M)) { + chameleon_error("CHAMELEON_map", "illegal value of LDA"); + return -4; + } + /* Quick return */ + if (chameleon_min(M, N) == 0) + return CHAMELEON_SUCCESS; + + /* Tune NB depending on M, N & NRHS; Set NBNB */ + status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0); + if (status != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_map", "chameleon_tune() failed"); + return status; + } + + /* Set NT */ + NB = CHAMELEON_NB; + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, uplo, + A, NB, NB, LDA, N, M, N, sequence, &request ); + + /* Call the tile interface */ + CHAMELEON_map_Tile_Async( uplo, &descAt, operator, op_args, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descAl, &descAt, + ChamDescInout, uplo, sequence, &request ); + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Tile + * + * Generate a random matrix by tiles. Tile equivalent of CHAMELEON_map(). + * Operates on matrices stored by tiles. All matrices are passed through + * descriptors. All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] A + * On exit, The random matrix A generated. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_map + * @sa CHAMELEON_map_Tile_Async + * + */ +int CHAMELEON_map_Tile( cham_uplo_t uplo, + CHAM_desc_t *A, + cham_unary_operator_t operator, + void *op_args ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_map_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + CHAMELEON_map_Tile_Async( uplo, A, operator, op_args, sequence, &request ); + + CHAMELEON_Desc_Flush( A, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Tile_Async + * + * Generate a random matrix by tiles. Non-blocking equivalent of + * CHAMELEON_map_Tile(). May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_map + * @sa CHAMELEON_map_Tile + * + */ +int CHAMELEON_map_Tile_Async( cham_uplo_t uplo, + CHAM_desc_t *A, + cham_unary_operator_t operator, + void *op_args, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_map_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + chameleon_fatal_error("CHAMELEON_map_Tile", "NULL sequence"); + return CHAMELEON_ERR_UNALLOCATED; + } + if (request == NULL) { + chameleon_fatal_error("CHAMELEON_map_Tile", "NULL request"); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == CHAMELEON_SUCCESS) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + } + + /* Check descriptors for correctness */ + if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_map_Tile", "invalid descriptor"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + + /* Quick return */ + if (chameleon_min( A->m, A->n ) == 0) + return CHAMELEON_SUCCESS; + + chameleon_pmap( uplo, A, operator, op_args, sequence, request ); + + return CHAMELEON_SUCCESS; +} diff --git a/compute/pmap.c b/compute/pmap.c new file mode 100644 index 000000000..7e0e5b514 --- /dev/null +++ b/compute/pmap.c @@ -0,0 +1,80 @@ +/** + * + * @file pmap.c + * + * @copyright 2018-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon map parallel algorithm + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2018-09-24 + * + */ +#include "control/common.h" + +#define A(m, n) A, m, n +/** + * chameleon_pmap - Generate a random matrix by tiles. + */ +void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, + cham_unary_operator_t operator, void *op_args, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + int m, n; + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) + return; + RUNTIME_options_init(&options, chamctxt, sequence, request); + + switch( uplo ) { + case ChamUpper: + for (n = 0; n < A->nt; n++) { + for (m = 0; m < n; m++) { + INSERT_TASK_map( + &options, + ChamUpperLower, A(m, n), + operator, op_args ); + } + INSERT_TASK_map( + &options, + uplo, A(n, n), + operator, op_args ); + } + break; + + case ChamLower: + for (n = 0; n < A->nt; n++) { + INSERT_TASK_map( + &options, + uplo, A(n, n), + operator, op_args ); + for (m = n+1; m < A->mt; m++) { + INSERT_TASK_map( + &options, + ChamUpperLower, A(m, n), + operator, op_args ); + } + } + break; + + case ChamUpperLower: + default: + for (m = 0; m < A->mt; m++) { + for (n = 0; n < A->nt; n++) { + INSERT_TASK_map( + &options, + uplo, A(m, n), + operator, op_args ); + } + } + } + + RUNTIME_options_finalize(&options, chamctxt); +} diff --git a/control/common.h b/control/common.h index be9c0826b..63b51d814 100644 --- a/control/common.h +++ b/control/common.h @@ -108,6 +108,10 @@ extern char *chameleon_lapack_constants[]; extern "C" { #endif +void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A, + cham_unary_operator_t operator, void *op_args, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); + #include "control/compute_s.h" #include "control/compute_d.h" #include "control/compute_c.h" diff --git a/include/chameleon.h b/include/chameleon.h index 3ad90b5da..ddc898f67 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -54,6 +54,20 @@ /* **************************************************************************** * CHAMELEON functionnalities */ +int CHAMELEON_map( cham_uplo_t uplo, int M, int N, + CHAMELEON_Complex64_t *A, int LDA, + cham_unary_operator_t operator, void *op_args ); +int CHAMELEON_map_Tile( cham_uplo_t uplo, + CHAM_desc_t *A, + cham_unary_operator_t operator, + void *op_args ); +int CHAMELEON_map_Tile_Async( cham_uplo_t uplo, + CHAM_desc_t *A, + cham_unary_operator_t operator, + void *op_args, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ); + #include "chameleon/chameleon_z.h" #include "chameleon/chameleon_c.h" #include "chameleon/chameleon_d.h" diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h index 924166c2c..31ffc09e5 100644 --- a/include/chameleon/tasks.h +++ b/include/chameleon/tasks.h @@ -86,6 +86,14 @@ typedef enum chameleon_tasktype_e { TASK_NBKERNELS } cham_tasktype_t; +typedef int (*cham_unary_operator_t)( const CHAM_desc_t *desc, + cham_uplo_t uplo, int m, int n, + void *data, void *op_args ); + +void INSERT_TASK_map( const RUNTIME_option_t *options, + cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_unary_operator_t operator, void *op_args ); + #include "chameleon/tasks_z.h" #include "chameleon/tasks_d.h" #include "chameleon/tasks_c.h" diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index 8b1352614..9ee2f96c5 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -116,6 +116,7 @@ precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}" TARGETDIR "codelets") set(RUNTIME_SRCS + codelets/codelet_map.c ${RUNTIME_COMMON} ${RUNTIME_SRCS_GENERATED} ) diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c new file mode 100644 index 000000000..f6883c5f0 --- /dev/null +++ b/runtime/starpu/codelets/codelet_map.c @@ -0,0 +1,69 @@ +/** + * + * @file codelet_map.c + * + * @copyright 2018-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon map StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2018-09-24 + * + */ +#include "chameleon_starpu.h" +#include "runtime_codelet_z.h" + +#if !defined(CHAMELEON_SIMULATION) +static void cl_map_cpu_func(void *descr[], void *cl_arg) +{ + const CHAM_desc_t *desc; + cham_uplo_t uplo; + int m; + int n; + void *data; + cham_unary_operator_t operator; + void *op_args; + + data = (void *)STARPU_MATRIX_GET_PTR(descr[0]); + starpu_codelet_unpack_args(cl_arg, &desc, &uplo, &m, &n, &operator, &op_args ); + operator( desc, uplo, m, n, data, op_args ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(map, 1, cl_map_cpu_func) + +void INSERT_TASK_map( const RUNTIME_option_t *options, + cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_unary_operator_t operator, void *op_args ) +{ + + struct starpu_codelet *codelet = &cl_map; + void (*callback)(void*) = options->profiling ? cl_map_callback : NULL; + + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_END_ACCESS_DECLARATION; + + starpu_insert_task( + starpu_mpi_codelet(codelet), + STARPU_VALUE, &A, sizeof(CHAM_desc_t*), + STARPU_VALUE, &uplo, sizeof(cham_uplo_t), + STARPU_VALUE, &Am, sizeof(int), + STARPU_VALUE, &An, sizeof(int), + STARPU_RW, RTBLKADDR(A, void, Am, An), + STARPU_VALUE, &operator, sizeof(cham_unary_operator_t), + STARPU_VALUE, &op_args, sizeof(void*), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "map", +#endif + 0); +} diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h index f0213ff00..9c9d73bf4 100644 --- a/runtime/starpu/include/runtime_codelets.h +++ b/runtime/starpu/include/runtime_codelets.h @@ -117,9 +117,6 @@ #define CCODELETS_HEADER(name) CODELETS_HEADER(c##name) #define ZCODELETS_HEADER(name) CODELETS_HEADER(z##name) -#define SCODELETS_CPU_HEADER(name) CODELETS_CPU_HEADER(s##name) -#define DCODELETS_CPU_HEADER(name) CODELETS_CPU_HEADER(d##name) -#define CCODELETS_CPU_HEADER(name) CODELETS_CPU_HEADER(c##name) -#define ZCODELETS_CPU_HEADER(name) CODELETS_CPU_HEADER(z##name) +CODELETS_HEADER(map); #endif /* _CODELETS_H_ */ -- GitLab