From 05baffd0215ff503861da3350db8b881882df5d9 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Mon, 24 Sep 2018 23:58:03 +0200
Subject: [PATCH] Add a map function

---
 compute/CMakeLists.txt                    |   4 +-
 compute/map.c                             | 236 ++++++++++++++++++++++
 compute/pmap.c                            |  80 ++++++++
 control/common.h                          |   4 +
 include/chameleon.h                       |  14 ++
 include/chameleon/tasks.h                 |   8 +
 runtime/starpu/CMakeLists.txt             |   1 +
 runtime/starpu/codelets/codelet_map.c     |  69 +++++++
 runtime/starpu/include/runtime_codelets.h |   5 +-
 9 files changed, 416 insertions(+), 5 deletions(-)
 create mode 100644 compute/map.c
 create mode 100644 compute/pmap.c
 create mode 100644 runtime/starpu/codelets/codelet_map.c

diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 9e5eb0cb4..3f1dc31dd 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -59,7 +59,9 @@ set(CHAMELEON_CONTROL
     ../control/tile.c
     ../control/chameleon_f77.c
     ../control/chameleon_mf77.c
-#    ../control/chameleonwinthread.c
+    #    ../control/chameleonwinthread.c
+    map.c
+    pmap.c
    )
 
 set(flags_to_add "")
diff --git a/compute/map.c b/compute/map.c
new file mode 100644
index 000000000..51cff7d61
--- /dev/null
+++ b/compute/map.c
@@ -0,0 +1,236 @@
+/**
+ *
+ * @file map.c
+ *
+ * @copyright 2018-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon map wrappers
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2018-09-24
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * Generate a random matrix by tiles.
+ *
+ *******************************************************************************
+ *
+ * @param[in] M
+ *          The number of rows of A.
+ *
+ * @param[in] N
+ *          The order of the matrix A. N >= 0.
+ *
+ * @param[out] A
+ *          On exit, The random matrix A generated.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_map_Tile
+ * @sa CHAMELEON_map_Tile_Async
+ *
+ */
+int CHAMELEON_map( cham_uplo_t uplo, int M, int N,
+                   CHAMELEON_Complex64_t *A, int LDA,
+                   cham_unary_operator_t operator, void *op_args )
+{
+    int NB;
+    int status;
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t descAl, descAt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_map", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    /* Check input arguments */
+    if (M < 0) {
+        chameleon_error("CHAMELEON_map", "illegal value of M");
+        return -1;
+    }
+    if (N < 0) {
+        chameleon_error("CHAMELEON_map", "illegal value of N");
+        return -2;
+    }
+    if (LDA < chameleon_max(1, M)) {
+        chameleon_error("CHAMELEON_map", "illegal value of LDA");
+        return -4;
+    }
+    /* Quick return */
+    if (chameleon_min(M, N) == 0)
+        return CHAMELEON_SUCCESS;
+
+    /* Tune NB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
+    if (status != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_map", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NT */
+    NB = CHAMELEON_NB;
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, uplo,
+                         A, NB, NB, LDA, N, M, N, sequence, &request );
+
+    /* Call the tile interface */
+    CHAMELEON_map_Tile_Async( uplo, &descAt, operator, op_args, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInout, uplo, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Tile
+ *
+ *  Generate a random matrix by tiles.  Tile equivalent of CHAMELEON_map().
+ *  Operates on matrices stored by tiles.  All matrices are passed through
+ *  descriptors.  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] A
+ *          On exit, The random matrix A generated.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_map
+ * @sa CHAMELEON_map_Tile_Async
+ *
+ */
+int CHAMELEON_map_Tile( cham_uplo_t           uplo,
+                        CHAM_desc_t          *A,
+                        cham_unary_operator_t operator,
+                        void                 *op_args )
+{
+    CHAM_context_t     *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t   request = RUNTIME_REQUEST_INITIALIZER;
+    int status;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_map_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    CHAMELEON_map_Tile_Async( uplo, A, operator, op_args, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Tile_Async
+ *
+ *  Generate a random matrix by tiles.  Non-blocking equivalent of
+ *  CHAMELEON_map_Tile().  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_map
+ * @sa CHAMELEON_map_Tile
+ *
+ */
+int CHAMELEON_map_Tile_Async( cham_uplo_t           uplo,
+                              CHAM_desc_t          *A,
+                              cham_unary_operator_t operator,
+                              void                 *op_args,
+                              RUNTIME_sequence_t   *sequence,
+                              RUNTIME_request_t    *request )
+{
+    CHAM_context_t *chamctxt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_map_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        chameleon_fatal_error("CHAMELEON_map_Tile", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        chameleon_fatal_error("CHAMELEON_map_Tile", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == CHAMELEON_SUCCESS) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_map_Tile", "invalid descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+
+    /* Quick return */
+    if (chameleon_min( A->m, A->n ) == 0)
+        return CHAMELEON_SUCCESS;
+
+    chameleon_pmap( uplo, A, operator, op_args, sequence, request );
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/compute/pmap.c b/compute/pmap.c
new file mode 100644
index 000000000..7e0e5b514
--- /dev/null
+++ b/compute/pmap.c
@@ -0,0 +1,80 @@
+/**
+ *
+ * @file pmap.c
+ *
+ * @copyright 2018-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon map parallel algorithm
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2018-09-24
+ *
+ */
+#include "control/common.h"
+
+#define A(m, n) A,  m,  n
+/**
+ *  chameleon_pmap - Generate a random matrix by tiles.
+ */
+void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
+                     cham_unary_operator_t operator, void *op_args,
+                     RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_option_t options;
+    int m, n;
+
+    chamctxt = chameleon_context_self();
+    if (sequence->status != CHAMELEON_SUCCESS)
+        return;
+    RUNTIME_options_init(&options, chamctxt, sequence, request);
+
+    switch( uplo ) {
+    case ChamUpper:
+        for (n = 0; n < A->nt; n++) {
+            for (m = 0; m < n; m++) {
+                INSERT_TASK_map(
+                    &options,
+                    ChamUpperLower, A(m, n),
+                    operator, op_args );
+            }
+            INSERT_TASK_map(
+                &options,
+                uplo, A(n, n),
+                operator, op_args );
+        }
+        break;
+
+    case ChamLower:
+        for (n = 0; n < A->nt; n++) {
+            INSERT_TASK_map(
+                &options,
+                uplo, A(n, n),
+                operator, op_args );
+            for (m = n+1; m < A->mt; m++) {
+                INSERT_TASK_map(
+                    &options,
+                    ChamUpperLower, A(m, n),
+                    operator, op_args );
+            }
+        }
+        break;
+
+    case ChamUpperLower:
+    default:
+        for (m = 0; m < A->mt; m++) {
+            for (n = 0; n < A->nt; n++) {
+                INSERT_TASK_map(
+                    &options,
+                    uplo, A(m, n),
+                    operator, op_args );
+            }
+        }
+    }
+
+    RUNTIME_options_finalize(&options, chamctxt);
+}
diff --git a/control/common.h b/control/common.h
index be9c0826b..63b51d814 100644
--- a/control/common.h
+++ b/control/common.h
@@ -108,6 +108,10 @@ extern char *chameleon_lapack_constants[];
 extern "C" {
 #endif
 
+void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
+                     cham_unary_operator_t operator, void *op_args,
+                     RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+
 #include "control/compute_s.h"
 #include "control/compute_d.h"
 #include "control/compute_c.h"
diff --git a/include/chameleon.h b/include/chameleon.h
index 3ad90b5da..ddc898f67 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -54,6 +54,20 @@
 /* ****************************************************************************
  * CHAMELEON functionnalities
  */
+int CHAMELEON_map( cham_uplo_t uplo, int M, int N,
+                   CHAMELEON_Complex64_t *A, int LDA,
+                   cham_unary_operator_t operator, void *op_args );
+int CHAMELEON_map_Tile( cham_uplo_t           uplo,
+                        CHAM_desc_t          *A,
+                        cham_unary_operator_t operator,
+                        void                 *op_args );
+int CHAMELEON_map_Tile_Async( cham_uplo_t           uplo,
+                              CHAM_desc_t          *A,
+                              cham_unary_operator_t operator,
+                              void                 *op_args,
+                              RUNTIME_sequence_t   *sequence,
+                              RUNTIME_request_t    *request );
+
 #include "chameleon/chameleon_z.h"
 #include "chameleon/chameleon_c.h"
 #include "chameleon/chameleon_d.h"
diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h
index 924166c2c..31ffc09e5 100644
--- a/include/chameleon/tasks.h
+++ b/include/chameleon/tasks.h
@@ -86,6 +86,14 @@ typedef enum chameleon_tasktype_e {
   TASK_NBKERNELS
 } cham_tasktype_t;
 
+typedef int (*cham_unary_operator_t)( const CHAM_desc_t *desc,
+                                      cham_uplo_t uplo, int m, int n,
+                                      void *data, void *op_args );
+
+void INSERT_TASK_map( const RUNTIME_option_t *options,
+                      cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
+                      cham_unary_operator_t operator, void *op_args );
+
 #include "chameleon/tasks_z.h"
 #include "chameleon/tasks_d.h"
 #include "chameleon/tasks_c.h"
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index 8b1352614..9ee2f96c5 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -116,6 +116,7 @@ precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
   TARGETDIR "codelets")
 
 set(RUNTIME_SRCS
+  codelets/codelet_map.c
   ${RUNTIME_COMMON}
   ${RUNTIME_SRCS_GENERATED}
   )
diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c
new file mode 100644
index 000000000..f6883c5f0
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_map.c
@@ -0,0 +1,69 @@
+/**
+ *
+ * @file codelet_map.c
+ *
+ * @copyright 2018-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon map StarPU codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2018-09-24
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelet_z.h"
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_map_cpu_func(void *descr[], void *cl_arg)
+{
+    const CHAM_desc_t *desc;
+    cham_uplo_t uplo;
+    int m;
+    int n;
+    void *data;
+    cham_unary_operator_t operator;
+    void *op_args;
+
+    data = (void *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &desc, &uplo, &m, &n, &operator, &op_args );
+    operator( desc, uplo, m, n, data, op_args );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(map, 1, cl_map_cpu_func)
+
+void INSERT_TASK_map( const RUNTIME_option_t *options,
+                      cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
+                      cham_unary_operator_t operator, void *op_args )
+{
+
+    struct starpu_codelet *codelet = &cl_map;
+    void (*callback)(void*) = options->profiling ? cl_map_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &A,                      sizeof(CHAM_desc_t*),
+        STARPU_VALUE,    &uplo,                   sizeof(cham_uplo_t),
+        STARPU_VALUE,    &Am,                     sizeof(int),
+        STARPU_VALUE,    &An,                     sizeof(int),
+        STARPU_RW,        RTBLKADDR(A, void, Am, An),
+        STARPU_VALUE,   &operator,                sizeof(cham_unary_operator_t),
+        STARPU_VALUE,   &op_args,                 sizeof(void*),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "map",
+#endif
+        0);
+}
diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h
index f0213ff00..9c9d73bf4 100644
--- a/runtime/starpu/include/runtime_codelets.h
+++ b/runtime/starpu/include/runtime_codelets.h
@@ -117,9 +117,6 @@
 #define CCODELETS_HEADER(name)                CODELETS_HEADER(c##name)
 #define ZCODELETS_HEADER(name)                CODELETS_HEADER(z##name)
 
-#define SCODELETS_CPU_HEADER(name)        CODELETS_CPU_HEADER(s##name)
-#define DCODELETS_CPU_HEADER(name)        CODELETS_CPU_HEADER(d##name)
-#define CCODELETS_CPU_HEADER(name)        CODELETS_CPU_HEADER(c##name)
-#define ZCODELETS_CPU_HEADER(name)        CODELETS_CPU_HEADER(z##name)
+CODELETS_HEADER(map);
 
 #endif /* _CODELETS_H_ */
-- 
GitLab