From dc7256178d921542070e1a35e978e82de955be80 Mon Sep 17 00:00:00 2001
From: Florent Pruvost <florent.pruvost@inria.fr>
Date: Fri, 7 Mar 2025 14:54:35 +0100
Subject: [PATCH] benchs: add scripts to upload/download data on the
 elasticsearch database server

---
 scripts/add_result.py | 209 ++++++++++++++++++++++++++++++++++++++++++
 scripts/database.sh   |   7 +-
 scripts/get_result.py |  65 +++++++++++++
 3 files changed, 278 insertions(+), 3 deletions(-)
 create mode 100755 scripts/add_result.py
 create mode 100755 scripts/get_result.py

diff --git a/scripts/add_result.py b/scripts/add_result.py
new file mode 100755
index 000000000..87b68fa45
--- /dev/null
+++ b/scripts/add_result.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+from typing import Any, Dict, List, Union
+import click
+import csv
+from elasticsearch import Elasticsearch
+
+
+Row = Dict[str, Union[str, float]]
+
+
+def open_csv(filename: str) -> List[Dict[str, str]]:
+    """
+    Open a csv file a return it as dictionary.
+    First row is titles.
+    """
+    csv_rows = []
+    with open(filename) as csv_data:
+        reader = csv.DictReader(csv_data)
+        titles = reader.fieldnames
+        for row in reader:
+            csv_rows.append(
+                {
+                    title: row[title]
+                    for title in titles
+                }
+            )
+    return csv_rows
+
+def format_entry_accuracy(row: Row) -> Dict[str, Any]:
+    """"format a result"""
+    commit_sha   = str(row.pop('gitcommit'))
+    commit_date  = str(row.pop('gitcommitdate'))
+    hostname     = str(row.pop('hostname'))
+    ndim         = int(row.pop('ndim'))
+    kernel_type  = int(row.pop('kernel_type'))
+    interp_type  = int(row.pop('interp_type'))
+    tree_height  = int(row.pop('tree_height'))
+    interp_order = int(row.pop('interp_order'))
+    error        = float(row.pop('error'))
+    result = {
+        "Commit_date": commit_date,
+        "Commit_sha": commit_sha,
+        "Hostname": hostname,
+        "Ndim": ndim,
+        "Kernel_type": kernel_type,
+        "Interp_type": interp_type,
+        "Tree_height": tree_height,
+        "Interp_order": interp_order,
+        "Error": error
+    }
+    return result
+
+def format_entry_timeseq(row: Row) -> Dict[str, Any]:
+    """"format a result"""
+    commit_sha   = str(row.pop('gitcommit'))
+    commit_date  = str(row.pop('gitcommitdate'))
+    hostname     = str(row.pop('hostname'))
+    ndim         = int(row.pop('ndim'))
+    kernel_type  = int(row.pop('kernel_type'))
+    interp_type  = int(row.pop('interp_type'))
+    nrun         = int(row.pop('nrun'))
+    tree_height  = int(row.pop('tree_height'))
+    interp_order = int(row.pop('interp_order'))
+    size         = int(row.pop('size'))
+    timefar_avg  = float(row.pop('timefar_avg'))
+    timenear_avg = float(row.pop('timenear_avg'))
+    timefull_avg = float(row.pop('timefull_avg'))
+
+    result = {
+        "Commit_date": commit_date,
+        "Commit_sha": commit_sha,
+        "Hostname": hostname,
+        "Ndim": ndim,
+        "Kernel_type": kernel_type,
+        "Interp_type": interp_type,
+        "Nrun": nrun,
+        "Tree_height": tree_height,
+        "Interp_order": interp_order,
+        "Size": size,
+        "Timefar_avg": timefar_avg,
+        "Timenear_avg": timenear_avg,
+        "Timefull_avg": timefull_avg
+    }
+    return result
+
+def format_entry_timeomp(row: Row) -> Dict[str, Any]:
+    """"format a result"""
+    commit_sha   = str(row.pop('gitcommit'))
+    commit_date  = str(row.pop('gitcommitdate'))
+    hostname     = str(row.pop('hostname'))
+    ndim         = int(row.pop('ndim'))
+    kernel_type  = int(row.pop('kernel_type'))
+    interp_type  = int(row.pop('interp_type'))
+    nrun         = int(row.pop('nrun'))
+    tree_height  = int(row.pop('tree_height'))
+    interp_order = int(row.pop('interp_order'))
+    size         = int(row.pop('size'))
+    nthread      = int(row.pop('nthread'))
+    groupsize    = int(row.pop('groupsize'))
+    timefull_avg = float(row.pop('timefull_avg'))
+
+    result = {
+        "Commit_date": commit_date,
+        "Commit_sha": commit_sha,
+        "Hostname": hostname,
+        "Ndim": ndim,
+        "Kernel_type": kernel_type,
+        "Interp_type": interp_type,
+        "Nrun": nrun,
+        "Tree_height": tree_height,
+        "Interp_order": interp_order,
+        "Size": size,
+        "Nthread": nthread,
+        "Groupsize": groupsize,
+        "Timefull_avg": timefull_avg
+    }
+    return result
+
+@click.command()
+@click.option("-e", "--elastic-url", default="http://localhost:9200", help="elasticsearch instance url")
+@click.option("-t", "--team", required=True, help="team name")
+@click.option("-p", "--project", required=True, help="project name")
+@click.option("-n", "--name", required=True, help="Table name")
+@click.argument("csv-files", nargs=-1)
+def main(
+    elastic_url: str,
+    team: str,
+    project: str,
+    name: str,
+    csv_files: str,
+):
+    """Add a result to an elasticsearch database."""
+    es = Elasticsearch(elastic_url)
+    info = es.info()
+    print("Elasticsearch version:", info["version"]["number"])
+
+    es_index = team + "-" + project + "-" + name
+    if not es.indices.exists(index=es_index):
+        es.indices.create(index=es_index)
+
+    mapping_input_accuracy = {
+        "properties": {
+            "Commit_sha": {"type": "keyword"},
+            "Commit_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss Z"},
+            "Hostname": {"type": "keyword"},
+            "Ndim": {"type": "integer"},
+            "Kernel_type": {"type": "integer"},
+            "Interp_type": {"type": "integer"},
+            "Tree_height": {"type": "integer"},
+            "Interp_order": {"type": "integer"},
+            "Error": {"type": "float"}
+        }
+    }
+    mapping_input_timeseq = {
+        "properties": {
+            "Commit_sha": {"type": "keyword"},
+            "Commit_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss Z"},
+            "Hostname": {"type": "keyword"},
+            "Ndim": {"type": "integer"},
+            "Kernel_type": {"type": "integer"},
+            "Interp_type": {"type": "integer"},
+            "Nrun": {"type": "integer"},
+            "Tree_height": {"type": "integer"},
+            "Interp_order": {"type": "integer"},
+            "Size": {"type": "integer"},
+            "Timefar_avg": {"type": "float"},
+            "Timenear_avg": {"type": "float"},
+            "Timefull_avg": {"type": "float"}
+        }
+    }
+    mapping_input_timeomp = {
+        "properties": {
+            "Commit_sha": {"type": "keyword"},
+            "Commit_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss Z"},
+            "Hostname": {"type": "keyword"},
+            "Ndim": {"type": "integer"},
+            "Kernel_type": {"type": "integer"},
+            "Interp_type": {"type": "integer"},
+            "Nrun": {"type": "integer"},
+            "Tree_height": {"type": "integer"},
+            "Interp_order": {"type": "integer"},
+            "Size": {"type": "integer"},
+            "Nthread": {"type": "integer"},
+            "Groupsize": {"type": "integer"},
+            "Timefull_avg": {"type": "float"}
+        }
+    }
+
+    if name == "accuracy":
+        es.indices.put_mapping(index=es_index, body=mapping_input_accuracy)
+    elif name == "timeseq":
+        es.indices.put_mapping(index=es_index, body=mapping_input_timeseq)
+    elif name == "timeomp":
+        es.indices.put_mapping(index=es_index, body=mapping_input_timeomp)
+
+    requests = [
+        request
+        for file in csv_files
+            for request in map(
+                lambda row: format_entry_accuracy(row) if name == "accuracy" else format_entry_timeseq(row) if name == "timeseq" else format_entry_timeomp(row),
+                open_csv(file)
+            )
+    ]
+    for request in requests:
+        es.index(index=es_index.lower(), body=request)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/database.sh b/scripts/database.sh
index ddf0fde3e..f2e95b87a 100755
--- a/scripts/database.sh
+++ b/scripts/database.sh
@@ -32,6 +32,7 @@ if [[ ! -z "${CI_JOB_TOKEN}" ]]; then
   curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file ./scalfmm.sqlite3 "https://gitlab.inria.fr/api/v4/projects/$CI_PROJECT_ID/packages/generic/benchmark/latest/scalfmm.sqlite3"
 fi
 
-# send results to the elasticsearch server
-#ls guix.json
-#python3 script/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t concace -p "scalfmm" scalfmm.csv
+# upload results to the elasticsearch server
+python3 ./scripts/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t concace -p scalfmm -n accuracy scalfmm_accuracy.csv
+python3 ./scripts/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t concace -p scalfmm -n timeseq scalfmm_timeseq.csv
+python3 ./scripts/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t concace -p scalfmm -n timeomp scalfmm_timeomp.csv
diff --git a/scripts/get_result.py b/scripts/get_result.py
new file mode 100755
index 000000000..023080ced
--- /dev/null
+++ b/scripts/get_result.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+import pandas
+import click
+import csv
+from elasticsearch import Elasticsearch
+
+@click.command()
+@click.option("-e", "--elastic-url", default="http://localhost:9200", help="elasticsearch instance url")
+@click.option("-t", "--team", required=True, help="team name")
+@click.option("-p", "--project", required=True, help="project name")
+@click.option("-n", "--name", required=True, help="Table name")
+@click.option("-c", "--commit", required=True, help="project commit")
+def main(
+    elastic_url: str,
+    team: str,
+    project: str,
+    name: str,
+    commit: str
+):
+    """Get a result from an elasticsearch database, e.g.
+    https://elasticsearch.bordeaux.inria.fr."""
+    es = Elasticsearch(elastic_url)
+    es_index = team + "-" + project + "-" + name
+
+    search_param = {
+      "query": {
+        "bool": {
+          "must": [
+            {"term": {"Commit_sha": {"value": commit}}}
+          ]
+        }
+      },
+      "size": 1000
+    }
+    response = es.search(index=es_index, body=search_param)
+    elastic_docs = response["hits"]["hits"]
+
+    docs = pandas.DataFrame()
+    for num, doc in enumerate(elastic_docs):
+
+        # get _source data dict from document
+        source_data = doc["_source"]
+
+        # get _id from document
+        _id = doc["_id"]
+
+        # create a Series object from doc dict object
+        doc_data = pandas.Series(source_data, name = _id)
+        doc_data = doc_data.drop(labels=['Commit_date', 'Commit_sha'])
+
+        # append the Series object to the DataFrame object
+        docs = pandas.concat([docs, doc_data.to_frame().T])
+
+    if name == 'accuracy':
+        docs = docs.astype({"Hostname": str, "Ndim": int, "Kernel_type": int, "Interp_type": int, "Tree_height": int, "Interp_order": int, "Error": float})
+    elif name == 'timeseq':
+        docs = docs.astype({"Hostname": str, "Ndim": int, "Kernel_type": int, "Interp_type": int, "Tree_height": int, "Interp_order": int, "Size": int, "Timefar_avg": float, "Timenear_avg": float, "Timefull_avg": float})
+    elif name == 'timeomp':
+        docs = docs.astype({"Hostname": str, "Ndim": int, "Kernel_type": int, "Interp_type": int, "Nrun": int, "Tree_height": int, "Interp_order": int, "Size": int, "Nthread": int, "Groupsize": int, "Timefull_avg": float})
+
+    docs = docs.rename(columns=str.lower)
+    docs.to_csv("scalfmm_" + name + ".csv", ",", index=False)
+
+if __name__ == "__main__":
+    main()
-- 
GitLab