From dc7256178d921542070e1a35e978e82de955be80 Mon Sep 17 00:00:00 2001 From: Florent Pruvost <florent.pruvost@inria.fr> Date: Fri, 7 Mar 2025 14:54:35 +0100 Subject: [PATCH] benchs: add scripts to upload/download data on the elasticsearch database server --- scripts/add_result.py | 209 ++++++++++++++++++++++++++++++++++++++++++ scripts/database.sh | 7 +- scripts/get_result.py | 65 +++++++++++++ 3 files changed, 278 insertions(+), 3 deletions(-) create mode 100755 scripts/add_result.py create mode 100755 scripts/get_result.py diff --git a/scripts/add_result.py b/scripts/add_result.py new file mode 100755 index 000000000..87b68fa45 --- /dev/null +++ b/scripts/add_result.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +from typing import Any, Dict, List, Union +import click +import csv +from elasticsearch import Elasticsearch + + +Row = Dict[str, Union[str, float]] + + +def open_csv(filename: str) -> List[Dict[str, str]]: + """ + Open a csv file a return it as dictionary. + First row is titles. + """ + csv_rows = [] + with open(filename) as csv_data: + reader = csv.DictReader(csv_data) + titles = reader.fieldnames + for row in reader: + csv_rows.append( + { + title: row[title] + for title in titles + } + ) + return csv_rows + +def format_entry_accuracy(row: Row) -> Dict[str, Any]: + """"format a result""" + commit_sha = str(row.pop('gitcommit')) + commit_date = str(row.pop('gitcommitdate')) + hostname = str(row.pop('hostname')) + ndim = int(row.pop('ndim')) + kernel_type = int(row.pop('kernel_type')) + interp_type = int(row.pop('interp_type')) + tree_height = int(row.pop('tree_height')) + interp_order = int(row.pop('interp_order')) + error = float(row.pop('error')) + result = { + "Commit_date": commit_date, + "Commit_sha": commit_sha, + "Hostname": hostname, + "Ndim": ndim, + "Kernel_type": kernel_type, + "Interp_type": interp_type, + "Tree_height": tree_height, + "Interp_order": interp_order, + "Error": error + } + return result + +def format_entry_timeseq(row: Row) -> Dict[str, Any]: + """"format a result""" + commit_sha = str(row.pop('gitcommit')) + commit_date = str(row.pop('gitcommitdate')) + hostname = str(row.pop('hostname')) + ndim = int(row.pop('ndim')) + kernel_type = int(row.pop('kernel_type')) + interp_type = int(row.pop('interp_type')) + nrun = int(row.pop('nrun')) + tree_height = int(row.pop('tree_height')) + interp_order = int(row.pop('interp_order')) + size = int(row.pop('size')) + timefar_avg = float(row.pop('timefar_avg')) + timenear_avg = float(row.pop('timenear_avg')) + timefull_avg = float(row.pop('timefull_avg')) + + result = { + "Commit_date": commit_date, + "Commit_sha": commit_sha, + "Hostname": hostname, + "Ndim": ndim, + "Kernel_type": kernel_type, + "Interp_type": interp_type, + "Nrun": nrun, + "Tree_height": tree_height, + "Interp_order": interp_order, + "Size": size, + "Timefar_avg": timefar_avg, + "Timenear_avg": timenear_avg, + "Timefull_avg": timefull_avg + } + return result + +def format_entry_timeomp(row: Row) -> Dict[str, Any]: + """"format a result""" + commit_sha = str(row.pop('gitcommit')) + commit_date = str(row.pop('gitcommitdate')) + hostname = str(row.pop('hostname')) + ndim = int(row.pop('ndim')) + kernel_type = int(row.pop('kernel_type')) + interp_type = int(row.pop('interp_type')) + nrun = int(row.pop('nrun')) + tree_height = int(row.pop('tree_height')) + interp_order = int(row.pop('interp_order')) + size = int(row.pop('size')) + nthread = int(row.pop('nthread')) + groupsize = int(row.pop('groupsize')) + timefull_avg = float(row.pop('timefull_avg')) + + result = { + "Commit_date": commit_date, + "Commit_sha": commit_sha, + "Hostname": hostname, + "Ndim": ndim, + "Kernel_type": kernel_type, + "Interp_type": interp_type, + "Nrun": nrun, + "Tree_height": tree_height, + "Interp_order": interp_order, + "Size": size, + "Nthread": nthread, + "Groupsize": groupsize, + "Timefull_avg": timefull_avg + } + return result + +@click.command() +@click.option("-e", "--elastic-url", default="http://localhost:9200", help="elasticsearch instance url") +@click.option("-t", "--team", required=True, help="team name") +@click.option("-p", "--project", required=True, help="project name") +@click.option("-n", "--name", required=True, help="Table name") +@click.argument("csv-files", nargs=-1) +def main( + elastic_url: str, + team: str, + project: str, + name: str, + csv_files: str, +): + """Add a result to an elasticsearch database.""" + es = Elasticsearch(elastic_url) + info = es.info() + print("Elasticsearch version:", info["version"]["number"]) + + es_index = team + "-" + project + "-" + name + if not es.indices.exists(index=es_index): + es.indices.create(index=es_index) + + mapping_input_accuracy = { + "properties": { + "Commit_sha": {"type": "keyword"}, + "Commit_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss Z"}, + "Hostname": {"type": "keyword"}, + "Ndim": {"type": "integer"}, + "Kernel_type": {"type": "integer"}, + "Interp_type": {"type": "integer"}, + "Tree_height": {"type": "integer"}, + "Interp_order": {"type": "integer"}, + "Error": {"type": "float"} + } + } + mapping_input_timeseq = { + "properties": { + "Commit_sha": {"type": "keyword"}, + "Commit_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss Z"}, + "Hostname": {"type": "keyword"}, + "Ndim": {"type": "integer"}, + "Kernel_type": {"type": "integer"}, + "Interp_type": {"type": "integer"}, + "Nrun": {"type": "integer"}, + "Tree_height": {"type": "integer"}, + "Interp_order": {"type": "integer"}, + "Size": {"type": "integer"}, + "Timefar_avg": {"type": "float"}, + "Timenear_avg": {"type": "float"}, + "Timefull_avg": {"type": "float"} + } + } + mapping_input_timeomp = { + "properties": { + "Commit_sha": {"type": "keyword"}, + "Commit_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss Z"}, + "Hostname": {"type": "keyword"}, + "Ndim": {"type": "integer"}, + "Kernel_type": {"type": "integer"}, + "Interp_type": {"type": "integer"}, + "Nrun": {"type": "integer"}, + "Tree_height": {"type": "integer"}, + "Interp_order": {"type": "integer"}, + "Size": {"type": "integer"}, + "Nthread": {"type": "integer"}, + "Groupsize": {"type": "integer"}, + "Timefull_avg": {"type": "float"} + } + } + + if name == "accuracy": + es.indices.put_mapping(index=es_index, body=mapping_input_accuracy) + elif name == "timeseq": + es.indices.put_mapping(index=es_index, body=mapping_input_timeseq) + elif name == "timeomp": + es.indices.put_mapping(index=es_index, body=mapping_input_timeomp) + + requests = [ + request + for file in csv_files + for request in map( + lambda row: format_entry_accuracy(row) if name == "accuracy" else format_entry_timeseq(row) if name == "timeseq" else format_entry_timeomp(row), + open_csv(file) + ) + ] + for request in requests: + es.index(index=es_index.lower(), body=request) + + +if __name__ == "__main__": + main() diff --git a/scripts/database.sh b/scripts/database.sh index ddf0fde3e..f2e95b87a 100755 --- a/scripts/database.sh +++ b/scripts/database.sh @@ -32,6 +32,7 @@ if [[ ! -z "${CI_JOB_TOKEN}" ]]; then curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file ./scalfmm.sqlite3 "https://gitlab.inria.fr/api/v4/projects/$CI_PROJECT_ID/packages/generic/benchmark/latest/scalfmm.sqlite3" fi -# send results to the elasticsearch server -#ls guix.json -#python3 script/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t concace -p "scalfmm" scalfmm.csv +# upload results to the elasticsearch server +python3 ./scripts/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t concace -p scalfmm -n accuracy scalfmm_accuracy.csv +python3 ./scripts/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t concace -p scalfmm -n timeseq scalfmm_timeseq.csv +python3 ./scripts/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t concace -p scalfmm -n timeomp scalfmm_timeomp.csv diff --git a/scripts/get_result.py b/scripts/get_result.py new file mode 100755 index 000000000..023080ced --- /dev/null +++ b/scripts/get_result.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +import pandas +import click +import csv +from elasticsearch import Elasticsearch + +@click.command() +@click.option("-e", "--elastic-url", default="http://localhost:9200", help="elasticsearch instance url") +@click.option("-t", "--team", required=True, help="team name") +@click.option("-p", "--project", required=True, help="project name") +@click.option("-n", "--name", required=True, help="Table name") +@click.option("-c", "--commit", required=True, help="project commit") +def main( + elastic_url: str, + team: str, + project: str, + name: str, + commit: str +): + """Get a result from an elasticsearch database, e.g. + https://elasticsearch.bordeaux.inria.fr.""" + es = Elasticsearch(elastic_url) + es_index = team + "-" + project + "-" + name + + search_param = { + "query": { + "bool": { + "must": [ + {"term": {"Commit_sha": {"value": commit}}} + ] + } + }, + "size": 1000 + } + response = es.search(index=es_index, body=search_param) + elastic_docs = response["hits"]["hits"] + + docs = pandas.DataFrame() + for num, doc in enumerate(elastic_docs): + + # get _source data dict from document + source_data = doc["_source"] + + # get _id from document + _id = doc["_id"] + + # create a Series object from doc dict object + doc_data = pandas.Series(source_data, name = _id) + doc_data = doc_data.drop(labels=['Commit_date', 'Commit_sha']) + + # append the Series object to the DataFrame object + docs = pandas.concat([docs, doc_data.to_frame().T]) + + if name == 'accuracy': + docs = docs.astype({"Hostname": str, "Ndim": int, "Kernel_type": int, "Interp_type": int, "Tree_height": int, "Interp_order": int, "Error": float}) + elif name == 'timeseq': + docs = docs.astype({"Hostname": str, "Ndim": int, "Kernel_type": int, "Interp_type": int, "Tree_height": int, "Interp_order": int, "Size": int, "Timefar_avg": float, "Timenear_avg": float, "Timefull_avg": float}) + elif name == 'timeomp': + docs = docs.astype({"Hostname": str, "Ndim": int, "Kernel_type": int, "Interp_type": int, "Nrun": int, "Tree_height": int, "Interp_order": int, "Size": int, "Nthread": int, "Groupsize": int, "Timefull_avg": float}) + + docs = docs.rename(columns=str.lower) + docs.to_csv("scalfmm_" + name + ".csv", ",", index=False) + +if __name__ == "__main__": + main() -- GitLab