diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a94e71fa98db8018923dfad10e096f5caacec630..520725253313ae44dbb126b2706402260cb95a4e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,3 @@ -image: hpclib/hiepacs - stages: - build - test @@ -12,6 +10,7 @@ before_script: - mkdir -p build-$VERSION .build_script_template: &build_script + image: hpclib/hiepacs stage: build tags: ["large"] script: @@ -28,8 +27,11 @@ before_script: - (cd build-$VERSION && make install | tee -a ../chameleon_${VERSION}.log && rm install/ -r) + except: + - schedules .test_script_template: &test_script + image: hpclib/hiepacs stage: test tags: ["large"] script: @@ -43,6 +45,8 @@ before_script: -D ExperimentalSubmit | tee -a ../chameleon_${VERSION}.log") - lcov --directory build-$VERSION --capture --output-file ./chameleon_${VERSION}.lcov + except: + - schedules build_openmp: <<: *build_script @@ -124,6 +128,7 @@ test_openmp_branches: except: - master - /^ci-.*$/ + - schedules test_openmp_master: <<: *test_script @@ -162,6 +167,7 @@ test_parsec_branches: except: - master - /^ci-.*$/ + - schedules test_parsec_master: <<: *test_script @@ -200,6 +206,7 @@ test_quark_branches: except: - master - /^ci-.*$/ + - schedules test_quark_master: <<: *test_script @@ -238,6 +245,7 @@ test_starpu_branches: except: - master - /^ci-.*$/ + - schedules test_starpu_master: <<: *test_script @@ -277,6 +285,7 @@ test_starpu_simgrid_branches: except: - master - /^ci-.*$/ + - schedules test_starpu_simgrid_master: <<: *test_script @@ -296,8 +305,22 @@ test_starpu_simgrid_master: only: - master@solverstack/chameleon - /^ci-.*$/ + except: + - schedules + +bench_plafrim: + stage: test + tags: ['plafrim'] + variables: + BUILD_OPTIONS: "-DCHAMELEON_USE_MPI=ON -DCMAKE_BUILD_TYPE=Release" + VERSION: miriel + script: + - (cd tools/bench/plafrim/miriel && ./run.sh) + only: + - schedules coverage: + image: hpclib/hiepacs stage: analyse dependencies: - test_starpu_branches @@ -313,13 +336,13 @@ coverage: -a chameleon_openmp.lcov -o chameleon.lcov - lcov --summary chameleon.lcov - only: - - branches except: - master - /^ci-.*$/ + - schedules .coverity: + image: hpclib/hiepacs stage: analyse tags: ["large"] dependencies: [] @@ -343,8 +366,11 @@ coverage: https://scan.coverity.com/builds?project=Chameleon only: - master@solverstack/chameleon + except: + - schedules sonarqube: + image: hpclib/hiepacs stage: analyse tags: ["large"] dependencies: @@ -370,8 +396,11 @@ sonarqube: only: - master@solverstack/chameleon - /^ci-.*$/ + except: + - schedules pages: + image: hpclib/hiepacs stage: deploy dependencies: [] artifacts: @@ -391,8 +420,11 @@ pages: only: - master@solverstack/chameleon - /^ci-.*$/ + except: + - schedules release: + image: hpclib/hiepacs stage: deploy dependencies: [] artifacts: @@ -402,7 +434,9 @@ release: - chameleon-*.tar.gz variables: VERSION: release - only: - - /^release-.*$/ script: - ./tools/release.sh + only: + - /^release-.*$/ + except: + - schedules diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake index ade499661b58c71fe0586c2bbb98ea9725a88c52..e4d8db74b5c96ea8351d117d11677e5ed0d7a3d9 160000 --- a/cmake_modules/morse_cmake +++ b/cmake_modules/morse_cmake @@ -1 +1 @@ -Subproject commit ade499661b58c71fe0586c2bbb98ea9725a88c52 +Subproject commit e4d8db74b5c96ea8351d117d11677e5ed0d7a3d9 diff --git a/tools/bench/README.org b/tools/bench/README.org new file mode 100644 index 0000000000000000000000000000000000000000..7b6bf95db77c850f149baea7b2ac6fdf3a616187 --- /dev/null +++ b/tools/bench/README.org @@ -0,0 +1,36 @@ +#+TITLE: Chameleon benchmarks +#+LANGUAGE: en +#+OPTIONS: H:3 num:t \n:nil @:t ::t |:t _:nil ^:nil -:t f:t *:t <:t +#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil pri:nil tags:not-in-toc html-style:nil + +The scripts located here are used to analyze Chameleon performances in +a systematic way. The workflow is as follows: + +* Gitlab-ci + We use a /schedule/ job that will be triggered with a chosen + frequency, the schedule job frequency can be tuned in the CI/CD part + in the web interface +* Guix + [[https://guix.gnu.org/][Guix]] is responsible for building an isolated and reproducible + environment to build and execute Chameleon, Jube and some python + scripts. We can use it on PlaFRIM because Guix and [[https://gitlab.inria.fr/guix-hpc/guix-hpc-non-free][Guix-HPC]] are + installed. +* Jube + [[https://apps.fz-juelich.de/jsc/jube/jube2/docu/index.html][Jube]] is used to drive the execution with different parameter spaces + and to parse the results in csv files. Appart from the execution + parameters such as the problem sizes the number of ressources used + and so on we also save in the database the commit date of chameleon + and the commit ids of chameleon and guix channels to properly + identify the software versions. +* Elasticsearch + [[https://www.elastic.co/fr/][Elasticsearch]] is the database framework. The server is + https://elasticsearch.bordeaux.inria.fr. It is only accessible from + Inria's networks for now. +* Kibana + [[https://www.elastic.co/fr/][Kibana]] is a web server to visualize the performances on graphs. It + looks for data imported in the elasticsearch database. We want to be + able to analyze the performances for each commit for which the + scheduled job has been performed and to monitor some performances in + the course of time/commits. Kibana server is hosted here + https://kibana.bordeaux.inria.fr. It is only accessible from Inria's + networks for now. diff --git a/tools/bench/jube/add_result.py b/tools/bench/jube/add_result.py new file mode 100755 index 0000000000000000000000000000000000000000..f5e4502cbcb6f18d16b299c1213b5ab73aab890d --- /dev/null +++ b/tools/bench/jube/add_result.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 + +from typing import Any, Dict, List, Union +from copy import deepcopy +import json +import click +import csv +import time +from git import Repo +from elasticsearch import Elasticsearch + + +Row = Dict[str, Union[str, float]] + + +def open_csv(filename: str) -> List[Dict[str, str]]: + """ + Open a csv file a return it as dictionary. + First row is titles. + """ + csv_rows = [] + with open(filename) as csv_data: + reader = csv.DictReader(csv_data) + titles = reader.fieldnames + for row in reader: + csv_rows.append( + { + title: row[title] + for title in titles + } + ) + return csv_rows + + +def format_entry(row: Row, commit_chameleon: Repo, commit_guix: str, commit_guix_hpc: str, commit_guix_hpcnonfree: str) -> Dict[str, Any]: + """"format a result""" + commit_date_chameleon = str(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(commit_chameleon.committed_date))) + commit_sha_chameleon = str(commit_chameleon.hexsha) + hostname = str(row.pop('hostname')) + algorithm = str(row.pop('algorithm')) + precision = str(row.pop('precision')) + nmpi = int(row.pop('NMPI')) + tdbc_p = int(row.pop('P')) + tdbc_q = int(row.pop('Q')) + nthread = int(row.pop('NTHREAD')) + ngpu = int(row.pop('NGPU')) + m = int(row.pop('M')) + n = int(row.pop('N')) + k = int(row.pop('K')) + cputime = float(row.pop('CPUTIME')) + gflops = float(row.pop('GFLOPS')) + stddev = float(row.pop('STDDEV')) + result = { + "Commit_date_chameleon": commit_date_chameleon, + "Commit_sha_chameleon": commit_sha_chameleon, + "Commit_sha_guix": commit_guix, + "Commit_sha_guix_hpc": commit_guix_hpc, + "Commit_sha_guix_hpcnonfree": commit_guix_hpcnonfree, + "Hostname": hostname, + "Algorithm": algorithm, + "Precision": precision, + "Nmpi": nmpi, + "P": tdbc_p, + "Q": tdbc_q, + "Nthread": nthread, + "Ngpu": ngpu, + "M": m, + "N": n, + "K": k, + "Cputime": cputime, + "Gflops": gflops, + "Stddev": stddev + } + return result + + +@click.command() +@click.option("-d", "--directory", default=".", help="git working directory") +@click.option("-e", "--elastic-url", default="http://localhost:9200", help="elasticsearch instance url") +@click.option("-t", "--team", required=True, help="team name") +@click.option("-p", "--project", required=True, help="project name") +@click.option("-h", "--host", required=True, help="host name") +@click.argument("csv-files", nargs=-1) +def main( + directory: str, + elastic_url: str, + team: str, + project: str, + host: str, + csv_files: str, +): + """Add a result to an elasticsearch database.""" + es = Elasticsearch(elastic_url) + es_index = team + "_" + project + "_" + "perf" + if not es.indices.exists(es_index): + es.indices.create(es_index) + + mapping_input = { + "result": { + "properties": { + "Commit_date_chameleon": {"type": "date", "format": "yyyy-MM-dd' 'HH:mm:ss"}, + "Commit_sha_chameleon": {"type": "keyword"}, + "Commit_sha_guix": {"type": "keyword"}, + "Commit_sha_guix_hpc": {"type": "keyword"}, + "Commit_sha_guix_hpcnonfree": {"type": "keyword"}, + "Hostname": {"type": "keyword"}, + "Algorithm": {"type": "keyword"}, + "Precision": {"type": "keyword"}, + "Nmpi": {"type": "integer"}, + "P": {"type": "integer"}, + "Q": {"type": "integer"}, + "Nthread": {"type": "integer"}, + "Ngpu": {"type": "integer"}, + "M": {"type": "integer"}, + "N": {"type": "integer"}, + "K": {"type": "integer"}, + "Cputime": {"type": "float"}, + "Gflops": {"type": "float"}, + "Stddev": {"type": "float"} + } + } + } + # es.indices.put_mapping(index=es_index, doc_type="result" , body=mapping_input, include_type_name=True) + es.indices.put_mapping(index=es_index, doc_type="result" , body=mapping_input) + + repo = Repo(directory, search_parent_directories=True) + commit_chameleon = repo.head.commit + + # collect guix commits info + with open('guix.json') as f: + guix_describe = json.load(f) + for index_guix in guix_describe: + if index_guix["name"] == "guix": + commit_guix = index_guix["commit"] + elif index_guix["name"] == "guix-hpc": + commit_guix_hpc = index_guix["commit"] + elif index_guix["name"] == "guix-hpc-non-free": + commit_guix_hpcnonfree = index_guix["commit"] + + requests = [ + request + for file in csv_files + for request in map( + lambda row: format_entry(row, commit_chameleon, commit_guix, commit_guix_hpc, commit_guix_hpcnonfree), + open_csv(file) + ) + ] + for request in requests: + es.index(index=es_index.lower(), doc_type="result", body=request) + + +if __name__ == "__main__": + main() diff --git a/tools/bench/jube/paths.xml b/tools/bench/jube/paths.xml new file mode 100644 index 0000000000000000000000000000000000000000..b92cad88a8aa3cb975df3c4784c2b2cf73f910d1 --- /dev/null +++ b/tools/bench/jube/paths.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jube> + <parameterset name="paths"> + <parameter name="BIN_DIR" type="string">{{CHAMELEON_DIR}}</parameter> + </parameterset> +</jube> diff --git a/tools/bench/jube/patterns.xml b/tools/bench/jube/patterns.xml new file mode 100644 index 0000000000000000000000000000000000000000..05c8f976db026952d3aa8e6da79aa2dc51e62191 --- /dev/null +++ b/tools/bench/jube/patterns.xml @@ -0,0 +1,17 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jube> + <!-- Regex pattern --> + <patternset name="chameleon"> + <pattern name="NMPI" type="int"># Nb mpi: *(\d+)</pattern> + <pattern name="P" type="int"># PxQ: *(\d+)x\d+</pattern> + <pattern name="Q" type="int"># PxQ: *\d+x(\d+)</pattern> + <pattern name="NTHREAD" type="int"># Nb threads: *(\d+)</pattern> + <pattern name="NGPU" type="int"># Nb GPUs: *(\d+)</pattern> + <pattern name="M" type="int">^ *(\d+) *\d+ *\d+ *\d+\.?\d+ *\d+\.?\d+ \+\- *\d+\.?\d+ *</pattern> + <pattern name="N" type="int">^ *\d+ *(\d+) *\d+ *\d+\.?\d+ *\d+\.?\d+ \+\- *\d+\.?\d+ *</pattern> + <pattern name="K" type="int">^ *\d+ *\d+ *(\d+) *\d+\.?\d+ *\d+\.?\d+ \+\- *\d+\.?\d+ *</pattern> + <pattern name="CPUTIME" type="float">^ *\d+ *\d+ *\d+ *(\d+\.?\d+) *\d+\.?\d+ \+\- *\d+\.?\d+ *</pattern> + <pattern name="GFLOPS" type="float">^ *\d+ *\d+ *\d+ *\d+\.?\d+ *(\d+\.?\d+) \+\- *\d+\.?\d+ *</pattern> + <pattern name="STDDEV" type="float">^ *\d+ *\d+ *\d+ *\d+\.?\d+ *\d+\.?\d+ \+\- *(\d+\.?\d+) *</pattern> + </patternset> +</jube> diff --git a/tools/bench/jube/requirements.txt b/tools/bench/jube/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..81ce03361a7196b0f7667e071de801b1601c05cb --- /dev/null +++ b/tools/bench/jube/requirements.txt @@ -0,0 +1,4 @@ +click==7.0 +gitpython==2.1.11 +elasticsearch==6.3.1 +certifi diff --git a/tools/bench/plafrim/miriel/chameleon.xml b/tools/bench/plafrim/miriel/chameleon.xml new file mode 100644 index 0000000000000000000000000000000000000000..98d369ff2fd038eea344e1a350d8ee6fa5994103 --- /dev/null +++ b/tools/bench/plafrim/miriel/chameleon.xml @@ -0,0 +1,112 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jube> + <benchmark name="miriel" outpath="results/miriel"> + <comment>benchmark chameleon on host plafrim miriel</comment> + + <parameterset name="param_gemm"> + <parameter name="hostname" type="string">miriel</parameter> + <parameter name="algorithm" type="string">gemm</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >22</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> + <parameter name="m" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="k" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + </parameterset> + + <parameterset name="param_potrf"> + <parameter name="hostname" type="string">miriel</parameter> + <parameter name="algorithm" type="string">potrf</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >22</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> + <parameter name="m" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="k" type="int" >1</parameter> + </parameterset> + + <parameterset name="param_geqrf"> + <parameter name="hostname" type="string">miriel</parameter> + <parameter name="algorithm" type="string">geqrf_hqr</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >22</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> + <parameter name="m" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="k" type="int" >1</parameter> + </parameterset> + + <!-- Operation --> + <step name="run_gemm" tag="gemm"> + <use>param_gemm</use> + <use from="../../jube/paths.xml">paths</use> + <do>STARPU_SILENT=1 mpiexec -np $nmpi $BIN_DIR/timing/time_${precision}${algorithm}_tile -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b --niter=3</do> + </step> + <step name="run_potrf" tag="potrf"> + <use>param_potrf</use> + <use from="../../jube/paths.xml">paths</use> + <do>STARPU_SILENT=1 mpiexec -np $nmpi $BIN_DIR/timing/time_${precision}${algorithm}_tile -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b --niter=3</do> + </step> + <step name="run_geqrf_hqr" tag="geqrf"> + <use>param_geqrf</use> + <use from="../../jube/paths.xml">paths</use> + <do>STARPU_SILENT=1 mpiexec -np $nmpi $BIN_DIR/timing/time_${precision}${algorithm}_tile -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b --niter=3</do> + </step> + + <!-- Analyse --> + <analyser name="analyse"> + <!-- use a pattern set --> + <use from="../../jube/patterns.xml">chameleon</use> + <analyse step="run_gemm" tag="gemm"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + <analyse step="run_potrf" tag="potrf"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + <analyse step="run_geqrf_hqr" tag="geqrf"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + </analyser> + + + <!-- Create result table --> + <result> + <use>analyse</use> <!-- use existing analyser --> + <!--<table name="result" style="csv" sort="number">--> + <table name="result" style="csv"> + <column>hostname</column> + <column>algorithm</column> + <column>precision</column> + <column>NMPI</column> + <column>P</column> + <column>Q</column> + <column>NTHREAD</column> + <column>NGPU</column> + <column>M</column> + <column>N</column> + <column>K</column> + <column>CPUTIME</column> + <column>GFLOPS</column> + <column>STDDEV</column> + </table> + </result> + </benchmark> +</jube> diff --git a/tools/bench/plafrim/miriel/chameleon_guix.sh b/tools/bench/plafrim/miriel/chameleon_guix.sh new file mode 100755 index 0000000000000000000000000000000000000000..a31d935ee83b87dbe533306febd66a62b3f0c4aa --- /dev/null +++ b/tools/bench/plafrim/miriel/chameleon_guix.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -x + +# Configure and Build Chameleon +echo $VERSION +cd ../../../../build-$VERSION +CHAMELEON_DIR=`pwd` +cmake $BUILD_OPTIONS .. +make -j5 +cd - + +# Define where to find the build directory for jube +sed 's@{{CHAMELEON_DIR}}@'"${CHAMELEON_DIR}"'@g' -i ../../jube/paths.xml + +# Execute jube benchmarks +jube run chameleon.xml --tag gemm potrf geqrf +# jube analysis +jube analyse results/$VERSION/ +# jube report +jube result results/$VERSION/ -i last > chameleon.csv + +# send results to the elasticsearch server +export PYTHONPATH=$GUIX_ENVIRONMENT/lib/python3.7/site-packages +python3 ../../jube/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t hiepacs -p "chameleon" -h $VERSION chameleon.csv diff --git a/tools/bench/plafrim/miriel/chameleon_guix.sl b/tools/bench/plafrim/miriel/chameleon_guix.sl new file mode 100644 index 0000000000000000000000000000000000000000..8c3dd9d6e85e5238d4fcd10ad329fd10bd33ef2b --- /dev/null +++ b/tools/bench/plafrim/miriel/chameleon_guix.sl @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +#SBATCH --exclusive +#SBATCH --ntasks-per-node=1 +#SBATCH --threads-per-core=1 + +echo "######################### Chameleon benchmarks #########################" +echo "HOSTNAME $HOSTNAME" +echo "USERNAME $USERNAME" +echo "GIT REPO $CI_REPOSITORY_URL" +echo "GIT BRANCH $CI_COMMIT_REF_NAME" +echo "GIT COMMIT $CI_COMMIT_SHA" + +# to avoid a lock during fetching chameleon branch in parallel +export XDG_CACHE_HOME=/tmp/guix-$$ + +# save guix commits +guix describe --format=json > guix.json + +# Submit jobs +exec guix environment --pure --preserve=SLURM --preserve=VERSION --preserve=BUILD_OPTIONS chameleon --with-input=openblas=mkl --ad-hoc slurm jube python python-click python-gitpython python-elasticsearch python-certifi sed coreutils grep gawk openssh perl hwloc openmpi starpu mkl -- /bin/bash --norc chameleon_guix.sh + +echo "####################### End Chameleon benchmarks #######################" + +# clean tmp +rm -rf /tmp/guix-$$ diff --git a/tools/bench/plafrim/miriel/run.sh b/tools/bench/plafrim/miriel/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c5e21926e3aa1a00f5389fd017e9f4814645a97d --- /dev/null +++ b/tools/bench/plafrim/miriel/run.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +echo "######################### Chameleon benchmarks #########################" +echo "HOSTNAME $HOSTNAME" +echo "USERNAME $USERNAME" +echo "GIT REPO $CI_REPOSITORY_URL" +echo "GIT BRANCH $CI_COMMIT_REF_NAME" +echo "GIT COMMIT $CI_COMMIT_SHA" + +# Parameters of the Slurm jobs +TIME=01:00:00 +PART=court +CONS=MirielIB +EXCL= +NP=9 +JOBSLIM=1 + +function wait_completion { + # Wait for completion of jobs + echo "JOB_LIST $JOB_LIST" + while [ "$ITER" -ge "$JOBSLIM" ] + do + for JOB in $JOB_LIST + do + IS_JOB_IN_QUEUE=`squeue |grep "$JOB"` + if [[ -z "$IS_JOB_IN_QUEUE" ]] + then + ITER=$[ITER-1] + JOB_LIST=`echo $JOB_LIST | sed "s#$JOB##"` + echo "JOB $JOB finished" + else + echo "$IS_JOB_IN_QUEUE" + fi + done + sleep 30 + done +} + + +# Submit jobs +ITER=0 +JOB_ID=`JOB_NAME=chameleon_bench\_$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS --exclude=$EXCL chameleon_guix.sl | sed "s#Submitted batch job ##"` +if [[ -n "$JOB_ID" ]] +then + JOB_LIST="$JOB_LIST $JOB_ID" + ITER=$[ITER+1] +fi + +# Wait for completion of jobs +wait_completion + +# Print results +cat chameleon_bench\_$NP.out + +echo "####################### End Chameleon benchmarks #######################" + +exit 0 diff --git a/tools/bench/plafrim/sirocco/chameleon.xml b/tools/bench/plafrim/sirocco/chameleon.xml new file mode 100644 index 0000000000000000000000000000000000000000..341d035fb0dbdecfd4dcf8a3460cb43eb013ff28 --- /dev/null +++ b/tools/bench/plafrim/sirocco/chameleon.xml @@ -0,0 +1,112 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jube> + <benchmark name="sirocco" outpath="results/sirocco"> + <comment>benchmark chameleon on host plafrim sirocco</comment> + + <parameterset name="param_gemm"> + <parameter name="hostname" type="string">sirocco</parameter> + <parameter name="algorithm" type="string">gemm</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >29</parameter> + <parameter name="ngpu" type="int" >2</parameter> + <parameter name="b" type="int" >1600</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> + <parameter name="m" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="k" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + </parameterset> + + <parameterset name="param_potrf"> + <parameter name="hostname" type="string">sirocco</parameter> + <parameter name="algorithm" type="string">potrf</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >29</parameter> + <parameter name="ngpu" type="int" >2</parameter> + <parameter name="b" type="int" >1600</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> + <parameter name="m" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="k" type="int" >1</parameter> + </parameterset> + + <parameterset name="param_geqrf"> + <parameter name="hostname" type="string">sirocco</parameter> + <parameter name="algorithm" type="string">geqrf_hqr</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >29</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >1600</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> + <parameter name="m" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >[${p}*${b}, ${p}*5*${b}, ${p}*10*${b}, ${p}*20*${b}, ${p}*50*${b}][$i_mn]</parameter> + <parameter name="k" type="int" >1</parameter> + </parameterset> + + <!-- Operation --> + <step name="run_gemm" tag="gemm"> + <use>param_gemm</use> + <use from="../../jube/paths.xml">paths</use> + <do>STARPU_SILENT=1 mpiexec -np $nmpi $BIN_DIR/timing/time_${precision}${algorithm}_tile -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b --niter=3</do> + </step> + <step name="run_potrf" tag="potrf"> + <use>param_potrf</use> + <use from="../../jube/paths.xml">paths</use> + <do>STARPU_SILENT=1 mpiexec -np $nmpi $BIN_DIR/timing/time_${precision}${algorithm}_tile -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b --niter=3</do> + </step> + <step name="run_geqrf_hqr" tag="geqrf"> + <use>param_geqrf</use> + <use from="../../jube/paths.xml">paths</use> + <do>STARPU_SILENT=1 mpiexec -np $nmpi $BIN_DIR/timing/time_${precision}${algorithm}_tile -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b --niter=3</do> + </step> + + <!-- Analyse --> + <analyser name="analyse"> + <!-- use a pattern set --> + <use from="../../jube/patterns.xml">chameleon</use> + <analyse step="run_gemm" tag="gemm"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + <analyse step="run_potrf" tag="potrf"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + <analyse step="run_geqrf_hqr" tag="geqrf"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + </analyser> + + + <!-- Create result table --> + <result> + <use>analyse</use> <!-- use existing analyser --> + <!--<table name="result" style="csv" sort="number">--> + <table name="result" style="csv"> + <column>hostname</column> + <column>algorithm</column> + <column>precision</column> + <column>NMPI</column> + <column>P</column> + <column>Q</column> + <column>NTHREAD</column> + <column>NGPU</column> + <column>M</column> + <column>N</column> + <column>K</column> + <column>CPUTIME</column> + <column>GFLOPS</column> + <column>STDDEV</column> + </table> + </result> + </benchmark> +</jube> diff --git a/tools/bench/plafrim/sirocco/chameleon_guix.sh b/tools/bench/plafrim/sirocco/chameleon_guix.sh new file mode 100755 index 0000000000000000000000000000000000000000..a31d935ee83b87dbe533306febd66a62b3f0c4aa --- /dev/null +++ b/tools/bench/plafrim/sirocco/chameleon_guix.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -x + +# Configure and Build Chameleon +echo $VERSION +cd ../../../../build-$VERSION +CHAMELEON_DIR=`pwd` +cmake $BUILD_OPTIONS .. +make -j5 +cd - + +# Define where to find the build directory for jube +sed 's@{{CHAMELEON_DIR}}@'"${CHAMELEON_DIR}"'@g' -i ../../jube/paths.xml + +# Execute jube benchmarks +jube run chameleon.xml --tag gemm potrf geqrf +# jube analysis +jube analyse results/$VERSION/ +# jube report +jube result results/$VERSION/ -i last > chameleon.csv + +# send results to the elasticsearch server +export PYTHONPATH=$GUIX_ENVIRONMENT/lib/python3.7/site-packages +python3 ../../jube/add_result.py -e https://elasticsearch.bordeaux.inria.fr -t hiepacs -p "chameleon" -h $VERSION chameleon.csv diff --git a/tools/bench/plafrim/sirocco/chameleon_guix.sl b/tools/bench/plafrim/sirocco/chameleon_guix.sl new file mode 100644 index 0000000000000000000000000000000000000000..8c3dd9d6e85e5238d4fcd10ad329fd10bd33ef2b --- /dev/null +++ b/tools/bench/plafrim/sirocco/chameleon_guix.sl @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +#SBATCH --exclusive +#SBATCH --ntasks-per-node=1 +#SBATCH --threads-per-core=1 + +echo "######################### Chameleon benchmarks #########################" +echo "HOSTNAME $HOSTNAME" +echo "USERNAME $USERNAME" +echo "GIT REPO $CI_REPOSITORY_URL" +echo "GIT BRANCH $CI_COMMIT_REF_NAME" +echo "GIT COMMIT $CI_COMMIT_SHA" + +# to avoid a lock during fetching chameleon branch in parallel +export XDG_CACHE_HOME=/tmp/guix-$$ + +# save guix commits +guix describe --format=json > guix.json + +# Submit jobs +exec guix environment --pure --preserve=SLURM --preserve=VERSION --preserve=BUILD_OPTIONS chameleon --with-input=openblas=mkl --ad-hoc slurm jube python python-click python-gitpython python-elasticsearch python-certifi sed coreutils grep gawk openssh perl hwloc openmpi starpu mkl -- /bin/bash --norc chameleon_guix.sh + +echo "####################### End Chameleon benchmarks #######################" + +# clean tmp +rm -rf /tmp/guix-$$ diff --git a/tools/bench/plafrim/sirocco/run.sh b/tools/bench/plafrim/sirocco/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..5c3f5e3c2de3fb4e019ad99c237f66ba64991e3a --- /dev/null +++ b/tools/bench/plafrim/sirocco/run.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +echo "######################### Chameleon benchmarks #########################" +echo "HOSTNAME $HOSTNAME" +echo "USERNAME $USERNAME" +echo "GIT REPO $CI_REPOSITORY_URL" +echo "GIT BRANCH $CI_COMMIT_REF_NAME" +echo "GIT COMMIT $CI_COMMIT_SHA" + +# Parameters of the Slurm jobs +TIME=01:00:00 +PART=court_sirocco +CONS=Skylake +EXCL= +NP=1 +JOBSLIM=1 + +function wait_completion { + # Wait for completion of jobs + echo "JOB_LIST $JOB_LIST" + while [ "$ITER" -ge "$JOBSLIM" ] + do + for JOB in $JOB_LIST + do + IS_JOB_IN_QUEUE=`squeue |grep "$JOB"` + if [[ -z "$IS_JOB_IN_QUEUE" ]] + then + ITER=$[ITER-1] + JOB_LIST=`echo $JOB_LIST | sed "s#$JOB##"` + echo "JOB $JOB finished" + else + echo "$IS_JOB_IN_QUEUE" + fi + done + sleep 30 + done +} + + +# Submit jobs +ITER=0 +JOB_ID=`JOB_NAME=chameleon_bench\_$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS chameleon_guix.sl | sed "s#Submitted batch job ##"` +if [[ -n "$JOB_ID" ]] +then + JOB_LIST="$JOB_LIST $JOB_ID" + ITER=$[ITER+1] +fi + +# Wait for completion of jobs +wait_completion + +# Print results +cat chameleon_bench\_$NP.out + +echo "####################### End Chameleon benchmarks #######################" + +exit 0