From 17168e6f0bdf92f00cd26674031d265da492e071 Mon Sep 17 00:00:00 2001 From: PRUVOST Florent <florent.pruvost@inria.fr> Date: Fri, 28 Feb 2020 11:21:36 +0100 Subject: [PATCH] Update to new kibana server and scripts factorization --- .gitlab/bench_plafrim.yml | 67 ++++++----- .gitlab/common.yml | 20 ++++ tools/bench/chameleon_guix.sh | 13 +-- tools/bench/chameleon_guix_nmad.sl | 22 ---- tools/bench/chameleon_guix_openmpi.sl | 22 ---- tools/bench/jube/add_result.py | 50 ++++---- tools/bench/plafrim/bora/chameleon_nmad.xml | 108 ------------------ .../bench/plafrim/bora/chameleon_openmpi.xml | 108 ------------------ tools/bench/plafrim/bora/run.sh | 70 ------------ tools/bench/plafrim/chameleon.xml | 56 +++++++++ tools/bench/plafrim/miriel/chameleon_nmad.xml | 108 ------------------ .../plafrim/miriel/chameleon_openmpi.xml | 108 ------------------ tools/bench/plafrim/miriel/run.sh | 70 ------------ .../plafrim/parameters/bora/parameters.xml | 54 +++++++++ .../plafrim/parameters/miriel/parameters.xml | 54 +++++++++ .../plafrim/parameters/sirocco/parameters.xml | 54 +++++++++ tools/bench/plafrim/run.sh | 83 ++++++++++++++ tools/bench/plafrim/sirocco/chameleon.xml | 108 ------------------ tools/bench/plafrim/sirocco/run.sh | 70 ------------ tools/bench/plafrim/slurm.sh | 59 ++++++++++ 20 files changed, 450 insertions(+), 854 deletions(-) delete mode 100644 tools/bench/chameleon_guix_nmad.sl delete mode 100644 tools/bench/chameleon_guix_openmpi.sl delete mode 100644 tools/bench/plafrim/bora/chameleon_nmad.xml delete mode 100644 tools/bench/plafrim/bora/chameleon_openmpi.xml delete mode 100755 tools/bench/plafrim/bora/run.sh create mode 100644 tools/bench/plafrim/chameleon.xml delete mode 100644 tools/bench/plafrim/miriel/chameleon_nmad.xml delete mode 100644 tools/bench/plafrim/miriel/chameleon_openmpi.xml delete mode 100755 tools/bench/plafrim/miriel/run.sh create mode 100644 tools/bench/plafrim/parameters/bora/parameters.xml create mode 100644 tools/bench/plafrim/parameters/miriel/parameters.xml create mode 100644 tools/bench/plafrim/parameters/sirocco/parameters.xml create mode 100755 tools/bench/plafrim/run.sh delete mode 100644 tools/bench/plafrim/sirocco/chameleon.xml delete mode 100755 tools/bench/plafrim/sirocco/run.sh create mode 100755 tools/bench/plafrim/slurm.sh diff --git a/.gitlab/bench_plafrim.yml b/.gitlab/bench_plafrim.yml index 24d9f5a7e..4c9cb4394 100644 --- a/.gitlab/bench_plafrim.yml +++ b/.gitlab/bench_plafrim.yml @@ -1,29 +1,42 @@ -bench_plafrim_bora: - stage: test - tags: ['plafrim'] - before_script: - - git submodule update --init --recursive - script: - - ./tools/bench/plafrim/bora/run.sh - only: - - schedules +--- +bench_plafrim_bora_openmpi: + variables: + NODE: bora + MPI: openmpi + SLURM_NP: 9 + JUBE_ID: "000001" + extends: .bench_plafrim_common -.bench_plafrim_miriel: - stage: test - tags: ['plafrim'] - before_script: - - git submodule update --init --recursive - script: - - ./tools/bench/plafrim/miriel/run.sh - only: - - schedules +bench_plafrim_miriel_openmpi: + variables: + NODE: miriel + MPI: openmpi + SLURM_NP: 9 + JUBE_ID: "000002" + extends: .bench_plafrim_common -.bench_plafrim_sirocco: - stage: test - tags: ['plafrim'] - before_script: - - git submodule update --init --recursive - script: - - ./tools/bench/plafrim/sirocco/run.sh - only: - - schedules \ No newline at end of file +# Nmad segfaults for now, waiting for a stable version +.bench_plafrim_bora_nmad: + variables: + NODE: bora + MPI: nmad + SLURM_NP: 9 + JUBE_ID: "000003" + extends: .bench_plafrim_common + +.bench_plafrim_miriel_nmad: + variables: + NODE: miriel + MPI: nmad + SLURM_NP: 9 + JUBE_ID: "000004" + extends: .bench_plafrim_common + +# Chameleon+CUDA in Guix is not working yet +.bench_plafrim_sirocco_openmpi: + variables: + NODE: sirocco + MPI: openmpi + SLURM_NP: 1 + JUBE_ID: "000005" + extends: .bench_plafrim_common \ No newline at end of file diff --git a/.gitlab/common.yml b/.gitlab/common.yml index 5960f8c8b..ed2298842 100644 --- a/.gitlab/common.yml +++ b/.gitlab/common.yml @@ -63,3 +63,23 @@ - (cd build-$VERSION && lcov --directory . --capture --output-file ../bis_${LOGNAME}.lcov) except: - schedules + +.bench_plafrim_common: + only: + - schedules + stage: test + tags: ['plafrim'] + before_script: + - git submodule update --init --recursive + script: + - ./tools/bench/plafrim/run.sh + artifacts: + name: "$CI_JOB_NAME" + expire_in: 1 week + paths: + - "chameleon-$NODE-$MPI-$SLURM_NP.err" + - "chameleon-$NODE-$MPI-$SLURM_NP.out" + - "build-$NODE-$MPI/chameleon.csv" + - "tools/bench/plafrim/results/$JUBE_ID" + variables: + PLATFORM: plafrim diff --git a/tools/bench/chameleon_guix.sh b/tools/bench/chameleon_guix.sh index a120649c1..5c1c641f6 100755 --- a/tools/bench/chameleon_guix.sh +++ b/tools/bench/chameleon_guix.sh @@ -2,24 +2,23 @@ set -x -# this script depends on a MPI vendor: openmpi or nmad -MPI=$1 - # Configure and Build Chameleon mkdir -p $CI_PROJECT_DIR/build-$NODE-$MPI cp $CI_PROJECT_DIR/guix.json $CI_PROJECT_DIR/build-$NODE-$MPI/ cd $CI_PROJECT_DIR/build-$NODE-$MPI rm CMake* -rf -cmake $BUILD_OPTIONS .. +cmake $CHAMELEON_BUILD_OPTIONS .. make -j20 VERBOSE=1 export CHAMELEON_BUILD=$PWD +# clean old benchmarks +jube remove --id $JUBE_ID # Execute jube benchmarks -jube run $CI_PROJECT_DIR/tools/bench/$PLATFORM/$NODE/chameleon_$MPI.xml --tag gemm potrf geqrf +jube run $CI_PROJECT_DIR/tools/bench/$PLATFORM/chameleon.xml --tag gemm potrf geqrf --include-path $CI_PROJECT_DIR/tools/bench/$PLATFORM/parameters/$NODE --id $JUBE_ID # jube analysis -jube analyse $CI_PROJECT_DIR/tools/bench/$PLATFORM/$NODE/results/ +jube analyse $CI_PROJECT_DIR/tools/bench/$PLATFORM/results --id $JUBE_ID # jube report -jube result $CI_PROJECT_DIR/tools/bench/$PLATFORM/$NODE/results/ -i last > chameleon.csv +jube result $CI_PROJECT_DIR/tools/bench/$PLATFORM/results --id $JUBE_ID > chameleon.csv # send results to the elasticsearch server export PYTHONPATH=$GUIX_ENVIRONMENT/lib/python3.7/site-packages diff --git a/tools/bench/chameleon_guix_nmad.sl b/tools/bench/chameleon_guix_nmad.sl deleted file mode 100644 index b3b952de2..000000000 --- a/tools/bench/chameleon_guix_nmad.sl +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -#SBATCH --exclusive -#SBATCH --ntasks-per-node=1 -#SBATCH --threads-per-core=1 - -echo "######################### Chameleon benchmarks #########################" - -# to avoid a lock during fetching chameleon branch in parallel -export XDG_CACHE_HOME=/tmp/guix-$$ - -# save guix commits -guix describe --format=json > guix.json - -# Submit jobs - -# Nmad version -exec guix environment --pure --preserve=^CI --preserve=^SLURM --preserve=^STARPU --preserve=^PADICO --preserve=^PIOM --preserve=PLATFORM --preserve=NODE --preserve=BUILD_OPTIONS chameleon --with-input=openblas=mkl --with-input=openmpi=nmad --ad-hoc slurm jube python python-click python-gitpython python-elasticsearch python-certifi coreutils inetutils util-linux procps grep tar sed gzip which gawk perl zlib openssh hwloc nmad starpu mkl -- /bin/bash --norc $CI_PROJECT_DIR/tools/bench/chameleon_guix.sh nmad - -echo "####################### End Chameleon benchmarks #######################" - -# clean tmp -rm -rf /tmp/guix-$$ diff --git a/tools/bench/chameleon_guix_openmpi.sl b/tools/bench/chameleon_guix_openmpi.sl deleted file mode 100644 index fd84573af..000000000 --- a/tools/bench/chameleon_guix_openmpi.sl +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -#SBATCH --exclusive -#SBATCH --ntasks-per-node=1 -#SBATCH --threads-per-core=1 - -echo "######################### Chameleon benchmarks #########################" - -# to avoid a lock during fetching chameleon branch in parallel -export XDG_CACHE_HOME=/tmp/guix-$$ - -# save guix commits -guix describe --format=json > guix.json - -# Submit jobs - -# OpenMPI version -exec guix environment --pure --preserve=^CI --preserve=^SLURM --preserve=^STARPU --preserve=PLATFORM --preserve=NODE --preserve=BUILD_OPTIONS chameleon --with-input=openblas=mkl --ad-hoc slurm jube python python-click python-gitpython python-elasticsearch python-certifi sed coreutils grep gawk openssh perl hwloc openmpi starpu mkl -- /bin/bash --norc $CI_PROJECT_DIR/tools/bench/chameleon_guix.sh openmpi - -echo "####################### End Chameleon benchmarks #######################" - -# clean tmp -rm -rf /tmp/guix-$$ diff --git a/tools/bench/jube/add_result.py b/tools/bench/jube/add_result.py index 63a68d6d2..4f27dc483 100755 --- a/tools/bench/jube/add_result.py +++ b/tools/bench/jube/add_result.py @@ -92,37 +92,35 @@ def main( ): """Add a result to an elasticsearch database.""" es = Elasticsearch(elastic_url) - es_index = team + "_" + project + "_" + "perf" + es_index = team + "-" + project + "_" + "perf" if not es.indices.exists(es_index): es.indices.create(es_index) mapping_input = { - "result": { - "properties": { - "Commit_date_chameleon": {"type": "date", "format": "yyyy-MM-dd' 'HH:mm:ss"}, - "Commit_sha_chameleon": {"type": "keyword"}, - "Commit_sha_guix": {"type": "keyword"}, - "Commit_sha_guix_hpc": {"type": "keyword"}, - "Commit_sha_guix_hpcnonfree": {"type": "keyword"}, - "Hostname": {"type": "keyword"}, - "MPIvendor": {"type": "keyword"}, - "Algorithm": {"type": "keyword"}, - "Precision": {"type": "keyword"}, - "Nmpi": {"type": "integer"}, - "P": {"type": "integer"}, - "Q": {"type": "integer"}, - "Nthread": {"type": "integer"}, - "Ngpu": {"type": "integer"}, - "M": {"type": "integer"}, - "N": {"type": "integer"}, - "K": {"type": "integer"}, - "Cputime": {"type": "float"}, - "Gflops": {"type": "float"} - } + "properties": { + "Commit_date_chameleon": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"}, + "Commit_sha_chameleon": {"type": "keyword"}, + "Commit_sha_guix": {"type": "keyword"}, + "Commit_sha_guix_hpc": {"type": "keyword"}, + "Commit_sha_guix_hpcnonfree": {"type": "keyword"}, + "Hostname": {"type": "keyword"}, + "MPIvendor": {"type": "keyword"}, + "Algorithm": {"type": "keyword"}, + "Precision": {"type": "keyword"}, + "Nmpi": {"type": "integer"}, + "P": {"type": "integer"}, + "Q": {"type": "integer"}, + "Nthread": {"type": "integer"}, + "Ngpu": {"type": "integer"}, + "M": {"type": "integer"}, + "N": {"type": "integer"}, + "K": {"type": "integer"}, + "Cputime": {"type": "float"}, + "Gflops": {"type": "float"} } + } - # es.indices.put_mapping(index=es_index, doc_type="result" , body=mapping_input, include_type_name=True) - es.indices.put_mapping(index=es_index, doc_type="result" , body=mapping_input) + es.indices.put_mapping(index=es_index, body=mapping_input) repo = Repo(directory, search_parent_directories=True) commit_chameleon = repo.head.commit @@ -147,7 +145,7 @@ def main( ) ] for request in requests: - es.index(index=es_index.lower(), doc_type="result", body=request) + es.index(index=es_index.lower(), body=request) if __name__ == "__main__": diff --git a/tools/bench/plafrim/bora/chameleon_nmad.xml b/tools/bench/plafrim/bora/chameleon_nmad.xml deleted file mode 100644 index 522da430c..000000000 --- a/tools/bench/plafrim/bora/chameleon_nmad.xml +++ /dev/null @@ -1,108 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<jube> - <benchmark name="bora" outpath="results"> - <comment>benchmark chameleon on host plafrim bora</comment> - - <parameterset name="param_gemm"> - <parameter name="hostname" type="string">bora</parameter> - <parameter name="algorithm" type="string">gemm</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >34</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - </parameterset> - - <parameterset name="param_potrf"> - <parameter name="hostname" type="string">bora</parameter> - <parameter name="algorithm" type="string">potrf</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >34</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <parameterset name="param_geqrf"> - <parameter name="hostname" type="string">bora</parameter> - <parameter name="algorithm" type="string">geqrf_hqr</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >34</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <!-- Operation --> - <step name="run_gemm" tag="gemm"> - <use>param_gemm</use> - <do>mpiexec -DPIOM_DEDICATED=1 -DPIOM_DEDICATED_WAIT=1 -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_potrf" tag="potrf"> - <use>param_potrf</use> - <do>mpiexec -DPIOM_DEDICATED=1 -DPIOM_DEDICATED_WAIT=1 -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_geqrf_hqr" tag="geqrf"> - <use>param_geqrf</use> - <do>mpiexec -DPIOM_DEDICATED=1 -DPIOM_DEDICATED_WAIT=1 -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - - <!-- Analyse --> - <analyser name="analyse"> - <!-- use a pattern set --> - <use from="../../jube/patterns.xml">chameleon</use> - <analyse step="run_gemm" tag="gemm"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_potrf" tag="potrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_geqrf_hqr" tag="geqrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - </analyser> - - - <!-- Create result table --> - <result> - <use>analyse</use> <!-- use existing analyser --> - <!--<table name="result" style="csv" sort="number">--> - <table name="result" style="csv"> - <column>hostname</column> - <column>algorithm</column> - <column>precision</column> - <column>nmpi</column> - <column>p</column> - <column>q</column> - <column>nthr</column> - <column>ngpu</column> - <column>m</column> - <column>n</column> - <column>k</column> - <column>cputime</column> - <column>gflops</column> - </table> - </result> - </benchmark> -</jube> diff --git a/tools/bench/plafrim/bora/chameleon_openmpi.xml b/tools/bench/plafrim/bora/chameleon_openmpi.xml deleted file mode 100644 index fe80ac54b..000000000 --- a/tools/bench/plafrim/bora/chameleon_openmpi.xml +++ /dev/null @@ -1,108 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<jube> - <benchmark name="bora" outpath="results"> - <comment>benchmark chameleon on host plafrim bora</comment> - - <parameterset name="param_gemm"> - <parameter name="hostname" type="string">bora</parameter> - <parameter name="algorithm" type="string">gemm</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >34</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - </parameterset> - - <parameterset name="param_potrf"> - <parameter name="hostname" type="string">bora</parameter> - <parameter name="algorithm" type="string">potrf</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >34</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <parameterset name="param_geqrf"> - <parameter name="hostname" type="string">bora</parameter> - <parameter name="algorithm" type="string">geqrf_hqr</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >34</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <!-- Operation --> - <step name="run_gemm" tag="gemm"> - <use>param_gemm</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_potrf" tag="potrf"> - <use>param_potrf</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_geqrf_hqr" tag="geqrf"> - <use>param_geqrf</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - - <!-- Analyse --> - <analyser name="analyse"> - <!-- use a pattern set --> - <use from="../../jube/patterns.xml">chameleon</use> - <analyse step="run_gemm" tag="gemm"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_potrf" tag="potrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_geqrf_hqr" tag="geqrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - </analyser> - - - <!-- Create result table --> - <result> - <use>analyse</use> <!-- use existing analyser --> - <!--<table name="result" style="csv" sort="number">--> - <table name="result" style="csv"> - <column>hostname</column> - <column>algorithm</column> - <column>precision</column> - <column>nmpi</column> - <column>p</column> - <column>q</column> - <column>nthr</column> - <column>ngpu</column> - <column>m</column> - <column>n</column> - <column>k</column> - <column>cputime</column> - <column>gflops</column> - </table> - </result> - </benchmark> -</jube> diff --git a/tools/bench/plafrim/bora/run.sh b/tools/bench/plafrim/bora/run.sh deleted file mode 100755 index baf9e7424..000000000 --- a/tools/bench/plafrim/bora/run.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash - -echo "######################### Chameleon benchmarks #########################" -echo "HOSTNAME $HOSTNAME" -echo "USERNAME $USERNAME" -echo "GIT REPO $CI_REPOSITORY_URL" -echo "GIT BRANCH $CI_COMMIT_REF_NAME" -echo "GIT COMMIT $CI_COMMIT_SHA" -echo "PROJECT DIR $CI_PROJECT_DIR" - -set -x - -function wait_completion { - # Wait for completion of jobs - echo "JOB_LIST $JOB_LIST" - while [ "$NJOB" -gt 0 ] - do - for JOB in $JOB_LIST - do - IS_JOB_IN_QUEUE=`squeue |grep "$JOB"` - if [[ -z "$IS_JOB_IN_QUEUE" ]] - then - NJOB=$[NJOB-1] - JOB_LIST=`echo $JOB_LIST | sed "s#$JOB##"` - echo "JOB $JOB finished" - else - echo "$IS_JOB_IN_QUEUE" - fi - done - sleep 30 - done -} - -# Parameters for scripts -export PLATFORM=plafrim -export NODE=bora -export BUILD_OPTIONS="-DCHAMELEON_USE_MPI=ON -DCMAKE_BUILD_TYPE=Release" -export STARPU_SILENT=1 -#export STARPU_LIMIT_CPU_MEM=180000 -#export STARPU_LIMIT_MAX_SUBMITTED_TASKS=16000 -#export STARPU_LIMIT_MIN_SUBMITTED_TASKS=15000 - -# Parameters of the Slurm jobs -TIME=01:00:00 -PART=routage -CONS=bora -EXCL= -NP=9 - -# Submit jobs -NJOB=0 -MPI_LIST="openmpi nmad" -#MPI_LIST="nmad" -for MPI in $MPI_LIST -do - JOB_ID=`JOB_NAME=chameleon\_$MPI\_$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS --exclude=$EXCL $CI_PROJECT_DIR/tools/bench/chameleon_guix\_$MPI.sl | sed "s#Submitted batch job ##"` - #JOB_ID=`JOB_NAME=chameleon\_$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS --exclude=$EXCL chameleon_guix.sl | sed "s#Submitted batch job ##"` - if [[ -n "$JOB_ID" ]] - then - JOB_LIST="$JOB_LIST $JOB_ID" - NJOB=$[NJOB+1] - fi -done - -# Wait for completion of jobs -wait_completion - -echo "####################### End Chameleon benchmarks #######################" - -exit 0 diff --git a/tools/bench/plafrim/chameleon.xml b/tools/bench/plafrim/chameleon.xml new file mode 100644 index 000000000..1fd85508b --- /dev/null +++ b/tools/bench/plafrim/chameleon.xml @@ -0,0 +1,56 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jube> + <benchmark name="plafrim" outpath="results"> + <comment>benchmark chameleon on plafrim</comment> + + <!-- Operation --> + <step name="run_gemm" tag="gemm"> + <use from= "parameters.xml">param_gemm</use> + <do>$command</do> + </step> + <step name="run_potrf" tag="potrf"> + <use from= "parameters.xml">param_potrf</use> + <do>$command</do> + </step> + <step name="run_geqrf_hqr" tag="geqrf"> + <use from= "parameters.xml">param_geqrf</use> + <do>$command</do> + </step> + + <!-- Analyse --> + <analyser name="analyse"> + <!-- use a pattern set --> + <use from="../jube/patterns.xml">chameleon</use> + <analyse step="run_gemm" tag="gemm"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + <analyse step="run_potrf" tag="potrf"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + <analyse step="run_geqrf_hqr" tag="geqrf"> + <file>stdout</file> <!-- file which should be scanned --> + </analyse> + </analyser> + + <!-- Create result table --> + <result> + <use>analyse</use> <!-- use existing analyser --> + <!--<table name="result" style="csv" sort="number">--> + <table name="result" style="csv"> + <column>hostname</column> + <column>algorithm</column> + <column>precision</column> + <column>nmpi</column> + <column>p</column> + <column>q</column> + <column>nthr</column> + <column>ngpu</column> + <column>m</column> + <column>n</column> + <column>k</column> + <column>cputime</column> + <column>gflops</column> + </table> + </result> + </benchmark> +</jube> diff --git a/tools/bench/plafrim/miriel/chameleon_nmad.xml b/tools/bench/plafrim/miriel/chameleon_nmad.xml deleted file mode 100644 index 28be9ef0b..000000000 --- a/tools/bench/plafrim/miriel/chameleon_nmad.xml +++ /dev/null @@ -1,108 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<jube> - <benchmark name="miriel" outpath="results/miriel"> - <comment>benchmark chameleon on host plafrim miriel</comment> - - <parameterset name="param_gemm"> - <parameter name="hostname" type="string">miriel</parameter> - <parameter name="algorithm" type="string">gemm</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >22</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - </parameterset> - - <parameterset name="param_potrf"> - <parameter name="hostname" type="string">miriel</parameter> - <parameter name="algorithm" type="string">potrf</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >22</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <parameterset name="param_geqrf"> - <parameter name="hostname" type="string">miriel</parameter> - <parameter name="algorithm" type="string">geqrf_hqr</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >22</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <!-- Operation --> - <step name="run_gemm" tag="gemm"> - <use>param_gemm</use> - <do>mpiexec -DPIOM_DEDICATED=1 -DPIOM_DEDICATED_WAIT=1 -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_potrf" tag="potrf"> - <use>param_potrf</use> - <do>mpiexec -DPIOM_DEDICATED=1 -DPIOM_DEDICATED_WAIT=1 -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_geqrf_hqr" tag="geqrf"> - <use>param_geqrf</use> - <do>mpiexec -DPIOM_DEDICATED=1 -DPIOM_DEDICATED_WAIT=1 -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - - <!-- Analyse --> - <analyser name="analyse"> - <!-- use a pattern set --> - <use from="../../jube/patterns.xml">chameleon</use> - <analyse step="run_gemm" tag="gemm"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_potrf" tag="potrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_geqrf_hqr" tag="geqrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - </analyser> - - - <!-- Create result table --> - <result> - <use>analyse</use> <!-- use existing analyser --> - <!--<table name="result" style="csv" sort="number">--> - <table name="result" style="csv"> - <column>hostname</column> - <column>algorithm</column> - <column>precision</column> - <column>nmpi</column> - <column>p</column> - <column>q</column> - <column>nthr</column> - <column>ngpu</column> - <column>m</column> - <column>n</column> - <column>k</column> - <column>cputime</column> - <column>gflops</column> - </table> - </result> - </benchmark> -</jube> diff --git a/tools/bench/plafrim/miriel/chameleon_openmpi.xml b/tools/bench/plafrim/miriel/chameleon_openmpi.xml deleted file mode 100644 index d6b70cc84..000000000 --- a/tools/bench/plafrim/miriel/chameleon_openmpi.xml +++ /dev/null @@ -1,108 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<jube> - <benchmark name="miriel" outpath="results/miriel"> - <comment>benchmark chameleon on host plafrim miriel</comment> - - <parameterset name="param_gemm"> - <parameter name="hostname" type="string">miriel</parameter> - <parameter name="algorithm" type="string">gemm</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >22</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - </parameterset> - - <parameterset name="param_potrf"> - <parameter name="hostname" type="string">miriel</parameter> - <parameter name="algorithm" type="string">potrf</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >22</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <parameterset name="param_geqrf"> - <parameter name="hostname" type="string">miriel</parameter> - <parameter name="algorithm" type="string">geqrf_hqr</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0, 1, 2</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >22</parameter> - <parameter name="ngpu" type="int" >0</parameter> - <parameter name="b" type="int" >320</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <!-- Operation --> - <step name="run_gemm" tag="gemm"> - <use>param_gemm</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_potrf" tag="potrf"> - <use>param_potrf</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_geqrf_hqr" tag="geqrf"> - <use>param_geqrf</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - - <!-- Analyse --> - <analyser name="analyse"> - <!-- use a pattern set --> - <use from="../../jube/patterns.xml">chameleon</use> - <analyse step="run_gemm" tag="gemm"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_potrf" tag="potrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_geqrf_hqr" tag="geqrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - </analyser> - - - <!-- Create result table --> - <result> - <use>analyse</use> <!-- use existing analyser --> - <!--<table name="result" style="csv" sort="number">--> - <table name="result" style="csv"> - <column>hostname</column> - <column>algorithm</column> - <column>precision</column> - <column>nmpi</column> - <column>p</column> - <column>q</column> - <column>nthr</column> - <column>ngpu</column> - <column>m</column> - <column>n</column> - <column>k</column> - <column>cputime</column> - <column>gflops</column> - </table> - </result> - </benchmark> -</jube> diff --git a/tools/bench/plafrim/miriel/run.sh b/tools/bench/plafrim/miriel/run.sh deleted file mode 100755 index d37974b03..000000000 --- a/tools/bench/plafrim/miriel/run.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash - -echo "######################### Chameleon benchmarks #########################" -echo "HOSTNAME $HOSTNAME" -echo "USERNAME $USERNAME" -echo "GIT REPO $CI_REPOSITORY_URL" -echo "GIT BRANCH $CI_COMMIT_REF_NAME" -echo "GIT COMMIT $CI_COMMIT_SHA" -echo "PROJECT DIR $CI_PROJECT_DIR" - -set -x - -function wait_completion { - # Wait for completion of jobs - echo "JOB_LIST $JOB_LIST" - while [ "$NJOB" -gt 0 ] - do - for JOB in $JOB_LIST - do - IS_JOB_IN_QUEUE=`squeue |grep "$JOB"` - if [[ -z "$IS_JOB_IN_QUEUE" ]] - then - NJOB=$[NJOB-1] - JOB_LIST=`echo $JOB_LIST | sed "s#$JOB##"` - echo "JOB $JOB finished" - else - echo "$IS_JOB_IN_QUEUE" - fi - done - sleep 30 - done -} - -# Parameters for scripts -export PLATFORM=plafrim -export NODE=miriel -export BUILD_OPTIONS="-DCHAMELEON_USE_MPI=ON -DCMAKE_BUILD_TYPE=Release" -export STARPU_SILENT=1 -export STARPU_LIMIT_CPU_MEM=120000 -export STARPU_LIMIT_MAX_SUBMITTED_TASKS=16000 -export STARPU_LIMIT_MIN_SUBMITTED_TASKS=15000 - -# Parameters of the Slurm jobs -TIME=01:00:00 -PART=routage -CONS="miriel,omnipath" -EXCL= -NP=9 - -# Submit jobs -NJOB=0 -MPI_LIST="openmpi nmad" -#MPI_LIST="nmad" -for MPI in $MPI_LIST -do - JOB_ID=`JOB_NAME=chameleon\_$MPI\_$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS --exclude=$EXCL $CI_PROJECT_DIR/tools/bench/chameleon_guix\_$MPI.sl | sed "s#Submitted batch job ##"` - #JOB_ID=`JOB_NAME=chameleon\_$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS --exclude=$EXCL chameleon_guix.sl | sed "s#Submitted batch job ##"` - if [[ -n "$JOB_ID" ]] - then - JOB_LIST="$JOB_LIST $JOB_ID" - NJOB=$[NJOB+1] - fi -done - -# Wait for completion of jobs -wait_completion - -echo "####################### End Chameleon benchmarks #######################" - -exit 0 diff --git a/tools/bench/plafrim/parameters/bora/parameters.xml b/tools/bench/plafrim/parameters/bora/parameters.xml new file mode 100644 index 000000000..e7e4f1486 --- /dev/null +++ b/tools/bench/plafrim/parameters/bora/parameters.xml @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jube> + <parameterset name="param_gemm"> + <parameter name="hostname" type="string">bora</parameter> + <parameter name="algorithm" type="string">gemm</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >34</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4, 5</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*2*${b}, ${nmpi}*4*${b}, ${nmpi}*8*${b}, ${nmpi}*16*${b}, ${nmpi}*32*${b}][$i_mn]</parameter> + <parameter name="k" mode="python" type="int" >${m}</parameter> + <parameter name="n" mode="python" type="int" >${m}</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> + <parameterset name="param_potrf"> + <parameter name="hostname" type="string">bora</parameter> + <parameter name="algorithm" type="string">potrf</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >34</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4, 5</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*2*${b}, ${nmpi}*4*${b}, ${nmpi}*8*${b}, ${nmpi}*16*${b}, ${nmpi}*32*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >${m}</parameter> + <parameter name="k" type="int" >1</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> + <parameterset name="param_geqrf"> + <parameter name="hostname" type="string">bora</parameter> + <parameter name="algorithm" type="string">geqrf_hqr</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >34</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4, 5</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*2*${b}, ${nmpi}*4*${b}, ${nmpi}*8*${b}, ${nmpi}*16*${b}, ${nmpi}*32*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >${m}</parameter> + <parameter name="k" type="int" >1</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> +</jube> diff --git a/tools/bench/plafrim/parameters/miriel/parameters.xml b/tools/bench/plafrim/parameters/miriel/parameters.xml new file mode 100644 index 000000000..00622ac1a --- /dev/null +++ b/tools/bench/plafrim/parameters/miriel/parameters.xml @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jube> + <parameterset name="param_gemm"> + <parameter name="hostname" type="string">miriel</parameter> + <parameter name="algorithm" type="string">gemm</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >22</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4, 5</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*2*${b}, ${nmpi}*4*${b}, ${nmpi}*8*${b}, ${nmpi}*16*${b}, ${nmpi}*24*${b}][$i_mn]</parameter> + <parameter name="k" mode="python" type="int" >${m}</parameter> + <parameter name="n" mode="python" type="int" >${m}</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> + <parameterset name="param_potrf"> + <parameter name="hostname" type="string">miriel</parameter> + <parameter name="algorithm" type="string">potrf</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >22</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4, 5</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*2*${b}, ${nmpi}*4*${b}, ${nmpi}*8*${b}, ${nmpi}*16*${b}, ${nmpi}*24*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >${m}</parameter> + <parameter name="k" type="int" >1</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> + <parameterset name="param_geqrf"> + <parameter name="hostname" type="string">miriel</parameter> + <parameter name="algorithm" type="string">geqrf_hqr</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0, 1, 2</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >22</parameter> + <parameter name="ngpu" type="int" >0</parameter> + <parameter name="b" type="int" >320</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4, 5</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*2*${b}, ${nmpi}*4*${b}, ${nmpi}*8*${b}, ${nmpi}*16*${b}, ${nmpi}*24*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >${m}</parameter> + <parameter name="k" type="int" >1</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> +</jube> diff --git a/tools/bench/plafrim/parameters/sirocco/parameters.xml b/tools/bench/plafrim/parameters/sirocco/parameters.xml new file mode 100644 index 000000000..873195b22 --- /dev/null +++ b/tools/bench/plafrim/parameters/sirocco/parameters.xml @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jube> + <parameterset name="param_gemm"> + <parameter name="hostname" type="string">sirocco</parameter> + <parameter name="algorithm" type="string">gemm</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >29</parameter> + <parameter name="ngpu" type="int" >2</parameter> + <parameter name="b" type="int" >1600</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> + <parameter name="k" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> + <parameterset name="param_potrf"> + <parameter name="hostname" type="string">sirocco</parameter> + <parameter name="algorithm" type="string">potrf</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >29</parameter> + <parameter name="ngpu" type="int" >2</parameter> + <parameter name="b" type="int" >1600</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4, 5</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*2*${b}, ${nmpi}*4*${b}, ${nmpi}*8*${b}, ${nmpi}*16*${b}, ${nmpi}*32*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >${m}</parameter> + <parameter name="k" type="int" >1</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> + <parameterset name="param_geqrf"> + <parameter name="hostname" type="string">sirocco</parameter> + <parameter name="algorithm" type="string">geqrf_hqr</parameter> + <parameter name="precision" type="string">s, d</parameter> + <parameter name="i_pq" type="int" >0</parameter> + <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> + <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> + <parameter name="nthr" type="int" >29</parameter> + <parameter name="ngpu" type="int" >2</parameter> + <parameter name="b" type="int" >1600</parameter> + <parameter name="i_mn" type="int" >0, 1, 2, 3, 4, 5</parameter> + <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*2*${b}, ${nmpi}*4*${b}, ${nmpi}*8*${b}, ${nmpi}*16*${b}, ${nmpi}*32*${b}][$i_mn]</parameter> + <parameter name="n" mode="python" type="int" >${m}</parameter> + <parameter name="k" type="int" >1</parameter> + <parameter name="command" type="string">mpiexec $MPI_OPTIONS -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</parameter> + </parameterset> +</jube> diff --git a/tools/bench/plafrim/run.sh b/tools/bench/plafrim/run.sh new file mode 100755 index 000000000..e33126df5 --- /dev/null +++ b/tools/bench/plafrim/run.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +echo "######################### Chameleon benchmarks #########################" + +set -x + +# to avoid a lock during fetching chameleon branch in parallel +export XDG_CACHE_HOME=/tmp/guix-$$ + +# save guix commits +guix describe --format=json > guix.json + +# define env var depending on the node type +export STARPU_SILENT=1 +export STARPU_CALIBRATE=0 +export STARPU_COMM_STATS=1 +export STARPU_WORKER_STATS=1 +if [ $NODE = "bora" ] +then + export SLURM_CONSTRAINTS="bora,omnipath" + export CHAMELEON_BUILD_OPTIONS="-DCHAMELEON_USE_MPI=ON -DCMAKE_BUILD_TYPE=Release" + export STARPU_HOSTNAME="bora" +elif [ $NODE = "miriel" ] +then + export SLURM_CONSTRAINTS="miriel,infinipath" + export CHAMELEON_BUILD_OPTIONS="-DCHAMELEON_USE_MPI=ON -DCMAKE_BUILD_TYPE=Release" + export STARPU_HOSTNAME="miriel" +elif [ $NODE = "sirocco" ] +then + export SLURM_CONSTRAINTS="sirocco,omnipath,v100" + export CHAMELEON_BUILD_OPTIONS="-DCHAMELEON_USE_MPI=ON -DCHAMELEON_USE_CUDA=ON -DCMAKE_BUILD_TYPE=Release" + export STARPU_HOSTNAME="sirocco" +else + echo "$0: Please set the NODE environnement variable to bora or miriel or sirocco." + exit -1 +fi + +# define env var and guix rule to use depending on the mpi vendor +GUIX_ENV="chameleon --with-input=openblas=mkl" +if [ $NODE = "sirocco" ] +then + GUIX_ENV="chameleon-cuda --with-input=openblas=mkl" +fi +if [ $MPI = "openmpi" ] +then + export MPI_OPTIONS="" + if [ $NODE = "miriel" ] + then + export MPI_OPTIONS="--mca mtl psm" + fi + GUIX_ENV_MPI="" + GUIX_ADHOC_MPI="openssh openmpi" +elif [ $MPI = "nmad" ] +then + export MPI_OPTIONS="-DPIOM_DEDICATED=1 -DPIOM_DEDICATED_WAIT=1" + GUIX_ENV_MPI="--with-input=openmpi=nmad" + GUIX_ADHOC_MPI="which gzip zlib tar inetutils util-linux procps openssh nmad" +else + echo "$0: Please set the MPI environnement variable to openmpi or nmad." + exit -1 +fi +GUIX_ADHOC="slurm jube python python-click python-gitpython python-elasticsearch python-certifi sed coreutils grep gawk perl" +GUIX_RULE="$GUIX_ENV $GUIX_ENV_MPI --ad-hoc $GUIX_ADHOC $GUIX_ADHOC_MPI" + +# Submit jobs + +# OpenMPI version +exec guix environment --pure \ + --preserve=PLATFORM \ + --preserve=NODE \ + --preserve=^CI \ + --preserve=^SLURM \ + --preserve=^JUBE \ + --preserve=^MPI \ + --preserve=^STARPU \ + --preserve=^CHAMELEON \ + $GUIX_RULE \ + -- /bin/bash --norc ./tools/bench/plafrim/slurm.sh + +echo "####################### End Chameleon benchmarks #######################" + +# clean tmp +rm -rf /tmp/guix-$$ \ No newline at end of file diff --git a/tools/bench/plafrim/sirocco/chameleon.xml b/tools/bench/plafrim/sirocco/chameleon.xml deleted file mode 100644 index fda2c21c7..000000000 --- a/tools/bench/plafrim/sirocco/chameleon.xml +++ /dev/null @@ -1,108 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<jube> - <benchmark name="sirocco" outpath="results/sirocco"> - <comment>benchmark chameleon on host plafrim sirocco</comment> - - <parameterset name="param_gemm"> - <parameter name="hostname" type="string">sirocco</parameter> - <parameter name="algorithm" type="string">gemm</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >29</parameter> - <parameter name="ngpu" type="int" >2</parameter> - <parameter name="b" type="int" >1600</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - </parameterset> - - <parameterset name="param_potrf"> - <parameter name="hostname" type="string">sirocco</parameter> - <parameter name="algorithm" type="string">potrf</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >29</parameter> - <parameter name="ngpu" type="int" >2</parameter> - <parameter name="b" type="int" >1600</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <parameterset name="param_geqrf"> - <parameter name="hostname" type="string">sirocco</parameter> - <parameter name="algorithm" type="string">geqrf_hqr</parameter> - <parameter name="precision" type="string">s, d</parameter> - <parameter name="i_pq" type="int" >0</parameter> - <parameter name="p" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="q" mode="python" type="int" >[1, 2, 3][$i_pq]</parameter> - <parameter name="nmpi" mode="python" type="int" >[1, 4, 9][$i_pq]</parameter> - <parameter name="nthr" type="int" >29</parameter> - <parameter name="ngpu" type="int" >2</parameter> - <parameter name="b" type="int" >1600</parameter> - <parameter name="i_mn" type="int" >0, 1, 2, 3, 4</parameter> - <parameter name="m" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="n" mode="python" type="int" >[${nmpi}*${b}, ${nmpi}*5*${b}, ${nmpi}*10*${b}, ${nmpi}*20*${b}, ${nmpi}*40*${b}][$i_mn]</parameter> - <parameter name="k" type="int" >1</parameter> - </parameterset> - - <!-- Operation --> - <step name="run_gemm" tag="gemm"> - <use>param_gemm</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_potrf" tag="potrf"> - <use>param_potrf</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - <step name="run_geqrf_hqr" tag="geqrf"> - <use>param_geqrf</use> - <do>mpiexec -np $nmpi $CHAMELEON_BUILD/new-testing/${precision}new-testing -o ${algorithm} -P $p -t $nthr -g $ngpu -m $m -n $n -k $k -b $b</do> - </step> - - <!-- Analyse --> - <analyser name="analyse"> - <!-- use a pattern set --> - <use from="../../jube/patterns.xml">chameleon</use> - <analyse step="run_gemm" tag="gemm"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_potrf" tag="potrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - <analyse step="run_geqrf_hqr" tag="geqrf"> - <file>stdout</file> <!-- file which should be scanned --> - </analyse> - </analyser> - - - <!-- Create result table --> - <result> - <use>analyse</use> <!-- use existing analyser --> - <!--<table name="result" style="csv" sort="number">--> - <table name="result" style="csv"> - <column>hostname</column> - <column>algorithm</column> - <column>precision</column> - <column>nmpi</column> - <column>p</column> - <column>q</column> - <column>nthr</column> - <column>ngpu</column> - <column>m</column> - <column>n</column> - <column>k</column> - <column>cputime</column> - <column>gflops</column> - </table> - </result> - </benchmark> -</jube> diff --git a/tools/bench/plafrim/sirocco/run.sh b/tools/bench/plafrim/sirocco/run.sh deleted file mode 100755 index e082b5a79..000000000 --- a/tools/bench/plafrim/sirocco/run.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash - -echo "######################### Chameleon benchmarks #########################" -echo "HOSTNAME $HOSTNAME" -echo "USERNAME $USERNAME" -echo "GIT REPO $CI_REPOSITORY_URL" -echo "GIT BRANCH $CI_COMMIT_REF_NAME" -echo "GIT COMMIT $CI_COMMIT_SHA" -echo "PROJECT DIR $CI_PROJECT_DIR" - -set -x - -function wait_completion { - # Wait for completion of jobs - echo "JOB_LIST $JOB_LIST" - while [ "$NJOB" -gt 0 ] - do - for JOB in $JOB_LIST - do - IS_JOB_IN_QUEUE=`squeue |grep "$JOB"` - if [[ -z "$IS_JOB_IN_QUEUE" ]] - then - NJOB=$[NJOB-1] - JOB_LIST=`echo $JOB_LIST | sed "s#$JOB##"` - echo "JOB $JOB finished" - else - echo "$IS_JOB_IN_QUEUE" - fi - done - sleep 30 - done -} - -# Parameters for scripts -export PLATFORM=plafrim -export NODE=v100 -export BUILD_OPTIONS="-DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=ON -DCMAKE_BUILD_TYPE=Release" -export STARPU_SILENT=1 -export STARPU_LIMIT_CPU_MEM=370000 -export STARPU_LIMIT_MAX_SUBMITTED_TASKS=16000 -export STARPU_LIMIT_MIN_SUBMITTED_TASKS=15000 - -# Parameters of the Slurm jobs -TIME=01:00:00 -PART=routage -CONS=v100 -EXCL= -NP=1 - -# Submit jobs -NJOB=0 -#MPI_LIST="openmpi nmad" -MPI_LIST="openmpi" -for MPI in $MPI_LIST -do - JOB_ID=`JOB_NAME=chameleon\_$MPI\_$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS --exclude=$EXCL $CI_PROJECT_DIR/tools/bench/chameleon_guix\_$MPI\_cuda.sl | sed "s#Submitted batch job ##"` - #JOB_ID=`JOB_NAME=chameleon\_$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS --exclude=$EXCL chameleon_guix.sl | sed "s#Submitted batch job ##"` - if [[ -n "$JOB_ID" ]] - then - JOB_LIST="$JOB_LIST $JOB_ID" - NJOB=$[NJOB+1] - fi -done - -# Wait for completion of jobs -wait_completion - -echo "####################### End Chameleon benchmarks #######################" - -exit 0 diff --git a/tools/bench/plafrim/slurm.sh b/tools/bench/plafrim/slurm.sh new file mode 100755 index 000000000..f64c4ff69 --- /dev/null +++ b/tools/bench/plafrim/slurm.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +echo "######################### Chameleon benchmarks #########################" + +# Check the environment +echo $PLATFORM +echo $NODE +env |grep ^CI +env |grep ^SLURM +env |grep ^JUBE_ID +env |grep ^MPI +env |grep ^STARPU +env |grep ^CHAMELEON + +set -x + +function wait_completion { + # Wait for completion of jobs + echo "JOB_LIST $JOB_LIST" + while [ "$NJOB" -gt 0 ] + do + for JOB in $JOB_LIST + do + IS_JOB_IN_QUEUE=`squeue |grep "$JOB"` + if [[ -z "$IS_JOB_IN_QUEUE" ]] + then + NJOB=$[NJOB-1] + JOB_LIST=`echo $JOB_LIST | sed "s#$JOB##"` + echo "JOB $JOB finished" + else + echo "$IS_JOB_IN_QUEUE" + fi + done + sleep 30 + done +} + +# Parameters of the Slurm jobs +TIME=02:00:00 +PART=routage +NP=$SLURM_NP +CONS=$SLURM_CONSTRAINTS +EXCL= + +# Submit jobs +NJOB=0 +JOB_ID=`JOB_NAME=chameleon\-$NODE\-$MPI\-$NP && sbatch --job-name="$JOB_NAME" --output="$JOB_NAME.out" --error="$JOB_NAME.err" --nodes=$NP --time=$TIME --partition=$PART --constraint=$CONS --exclude=$EXCL --exclusive --ntasks-per-node=1 --threads-per-core=1 $CI_PROJECT_DIR/tools/bench/chameleon_guix.sh | sed "s#Submitted batch job ##"` +if [[ -n "$JOB_ID" ]] +then + JOB_LIST="$JOB_LIST $JOB_ID" + NJOB=$[NJOB+1] +fi + +# Wait for completion of jobs +wait_completion + +echo "####################### End Chameleon benchmarks #######################" + +exit 0 -- GitLab