From dcb38b66c2d6de1d84675dc1069d990fb05ee9b6 Mon Sep 17 00:00:00 2001 From: Florent Pruvost <florent.pruvost@inria.fr> Date: Wed, 18 Dec 2024 08:41:11 +0100 Subject: [PATCH] ci: add test_starpu_cuda --- .gitlab-ci.yml | 1 + .gitlab/build.sh | 2 +- .gitlab/build.yml | 26 +++++++++++++------ .gitlab/common.yml | 1 + .gitlab/sbatch.sh | 29 +++++++++++++++++++++ .gitlab/test.sh | 1 - .gitlab/test_starpu.yml | 8 +++--- .gitlab/test_starpu_plafrim.yml | 46 +++++++++++++++++++++++++++++++++ 8 files changed, 101 insertions(+), 13 deletions(-) create mode 100755 .gitlab/sbatch.sh create mode 100644 .gitlab/test_starpu_plafrim.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e01e26e32..270076d74 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,6 +14,7 @@ include: - .gitlab/docker.yml - .gitlab/build.yml - .gitlab/test_starpu.yml + - .gitlab/test_starpu_plafrim.yml - .gitlab/test_starpu_simgrid.yml - .gitlab/test_parsec.yml - .gitlab/test_quark.yml diff --git a/.gitlab/build.sh b/.gitlab/build.sh index 0d17fd144..e631b37f2 100755 --- a/.gitlab/build.sh +++ b/.gitlab/build.sh @@ -59,7 +59,7 @@ case $SYSTEM in esac # Compile -eval '${SCAN}cmake --build build-${VERSION} -j 4 > /dev/null' +eval '${SCAN}cmake --build build-${VERSION} -j ${CMAKE_BUILD_PARALLEL_LEVEL} > /dev/null' # Install cmake --install build-${VERSION} diff --git a/.gitlab/build.yml b/.gitlab/build.yml index e5d674332..2d449df9d 100644 --- a/.gitlab/build.yml +++ b/.gitlab/build.yml @@ -2,6 +2,8 @@ .build_script_common: stage: build extends: .only-master-mr + variables: + CMAKE_BUILD_PARALLEL_LEVEL: 4 artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" expire_in: 1 week @@ -10,6 +12,8 @@ .build_script_linux: extends: .build_script_common + variables: + SYSTEM: linux script: - bash .gitlab/build.sh | tee ${LOGNAME}.log @@ -56,29 +60,31 @@ build_starpu: .build_script_guix: tags: ['plafrim'] extends: .build_script_common + variables: + SYSTEM: guix + CMAKE_BUILD_PARALLEL_LEVEL: 20 script: - guix describe -f channels - - guix shell --pure --preserve=SYSTEM --preserve=VERSION --preserve=LOGNAME --preserve=BUILD_OPTIONS - -D chameleon-${GUIX_CHAMELEON_VARIANT} coreutils bash-minimal -- + - guix shell --pure --preserve=SYSTEM --preserve=VERSION --preserve=LOGNAME --preserve=BUILD_OPTIONS --preserve=CMAKE_BUILD_PARALLEL_LEVEL + -D chameleon-${GPU_BACKEND} ${BLAS} coreutils bash-minimal -- bash .gitlab/build.sh | tee ${LOGNAME}.log build_starpu_cuda: extends: .build_script_guix variables: - SYSTEM: guix VERSION: starpu_cuda - GUIX_CHAMELEON_VARIANT: cuda + GPU_BACKEND: cuda + BLAS: "--with-input=openblas=intel-oneapi-mkl" LOGNAME: "chameleon-build-${SYSTEM}-${VERSION}" - BUILD_OPTIONS: "-DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=ON -DBLA_VENDOR=OpenBLAS" + BUILD_OPTIONS: "-DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=OFF -DBLA_VENDOR=Intel10_64lp_seq" build_starpu_hip: extends: .build_script_guix variables: - SYSTEM: guix VERSION: starpu_hip - GUIX_CHAMELEON_VARIANT: hip + GPU_BACKEND: hip LOGNAME: "chameleon-build-${SYSTEM}-${VERSION}" - BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_ROC=ON -DCHAMELEON_USE_MPI=ON -DBLA_VENDOR=OpenBLAS -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran" + BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_ROC=ON -DCHAMELEON_USE_MPI=OFF -DBLA_VENDOR=OpenBLAS -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran" build_starpu_simgrid: extends: .build_script_linux @@ -97,11 +103,13 @@ build_starpu_macosx: SYSTEM: macosx VERSION: starpu LOGNAME: "chameleon-build-${SYSTEM}-${VERSION}" + CMAKE_BUILD_PARALLEL_LEVEL: 4 script: - bash .gitlab/build.sh | tee ${LOGNAME}.log artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" expire_in: 1 week + when: always paths: - ${LOGNAME}.log cache: @@ -119,11 +127,13 @@ build_starpu_macosx: MSYSTEM: UCRT64 VERSION: starpu LOGNAME: "chameleon-build-${SYSTEM}-${VERSION}" + CMAKE_BUILD_PARALLEL_LEVEL: 4 script: - bash -lc .gitlab/build.sh | tee "$env:LOGNAME.log" artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" expire_in: 1 week + when: always paths: - ${LOGNAME}.log cache: diff --git a/.gitlab/common.yml b/.gitlab/common.yml index 9e679f1c4..f4e30eb4b 100644 --- a/.gitlab/common.yml +++ b/.gitlab/common.yml @@ -37,5 +37,6 @@ variables: name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" expire_in: 1 week untracked: true + when: always reports: junit: ${LOGNAME}-junit.xml diff --git a/.gitlab/sbatch.sh b/.gitlab/sbatch.sh new file mode 100755 index 000000000..d202d0686 --- /dev/null +++ b/.gitlab/sbatch.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# sbatch.sh : submit slurm jobs and wait for completion before exiting +set -x + +if [ $# -gt 0 ] +then + JOB_NAME=$1 +fi +JOB_NAME=${JOB_NAME:-chameleon} + +# to get kernels execution on both cpus and gpus +export STARPU_SCHED=random + +# execution commands +sbatch --wait \ + --job-name="$JOB_NAME" \ + --output="$JOB_NAME.out" \ + --nodes=1 \ + --exclusive --ntasks-per-node=1 --threads-per-core=1 \ + --constraint="$SLURM_CONSTRAINTS" \ + --time=01:00:00 \ + $(dirname "$0")/test.sh +# get the error code from the last command: sbatch --wait ... +err=$? + +cat $JOB_NAME.out + +# exit with error code from the guix command +exit $err diff --git a/.gitlab/test.sh b/.gitlab/test.sh index 0a4a8639f..27ba5484b 100755 --- a/.gitlab/test.sh +++ b/.gitlab/test.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash - set -e set -x diff --git a/.gitlab/test_starpu.yml b/.gitlab/test_starpu.yml index efc3bd783..1adc4997f 100644 --- a/.gitlab/test_starpu.yml +++ b/.gitlab/test_starpu.yml @@ -1,5 +1,5 @@ --- -.test_starpu: +.test_script_starpu: extends: .test_script_linux needs: [build_starpu] variables: @@ -9,7 +9,7 @@ TESTS_RESTRICTION: "-R _${CATEGORY}_${PRECISION}|example -E sytrf|sysv" test_starpu_master: - extends: .test_starpu + extends: .test_script_starpu rules: - if: ($CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH || $CI_COMMIT_BRANCH =~ /^ci-.*$/) && $CI_PIPELINE_SOURCE != "schedule" parallel: @@ -18,7 +18,7 @@ test_starpu_master: CATEGORY: [shm, mpi] test_starpu_mr: - extends: .test_starpu + extends: .test_script_starpu rules: - if: ($CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME !~ /^notest-.*$/) parallel: @@ -43,6 +43,7 @@ test_starpu_macosx: artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" expire_in: 1 week + when: always paths: - ${LOGNAME}.log reports: @@ -69,6 +70,7 @@ test_starpu_macosx: artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" expire_in: 1 week + when: always paths: - ${LOGNAME}.log reports: diff --git a/.gitlab/test_starpu_plafrim.yml b/.gitlab/test_starpu_plafrim.yml new file mode 100644 index 000000000..c5178e6ca --- /dev/null +++ b/.gitlab/test_starpu_plafrim.yml @@ -0,0 +1,46 @@ +--- +.test_script_starpu_plafrim: + tags: ['plafrim'] + stage: test + variables: + SYSTEM: guix + script: + - guix describe -f channels | tee guix-channels.scm + - guix shell --pure --preserve=SYSTEM --preserve=VERSION --preserve=LOGNAME --preserve=BUILD_OPTIONS --preserve=TESTS_RESTRICTION --preserve=SLURM_CONSTRAINTS --preserve=LD_PRELOAD + -D chameleon-${GPU_BACKEND} ${BLAS} slurm coreutils inetutils bash-minimal -- + bash .gitlab/sbatch.sh ${LOGNAME} | tee ${LOGNAME}.log + artifacts: + name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + expire_in: 1 week + when: always + paths: + - guix-channels.scm + - ${LOGNAME}.log + reports: + junit: ${LOGNAME}-junit.xml + +.test_script_starpu_plafrim_cuda: + extends: .test_script_starpu_plafrim + needs: [build_starpu_cuda] + variables: + VERSION: starpu_cuda + GPU_BACKEND: cuda + BLAS: "--with-input=openblas=intel-oneapi-mkl" + LOGNAME: "chameleon-${SYSTEM}-${VERSION}" + BUILD_OPTIONS: "-DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=OFF -DBLA_VENDOR=Intel10_64lp_seq" + SLURM_CONSTRAINTS: "sirocco" + LD_PRELOAD: "/usr/lib64/libcuda.so" + +test_starpu_cuda_master: + extends: .test_script_starpu_plafrim_cuda + rules: + - if: ($CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH || $CI_COMMIT_BRANCH =~ /^ci-.*$/) && $CI_PIPELINE_SOURCE != "schedule" + variables: + TESTS_RESTRICTION: "-R test_shm_gpu" + +test_starpu_cuda_mr: + extends: .test_script_starpu_plafrim_cuda + rules: + - if: ($CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME !~ /^notest-.*$/) + variables: + TESTS_RESTRICTION: "-R test_shm_gpu_d|test_shm_gpu_c" -- GitLab