From dcb38b66c2d6de1d84675dc1069d990fb05ee9b6 Mon Sep 17 00:00:00 2001
From: Florent Pruvost <florent.pruvost@inria.fr>
Date: Wed, 18 Dec 2024 08:41:11 +0100
Subject: [PATCH] ci: add test_starpu_cuda

---
 .gitlab-ci.yml                  |  1 +
 .gitlab/build.sh                |  2 +-
 .gitlab/build.yml               | 26 +++++++++++++------
 .gitlab/common.yml              |  1 +
 .gitlab/sbatch.sh               | 29 +++++++++++++++++++++
 .gitlab/test.sh                 |  1 -
 .gitlab/test_starpu.yml         |  8 +++---
 .gitlab/test_starpu_plafrim.yml | 46 +++++++++++++++++++++++++++++++++
 8 files changed, 101 insertions(+), 13 deletions(-)
 create mode 100755 .gitlab/sbatch.sh
 create mode 100644 .gitlab/test_starpu_plafrim.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e01e26e32..270076d74 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,6 +14,7 @@ include:
   - .gitlab/docker.yml
   - .gitlab/build.yml
   - .gitlab/test_starpu.yml
+  - .gitlab/test_starpu_plafrim.yml
   - .gitlab/test_starpu_simgrid.yml
   - .gitlab/test_parsec.yml
   - .gitlab/test_quark.yml
diff --git a/.gitlab/build.sh b/.gitlab/build.sh
index 0d17fd144..e631b37f2 100755
--- a/.gitlab/build.sh
+++ b/.gitlab/build.sh
@@ -59,7 +59,7 @@ case $SYSTEM in
 esac
 
 # Compile
-eval '${SCAN}cmake --build build-${VERSION} -j 4 > /dev/null'
+eval '${SCAN}cmake --build build-${VERSION} -j ${CMAKE_BUILD_PARALLEL_LEVEL} > /dev/null'
 
 # Install
 cmake --install build-${VERSION}
diff --git a/.gitlab/build.yml b/.gitlab/build.yml
index e5d674332..2d449df9d 100644
--- a/.gitlab/build.yml
+++ b/.gitlab/build.yml
@@ -2,6 +2,8 @@
 .build_script_common:
   stage: build
   extends: .only-master-mr
+  variables:
+    CMAKE_BUILD_PARALLEL_LEVEL: 4
   artifacts:
     name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
     expire_in: 1 week
@@ -10,6 +12,8 @@
 
 .build_script_linux:
   extends: .build_script_common
+  variables:
+    SYSTEM: linux
   script:
     - bash .gitlab/build.sh | tee ${LOGNAME}.log
 
@@ -56,29 +60,31 @@ build_starpu:
 .build_script_guix:
   tags: ['plafrim']
   extends: .build_script_common
+  variables:
+    SYSTEM: guix
+    CMAKE_BUILD_PARALLEL_LEVEL: 20
   script:
     - guix describe -f channels
-    - guix shell --pure --preserve=SYSTEM --preserve=VERSION --preserve=LOGNAME --preserve=BUILD_OPTIONS
-                 -D chameleon-${GUIX_CHAMELEON_VARIANT} coreutils bash-minimal --
+    - guix shell --pure --preserve=SYSTEM --preserve=VERSION --preserve=LOGNAME --preserve=BUILD_OPTIONS --preserve=CMAKE_BUILD_PARALLEL_LEVEL
+                 -D chameleon-${GPU_BACKEND} ${BLAS} coreutils bash-minimal --
                  bash .gitlab/build.sh | tee ${LOGNAME}.log
 
 build_starpu_cuda:
   extends: .build_script_guix
   variables:
-    SYSTEM: guix
     VERSION: starpu_cuda
-    GUIX_CHAMELEON_VARIANT: cuda
+    GPU_BACKEND: cuda
+    BLAS: "--with-input=openblas=intel-oneapi-mkl"
     LOGNAME: "chameleon-build-${SYSTEM}-${VERSION}"
-    BUILD_OPTIONS: "-DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=ON -DBLA_VENDOR=OpenBLAS"
+    BUILD_OPTIONS: "-DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=OFF -DBLA_VENDOR=Intel10_64lp_seq"
 
 build_starpu_hip:
   extends: .build_script_guix
   variables:
-    SYSTEM: guix
     VERSION: starpu_hip
-    GUIX_CHAMELEON_VARIANT: hip
+    GPU_BACKEND: hip
     LOGNAME: "chameleon-build-${SYSTEM}-${VERSION}"
-    BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_ROC=ON -DCHAMELEON_USE_MPI=ON -DBLA_VENDOR=OpenBLAS -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran"
+    BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_ROC=ON -DCHAMELEON_USE_MPI=OFF -DBLA_VENDOR=OpenBLAS -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran"
 
 build_starpu_simgrid:
   extends: .build_script_linux
@@ -97,11 +103,13 @@ build_starpu_macosx:
     SYSTEM: macosx
     VERSION: starpu
     LOGNAME: "chameleon-build-${SYSTEM}-${VERSION}"
+    CMAKE_BUILD_PARALLEL_LEVEL: 4
   script:
     - bash .gitlab/build.sh | tee ${LOGNAME}.log
   artifacts:
     name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
     expire_in: 1 week
+    when: always
     paths:
       - ${LOGNAME}.log
   cache:
@@ -119,11 +127,13 @@ build_starpu_macosx:
     MSYSTEM: UCRT64
     VERSION: starpu
     LOGNAME: "chameleon-build-${SYSTEM}-${VERSION}"
+    CMAKE_BUILD_PARALLEL_LEVEL: 4
   script:
     - bash -lc .gitlab/build.sh | tee "$env:LOGNAME.log"
   artifacts:
     name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
     expire_in: 1 week
+    when: always
     paths:
       - ${LOGNAME}.log
   cache:
diff --git a/.gitlab/common.yml b/.gitlab/common.yml
index 9e679f1c4..f4e30eb4b 100644
--- a/.gitlab/common.yml
+++ b/.gitlab/common.yml
@@ -37,5 +37,6 @@ variables:
     name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
     expire_in: 1 week
     untracked: true
+    when: always
     reports:
       junit: ${LOGNAME}-junit.xml
diff --git a/.gitlab/sbatch.sh b/.gitlab/sbatch.sh
new file mode 100755
index 000000000..d202d0686
--- /dev/null
+++ b/.gitlab/sbatch.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# sbatch.sh : submit slurm jobs and wait for completion before exiting
+set -x
+
+if [ $# -gt 0 ]
+then
+    JOB_NAME=$1
+fi
+JOB_NAME=${JOB_NAME:-chameleon}
+
+# to get kernels execution on both cpus and gpus
+export STARPU_SCHED=random
+
+# execution commands
+sbatch --wait \
+       --job-name="$JOB_NAME" \
+       --output="$JOB_NAME.out" \
+       --nodes=1 \
+       --exclusive --ntasks-per-node=1 --threads-per-core=1 \
+       --constraint="$SLURM_CONSTRAINTS" \
+       --time=01:00:00 \
+       $(dirname "$0")/test.sh
+# get the error code from the last command: sbatch --wait ...
+err=$?
+
+cat $JOB_NAME.out
+
+# exit with error code from the guix command
+exit $err
diff --git a/.gitlab/test.sh b/.gitlab/test.sh
index 0a4a8639f..27ba5484b 100755
--- a/.gitlab/test.sh
+++ b/.gitlab/test.sh
@@ -1,5 +1,4 @@
 #!/usr/bin/env bash
-
 set -e
 set -x
 
diff --git a/.gitlab/test_starpu.yml b/.gitlab/test_starpu.yml
index efc3bd783..1adc4997f 100644
--- a/.gitlab/test_starpu.yml
+++ b/.gitlab/test_starpu.yml
@@ -1,5 +1,5 @@
 ---
-.test_starpu:
+.test_script_starpu:
   extends: .test_script_linux
   needs: [build_starpu]
   variables:
@@ -9,7 +9,7 @@
     TESTS_RESTRICTION: "-R _${CATEGORY}_${PRECISION}|example -E sytrf|sysv"
 
 test_starpu_master:
-  extends: .test_starpu
+  extends: .test_script_starpu
   rules:
     - if: ($CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH || $CI_COMMIT_BRANCH =~ /^ci-.*$/) && $CI_PIPELINE_SOURCE != "schedule"
   parallel:
@@ -18,7 +18,7 @@ test_starpu_master:
         CATEGORY: [shm, mpi]
 
 test_starpu_mr:
-  extends: .test_starpu
+  extends: .test_script_starpu
   rules:
     - if: ($CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME !~ /^notest-.*$/)
   parallel:
@@ -43,6 +43,7 @@ test_starpu_macosx:
   artifacts:
     name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
     expire_in: 1 week
+    when: always
     paths:
       - ${LOGNAME}.log
     reports:
@@ -69,6 +70,7 @@ test_starpu_macosx:
   artifacts:
     name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
     expire_in: 1 week
+    when: always
     paths:
       - ${LOGNAME}.log
     reports:
diff --git a/.gitlab/test_starpu_plafrim.yml b/.gitlab/test_starpu_plafrim.yml
new file mode 100644
index 000000000..c5178e6ca
--- /dev/null
+++ b/.gitlab/test_starpu_plafrim.yml
@@ -0,0 +1,46 @@
+---
+.test_script_starpu_plafrim:
+  tags: ['plafrim']
+  stage: test
+  variables:
+    SYSTEM: guix
+  script:
+    - guix describe -f channels | tee guix-channels.scm
+    - guix shell --pure --preserve=SYSTEM --preserve=VERSION --preserve=LOGNAME --preserve=BUILD_OPTIONS --preserve=TESTS_RESTRICTION --preserve=SLURM_CONSTRAINTS --preserve=LD_PRELOAD
+                 -D chameleon-${GPU_BACKEND} ${BLAS} slurm coreutils inetutils bash-minimal --
+                 bash .gitlab/sbatch.sh ${LOGNAME} | tee ${LOGNAME}.log
+  artifacts:
+    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
+    expire_in: 1 week
+    when: always
+    paths:
+      - guix-channels.scm
+      - ${LOGNAME}.log
+    reports:
+      junit: ${LOGNAME}-junit.xml
+
+.test_script_starpu_plafrim_cuda:
+  extends: .test_script_starpu_plafrim
+  needs: [build_starpu_cuda]
+  variables:
+    VERSION: starpu_cuda
+    GPU_BACKEND: cuda
+    BLAS: "--with-input=openblas=intel-oneapi-mkl"
+    LOGNAME: "chameleon-${SYSTEM}-${VERSION}"
+    BUILD_OPTIONS: "-DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=OFF -DBLA_VENDOR=Intel10_64lp_seq"
+    SLURM_CONSTRAINTS: "sirocco"
+    LD_PRELOAD: "/usr/lib64/libcuda.so"
+
+test_starpu_cuda_master:
+  extends: .test_script_starpu_plafrim_cuda
+  rules:
+    - if: ($CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH || $CI_COMMIT_BRANCH =~ /^ci-.*$/) && $CI_PIPELINE_SOURCE != "schedule"
+  variables:
+    TESTS_RESTRICTION: "-R test_shm_gpu"
+
+test_starpu_cuda_mr:
+  extends: .test_script_starpu_plafrim_cuda
+  rules:
+    - if: ($CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME !~ /^notest-.*$/)
+  variables:
+    TESTS_RESTRICTION: "-R test_shm_gpu_d|test_shm_gpu_c"
-- 
GitLab