Commit 39080e1f authored by Baptiste Jonglez's avatar Baptiste Jonglez

Merge branch 'nvidia_ppc64'

parents ee6dcc9a d35c971f
/usr/local/cuda-7.0/lib
/usr/local/cuda-7.0/lib64
/usr/local/cuda/lib64
[Unit]
Description=NVIDIA DCGM prometheus exporter service
After=network.target
# Ensure that /dev/nvidia0 is created by first calling nvidia-smi.
# If no GPU is found, nvidia-smi will not create /dev/nvidia0 and we will not run.
Wants=nvidia-smi.service
After=nvidia-smi.service
ConditionPathExists=/dev/nvidia0
[Service]
......
[Unit]
Description=NVIDIA Persistence Daemon
Wants=syslog.target
# Ensure that /dev/nvidia0 is created by first calling nvidia-smi.
# If no GPU is found, nvidia-smi will not create /dev/nvidia0 and we will not run.
Wants=nvidia-smi.service
After=nvidia-smi.service
ConditionPathExists=/dev/nvidia0
[Service]
Type=forking
......
[Unit]
Description=Call nvidia-smi once to create /dev/nvidiaX
[Service]
Type=oneshot
# Ignore the exit code: the command fails when no GPU is found
ExecStart=-/usr/bin/nvidia-smi
[Install]
WantedBy=multi-user.target
# /etc/profile: system-wide .profile file for the Bourne shell (sh(1))
# and Bourne compatible shells (bash(1), ksh(1), ash(1), ...).
if [ "`id -u`" -eq 0 ]; then
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/cuda-7.0/bin"
else
PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/usr/local/cuda-7.0/bin"
fi
export PATH
if [ "$PS1" ]; then
if [ "$BASH" ] && [ "$BASH" != "/bin/sh" ]; then
# The file bash.bashrc already sets the default PS1.
# PS1='\h:\w\$ '
if [ -f /etc/bash.bashrc ]; then
. /etc/bash.bashrc
fi
else
if [ "`id -u`" -eq 0 ]; then
PS1='# '
else
PS1='$ '
fi
fi
fi
if [ -d /etc/profile.d ]; then
for i in /etc/profile.d/*.sh; do
if [ -r $i ]; then
. $i
fi
done
unset i
fi
......@@ -17,10 +17,12 @@ class env::big ( $variant = "big", $parent_parameters = {} ){
class { 'env::big::configure_postfix': }
# kvm
class { 'env::big::configure_kvm': }
if $env::deb_arch == 'amd64' {
# nvidia
# nvidia
if $env::deb_arch == 'amd64' or $env::deb_arch == 'ppc64el' {
class { 'env::big::configure_nvidia_gpu': }
# beegfs install
}
# beegfs install
if $env::deb_arch == 'amd64' {
class { 'env::big::install_beegfs': }
}
#Allow sshfs
......
......@@ -6,6 +6,8 @@ class env::big::configure_nvidia_gpu () {
include 'env::big::configure_nvidia_gpu::modules'
# Install nvidia drivers
include 'env::big::configure_nvidia_gpu::drivers'
# Install additional services (currently nvidia-smi, needed by cuda and prometheus)
include 'env::big::configure_nvidia_gpu::services'
# Install cuda
include 'env::big::configure_nvidia_gpu::cuda'
# Install nvidia ganglia plugins
......
......@@ -2,7 +2,20 @@ class env::big::configure_nvidia_gpu::cuda () {
case "${::lsbdistcodename}" {
"buster" : {
$driver_source = 'http://packages.grid5000.fr/other/cuda/cuda_10.1.168_418.67_linux.run'
case "$env::deb_arch" {
"amd64": {
$driver_source = 'http://packages.grid5000.fr/other/cuda/cuda_10.1.243_418.87.00_linux.run'
$libcuda = '/usr/lib/x86_64-linux-gnu/libcuda.so'
}
"ppc64el": {
$driver_source = 'http://packages.grid5000.fr/other/cuda/cuda_10.1.243_418.87.00_linux_ppc64le.run'
$libcuda = '/usr/lib/powerpc64le-linux-gnu/libcuda.so'
}
default: {
err "${env::deb_arch} not supported"
}
}
$opengl_packages = ['ocl-icd-libopencl1', 'opencl-headers']
exec{
......@@ -26,27 +39,7 @@ class env::big::configure_nvidia_gpu::cuda () {
"stretch" : {
$driver_source = 'http://packages.grid5000.fr/other/cuda/cuda_9.0.176_384.81_linux-run'
$opengl_packages = ['ocl-icd-libopencl1', 'opencl-headers']
exec{
'retrieve_nvidia_cuda':
command => "/usr/bin/wget -q $driver_source -O /tmp/NVIDIA-Linux_cuda.run && chmod u+x /tmp/NVIDIA-Linux_cuda.run",
timeout => 1200, # 20 min
creates => "/tmp/NVIDIA-Linux_cuda.run";
'install_nvidia_cuda':
command => "/tmp/NVIDIA-Linux_cuda.run --silent --toolkit --samples && /bin/rm /tmp/NVIDIA-Linux_cuda.run",
timeout => 2400, # 20 min
user => root,
require => File['/tmp/NVIDIA-Linux_cuda.run'];
'update_ld_conf':
command => "/sbin/ldconfig",
user => root,
refreshonly => true;
}
}
"jessie" : {
$driver_source = 'http://packages.grid5000.fr/other/cuda/cuda_9.0.176_384.81_linux-run'
$opengl_packages = ['ocl-icd-libopencl1', 'opencl-headers', 'amd-opencl-icd']
$libcuda = '/usr/lib/x86_64-linux-gnu/libcuda.so'
exec{
'retrieve_nvidia_cuda':
......@@ -74,7 +67,7 @@ class env::big::configure_nvidia_gpu::cuda () {
require => Exec['retrieve_nvidia_cuda'];
'/usr/local/cuda/lib64/libcuda.so':
ensure => 'link',
target => '/usr/lib/x86_64-linux-gnu/libcuda.so',
target => $libcuda,
require => Exec['install_nvidia_cuda'],
notify => Exec['update_ld_conf'];
'/etc/ld.so.conf.d/cuda.conf':
......@@ -82,31 +75,17 @@ class env::big::configure_nvidia_gpu::cuda () {
owner => root,
group => root,
mode => '0644',
source => 'puppet:///modules/env/big/nvidia/cuda-9.0.conf',
source => 'puppet:///modules/env/big/nvidia/cuda.conf',
notify => Exec['update_ld_conf'];
'/etc/systemd/system/nvidia-persistenced.service':
ensure => file,
owner => root,
group => root,
mode => '0644',
source => 'puppet:///modules/env/big/nvidia/nvidia-persistenced-9.0.service';
}
}
"jessie" : {
file{
'/tmp/NVIDIA-Linux_cuda.run':
ensure => file,
require => Exec['retrieve_nvidia_cuda'];
'/etc/ld.so.conf.d/cuda.conf':
ensure => file,
owner => root,
group => root,
mode => '0644',
source => 'puppet:///modules/env/big/nvidia/cuda.conf',
notify => Exec['update_ld_conf'];
'/usr/local/cuda/lib64/libcuda.so':
ensure => 'link',
target => '/usr/lib/libcuda.so';
source => 'puppet:///modules/env/big/nvidia/nvidia-persistenced.service';
'/etc/systemd/system/multi-user.target.wants/nvidia-persistenced.service':
ensure => link,
target => '/etc/systemd/system/nvidia-persistenced.service';
}
}
}
......
......@@ -2,8 +2,17 @@ class env::big::configure_nvidia_gpu::drivers () {
### This class exists for gpuclus cluster, that require a recent version of nvidia driver
# May be changed to a link inside g5k if required
$driver_source = 'http://packages.grid5000.fr/other/nvidia//NVIDIA-Linux-x86_64-450.51.05.run'
case "$env::deb_arch" {
"amd64": {
$driver_source = 'http://packages.grid5000.fr/other/nvidia/NVIDIA-Linux-x86_64-450.80.02.run'
}
"ppc64el": {
$driver_source = 'http://packages.grid5000.fr/other/nvidia/NVIDIA-Linux-ppc64le-450.80.02.run'
}
default: {
err "${env::deb_arch} not supported"
}
}
package {
['module-assistant', 'dkms']:
......
class env::big::configure_nvidia_gpu::services () {
# We only install the service but do not enable it.
# Services that depend on it can add "Wants=nvidia-smi.service"
# and "After=nvidia-smi.service", and this will automatically start
# this service.
file{
'/etc/systemd/system/nvidia-smi.service':
ensure => file,
owner => root,
group => root,
mode => '0644',
source => 'puppet:///modules/env/big/nvidia/nvidia-smi.service';
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment