From 3b2666a2951e64b1aab0c730513573066b0fc82e Mon Sep 17 00:00:00 2001 From: BERARD Benjamin <benjamin.berard@inria.fr> Date: Wed, 30 Nov 2022 19:08:44 +0100 Subject: [PATCH] [grat/sirius/nvidia_fabricmanager] Add nvswitch variable and set true for grat and sirius for each gpu_devices --- .../sites/lyon/clusters/sirius/nodes/sirius-1.json | 3 +++ data/grid5000/sites/nancy/clusters/grat/nodes/grat-1.json | 8 ++++++++ input/grid5000/sites/lyon/clusters/sirius/sirius.yaml | 5 ++++- input/grid5000/sites/nancy/clusters/grat/grat.yaml | 3 +++ lib/refrepo/valid/input/schemas/schema-global.yaml | 2 +- 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/data/grid5000/sites/lyon/clusters/sirius/nodes/sirius-1.json b/data/grid5000/sites/lyon/clusters/sirius/nodes/sirius-1.json index 248d1ca2e66..2da8675e123 100644 --- a/data/grid5000/sites/lyon/clusters/sirius/nodes/sirius-1.json +++ b/data/grid5000/sites/lyon/clusters/sirius/nodes/sirius-1.json @@ -98,6 +98,9 @@ "power_default_limit": "400.00 W", "vbios_version": "92.00.36.00.04", "vendor": "Nvidia" + }, + "nvidia[0-7]": { + "nvswitch": true } }, "kavlan": { diff --git a/data/grid5000/sites/nancy/clusters/grat/nodes/grat-1.json b/data/grid5000/sites/nancy/clusters/grat/nodes/grat-1.json index 9cc3cdfdefe..682e4cc5241 100644 --- a/data/grid5000/sites/nancy/clusters/grat/nodes/grat-1.json +++ b/data/grid5000/sites/nancy/clusters/grat/nodes/grat-1.json @@ -25,6 +25,7 @@ "device": "/dev/nvidia0", "memory": 42505076736, "model": "A100-SXM4-40GB", + "nvswitch": true, "power_default_limit": "400.00 W", "vbios_version": "92.00.45.00.03", "vendor": "Nvidia" @@ -35,6 +36,7 @@ "device": "/dev/nvidia1", "memory": 42505076736, "model": "A100-SXM4-40GB", + "nvswitch": true, "power_default_limit": "400.00 W", "vbios_version": "92.00.45.00.03", "vendor": "Nvidia" @@ -45,6 +47,7 @@ "device": "/dev/nvidia2", "memory": 42505076736, "model": "A100-SXM4-40GB", + "nvswitch": true, "power_default_limit": "400.00 W", "vbios_version": "92.00.45.00.03", "vendor": "Nvidia" @@ -55,6 +58,7 @@ "device": "/dev/nvidia3", "memory": 42505076736, "model": "A100-SXM4-40GB", + "nvswitch": true, "power_default_limit": "400.00 W", "vbios_version": "92.00.45.00.03", "vendor": "Nvidia" @@ -65,6 +69,7 @@ "device": "/dev/nvidia4", "memory": 42505076736, "model": "A100-SXM4-40GB", + "nvswitch": true, "power_default_limit": "400.00 W", "vbios_version": "92.00.45.00.03", "vendor": "Nvidia" @@ -75,6 +80,7 @@ "device": "/dev/nvidia5", "memory": 42505076736, "model": "A100-SXM4-40GB", + "nvswitch": true, "power_default_limit": "400.00 W", "vbios_version": "92.00.45.00.03", "vendor": "Nvidia" @@ -85,6 +91,7 @@ "device": "/dev/nvidia6", "memory": 42505076736, "model": "A100-SXM4-40GB", + "nvswitch": true, "power_default_limit": "400.00 W", "vbios_version": "92.00.45.00.03", "vendor": "Nvidia" @@ -95,6 +102,7 @@ "device": "/dev/nvidia7", "memory": 42505076736, "model": "A100-SXM4-40GB", + "nvswitch": true, "power_default_limit": "400.00 W", "vbios_version": "92.00.45.00.03", "vendor": "Nvidia" diff --git a/input/grid5000/sites/lyon/clusters/sirius/sirius.yaml b/input/grid5000/sites/lyon/clusters/sirius/sirius.yaml index 854cb87b0c1..c33ed79d401 100644 --- a/input/grid5000/sites/lyon/clusters/sirius/sirius.yaml +++ b/input/grid5000/sites/lyon/clusters/sirius/sirius.yaml @@ -17,6 +17,9 @@ nodes: clock_speed: 2100000000 software: standard-environment: debian11-x64-std + gpu_devices: + nvidia[0-7]: + nvswitch: true network_adapters: bmc: interface: Ethernet @@ -53,4 +56,4 @@ nodes: id: disk5 pci-0000:ca:00.0-nvme-1: interface: NVME - id: disk4 + id: disk4 \ No newline at end of file diff --git a/input/grid5000/sites/nancy/clusters/grat/grat.yaml b/input/grid5000/sites/nancy/clusters/grat/grat.yaml index ce9fb95f087..fd2fbce41ec 100644 --- a/input/grid5000/sites/nancy/clusters/grat/grat.yaml +++ b/input/grid5000/sites/nancy/clusters/grat/grat.yaml @@ -17,6 +17,9 @@ nodes: clock_speed: 2100000000 software: standard-environment: debian11-x64-std + gpu_devices: + nvidia[0-7]: + nvswitch: true management_tools: bmc_vendor_tool: ipmitool network_adapters: diff --git a/lib/refrepo/valid/input/schemas/schema-global.yaml b/lib/refrepo/valid/input/schemas/schema-global.yaml index b3816ed5289..075883fba49 100644 --- a/lib/refrepo/valid/input/schemas/schema-global.yaml +++ b/lib/refrepo/valid/input/schemas/schema-global.yaml @@ -7,4 +7,4 @@ ipv4: required_hash ipv6: required_hash software: required_hash disk_vendor_model_mapping: required_hash -management_tools: required_hash +management_tools: required_hash \ No newline at end of file -- GitLab