From cee3c34af059343cd4dafb4b520b7094b6095ae5 Mon Sep 17 00:00:00 2001 From: Lancelot Doan <lancelot.doan@inria.fr> Date: Tue, 27 May 2025 09:06:48 +0200 Subject: [PATCH] [Sophia][Esterel33] Regen g5k checks --- .../sophia/clusters/esterel33/esterel33.json | 2 +- .../clusters/esterel33/nodes/esterel33-1.json | 21 +++++++++++++++++-- .../clusters/esterel33/nodes/esterel33-1.yaml | 12 +++++++++-- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json b/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json index f7c15265f2f..7cdc546f009 100644 --- a/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json +++ b/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json @@ -80,7 +80,7 @@ ], "model": "Dell PowerEdge R7525", "nodes_count": 1, - "nodes_description": "2 CPUs AMD EPYC 7282, 16 cores/CPU, 2 GPUs A40, 256GB RAM, 446GB SSD, 2679GB SSD, 1 x 1Gb Ethernet, 1 x 40Gb InfiniBand", + "nodes_description": "2 CPUs AMD EPYC 7282, 16 cores/CPU, 3 GPUs A40, 256GB RAM, 446GB SSD, 2679GB SSD, 1 x 1Gb Ethernet, 1 x 20Gb InfiniBand", "priority": 202205, "queues": [ "admin", diff --git a/data/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.json b/data/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.json index cd1fe56cb52..a562380da23 100644 --- a/data/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.json +++ b/data/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.json @@ -24,7 +24,7 @@ "nvidia0": { "compute_capability": "8.6", "cores": 10752, - "cpu_affinity": 1, + "cpu_affinity": 0, "device": "/dev/nvidia0", "memory": 48305799168, "microarchitecture": "Ampere", @@ -54,6 +54,23 @@ "power_default_limit": "300.00 W", "vbios_version": "94.02.5C.00.03", "vendor": "Nvidia" + }, + "nvidia2": { + "compute_capability": "8.6", + "cores": 10752, + "cpu_affinity": 1, + "device": "/dev/nvidia2", + "memory": 48305799168, + "microarchitecture": "Ampere", + "model": "A40", + "performance": { + "fp-16": 37420000000000, + "fp-32": 37420000000000, + "fp-64": 584600000000 + }, + "power_default_limit": "300.00 W", + "vbios_version": "94.02.5C.00.03", + "vendor": "Nvidia" } }, "main_memory": { @@ -205,7 +222,7 @@ "name": "ib0", "netmask": "255.255.240.0", "network_address": "esterel33-1-ib0.sophia.grid5000.fr", - "rate": 40000000000, + "rate": 20000000000, "vendor": "Mellanox Technologies" }, { diff --git a/input/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.yaml b/input/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.yaml index 999832d24ec..e4029587ef2 100644 --- a/input/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.yaml +++ b/input/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.yaml @@ -18,7 +18,7 @@ esterel33-1: serial: BZLF4F3 gpu_devices: nvidia0: - cpu_affinity: 1 + cpu_affinity: 0 device: "/dev/nvidia0" memory: 48305799168 model: A40 @@ -33,6 +33,14 @@ esterel33-1: power_default_limit: 300.00 W vbios_version: 94.02.5C.00.03 vendor: Nvidia + nvidia2: + cpu_affinity: 1 + device: "/dev/nvidia2" + memory: 48305799168 + model: A40 + power_default_limit: 300.00 W + vbios_version: 94.02.5C.00.03 + vendor: Nvidia main_memory: ram_size: 274877906944 memory_devices: @@ -123,7 +131,7 @@ esterel33-1: management: false model: MT27800 Family [ConnectX-5] name: ib0 - rate: 40000000000 + rate: 20000000000 vendor: Mellanox Technologies operating_system: cstate_driver: acpi_idle -- GitLab