From 80544f8cfaa8abee405d920bbe4d8c3c7bc638a1 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 1/8] [sophia][esterel26] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel26/esterel26.json | 66 +++++++++ .../clusters/esterel26/nodes/esterel26-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel26/esterel26.yaml | 41 ++++++ .../clusters/esterel26/esterel26_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel26/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel26/esterel26.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel26/esterel26_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json new file mode 100644 index 00000000000..61c5bd1e192 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel26", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json new file mode 100644 index 00000000000..71a2c919c50 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.33", + "ip6": "2001:660:4406:800:4::21", + "kavlan": false, + "mac": "50:9a:4c:6c:38:d9", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel26-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.33", + "kavlan": false, + "mac": "70:b5:e8:e1:41:24", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel26-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel26", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel26-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml new file mode 100644 index 00000000000..a3f40bf62b8 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel26-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel26 diff --git a/input/grid5000/sites/sophia/clusters/esterel26/esterel26_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel26/esterel26_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel26/esterel26_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb new file mode 100644 index 00000000000..cd244fa8358 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel26" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6c:38:d9 + ) + mac_bmc_list = %w( +70:b5:e8:e1:41:24 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From cf37915d2ebd367951a1a12361d7d6f3f339b16c Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Thu, 20 Mar 2025 14:45:07 +0100 Subject: [PATCH 2/8] [sophia][esterel26] fix ethernet production MAC address --- .../sites/sophia/clusters/esterel26/nodes/esterel26-1.json | 4 ++-- input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json index 71a2c919c50..889c21a4ef3 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json @@ -44,7 +44,7 @@ "ip": "172.16.131.33", "ip6": "2001:660:4406:800:4::21", "kavlan": false, - "mac": "50:9a:4c:6c:38:d9", + "mac": "70:b5:e8:e1:41:14", "management": false, "mountable": true, "mounted": true, @@ -99,7 +99,7 @@ "redfish": true, "software": { "forced-deployment-timestamp": 202007300948, - "postinstall-version": "1.2025021810", + "postinstall-version": "1.2025032008", "standard-environment": "debian11-x64-std" }, "storage_devices": [ diff --git a/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb index cd244fa8358..70c85c20238 100644 --- a/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb +++ b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb @@ -1,12 +1,12 @@ <% - # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-new-final.csv # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' cluster_name = "esterel26" nodes_number = 1 # Size of the cluster (number of nodes) # MAC addresses declaration mac_eth0_list = %w( -50:9a:4c:6c:38:d9 +70:b5:e8:e1:41:14 ) mac_bmc_list = %w( 70:b5:e8:e1:41:24 -- GitLab From c3f7ae6467103d2be79c8ce4d3aae359a84d2a32 Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Mon, 24 Mar 2025 09:13:02 +0100 Subject: [PATCH 3/8] [sophia][esterel26] Add network tpology description --- .../sites/sophia/clusters/esterel26/nodes/esterel26-1.json | 4 ++-- data/grid5000/sites/sophia/network_equipments/sw-2.json | 4 ++++ data/grid5000/sites/sophia/network_equipments/swadmin-2.json | 3 +++ input/grid5000/sites/sophia/networks/sw-2.yaml | 2 ++ input/grid5000/sites/sophia/networks/swadmin-2.yaml | 3 +++ 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json index 889c21a4ef3..23fd95bddbc 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json @@ -51,8 +51,8 @@ "name": "enp1s0f0np0", "network_address": "esterel26-1.sophia.grid5000.fr", "rate": 10000000000, - "switch": null, - "switch_port": null + "switch": "sw-2", + "switch_port": "1/1/17" }, { "device": "bmc", diff --git a/data/grid5000/sites/sophia/network_equipments/sw-2.json b/data/grid5000/sites/sophia/network_equipments/sw-2.json index dc7b512fe65..1cee59a69d0 100644 --- a/data/grid5000/sites/sophia/network_equipments/sw-2.json +++ b/data/grid5000/sites/sophia/network_equipments/sw-2.json @@ -73,6 +73,10 @@ { }, { + "kind": "node", + "port": "eth0", + "snmp_name": "1/1/17", + "uid": "esterel26-1" }, { }, diff --git a/data/grid5000/sites/sophia/network_equipments/swadmin-2.json b/data/grid5000/sites/sophia/network_equipments/swadmin-2.json index 6526a12777b..f15945ad4d8 100644 --- a/data/grid5000/sites/sophia/network_equipments/swadmin-2.json +++ b/data/grid5000/sites/sophia/network_equipments/swadmin-2.json @@ -61,6 +61,9 @@ "uid": "esterel41-1-bmc" }, { + "kind": "other", + "snmp_name": "1/1/11", + "uid": "esterel26-1-bmc" }, { }, diff --git a/input/grid5000/sites/sophia/networks/sw-2.yaml b/input/grid5000/sites/sophia/networks/sw-2.yaml index 36a246d3fa0..ca5d943dae4 100644 --- a/input/grid5000/sites/sophia/networks/sw-2.yaml +++ b/input/grid5000/sites/sophia/networks/sw-2.yaml @@ -44,6 +44,8 @@ sw-2: 12: uid: gwol-south-bmc kind: other + 17: + uid: esterel26-1 33: uid: esterel10-2 port: eth1 diff --git a/input/grid5000/sites/sophia/networks/swadmin-2.yaml b/input/grid5000/sites/sophia/networks/swadmin-2.yaml index 5e7e41418d3..e6b52ec4453 100644 --- a/input/grid5000/sites/sophia/networks/swadmin-2.yaml +++ b/input/grid5000/sites/sophia/networks/swadmin-2.yaml @@ -44,6 +44,9 @@ swadmin-2: 10: uid: esterel41-1-bmc kind: other + 11: + uid: esterel26-1-bmc + kind: other 25: uid: sw-2 kind: switch -- GitLab From caf9e310c1936acc57d7b3435bd4ced8a8b1efb2 Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Mon, 24 Mar 2025 09:41:24 +0100 Subject: [PATCH 4/8] [sophia][esterel26] Add disk by-path configuration --- .../sites/sophia/clusters/esterel26/esterel26.json | 2 +- .../sophia/clusters/esterel26/nodes/esterel26-1.json | 12 +++++++++++- .../sites/sophia/clusters/esterel26/esterel26.yaml | 6 +++++- .../sites/sophia/clusters/esterel26/nodes.yaml.erb | 4 ++++ 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json index 61c5bd1e192..a0a6d49fc90 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json @@ -53,7 +53,7 @@ ], "model": "Cluster Model", "nodes_count": 1, - "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 2x0GB SSD, 1 x 10Gb Ethernet", "priority": 197001, "queues": [ "admin", diff --git a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json index 23fd95bddbc..d38e5111dfe 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json @@ -105,13 +105,23 @@ "storage_devices": [ { "by_id": "", - "by_path": "/dev/disk/by-path/dummy", + "by_path": "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0", "id": "disk0", "interface": "SAS", "model": "unknown", "size": 8, "storage": "SSD", "vendor": "Unknown" + }, + { + "by_id": "", + "by_path": "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:1:0", + "id": "disk1", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" } ], "supported_job_types": { diff --git a/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml index a3f40bf62b8..ff1b30e4c06 100644 --- a/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml +++ b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml @@ -33,7 +33,11 @@ nodes: disk0: # This field will have to be renamed later. id: disk0 interface: SAS - by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0" + disk1: + id: disk1 + interface: SAS + by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:1:0" software: standard-environment: debian11-x64-std # TODO: check that architecture is OK management_tools: diff --git a/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb index 70c85c20238..5638a493927 100644 --- a/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb +++ b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb @@ -77,4 +77,8 @@ nodes: storage: SSD # Fake data, will be replaced by g5k-checks model: unknown # Fake data, will be replaced by g5k-checks size: 8 # Fake data, will be replaced by g5k-checks + disk1: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks <% } %> -- GitLab From ddff0a691139b858bc231640740a40f147e62272 Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Mon, 24 Mar 2025 11:15:36 +0100 Subject: [PATCH 5/8] [sophia][esterel26] g5k-check import and IB interface --- .../sophia/clusters/esterel26/esterel26.json | 2 +- .../clusters/esterel26/nodes/esterel26-1.json | 202 ++++++++++++++---- input/grid5000/ipv4.yaml | 1 + .../sophia/clusters/esterel26/esterel26.yaml | 19 +- .../sophia/clusters/esterel26/nodes.yaml.erb | 84 -------- .../clusters/esterel26/nodes/esterel26-1.yaml | 148 +++++++++++++ lib/refrepo/net_names_mapping.yaml | 5 + 7 files changed, 331 insertions(+), 130 deletions(-) delete mode 100644 input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb create mode 100644 input/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.yaml diff --git a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json index a0a6d49fc90..540da190de6 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json @@ -53,7 +53,7 @@ ], "model": "Cluster Model", "nodes_count": 1, - "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 2x0GB SSD, 1 x 10Gb Ethernet", + "nodes_description": "2 CPUs Intel Xeon Silver 4216, 16 cores/CPU, 384GB RAM, 558GB HDD, 3575GB HDD, 1 x 1Gb Ethernet, 1 x 40Gb InfiniBand", "priority": 197001, "queues": [ "admin", diff --git a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json index d38e5111dfe..215e6ccb3f4 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json @@ -1,26 +1,27 @@ { "architecture": { - "cpu_core_numbering": "contiguous", - "nb_cores": 72, - "nb_procs": 1, - "nb_threads": 72, + "cpu_core_numbering": "round-robin", + "nb_cores": 32, + "nb_procs": 2, + "nb_threads": 64, "platform_type": "x86_64" }, "bios": { - "release_date": "01/01/2000", - "vendor": "Unknown", - "version": 1 + "release_date": "09/14/2023", + "vendor": "Dell Inc.", + "version": "2.20.1" }, - "bmc_version": "v1", + "bmc_version": "7.00", "chassis": { "manufactured_at": "1970-01-01", - "manufacturer": "Unknown", - "name": "Unknown", + "manufacturer": "Dell Inc.", + "name": "PowerEdge T640", + "serial": "61GCH73", "warranty_end": "1970-01-01" }, "exotic": false, "main_memory": { - "ram_size": 8 + "ram_size": 412316860416 }, "management_tools": { "bmc_vendor_tool": "ipmitool", @@ -30,29 +31,145 @@ }, "memory_devices": [ { - "device": "dimm_proc 1 dimm 1", - "size": 8, + "device": "dimm_a1", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_a2", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_a3", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_a4", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_a5", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_a6", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_b1", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_b2", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_b3", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_b4", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_b5", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_b6", + "size": 34359738368, "technology": "dram" } ], "network_adapters": [ { "device": "eth0", - "driver": "mlx_core", + "driver": "bnxt_en", "enabled": true, + "firmware_version": "227.0.134.0/pkg 22.71.11.13", "interface": "Ethernet", "ip": "172.16.131.33", "ip6": "2001:660:4406:800:4::21", "kavlan": false, "mac": "70:b5:e8:e1:41:14", "management": false, + "model": "BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet Controller", "mountable": true, "mounted": true, "name": "enp1s0f0np0", "network_address": "esterel26-1.sophia.grid5000.fr", - "rate": 10000000000, + "rate": 1000000000, + "sriov": false, + "sriov_totalvfs": 0, "switch": "sw-2", - "switch_port": "1/1/17" + "switch_port": "1/1/17", + "vendor": "Broadcom Inc. and subsidiaries" + }, + { + "device": "eth1", + "driver": "bnxt_en", + "enabled": false, + "firmware_version": "227.0.134.0/pkg 22.71.11.13", + "interface": "Ethernet", + "kavlan": false, + "mac": "70:b5:e8:e1:41:15", + "management": false, + "model": "BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet Controller", + "mountable": false, + "mounted": false, + "name": "eno2np1", + "sriov": false, + "sriov_totalvfs": 0, + "vendor": "Broadcom Inc. and subsidiaries" + }, + { + "device": "eth2", + "driver": "mlx4_en", + "enabled": false, + "firmware_version": "2.9.1000", + "interface": "Ethernet", + "kavlan": false, + "mac": "00:02:c9:4f:f8:c9", + "management": false, + "model": "MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter IC with PCIe 2.0 x8 5.0GT/s Interface", + "mountable": false, + "mounted": false, + "name": "enp137s0d1", + "sriov": false, + "sriov_totalvfs": 0, + "vendor": "Mellanox Technologies" + }, + { + "device": "ib0", + "driver": "mlx4_core", + "enabled": true, + "firmware_version": "2.9.1000", + "guid": "0x0002c903004ff8c9", + "interface": "InfiniBand", + "ip": "172.18.131.33", + "kavlan": false, + "mac": "00:02:c9:03:00:4f:f8:c9", + "management": false, + "model": "MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter IC with PCIe 2.0 x8 5.0GT/s Interface", + "mountable": true, + "mounted": true, + "name": "ibp137s0", + "netmask": "255.255.240.0", + "network_address": "esterel26-1-ib0.sophia.grid5000.fr", + "rate": 40000000000, + "sriov": false, + "sriov_totalvfs": 0, + "vendor": "Mellanox Technologies" }, { "device": "bmc", @@ -69,32 +186,32 @@ ], "nodeset": "esterel26", "operating_system": { - "cstate_driver": "unknown", - "cstate_governor": "unknown", + "cstate_driver": "intel_idle", + "cstate_governor": "menu", "ht_enabled": true, - "pstate_driver": "unknwon", - "pstate_governor": "unknown", + "pstate_driver": "intel_pstate", + "pstate_governor": "performance", "turboboost_enabled": true }, "performance": { "core_flops": 128, - "node_flops": 9216 + "node_flops": 4096 }, "processor": { "cache_l1": null, - "cache_l1d": 8, - "cache_l1i": 8, - "cache_l2": 8, - "cache_l3": 8, + "cache_l1d": 32768, + "cache_l1i": 32768, + "cache_l2": 1048576, + "cache_l3": 23068672, "clock_speed": 8, "ht_capable": true, "instruction_set": "x86-64", "microarchitecture": "Haswell", - "microcode": "0xd000001", - "model": "Unknown", - "other_description": "description", - "vendor": "vendor", - "version": "vendor" + "microcode": "0x5003707", + "model": "Intel Xeon", + "other_description": "Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz", + "vendor": "Intel", + "version": "Silver 4216" }, "redfish": true, "software": { @@ -104,24 +221,26 @@ }, "storage_devices": [ { - "by_id": "", + "by_id": "/dev/disk/by-id/wwn-0x62cea7f07146490027ecdc8d09a0a809", "by_path": "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0", + "firmware_version": 4.3, "id": "disk0", "interface": "SAS", - "model": "unknown", - "size": 8, - "storage": "SSD", - "vendor": "Unknown" + "model": "PERC H730P Adp", + "size": 599550590976, + "storage": "HDD", + "vendor": "Dell" }, { - "by_id": "", + "by_id": "/dev/disk/by-id/wwn-0x62cea7f07146490027ecdcd10db4a35d", "by_path": "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:1:0", + "firmware_version": 4.3, "id": "disk1", "interface": "SAS", - "model": "unknown", - "size": 8, - "storage": "SSD", - "vendor": "Unknown" + "model": "PERC H730P Adp", + "size": 3838627020800, + "storage": "HDD", + "vendor": "Dell" } ], "supported_job_types": { @@ -131,7 +250,8 @@ "queues": [ "admin", "testing" - ] + ], + "virtual": "ivt" }, "type": "node", "uid": "esterel26-1" diff --git a/input/grid5000/ipv4.yaml b/input/grid5000/ipv4.yaml index 4ccf9df6705..fbe687fe55c 100644 --- a/input/grid5000/ipv4.yaml +++ b/input/grid5000/ipv4.yaml @@ -187,6 +187,7 @@ ipv4: sophia esterel24 eth0 0 0 3 29 sophia esterel25 eth0 0 0 3 31 sophia esterel26 eth0 0 0 3 32 + sophia esterel26 ib0 0 0 3 32 sophia esterel27 eth0 0 0 3 33 sophia esterel28 eth0 0 0 3 34 sophia esterel29 eth0 0 0 3 35 diff --git a/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml index ff1b30e4c06..c3085221933 100644 --- a/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml +++ b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml @@ -29,15 +29,26 @@ nodes: enabled: true mountable: true mounted: true + eth1: + enabled: false + mountable: false + mounted: false + eth2: + enabled: false + mountable: false + mounted: false + ib0: + mounted: true + enabled: true + mountable: true + netmask: 255.255.240.0 storage_devices: - disk0: # This field will have to be renamed later. + pci-0000:19:00.0-scsi-0:2:0:0: # This field will have to be renamed later. id: disk0 interface: SAS - by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0" - disk1: + pci-0000:19:00.0-scsi-0:2:1:0: id: disk1 interface: SAS - by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:1:0" software: standard-environment: debian11-x64-std # TODO: check that architecture is OK management_tools: diff --git a/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb deleted file mode 100644 index 5638a493927..00000000000 --- a/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb +++ /dev/null @@ -1,84 +0,0 @@ -<% - # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-new-final.csv - # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' - - cluster_name = "esterel26" - nodes_number = 1 # Size of the cluster (number of nodes) - # MAC addresses declaration - mac_eth0_list = %w( -70:b5:e8:e1:41:14 - ) - mac_bmc_list = %w( -70:b5:e8:e1:41:24 - ) -%> ---- -nodes: -<% (1..nodes_number).each { |i| %> - <%= cluster_name %>-<%= i %>: - architecture: - nb_procs: 1 # Fake data, will be replaced by g5k-checks - nb_cores: 72 # Fake data, will be replaced by g5k-checks - nb_threads: 72 # Fake data, will be replaced by g5k-checks - platform_type: x86_64 # Fake data, will be replaced by g5k-checks - cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks - bios: - release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks - vendor: Unknown # Fake vendor, will be replaced by g5k-checks - version: 1 # Fake version, will be replaced by g5k-checks - bmc_version: v1 # Fake version, will be replaced by g5k-checks - chassis: - manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks - name: Unknown # Fake name, will be replaced by g5k-checks - main_memory: - ram_size: 8 # Fake size, will be replaced by g5k-checks - memory_devices: - dimm: - size: 8 # Fake size, will be replaced by g5k-checks - technology: dram # Common memory technology, will be replaced by g5k-checks - processor: - model: Unknown # Fake model name, will be replaced by g5k-checks - other_description: description # Fake description, will be replaced by g5k-checks - vendor: vendor # Fake vendor, will be replaced by g5k-checks - version: vendor # Fake version, will be replaced by g5k-checks - cache_l1d: 8 # Fake cache, will be replaced by g5k-checks - cache_l1i: 8 # Fake cache, will be replaced by g5k-checks - cache_l2: 8 # Fake cache, will be replaced by g5k-checks - cache_l3: 8 # Fake cache, will be replaced by g5k-checks - instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks - microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks - ht_capable: true # Default ht capable value, will be replaced by g5k-checks - main_memory: - ram_size: 8 # Fake ram size, will be replaced by g5k-checks - memory_devices: - dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks - size: 8 # Fake dimm size, will be replaced by g5k-checks - technology: dram # Default dimm technology, will be replaced by g5k-checks - operating_system: - cstate_driver: unknown # Fake driver, will be replaced by g5k-checks - cstate_governor: unknown # Fake governor, will be replaced by g5k-checks - ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks - pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks - pstate_governor: unknown # Fake driver, will be replaced by g5k-checks - turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks - network_adapters: - bmc: - management: true - mac: <%= mac_bmc_list[i - 1] %> - eth0: - interface: Ethernet - management: false - driver: mlx_core # Fake data, will be replaced by g5k-checks - name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks - rate: 10000000000 # Fake data, will be replaced by g5k-checks - mac: <%= mac_eth0_list[i - 1] %> - storage_devices: - disk0: - storage: SSD # Fake data, will be replaced by g5k-checks - model: unknown # Fake data, will be replaced by g5k-checks - size: 8 # Fake data, will be replaced by g5k-checks - disk1: - storage: SSD # Fake data, will be replaced by g5k-checks - model: unknown # Fake data, will be replaced by g5k-checks - size: 8 # Fake data, will be replaced by g5k-checks -<% } %> diff --git a/input/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.yaml b/input/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.yaml new file mode 100644 index 00000000000..5b5bd8a769b --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.yaml @@ -0,0 +1,148 @@ +# Generated by g5k-checks (g5k-checks -m api) +--- +esterel26-1: + architecture: + cpu_core_numbering: round-robin + nb_cores: 32 + nb_procs: 2 + nb_threads: 64 + platform_type: x86_64 + bios: + release_date: '09/14/2023' + vendor: Dell Inc. + version: 2.20.1 + bmc_version: '7.00' + chassis: + manufacturer: Dell Inc. + name: PowerEdge T640 + serial: 61GCH73 + main_memory: + ram_size: 412316860416 + memory_devices: + dimm_a1: + size: 34359738368 + technology: dram + dimm_a2: + size: 34359738368 + technology: dram + dimm_a3: + size: 34359738368 + technology: dram + dimm_a4: + size: 34359738368 + technology: dram + dimm_a5: + size: 34359738368 + technology: dram + dimm_a6: + size: 34359738368 + technology: dram + dimm_b1: + size: 34359738368 + technology: dram + dimm_b2: + size: 34359738368 + technology: dram + dimm_b3: + size: 34359738368 + technology: dram + dimm_b4: + size: 34359738368 + technology: dram + dimm_b5: + size: 34359738368 + technology: dram + dimm_b6: + size: 34359738368 + technology: dram + network_adapters: + bmc: + ip: 172.17.131.33 + mac: 70:b5:e8:e1:41:24 + management: true + eth0: + driver: bnxt_en + firmware_version: 227.0.134.0/pkg 22.71.11.13 + interface: Ethernet + ip: 172.16.131.33 + mac: 70:b5:e8:e1:41:14 + management: false + model: BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet Controller + name: enp1s0f0np0 + rate: 1000000000 + sriov: false + sriov_totalvfs: 0 + vendor: Broadcom Inc. and subsidiaries + eth1: + driver: bnxt_en + firmware_version: 227.0.134.0/pkg 22.71.11.13 + interface: Ethernet + mac: 70:b5:e8:e1:41:15 + management: false + model: BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet Controller + name: eno2np1 + sriov: false + sriov_totalvfs: 0 + vendor: Broadcom Inc. and subsidiaries + eth2: + driver: mlx4_en + firmware_version: 2.9.1000 + interface: Ethernet + mac: 00:02:c9:4f:f8:c9 + management: false + model: MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter + IC with PCIe 2.0 x8 5.0GT/s Interface + name: enp137s0d1 + sriov: false + sriov_totalvfs: 0 + vendor: Mellanox Technologies + ib0: + driver: mlx4_core + firmware_version: 2.9.1000 + guid: '0x0002c903004ff8c9' + interface: InfiniBand + mac: 00:02:c9:03:00:4f:f8:c9 + management: false + model: MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter + IC with PCIe 2.0 x8 5.0GT/s Interface + name: ibp137s0 + rate: 40000000000 + sriov: false + sriov_totalvfs: 0 + vendor: Mellanox Technologies + operating_system: + cstate_driver: intel_idle + cstate_governor: menu + ht_enabled: true + pstate_driver: intel_pstate + pstate_governor: performance + turboboost_enabled: true + processor: + cache_l1d: 32768 + cache_l1i: 32768 + cache_l2: 1048576 + cache_l3: 23068672 + ht_capable: true + instruction_set: x86-64 + microcode: '0x5003707' + model: Intel Xeon + other_description: Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz + vendor: Intel + version: Silver 4216 + storage_devices: + pci-0000:19:00.0-scsi-0:2:0:0: + by_id: "/dev/disk/by-id/wwn-0x62cea7f07146490027ecdc8d09a0a809" + by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0" + firmware_version: 4.3 + model: PERC H730P Adp + size: 599550590976 + storage: HDD + pci-0000:19:00.0-scsi-0:2:1:0: + by_id: "/dev/disk/by-id/wwn-0x62cea7f07146490027ecdcd10db4a35d" + by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:1:0" + firmware_version: 4.3 + model: PERC H730P Adp + size: 3838627020800 + storage: HDD + supported_job_types: + virtual: ivt diff --git a/lib/refrepo/net_names_mapping.yaml b/lib/refrepo/net_names_mapping.yaml index a80a7446cf3..99174f5a553 100644 --- a/lib/refrepo/net_names_mapping.yaml +++ b/lib/refrepo/net_names_mapping.yaml @@ -420,6 +420,11 @@ esterel10: enp1s0f0: eth0 enp1s0f1: eth1 ibp130s0: ib0 +esterel26: + enp1s0f0np0: eth0 + eno2np1: eth1 + enp137s0d1: eth2 + ibp137s0: ib0 esterel41: enp1s0f0np0: eth0 ens15f1: eth1 -- GitLab From aa72ca5b2a6d6c5353a146c71bb537f5343985a3 Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Mon, 24 Mar 2025 13:54:54 +0100 Subject: [PATCH 6/8] [sophia][esterel26] Add cluster information --- .../sophia/clusters/esterel26/esterel26.json | 4 +-- .../clusters/esterel26/nodes/esterel26-1.json | 14 +++++------ input/grid5000/dell-product-data.yaml | 6 +++++ .../sophia/clusters/esterel26/esterel26.yaml | 25 ++++++++----------- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json index 540da190de6..74980ff3f01 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json @@ -1,6 +1,6 @@ { "boot_type": "uefi", - "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "created_at": "Tue, 25 Mar 2025 00:00:00 GMT", "exotic": false, "kavlan": false, "manufactured_at": "1970-01-01", @@ -51,7 +51,7 @@ } } ], - "model": "Cluster Model", + "model": "Dell PowerEdge T640", "nodes_count": 1, "nodes_description": "2 CPUs Intel Xeon Silver 4216, 16 cores/CPU, 384GB RAM, 558GB HDD, 3575GB HDD, 1 x 1Gb Ethernet, 1 x 40Gb InfiniBand", "priority": 197001, diff --git a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json index 215e6ccb3f4..d6df3f3eef9 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json @@ -24,7 +24,7 @@ "ram_size": 412316860416 }, "management_tools": { - "bmc_vendor_tool": "ipmitool", + "bmc_vendor_tool": "racadm", "ipmitool": { "retries": 5 } @@ -194,8 +194,8 @@ "turboboost_enabled": true }, "performance": { - "core_flops": 128, - "node_flops": 4096 + "core_flops": 33600000000, + "node_flops": 1075200000000 }, "processor": { "cache_l1": null, @@ -203,10 +203,10 @@ "cache_l1i": 32768, "cache_l2": 1048576, "cache_l3": 23068672, - "clock_speed": 8, + "clock_speed": 2100000000, "ht_capable": true, "instruction_set": "x86-64", - "microarchitecture": "Haswell", + "microarchitecture": "Cascade Lake-SP", "microcode": "0x5003707", "model": "Intel Xeon", "other_description": "Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz", @@ -225,7 +225,7 @@ "by_path": "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0", "firmware_version": 4.3, "id": "disk0", - "interface": "SAS", + "interface": "RAID", "model": "PERC H730P Adp", "size": 599550590976, "storage": "HDD", @@ -236,7 +236,7 @@ "by_path": "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:1:0", "firmware_version": 4.3, "id": "disk1", - "interface": "SAS", + "interface": "RAID", "model": "PERC H730P Adp", "size": 3838627020800, "storage": "HDD", diff --git a/input/grid5000/dell-product-data.yaml b/input/grid5000/dell-product-data.yaml index dff68ffa4c3..f1109449749 100644 --- a/input/grid5000/dell-product-data.yaml +++ b/input/grid5000/dell-product-data.yaml @@ -2238,6 +2238,12 @@ sites: chassis: manufactured_at: 2016-05-04 warranty_end: 2021-05-04 + esterel26: + nodes: + esterel26-1: + chassis: + manufactured_at: 2020-10-30 + warranty_end: 2027-10-31 esterel4: nodes: esterel4-1: diff --git a/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml index c3085221933..548b4eb6ac0 100644 --- a/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml +++ b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml @@ -1,24 +1,21 @@ --- -model: Cluster Model # TODO: change this value. -created_at: 1970-01-01 # TODO: change this value +model: Dell PowerEdge T640 +created_at: 2025-03-25 kavlan: false -boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) -exotic: false # TODO: specify if 'true' or 'false' +boot_type: uefi +exotic: false queues: - admin - testing nodes: esterel26-1: - chassis: - manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. - warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. supported_job_types: deploy: true besteffort: true max_walltime: 0 processor: - microarchitecture: Haswell # TODO: replace with microarch name. - clock_speed: 8 # TODO: Replace with clock speed. + microarchitecture: Cascade Lake-SP + clock_speed: 2100000000 network_adapters: bmc: interface: Ethernet @@ -43,14 +40,14 @@ nodes: mountable: true netmask: 255.255.240.0 storage_devices: - pci-0000:19:00.0-scsi-0:2:0:0: # This field will have to be renamed later. + pci-0000:19:00.0-scsi-0:2:0:0: id: disk0 - interface: SAS + interface: RAID pci-0000:19:00.0-scsi-0:2:1:0: id: disk1 - interface: SAS + interface: RAID software: - standard-environment: debian11-x64-std # TODO: check that architecture is OK + standard-environment: debian11-x64-std management_tools: - bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + bmc_vendor_tool: racadm nodeset: esterel26 -- GitLab From 34ac29d1c45c058938d8f4cf6f242b00b829d10e Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Mon, 24 Mar 2025 14:19:45 +0100 Subject: [PATCH 7/8] [sophia][esterel26] g5k-checks after firmware update --- .../sophia/clusters/esterel26/esterel26.json | 6 +++--- .../clusters/esterel26/nodes/esterel26-1.json | 20 +++++++++---------- .../clusters/esterel26/nodes/esterel26-1.yaml | 17 ++++++++-------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json index 74980ff3f01..733f18697a2 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json @@ -3,7 +3,7 @@ "created_at": "Tue, 25 Mar 2025 00:00:00 GMT", "exotic": false, "kavlan": false, - "manufactured_at": "1970-01-01", + "manufactured_at": "2020-10-30", "metrics": [ { "description": "Default subset of metrics from Prometheus Node Exporter", @@ -54,7 +54,7 @@ "model": "Dell PowerEdge T640", "nodes_count": 1, "nodes_description": "2 CPUs Intel Xeon Silver 4216, 16 cores/CPU, 384GB RAM, 558GB HDD, 3575GB HDD, 1 x 1Gb Ethernet, 1 x 40Gb InfiniBand", - "priority": 197001, + "priority": 202010, "queues": [ "admin", "testing" @@ -62,5 +62,5 @@ "redfish": true, "type": "cluster", "uid": "esterel26", - "warranty_end": "1970-01-01" + "warranty_end": "2027-10-31" } \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json index d6df3f3eef9..fd4f22045de 100644 --- a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json +++ b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json @@ -7,17 +7,17 @@ "platform_type": "x86_64" }, "bios": { - "release_date": "09/14/2023", + "release_date": "01/09/2025", "vendor": "Dell Inc.", - "version": "2.20.1" + "version": "2.23.0" }, - "bmc_version": "7.00", + "bmc_version": "7.00.00.174", "chassis": { - "manufactured_at": "1970-01-01", + "manufactured_at": "2020-10-30", "manufacturer": "Dell Inc.", "name": "PowerEdge T640", "serial": "61GCH73", - "warranty_end": "1970-01-01" + "warranty_end": "2027-10-31" }, "exotic": false, "main_memory": { @@ -96,7 +96,7 @@ "device": "eth0", "driver": "bnxt_en", "enabled": true, - "firmware_version": "227.0.134.0/pkg 22.71.11.13", + "firmware_version": "231.0.153.0/pkg 23.11.16.22", "interface": "Ethernet", "ip": "172.16.131.33", "ip6": "2001:660:4406:800:4::21", @@ -119,7 +119,7 @@ "device": "eth1", "driver": "bnxt_en", "enabled": false, - "firmware_version": "227.0.134.0/pkg 22.71.11.13", + "firmware_version": "231.0.153.0/pkg 23.11.16.22", "interface": "Ethernet", "kavlan": false, "mac": "70:b5:e8:e1:41:15", @@ -163,12 +163,10 @@ "model": "MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter IC with PCIe 2.0 x8 5.0GT/s Interface", "mountable": true, "mounted": true, - "name": "ibp137s0", + "name": "ib0", "netmask": "255.255.240.0", "network_address": "esterel26-1-ib0.sophia.grid5000.fr", "rate": 40000000000, - "sriov": false, - "sriov_totalvfs": 0, "vendor": "Mellanox Technologies" }, { @@ -207,7 +205,7 @@ "ht_capable": true, "instruction_set": "x86-64", "microarchitecture": "Cascade Lake-SP", - "microcode": "0x5003707", + "microcode": "0x5003801", "model": "Intel Xeon", "other_description": "Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz", "vendor": "Intel", diff --git a/input/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.yaml b/input/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.yaml index 5b5bd8a769b..0d7d8daefab 100644 --- a/input/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.yaml +++ b/input/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.yaml @@ -8,10 +8,10 @@ esterel26-1: nb_threads: 64 platform_type: x86_64 bios: - release_date: '09/14/2023' + release_date: 01/09/2025 vendor: Dell Inc. - version: 2.20.1 - bmc_version: '7.00' + version: 2.23.0 + bmc_version: 7.00.00.174 chassis: manufacturer: Dell Inc. name: PowerEdge T640 @@ -62,7 +62,7 @@ esterel26-1: management: true eth0: driver: bnxt_en - firmware_version: 227.0.134.0/pkg 22.71.11.13 + firmware_version: 231.0.153.0/pkg 23.11.16.22 interface: Ethernet ip: 172.16.131.33 mac: 70:b5:e8:e1:41:14 @@ -75,7 +75,7 @@ esterel26-1: vendor: Broadcom Inc. and subsidiaries eth1: driver: bnxt_en - firmware_version: 227.0.134.0/pkg 22.71.11.13 + firmware_version: 231.0.153.0/pkg 23.11.16.22 interface: Ethernet mac: 70:b5:e8:e1:41:15 management: false @@ -101,14 +101,13 @@ esterel26-1: firmware_version: 2.9.1000 guid: '0x0002c903004ff8c9' interface: InfiniBand + ip: 172.18.131.33 mac: 00:02:c9:03:00:4f:f8:c9 management: false model: MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter IC with PCIe 2.0 x8 5.0GT/s Interface - name: ibp137s0 + name: ib0 rate: 40000000000 - sriov: false - sriov_totalvfs: 0 vendor: Mellanox Technologies operating_system: cstate_driver: intel_idle @@ -124,7 +123,7 @@ esterel26-1: cache_l3: 23068672 ht_capable: true instruction_set: x86-64 - microcode: '0x5003707' + microcode: '0x5003801' model: Intel Xeon other_description: Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz vendor: Intel -- GitLab From cb53be3e0ed1641ab011e4e2b4ad53575d5fe928 Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Mon, 24 Mar 2025 14:40:37 +0100 Subject: [PATCH 8/8] [sophia][esterel26] Remove ib0 for net_name_mapping --- lib/refrepo/net_names_mapping.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/refrepo/net_names_mapping.yaml b/lib/refrepo/net_names_mapping.yaml index 99174f5a553..6fe72981566 100644 --- a/lib/refrepo/net_names_mapping.yaml +++ b/lib/refrepo/net_names_mapping.yaml @@ -424,7 +424,6 @@ esterel26: enp1s0f0np0: eth0 eno2np1: eth1 enp137s0d1: eth2 - ibp137s0: ib0 esterel41: enp1s0f0np0: eth0 ens15f1: eth1 -- GitLab