From 73f1b74bdbeacaae611e31e15e18dec5045161f2 Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Tue, 22 Apr 2025 09:16:54 +0200 Subject: [PATCH 1/5] [lille][chiconade1] Rename chiconade into chiconade1 --- .../lille/clusters/chiconade1/chiconade1.json | 93 +++++++++++++ .../chiconade1/nodes/chiconade1-1.json | 128 ++++++++++++++++++ input/grid5000/ipv4.yaml | 1 + .../lille/clusters/chiconade1/chiconade1.yaml | 41 ++++++ .../chiconade1/chiconade1_metrics.yaml | 63 +++++++++ 5 files changed, 326 insertions(+) create mode 100644 data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json create mode 100644 data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json create mode 100644 input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml create mode 100644 input/grid5000/sites/lille/clusters/chiconade1/chiconade1_metrics.yaml diff --git a/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json b/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json new file mode 100644 index 00000000000..bb4e37e0f1a --- /dev/null +++ b/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "chiconade1", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json new file mode 100644 index 00000000000..bfcb08c99a2 --- /dev/null +++ b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.34.1", + "ip6": "2001:660:4406:200:3::1", + "kavlan": false, + "mac": "8c:84:74:99:a2:82", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chiconade1-1.lille.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.34.1", + "kavlan": false, + "mac": "7c:a6:2a:69:9a:b6", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chiconade1-1-bmc.lille.grid5000.fr" + } + ], + "nodeset": "chiconade1", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025040409", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chiconade1-1" +} \ No newline at end of file diff --git a/input/grid5000/ipv4.yaml b/input/grid5000/ipv4.yaml index 89fdc40b914..22e3f4b0e3b 100644 --- a/input/grid5000/ipv4.yaml +++ b/input/grid5000/ipv4.yaml @@ -65,6 +65,7 @@ ipv4: lille chirop eth1 0 0 1 100 lille chuc eth0 0 0 0 0 lille chuc eth1 0 0 0 100 + lille chiconade1 eth0 0 0 2 0 luxembourg petitprince eth0 0 0 1 0 luxembourg petitprince eth1 0 0 1 100 luxembourg vianden eth0 0 0 2 0 diff --git a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml new file mode 100644 index 00000000000..297930ad91f --- /dev/null +++ b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + chiconade1-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: chiconade1 diff --git a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1_metrics.yaml b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1_metrics.yaml new file mode 100644 index 00000000000..0406ead1267 --- /dev/null +++ b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From bf409cbe867b1907ceb31259bb9388026cdf6614 Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Thu, 24 Apr 2025 10:41:08 +0200 Subject: [PATCH 2/5] [lille][chiconade1] Add disk by-path --- .../sites/lille/clusters/chiconade1/nodes/chiconade1-1.json | 2 +- input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json index bfcb08c99a2..300fe76794c 100644 --- a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json +++ b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json @@ -105,7 +105,7 @@ "storage_devices": [ { "by_id": "", - "by_path": "/dev/disk/by-path/dummy", + "by_path": "/dev/disk/by-path/pci-0000:9b:00.0-nvme-1", "id": "disk0", "interface": "SAS", "model": "unknown", diff --git a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml index 297930ad91f..e621173d10a 100644 --- a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml +++ b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml @@ -33,7 +33,7 @@ nodes: disk0: # This field will have to be renamed later. id: disk0 interface: SAS - by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + by_path: "/dev/disk/by-path/pci-0000:9b:00.0-nvme-1" # this path will have to change later. software: standard-environment: debian11-x64-std # TODO: check that architecture is OK management_tools: -- GitLab From 39a4c1f4ac2de0b27cc846ccddc277fef585e9ca Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Fri, 25 Apr 2025 09:59:24 +0200 Subject: [PATCH 3/5] [lille][chiconade1] Add net name mapping and eth1 interface --- .../lille/clusters/chiconade1/nodes/chiconade1-1.json | 7 +++++++ input/grid5000/ipv4.yaml | 1 + .../sites/lille/clusters/chiconade1/chiconade1.yaml | 4 ++++ lib/refrepo/net_names_mapping.yaml | 3 +++ 4 files changed, 15 insertions(+) diff --git a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json index 300fe76794c..a4f7a21e594 100644 --- a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json +++ b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json @@ -54,6 +54,13 @@ "switch": null, "switch_port": null }, + { + "device": "eth1", + "enabled": true, + "kavlan": false, + "mounted": false, + "moutable": true + }, { "device": "bmc", "enabled": true, diff --git a/input/grid5000/ipv4.yaml b/input/grid5000/ipv4.yaml index 22e3f4b0e3b..db463772c3a 100644 --- a/input/grid5000/ipv4.yaml +++ b/input/grid5000/ipv4.yaml @@ -66,6 +66,7 @@ ipv4: lille chuc eth0 0 0 0 0 lille chuc eth1 0 0 0 100 lille chiconade1 eth0 0 0 2 0 + lille chiconade1 eth1 0 0 2 100 luxembourg petitprince eth0 0 0 1 0 luxembourg petitprince eth1 0 0 1 100 luxembourg vianden eth0 0 0 2 0 diff --git a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml index e621173d10a..ea2b3924a84 100644 --- a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml +++ b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml @@ -29,6 +29,10 @@ nodes: enabled: true mountable: true mounted: true + eth1: + enabled: true + moutable: true + mounted: false storage_devices: disk0: # This field will have to be renamed later. id: disk0 diff --git a/lib/refrepo/net_names_mapping.yaml b/lib/refrepo/net_names_mapping.yaml index 9dad791f8dc..241ab261db1 100644 --- a/lib/refrepo/net_names_mapping.yaml +++ b/lib/refrepo/net_names_mapping.yaml @@ -107,6 +107,9 @@ chirop: chuc: ens15f0np0: eth0 ens15f1np1: eth1 +chiconade1: + enp1s0f0np0: eth0 + ens14f1np1: eth1 dahu: enp24s0f0: eth0 enp24s0f1: eth1 -- GitLab From e13d572e4d0b7d866a0c1adf1815bab1746b2e8d Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Fri, 25 Apr 2025 11:02:41 +0200 Subject: [PATCH 4/5] [lille][chiconade1] g5k-checks import --- .../lille/clusters/chiconade1/chiconade1.json | 12 +- .../chiconade1/nodes/chiconade1-1.json | 137 +++++++++++++----- input/grid5000/disks.yaml | 1 + .../lille/clusters/chiconade1/chiconade1.yaml | 27 ++-- .../chiconade1/nodes/chiconade1-1.yaml | 100 +++++++++++++ 5 files changed, 217 insertions(+), 60 deletions(-) create mode 100644 input/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.yaml diff --git a/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json b/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json index bb4e37e0f1a..77be3bc5112 100644 --- a/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json +++ b/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json @@ -1,9 +1,9 @@ { "boot_type": "uefi", - "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "created_at": "Fri, 25 Apr 2025 00:00:00 GMT", "exotic": false, "kavlan": false, - "manufactured_at": "1970-01-01", + "manufactured_at": "2025-04-03", "metrics": [ { "description": "Default subset of metrics from Prometheus Node Exporter", @@ -78,10 +78,10 @@ } } ], - "model": "Cluster Model", + "model": "ProLiant DL320 Gen11", "nodes_count": 1, - "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", - "priority": 197001, + "nodes_description": "1 CPU Intel Xeon Gold 5420+, 28 cores/CPU, 1 GPU L40S, 128GB RAM, 745GB SSD, 2 x 25Gb Ethernet", + "priority": 202604, "queues": [ "admin", "testing" @@ -89,5 +89,5 @@ "redfish": true, "type": "cluster", "uid": "chiconade1", - "warranty_end": "1970-01-01" + "warranty_end": "2028-04-09" } \ No newline at end of file diff --git a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json index a4f7a21e594..5897a83bb62 100644 --- a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json +++ b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json @@ -1,26 +1,46 @@ { "architecture": { "cpu_core_numbering": "contiguous", - "nb_cores": 72, + "nb_cores": 28, "nb_procs": 1, - "nb_threads": 72, + "nb_threads": 56, "platform_type": "x86_64" }, "bios": { - "release_date": "01/01/2000", - "vendor": "Unknown", - "version": 1 + "release_date": "03/11/2025", + "vendor": "HPE", + "version": 2.48 }, - "bmc_version": "v1", + "bmc_version": "1.67", "chassis": { - "manufactured_at": "1970-01-01", - "manufacturer": "Unknown", - "name": "Unknown", - "warranty_end": "1970-01-01" + "manufactured_at": "2025-04-03", + "manufacturer": "HPE", + "name": "ProLiant DL320 Gen11", + "serial": "CZ2D2J07GW", + "warranty_end": "2028-04-09" }, "exotic": false, + "gpu_devices": { + "nvidia0": { + "compute_capability": "8.9", + "cores": 18176, + "cpu_affinity": 0, + "device": "/dev/nvidia0", + "memory": 48305799168, + "microarchitecture": "Ada Lovelace", + "model": "L40S", + "performance": { + "fp-16": 91610000000000, + "fp-32": 91610000000000, + "fp-64": 1431000000000 + }, + "power_default_limit": "350.00 W", + "vbios_version": "95.02.66.00.15", + "vendor": "Nvidia" + } + }, "main_memory": { - "ram_size": 8 + "ram_size": 137438953472 }, "management_tools": { "bmc_vendor_tool": "ipmitool", @@ -30,36 +50,72 @@ }, "memory_devices": [ { - "device": "dimm_proc 1 dimm 1", - "size": 8, + "device": "dimm_proc 1 dimm 10", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_proc 1 dimm 14", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_proc 1 dimm 3", + "size": 34359738368, + "technology": "dram" + }, + { + "device": "dimm_proc 1 dimm 7", + "size": 34359738368, "technology": "dram" } ], "network_adapters": [ { "device": "eth0", - "driver": "mlx_core", + "driver": "bnxt_en", "enabled": true, + "firmware_version": "230.0.168.0/pkg 230.1.123.0", "interface": "Ethernet", "ip": "172.16.34.1", "ip6": "2001:660:4406:200:3::1", "kavlan": false, "mac": "8c:84:74:99:a2:82", "management": false, + "model": "BCM57414 NetXtreme-E 10Gb/25Gb RDMA Ethernet Controller", "mountable": true, "mounted": true, "name": "enp1s0f0np0", "network_address": "chiconade1-1.lille.grid5000.fr", - "rate": 10000000000, + "rate": 25000000000, + "sriov": false, + "sriov_totalvfs": 0, "switch": null, - "switch_port": null + "switch_port": null, + "vendor": "Broadcom Inc. and subsidiaries" }, { "device": "eth1", + "driver": "bnxt_en", "enabled": true, + "firmware_version": "230.0.168.0/pkg 230.1.123.0", + "interface": "Ethernet", + "ip": "172.16.34.101", + "ip6": "2001:660:4406:200:3:1:0:1", "kavlan": false, + "mac": "8c:84:74:99:a2:83", + "management": false, + "model": "BCM57414 NetXtreme-E 10Gb/25Gb RDMA Ethernet Controller", + "mountable": true, "mounted": false, - "moutable": true + "name": "ens14f1np1", + "network_address": "chiconade1-1-eth1.lille.grid5000.fr", + "rate": 25000000000, + "sriov": false, + "sriov_totalvfs": 0, + "switch": null, + "switch_port": null, + "vendor": "Broadcom Inc. and subsidiaries" }, { "device": "bmc", @@ -76,32 +132,32 @@ ], "nodeset": "chiconade1", "operating_system": { - "cstate_driver": "unknown", - "cstate_governor": "unknown", + "cstate_driver": "intel_idle", + "cstate_governor": "menu", "ht_enabled": true, - "pstate_driver": "unknwon", - "pstate_governor": "unknown", + "pstate_driver": "intel_pstate", + "pstate_governor": "performance", "turboboost_enabled": true }, "performance": { - "core_flops": 128, - "node_flops": 9216 + "core_flops": 64000000000, + "node_flops": 1792000000000 }, "processor": { "cache_l1": null, - "cache_l1d": 8, - "cache_l1i": 8, - "cache_l2": 8, - "cache_l3": 8, - "clock_speed": 8, + "cache_l1d": 49152, + "cache_l1i": 32768, + "cache_l2": 2097152, + "cache_l3": 55050240, + "clock_speed": 2000000000, "ht_capable": true, "instruction_set": "x86-64", - "microarchitecture": "Haswell", - "microcode": "0xd000001", - "model": "Unknown", - "other_description": "description", - "vendor": "vendor", - "version": "vendor" + "microarchitecture": "Sapphire Rapids", + "microcode": "0x2b000620", + "model": "Intel Xeon", + "other_description": "Intel(R) Xeon(R) Gold 5420+", + "vendor": "Intel", + "version": "Gold 5420+" }, "redfish": true, "software": { @@ -111,14 +167,14 @@ }, "storage_devices": [ { - "by_id": "", + "by_id": "/dev/disk/by-id/nvme-eui.ace42e004531e419", "by_path": "/dev/disk/by-path/pci-0000:9b:00.0-nvme-1", "id": "disk0", - "interface": "SAS", - "model": "unknown", - "size": 8, + "interface": "NVME", + "model": "MO000800KXPRV", + "size": 800166076416, "storage": "SSD", - "vendor": "Unknown" + "vendor": "HP" } ], "supported_job_types": { @@ -128,7 +184,8 @@ "queues": [ "admin", "testing" - ] + ], + "virtual": "ivt" }, "type": "node", "uid": "chiconade1-1" diff --git a/input/grid5000/disks.yaml b/input/grid5000/disks.yaml index 8e7b8f25ae8..95118476df2 100644 --- a/input/grid5000/disks.yaml +++ b/input/grid5000/disks.yaml @@ -164,6 +164,7 @@ disk_vendor_model_mapping: - Dell Ent NVMe PM1735a MU 3.2TB HP: - VO001920KYDMT + - MO000800KXPRV Samsung: - SAMSUNG MZ1L2960HCJR-00A07 - SAMSUNG MZ1LB1T9HALS-00007 diff --git a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml index ea2b3924a84..bfa9c28709e 100644 --- a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml +++ b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1.yaml @@ -1,24 +1,24 @@ --- -model: Cluster Model # TODO: change this value. -created_at: 1970-01-01 # TODO: change this value +model: ProLiant DL320 Gen11 +created_at: 2025-04-25 kavlan: false -boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) -exotic: false # TODO: specify if 'true' or 'false' +boot_type: uefi +exotic: false queues: - admin - testing nodes: chiconade1-1: chassis: - manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. - warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + manufactured_at: 2025-04-03 + warranty_end: 2028-04-09 supported_job_types: deploy: true besteffort: true max_walltime: 0 processor: - microarchitecture: Haswell # TODO: replace with microarch name. - clock_speed: 8 # TODO: Replace with clock speed. + microarchitecture: Sapphire Rapids + clock_speed: 2000000000 network_adapters: bmc: interface: Ethernet @@ -31,15 +31,14 @@ nodes: mounted: true eth1: enabled: true - moutable: true + mountable: true mounted: false storage_devices: - disk0: # This field will have to be renamed later. + pci-0000:9b:00.0-nvme-1: id: disk0 - interface: SAS - by_path: "/dev/disk/by-path/pci-0000:9b:00.0-nvme-1" # this path will have to change later. + interface: NVME software: - standard-environment: debian11-x64-std # TODO: check that architecture is OK + standard-environment: debian11-x64-std management_tools: - bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + bmc_vendor_tool: ipmitool nodeset: chiconade1 diff --git a/input/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.yaml b/input/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.yaml new file mode 100644 index 00000000000..a2eb56defa0 --- /dev/null +++ b/input/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.yaml @@ -0,0 +1,100 @@ +# Generated by g5k-checks (g5k-checks -m api) +--- +chiconade1-1: + architecture: + cpu_core_numbering: contiguous + nb_cores: 28 + nb_procs: 1 + nb_threads: 56 + platform_type: x86_64 + bios: + release_date: 03/11/2025 + vendor: HPE + version: 2.48 + bmc_version: '1.67' + chassis: + manufacturer: HPE + name: ProLiant DL320 Gen11 + serial: CZ2D2J07GW + gpu_devices: + nvidia0: + cpu_affinity: 0 + device: "/dev/nvidia0" + memory: 48305799168 + model: L40S + power_default_limit: 350.00 W + vbios_version: 95.02.66.00.15 + vendor: Nvidia + main_memory: + ram_size: 137438953472 + memory_devices: + dimm_proc 1 dimm 10: + size: 34359738368 + technology: dram + dimm_proc 1 dimm 14: + size: 34359738368 + technology: dram + dimm_proc 1 dimm 3: + size: 34359738368 + technology: dram + dimm_proc 1 dimm 7: + size: 34359738368 + technology: dram + network_adapters: + bmc: + ip: 172.17.34.1 + mac: 7c:a6:2a:69:9a:b6 + management: true + eth0: + driver: bnxt_en + firmware_version: 230.0.168.0/pkg 230.1.123.0 + interface: Ethernet + ip: 172.16.34.1 + mac: 8c:84:74:99:a2:82 + management: false + model: BCM57414 NetXtreme-E 10Gb/25Gb RDMA Ethernet Controller + name: enp1s0f0np0 + rate: 25000000000 + sriov: false + sriov_totalvfs: 0 + vendor: Broadcom Inc. and subsidiaries + eth1: + driver: bnxt_en + firmware_version: 230.0.168.0/pkg 230.1.123.0 + interface: Ethernet + mac: 8c:84:74:99:a2:83 + management: false + model: BCM57414 NetXtreme-E 10Gb/25Gb RDMA Ethernet Controller + name: ens14f1np1 + rate: 25000000000 + sriov: false + sriov_totalvfs: 0 + vendor: Broadcom Inc. and subsidiaries + operating_system: + cstate_driver: intel_idle + cstate_governor: menu + ht_enabled: true + pstate_driver: intel_pstate + pstate_governor: performance + turboboost_enabled: true + processor: + cache_l1d: 49152 + cache_l1i: 32768 + cache_l2: 2097152 + cache_l3: 55050240 + ht_capable: true + instruction_set: x86-64 + microcode: '0x2b000620' + model: Intel Xeon + other_description: Intel(R) Xeon(R) Gold 5420+ + vendor: Intel + version: Gold 5420+ + storage_devices: + pci-0000:9b:00.0-nvme-1: + by_id: "/dev/disk/by-id/nvme-eui.ace42e004531e419" + by_path: "/dev/disk/by-path/pci-0000:9b:00.0-nvme-1" + model: MO000800KXPRV + size: 800166076416 + storage: SSD + supported_job_types: + virtual: ivt -- GitLab From 9d60dcadc122ea4066f28914ec80242f6cd2158a Mon Sep 17 00:00:00 2001 From: Hugo Dominois <hugo.dominois@inria.fr> Date: Fri, 25 Apr 2025 11:38:23 +0200 Subject: [PATCH 5/5] [lille][chiconade1] Add network description --- .../lille/clusters/chiconade1/nodes/chiconade1-1.json | 2 +- .../sites/lille/network_equipments/sw-chiclet-1.json | 8 ++++++++ input/grid5000/sites/lille/networks/sw-chiclet-1.yaml | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json index 5897a83bb62..34d5036eb1d 100644 --- a/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json +++ b/data/grid5000/sites/lille/clusters/chiconade1/nodes/chiconade1-1.json @@ -162,7 +162,7 @@ "redfish": true, "software": { "forced-deployment-timestamp": 202007300948, - "postinstall-version": "1.2025040409", + "postinstall-version": "1.2025042314", "standard-environment": "debian11-x64-std" }, "storage_devices": [ diff --git a/data/grid5000/sites/lille/network_equipments/sw-chiclet-1.json b/data/grid5000/sites/lille/network_equipments/sw-chiclet-1.json index 1063ed79c7a..28fa56e65fb 100644 --- a/data/grid5000/sites/lille/network_equipments/sw-chiclet-1.json +++ b/data/grid5000/sites/lille/network_equipments/sw-chiclet-1.json @@ -205,8 +205,16 @@ "uid": "chiclet-8" }, { + "kind": "node", + "port": "eth0", + "snmp_name": "Ethernet1/33", + "uid": "chiconade1-1" }, { + "kind": "node", + "port": "eth1", + "snmp_name": "Ethernet1/34", + "uid": "chiconade1-1" }, { }, diff --git a/input/grid5000/sites/lille/networks/sw-chiclet-1.yaml b/input/grid5000/sites/lille/networks/sw-chiclet-1.yaml index 73758dddf15..e2a5b114109 100644 --- a/input/grid5000/sites/lille/networks/sw-chiclet-1.yaml +++ b/input/grid5000/sites/lille/networks/sw-chiclet-1.yaml @@ -126,6 +126,10 @@ sw-chiclet-1: uid: chiclet-7-eth1 32: uid: chiclet-8-eth1 + 33: + uid: chiconade1-1-eth0 + 34: + uid: chiconade1-1-eth1 53: uid: port-channel1 kind: channel -- GitLab