From 441c42dadf4c5d5adc4079c2489e47022d1e8371 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:18:54 +0100 Subject: [PATCH 01/27] [ipv4] grenoble - Add chartreuse[1-7] and vercors[1-21] clusters --- input/grid5000/ipv4.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/input/grid5000/ipv4.yaml b/input/grid5000/ipv4.yaml index 86717b8ceee..a2d558462f2 100644 --- a/input/grid5000/ipv4.yaml +++ b/input/grid5000/ipv4.yaml @@ -30,7 +30,34 @@ ipv4: grenoble kinovis eth0 0 0 12 0 grenoble kinovis eth1 0 0 12 6 grenoble kinovis eth2 0 0 12 12 + grenoble chartreuse1 eth0 0 0 10 0 grenoble chartreuse2 eth0 0 0 10 4 + grenoble chartreuse3 eth0 0 0 10 8 + grenoble chartreuse4 eth0 0 0 10 12 + grenoble chartreuse5 eth0 0 0 10 16 + grenoble chartreuse6 eth0 0 0 10 24 + grenoble chartreuse7 eth0 0 0 10 28 + grenoble vercors1 eth0 0 0 11 0 + grenoble vercors2 eth0 0 0 11 2 + grenoble vercors3 eth0 0 0 11 4 + grenoble vercors4 eth0 0 0 11 6 + grenoble vercors5 eth0 0 0 11 9 + grenoble vercors6 eth0 0 0 11 10 + grenoble vercors7 eth0 0 0 11 12 + grenoble vercors8 eth0 0 0 11 14 + grenoble vercors9 eth0 0 0 11 16 + grenoble vercors10 eth0 0 0 11 17 + grenoble vercors11 eth0 0 0 11 19 + grenoble vercors12 eth0 0 0 11 21 + grenoble vercors13 eth0 0 0 11 22 + grenoble vercors14 eth0 0 0 11 23 + grenoble vercors15 eth0 0 0 11 28 + grenoble vercors16 eth0 0 0 11 32 + grenoble vercors17 eth0 0 0 11 34 + grenoble vercors18 eth0 0 0 11 37 + grenoble vercors19 eth0 0 0 11 39 + grenoble vercors20 eth0 0 0 11 40 + grenoble vercors21 eth0 0 0 11 43 lille chifflot eth0 0 0 4 0 lille chifflot eth1 0 0 4 100 lille chiclet eth0 0 0 7 0 -- GitLab From bc1ccaddaa2505ea7683b832bcc15eeae59eea76 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 02/27] [grenoble] Add new cluster chartreuse1 --- .../clusters/chartreuse1/chartreuse1.json | 66 +++++++++ .../chartreuse1/nodes/chartreuse1-1.json | 128 ++++++++++++++++++ .../chartreuse1/nodes/chartreuse1-2.json | 128 ++++++++++++++++++ .../chartreuse1/nodes/chartreuse1-3.json | 128 ++++++++++++++++++ .../chartreuse1/nodes/chartreuse1-4.json | 128 ++++++++++++++++++ .../clusters/chartreuse1/chartreuse1.yaml | 41 ++++++ .../chartreuse1/chartreuse1_metrics.yaml | 42 ++++++ .../clusters/chartreuse1/nodes.yaml.erb | 86 ++++++++++++ 8 files changed, 747 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-4.json create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1_metrics.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse1/nodes.yaml.erb diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1.json b/data/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1.json new file mode 100644 index 00000000000..0f33398f4a5 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 4, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "chartreuse1", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-1.json b/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-1.json new file mode 100644 index 00000000000..9d223c6623f --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.1", + "ip6": "2001:660:4406:100:b::1", + "kavlan": false, + "mac": "00:8c:fa:fd:6f:b6", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse1-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.1", + "kavlan": false, + "mac": "00:8c:fa:f7:3d:be", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse1-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse1", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse1-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-2.json b/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-2.json new file mode 100644 index 00000000000..bc18a8a9532 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.2", + "ip6": "2001:660:4406:100:b::2", + "kavlan": false, + "mac": "00:8c:fa:fe:97:4c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse1-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.2", + "kavlan": false, + "mac": "00:8c:fa:f0:88:2a", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse1-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse1", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse1-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-3.json b/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-3.json new file mode 100644 index 00000000000..d9965c85acd --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.3", + "ip6": "2001:660:4406:100:b::3", + "kavlan": false, + "mac": "00:8c:fa:fd:7c:ac", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse1-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.3", + "kavlan": false, + "mac": "00:8c:fa:f7:58:e2", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse1-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse1", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse1-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-4.json b/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-4.json new file mode 100644 index 00000000000..3866e584e91 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse1/nodes/chartreuse1-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.4", + "ip6": "2001:660:4406:100:b::4", + "kavlan": false, + "mac": "00:8c:fa:fe:9b:06", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse1-4.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.4", + "kavlan": false, + "mac": "00:8c:fa:f7:28:3a", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse1-4-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse1", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse1-4" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1.yaml new file mode 100644 index 00000000000..1626b11850b --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + chartreuse1-[1-4]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: chartreuse1 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1_metrics.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1_metrics.yaml new file mode 100644 index 00000000000..3c11d0073c1 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse1/chartreuse1_metrics.yaml @@ -0,0 +1,42 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse1/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/chartreuse1/nodes.yaml.erb new file mode 100644 index 00000000000..15e1a3bad45 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse1/nodes.yaml.erb @@ -0,0 +1,86 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "chartreuse1" + nodes_number = 4 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +00:8c:fa:fd:6f:b6 +00:8c:fa:fe:97:4c +00:8c:fa:fd:7c:ac +00:8c:fa:fe:9b:06 + ) + mac_bmc_list = %w( +00:8c:fa:f7:3d:be +00:8c:fa:f0:88:2a +00:8c:fa:f7:58:e2 +00:8c:fa:f7:28:3a + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 389c98a0fa6e5abb42072bec217677d3b3718f7d Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 03/27] [grenoble] Add new cluster chartreuse3 --- .../clusters/chartreuse3/chartreuse3.json | 66 +++++++++ .../chartreuse3/nodes/chartreuse3-1.json | 128 ++++++++++++++++++ .../chartreuse3/nodes/chartreuse3-2.json | 128 ++++++++++++++++++ .../chartreuse3/nodes/chartreuse3-3.json | 128 ++++++++++++++++++ .../chartreuse3/nodes/chartreuse3-4.json | 128 ++++++++++++++++++ .../clusters/chartreuse3/chartreuse3.yaml | 41 ++++++ .../chartreuse3/chartreuse3_metrics.yaml | 42 ++++++ .../clusters/chartreuse3/nodes.yaml.erb | 86 ++++++++++++ 8 files changed, 747 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-4.json create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3_metrics.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse3/nodes.yaml.erb diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3.json b/data/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3.json new file mode 100644 index 00000000000..d73c0c3c128 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 4, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "chartreuse3", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-1.json b/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-1.json new file mode 100644 index 00000000000..d4981bc34d4 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.9", + "ip6": "2001:660:4406:100:b::9", + "kavlan": false, + "mac": "00:8c:fa:fd:6f:b6", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse3-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.9", + "kavlan": false, + "mac": "00:8c:fa:fd:6f:ba", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse3-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse3-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-2.json b/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-2.json new file mode 100644 index 00000000000..0f5ddc4a084 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.10", + "ip6": "2001:660:4406:100:b::a", + "kavlan": false, + "mac": "00:8c:fa:fe:97:4c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse3-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.10", + "kavlan": false, + "mac": "00:8c:fa:fe:97:50", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse3-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse3-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-3.json b/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-3.json new file mode 100644 index 00000000000..bdb943dabe0 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.11", + "ip6": "2001:660:4406:100:b::b", + "kavlan": false, + "mac": "00:8c:fa:fd:7c:ac", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse3-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.11", + "kavlan": false, + "mac": "00:8c:fa:fd:7c:b0", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse3-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse3-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-4.json b/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-4.json new file mode 100644 index 00000000000..f96a177ffc8 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse3/nodes/chartreuse3-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.12", + "ip6": "2001:660:4406:100:b::c", + "kavlan": false, + "mac": "00:8c:fa:fe:9b:06", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse3-4.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.12", + "kavlan": false, + "mac": "00:8c:fa:fe:9b:0a", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse3-4-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse3-4" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3.yaml new file mode 100644 index 00000000000..c6dcbe9d524 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + chartreuse3-[1-4]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: chartreuse3 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3_metrics.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3_metrics.yaml new file mode 100644 index 00000000000..3c11d0073c1 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse3/chartreuse3_metrics.yaml @@ -0,0 +1,42 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse3/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/chartreuse3/nodes.yaml.erb new file mode 100644 index 00000000000..dbbe1044924 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse3/nodes.yaml.erb @@ -0,0 +1,86 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "chartreuse3" + nodes_number = 4 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +00:8c:fa:fd:6f:b6 +00:8c:fa:fe:97:4c +00:8c:fa:fd:7c:ac +00:8c:fa:fe:9b:06 + ) + mac_bmc_list = %w( +00:8c:fa:fd:6f:ba +00:8c:fa:fe:97:50 +00:8c:fa:fd:7c:b0 +00:8c:fa:fe:9b:0a + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From d59f4816c268c798c5e38b0bc0c40a4343724d13 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 04/27] [grenoble] Add new cluster chartreuse4 --- .../clusters/chartreuse4/chartreuse4.json | 66 +++++++++ .../chartreuse4/nodes/chartreuse4-1.json | 128 ++++++++++++++++++ .../chartreuse4/nodes/chartreuse4-2.json | 128 ++++++++++++++++++ .../chartreuse4/nodes/chartreuse4-3.json | 128 ++++++++++++++++++ .../chartreuse4/nodes/chartreuse4-4.json | 128 ++++++++++++++++++ .../clusters/chartreuse4/chartreuse4.yaml | 41 ++++++ .../chartreuse4/chartreuse4_metrics.yaml | 42 ++++++ .../clusters/chartreuse4/nodes.yaml.erb | 86 ++++++++++++ 8 files changed, 747 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-4.json create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4_metrics.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse4/nodes.yaml.erb diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4.json b/data/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4.json new file mode 100644 index 00000000000..3f72924f2f5 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 4, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "chartreuse4", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-1.json b/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-1.json new file mode 100644 index 00000000000..ca8b3de1311 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.13", + "ip6": "2001:660:4406:100:b::d", + "kavlan": false, + "mac": "3c:fd:fe:55:59:18", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse4-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.13", + "kavlan": false, + "mac": "50:9a:4c:6c:36:c1", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse4-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse4", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse4-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-2.json b/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-2.json new file mode 100644 index 00000000000..1cfd29c3333 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.14", + "ip6": "2001:660:4406:100:b::e", + "kavlan": false, + "mac": "3c:fd:fe:55:54:38", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse4-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.14", + "kavlan": false, + "mac": "50:9a:4c:6c:2f:ad", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse4-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse4", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse4-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-3.json b/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-3.json new file mode 100644 index 00000000000..90d55271e24 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.15", + "ip6": "2001:660:4406:100:b::f", + "kavlan": false, + "mac": "3c:fd:fe:55:2c:b8", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse4-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.15", + "kavlan": false, + "mac": "50:9a:4c:6b:6b:d5", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse4-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse4", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse4-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-4.json b/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-4.json new file mode 100644 index 00000000000..fda73429e34 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse4/nodes/chartreuse4-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.16", + "ip6": "2001:660:4406:100:b::10", + "kavlan": false, + "mac": "3c:fd:fe:55:5a:f8", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse4-4.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.16", + "kavlan": false, + "mac": "50:9a:4c:6c:37:c3", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse4-4-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse4", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse4-4" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4.yaml new file mode 100644 index 00000000000..670aefbc5ef --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + chartreuse4-[1-4]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: chartreuse4 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4_metrics.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4_metrics.yaml new file mode 100644 index 00000000000..3c11d0073c1 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse4/chartreuse4_metrics.yaml @@ -0,0 +1,42 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse4/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/chartreuse4/nodes.yaml.erb new file mode 100644 index 00000000000..30420ee0a17 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse4/nodes.yaml.erb @@ -0,0 +1,86 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "chartreuse4" + nodes_number = 4 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +3c:fd:fe:55:59:18 +3c:fd:fe:55:54:38 +3c:fd:fe:55:2c:b8 +3c:fd:fe:55:5a:f8 + ) + mac_bmc_list = %w( +50:9a:4c:6c:36:c1 +50:9a:4c:6c:2f:ad +50:9a:4c:6b:6b:d5 +50:9a:4c:6c:37:c3 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From bf2e4657cb101d67a95ae566e86ebf61bf7c3282 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 05/27] [grenoble] Add new cluster chartreuse5 --- .../clusters/chartreuse5/chartreuse5.json | 66 +++++++++ .../chartreuse5/nodes/chartreuse5-1.json | 128 ++++++++++++++++++ .../chartreuse5/nodes/chartreuse5-2.json | 128 ++++++++++++++++++ .../chartreuse5/nodes/chartreuse5-3.json | 128 ++++++++++++++++++ .../chartreuse5/nodes/chartreuse5-4.json | 128 ++++++++++++++++++ .../chartreuse5/nodes/chartreuse5-5.json | 128 ++++++++++++++++++ .../chartreuse5/nodes/chartreuse5-6.json | 128 ++++++++++++++++++ .../chartreuse5/nodes/chartreuse5-7.json | 128 ++++++++++++++++++ .../chartreuse5/nodes/chartreuse5-8.json | 128 ++++++++++++++++++ .../clusters/chartreuse5/chartreuse5.yaml | 41 ++++++ .../chartreuse5/chartreuse5_metrics.yaml | 42 ++++++ .../clusters/chartreuse5/nodes.yaml.erb | 94 +++++++++++++ 12 files changed, 1267 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-4.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-5.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-6.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-7.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-8.json create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5_metrics.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse5/nodes.yaml.erb diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5.json new file mode 100644 index 00000000000..905ff1a394b --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 8, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "chartreuse5", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-1.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-1.json new file mode 100644 index 00000000000..a1e7da4f9b3 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.17", + "ip6": "2001:660:4406:100:b::11", + "kavlan": false, + "mac": "34:17:eb:e6:48:bb", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse5-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.17", + "kavlan": false, + "mac": "34:17:eb:e6:48:bd", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse5-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse5-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-2.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-2.json new file mode 100644 index 00000000000..49fd253153c --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.18", + "ip6": "2001:660:4406:100:b::12", + "kavlan": false, + "mac": "34:17:eb:e6:4b:2e", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse5-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.18", + "kavlan": false, + "mac": "34:17:eb:e6:4b:30", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse5-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse5-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-3.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-3.json new file mode 100644 index 00000000000..e29f5d660d2 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.19", + "ip6": "2001:660:4406:100:b::13", + "kavlan": false, + "mac": "34:17:eb:e6:47:a1", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse5-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.19", + "kavlan": false, + "mac": "34:17:eb:e6:47:a3", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse5-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse5-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-4.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-4.json new file mode 100644 index 00000000000..526563767d8 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.20", + "ip6": "2001:660:4406:100:b::14", + "kavlan": false, + "mac": "34:17:eb:e6:4a:c2", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse5-4.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.20", + "kavlan": false, + "mac": "34:17:eb:e6:4a:c4", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse5-4-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse5-4" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-5.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-5.json new file mode 100644 index 00000000000..7bd4d734a09 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-5.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.21", + "ip6": "2001:660:4406:100:b::15", + "kavlan": false, + "mac": "34:17:eb:e7:12:6f", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse5-5.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.21", + "kavlan": false, + "mac": "34:17:eb:e7:12:71", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse5-5-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse5-5" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-6.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-6.json new file mode 100644 index 00000000000..192344564a7 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-6.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.22", + "ip6": "2001:660:4406:100:b::16", + "kavlan": false, + "mac": "34:17:eb:e7:0b:e5", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse5-6.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.22", + "kavlan": false, + "mac": "34:17:eb:e7:0b:e7", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse5-6-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse5-6" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-7.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-7.json new file mode 100644 index 00000000000..fe6b37a612d --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-7.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.23", + "ip6": "2001:660:4406:100:b::17", + "kavlan": false, + "mac": "34:17:eb:e7:11:97", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse5-7.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.23", + "kavlan": false, + "mac": "34:17:eb:e7:11:99", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse5-7-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse5-7" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-8.json b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-8.json new file mode 100644 index 00000000000..4f7b6969530 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse5/nodes/chartreuse5-8.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.24", + "ip6": "2001:660:4406:100:b::18", + "kavlan": false, + "mac": "34:17:eb:e7:11:9a", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse5-8.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.24", + "kavlan": false, + "mac": "34:17:eb:e7:11:9c", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse5-8-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse5-8" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5.yaml new file mode 100644 index 00000000000..8f1da0d8fb6 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + chartreuse5-[1-8]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: chartreuse5 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5_metrics.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5_metrics.yaml new file mode 100644 index 00000000000..3c11d0073c1 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse5/chartreuse5_metrics.yaml @@ -0,0 +1,42 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse5/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/chartreuse5/nodes.yaml.erb new file mode 100644 index 00000000000..0ac5257491d --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse5/nodes.yaml.erb @@ -0,0 +1,94 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "chartreuse5" + nodes_number = 8 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +34:17:eb:e6:48:bb +34:17:eb:e6:4b:2e +34:17:eb:e6:47:a1 +34:17:eb:e6:4a:c2 +34:17:eb:e7:12:6f +34:17:eb:e7:0b:e5 +34:17:eb:e7:11:97 +34:17:eb:e7:11:9a + ) + mac_bmc_list = %w( +34:17:eb:e6:48:bd +34:17:eb:e6:4b:30 +34:17:eb:e6:47:a3 +34:17:eb:e6:4a:c4 +34:17:eb:e7:12:71 +34:17:eb:e7:0b:e7 +34:17:eb:e7:11:99 +34:17:eb:e7:11:9c + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 24e02b89dccb6012b643e68f242403e872e4b42d Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 06/27] [grenoble] Add new cluster chartreuse6 --- .../clusters/chartreuse6/chartreuse6.json | 66 +++++++++ .../chartreuse6/nodes/chartreuse6-1.json | 128 ++++++++++++++++++ .../chartreuse6/nodes/chartreuse6-2.json | 128 ++++++++++++++++++ .../chartreuse6/nodes/chartreuse6-3.json | 128 ++++++++++++++++++ .../chartreuse6/nodes/chartreuse6-4.json | 128 ++++++++++++++++++ .../clusters/chartreuse6/chartreuse6.yaml | 41 ++++++ .../chartreuse6/chartreuse6_metrics.yaml | 42 ++++++ .../clusters/chartreuse6/nodes.yaml.erb | 86 ++++++++++++ 8 files changed, 747 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-4.json create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6_metrics.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/chartreuse6/nodes.yaml.erb diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6.json b/data/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6.json new file mode 100644 index 00000000000..de32f9504df --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 4, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "chartreuse6", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-1.json b/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-1.json new file mode 100644 index 00000000000..8542a2cc532 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.25", + "ip6": "2001:660:4406:100:b::19", + "kavlan": false, + "mac": "3c:fd:fe:59:e6:88", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse6-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.25", + "kavlan": false, + "mac": "50:9a:4c:8b:ff:0e", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse6-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse6", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse6-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-2.json b/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-2.json new file mode 100644 index 00000000000..a9beb1907c2 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.26", + "ip6": "2001:660:4406:100:b::1a", + "kavlan": false, + "mac": "aa:bb:cc:dd:ee:00", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse6-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.26", + "kavlan": false, + "mac": "00:11:22:33:44:00", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse6-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse6", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse6-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-3.json b/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-3.json new file mode 100644 index 00000000000..7d97a45bf6b --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.27", + "ip6": "2001:660:4406:100:b::1b", + "kavlan": false, + "mac": "3c:fd:fe:59:bf:88", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse6-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.27", + "kavlan": false, + "mac": "50:9a:4c:8c:00:7c", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse6-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse6", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse6-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-4.json b/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-4.json new file mode 100644 index 00000000000..708c7e019e4 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/chartreuse6/nodes/chartreuse6-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.26.28", + "ip6": "2001:660:4406:100:b::1c", + "kavlan": false, + "mac": "3c:fd:fe:59:bc:e8", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "chartreuse6-4.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.26.28", + "kavlan": false, + "mac": "50:9a:4c:8b:fc:ce", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "chartreuse6-4-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "chartreuse6", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "chartreuse6-4" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6.yaml new file mode 100644 index 00000000000..5e5b12ba6e9 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + chartreuse6-[1-4]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: chartreuse6 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6_metrics.yaml b/input/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6_metrics.yaml new file mode 100644 index 00000000000..3c11d0073c1 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse6/chartreuse6_metrics.yaml @@ -0,0 +1,42 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/grenoble/clusters/chartreuse6/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/chartreuse6/nodes.yaml.erb new file mode 100644 index 00000000000..fd1f4e15943 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/chartreuse6/nodes.yaml.erb @@ -0,0 +1,86 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "chartreuse6" + nodes_number = 4 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +3c:fd:fe:59:e6:88 +aa:bb:cc:dd:ee:00 +3c:fd:fe:59:bf:88 +3c:fd:fe:59:bc:e8 + ) + mac_bmc_list = %w( +50:9a:4c:8b:ff:0e +00:11:22:33:44:00 +50:9a:4c:8c:00:7c +50:9a:4c:8b:fc:ce + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 28681185e8d163db45ecbc550da8e713a1fbf762 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 07/27] [grenoble] Add new cluster vercors1 --- .../clusters/vercors1/nodes/vercors1-1.json | 128 ++++++++++++++++++ .../clusters/vercors1/nodes/vercors1-2.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors1/vercors1.json | 93 +++++++++++++ .../grenoble/clusters/vercors1/nodes.yaml.erb | 82 +++++++++++ .../grenoble/clusters/vercors1/vercors1.yaml | 41 ++++++ .../clusters/vercors1/vercors1_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors1/nodes/vercors1-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors1/nodes/vercors1-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors1/vercors1.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors1/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors1/vercors1.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors1/vercors1_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors1/nodes/vercors1-1.json b/data/grid5000/sites/grenoble/clusters/vercors1/nodes/vercors1-1.json new file mode 100644 index 00000000000..eb25fcb29f3 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors1/nodes/vercors1-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.1", + "ip6": "2001:660:4406:100:c::1", + "kavlan": false, + "mac": "24:6e:96:2a:48:00", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors1-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.1", + "kavlan": false, + "mac": "18:fb:7b:a8:c2:df", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors1-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors1", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors1-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors1/nodes/vercors1-2.json b/data/grid5000/sites/grenoble/clusters/vercors1/nodes/vercors1-2.json new file mode 100644 index 00000000000..a65e3256d62 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors1/nodes/vercors1-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.2", + "ip6": "2001:660:4406:100:c::2", + "kavlan": false, + "mac": "24:6e:96:2a:42:70", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors1-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.2", + "kavlan": false, + "mac": "18:fb:7b:a8:bf:67", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors1-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors1", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors1-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors1/vercors1.json b/data/grid5000/sites/grenoble/clusters/vercors1/vercors1.json new file mode 100644 index 00000000000..4f2b60a2de5 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors1/vercors1.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors1", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors1/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors1/nodes.yaml.erb new file mode 100644 index 00000000000..4ee2fc9942c --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors1/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors1" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +24:6e:96:2a:48:00 +24:6e:96:2a:42:70 + ) + mac_bmc_list = %w( +18:fb:7b:a8:c2:df +18:fb:7b:a8:bf:67 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors1/vercors1.yaml b/input/grid5000/sites/grenoble/clusters/vercors1/vercors1.yaml new file mode 100644 index 00000000000..dbd15a611af --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors1/vercors1.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors1-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors1 diff --git a/input/grid5000/sites/grenoble/clusters/vercors1/vercors1_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors1/vercors1_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors1/vercors1_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 722205fa4a5aa1c189c2d1334536bedd9012b011 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 08/27] [grenoble] Add new cluster vercors10 --- .../clusters/vercors10/nodes/vercors10-1.json | 128 ++++++++++++++++++ .../clusters/vercors10/nodes/vercors10-2.json | 128 ++++++++++++++++++ .../clusters/vercors10/vercors10.json | 93 +++++++++++++ .../clusters/vercors10/nodes.yaml.erb | 82 +++++++++++ .../clusters/vercors10/vercors10.yaml | 41 ++++++ .../clusters/vercors10/vercors10_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors10/nodes/vercors10-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors10/nodes/vercors10-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors10/vercors10.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors10/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors10/vercors10.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors10/vercors10_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors10/nodes/vercors10-1.json b/data/grid5000/sites/grenoble/clusters/vercors10/nodes/vercors10-1.json new file mode 100644 index 00000000000..4db0453e9d7 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors10/nodes/vercors10-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.18", + "ip6": "2001:660:4406:100:c::12", + "kavlan": false, + "mac": "ec:f4:bb:d5:a7:d8", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors10-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.18", + "kavlan": false, + "mac": "00:11:22:33:44:08", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors10-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors10", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors10-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors10/nodes/vercors10-2.json b/data/grid5000/sites/grenoble/clusters/vercors10/nodes/vercors10-2.json new file mode 100644 index 00000000000..b2991cd7087 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors10/nodes/vercors10-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.19", + "ip6": "2001:660:4406:100:c::13", + "kavlan": false, + "mac": "ec:f4:bb:d5:a8:50", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors10-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.19", + "kavlan": false, + "mac": "00:11:22:33:44:09", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors10-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors10", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors10-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors10/vercors10.json b/data/grid5000/sites/grenoble/clusters/vercors10/vercors10.json new file mode 100644 index 00000000000..78339a8550b --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors10/vercors10.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors10", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors10/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors10/nodes.yaml.erb new file mode 100644 index 00000000000..c93d6d54e2e --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors10/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors10" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +ec:f4:bb:d5:a7:d8 +ec:f4:bb:d5:a8:50 + ) + mac_bmc_list = %w( +00:11:22:33:44:08 +00:11:22:33:44:09 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors10/vercors10.yaml b/input/grid5000/sites/grenoble/clusters/vercors10/vercors10.yaml new file mode 100644 index 00000000000..531466f8408 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors10/vercors10.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors10-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors10 diff --git a/input/grid5000/sites/grenoble/clusters/vercors10/vercors10_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors10/vercors10_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors10/vercors10_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 37c4b42e468bac821e16aa7a7f8282affad6cd5a Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 09/27] [grenoble] Add new cluster vercors11 --- .../clusters/vercors11/nodes/vercors11-1.json | 128 ++++++++++++++++++ .../clusters/vercors11/nodes/vercors11-2.json | 128 ++++++++++++++++++ .../clusters/vercors11/vercors11.json | 93 +++++++++++++ .../clusters/vercors11/nodes.yaml.erb | 82 +++++++++++ .../clusters/vercors11/vercors11.yaml | 41 ++++++ .../clusters/vercors11/vercors11_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors11/nodes/vercors11-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors11/nodes/vercors11-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors11/vercors11.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors11/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors11/vercors11.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors11/vercors11_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors11/nodes/vercors11-1.json b/data/grid5000/sites/grenoble/clusters/vercors11/nodes/vercors11-1.json new file mode 100644 index 00000000000..022ec5d85df --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors11/nodes/vercors11-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.20", + "ip6": "2001:660:4406:100:c::14", + "kavlan": false, + "mac": "ec:f4:bb:d5:a8:40", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors11-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.20", + "kavlan": false, + "mac": "00:11:22:33:44:10", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors11-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors11", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors11-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors11/nodes/vercors11-2.json b/data/grid5000/sites/grenoble/clusters/vercors11/nodes/vercors11-2.json new file mode 100644 index 00000000000..037c0ce6660 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors11/nodes/vercors11-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.21", + "ip6": "2001:660:4406:100:c::15", + "kavlan": false, + "mac": "ec:f4:bb:d5:b5:9a", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors11-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.21", + "kavlan": false, + "mac": "00:11:22:33:44:11", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors11-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors11", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors11-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors11/vercors11.json b/data/grid5000/sites/grenoble/clusters/vercors11/vercors11.json new file mode 100644 index 00000000000..f5925e90757 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors11/vercors11.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors11", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors11/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors11/nodes.yaml.erb new file mode 100644 index 00000000000..d81e4d8b234 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors11/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors11" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +ec:f4:bb:d5:a8:40 +ec:f4:bb:d5:b5:9a + ) + mac_bmc_list = %w( +00:11:22:33:44:10 +00:11:22:33:44:11 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors11/vercors11.yaml b/input/grid5000/sites/grenoble/clusters/vercors11/vercors11.yaml new file mode 100644 index 00000000000..a40be5dec77 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors11/vercors11.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors11-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors11 diff --git a/input/grid5000/sites/grenoble/clusters/vercors11/vercors11_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors11/vercors11_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors11/vercors11_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 3d0e201fe15cd2cf2424088daeda362e752a4ff8 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 10/27] [grenoble] Add new cluster vercors12 --- .../clusters/vercors12/nodes/vercors12-1.json | 128 ++++++++++++++++++ .../clusters/vercors12/vercors12.json | 93 +++++++++++++ .../clusters/vercors12/nodes.yaml.erb | 80 +++++++++++ .../clusters/vercors12/vercors12.yaml | 41 ++++++ .../clusters/vercors12/vercors12_metrics.yaml | 63 +++++++++ 5 files changed, 405 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors12/nodes/vercors12-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors12/vercors12.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors12/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors12/vercors12.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors12/vercors12_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors12/nodes/vercors12-1.json b/data/grid5000/sites/grenoble/clusters/vercors12/nodes/vercors12-1.json new file mode 100644 index 00000000000..7f9ac1bdbb8 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors12/nodes/vercors12-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.22", + "ip6": "2001:660:4406:100:c::16", + "kavlan": false, + "mac": "ec:f4:bb:d5:b8:20", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors12-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.22", + "kavlan": false, + "mac": "00:11:22:33:44:12", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors12-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors12", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors12-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors12/vercors12.json b/data/grid5000/sites/grenoble/clusters/vercors12/vercors12.json new file mode 100644 index 00000000000..36c7e9e84b1 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors12/vercors12.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors12", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors12/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors12/nodes.yaml.erb new file mode 100644 index 00000000000..d375937294f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors12/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors12" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +ec:f4:bb:d5:b8:20 + ) + mac_bmc_list = %w( +00:11:22:33:44:12 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors12/vercors12.yaml b/input/grid5000/sites/grenoble/clusters/vercors12/vercors12.yaml new file mode 100644 index 00000000000..a000933c0dc --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors12/vercors12.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors12-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors12 diff --git a/input/grid5000/sites/grenoble/clusters/vercors12/vercors12_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors12/vercors12_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors12/vercors12_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 884249447bacbcd67b18ff95d0d5508cd7bb4bb1 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 11/27] [grenoble] Add new cluster vercors13 --- .../clusters/vercors13/nodes/vercors13-1.json | 128 ++++++++++++++++++ .../clusters/vercors13/vercors13.json | 93 +++++++++++++ .../clusters/vercors13/nodes.yaml.erb | 80 +++++++++++ .../clusters/vercors13/vercors13.yaml | 41 ++++++ .../clusters/vercors13/vercors13_metrics.yaml | 63 +++++++++ 5 files changed, 405 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors13/nodes/vercors13-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors13/vercors13.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors13/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors13/vercors13.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors13/vercors13_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors13/nodes/vercors13-1.json b/data/grid5000/sites/grenoble/clusters/vercors13/nodes/vercors13-1.json new file mode 100644 index 00000000000..0fe8a4eec75 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors13/nodes/vercors13-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.23", + "ip6": "2001:660:4406:100:c::17", + "kavlan": false, + "mac": "ec:f4:bb:ec:94:18", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors13-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.23", + "kavlan": false, + "mac": "00:11:22:33:44:13", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors13-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors13", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors13-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors13/vercors13.json b/data/grid5000/sites/grenoble/clusters/vercors13/vercors13.json new file mode 100644 index 00000000000..6da5ad225f1 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors13/vercors13.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors13", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors13/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors13/nodes.yaml.erb new file mode 100644 index 00000000000..36912f17518 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors13/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors13" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +ec:f4:bb:ec:94:18 + ) + mac_bmc_list = %w( +00:11:22:33:44:13 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors13/vercors13.yaml b/input/grid5000/sites/grenoble/clusters/vercors13/vercors13.yaml new file mode 100644 index 00000000000..936a38ce6d9 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors13/vercors13.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors13-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors13 diff --git a/input/grid5000/sites/grenoble/clusters/vercors13/vercors13_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors13/vercors13_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors13/vercors13_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From eb2ce82a0267828d40a80f352cfd93329725958a Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 12/27] [grenoble] Add new cluster vercors14 --- .../clusters/vercors14/nodes/vercors14-1.json | 128 ++++++++++++++++++ .../clusters/vercors14/nodes/vercors14-2.json | 128 ++++++++++++++++++ .../clusters/vercors14/nodes/vercors14-3.json | 128 ++++++++++++++++++ .../clusters/vercors14/nodes/vercors14-4.json | 128 ++++++++++++++++++ .../clusters/vercors14/nodes/vercors14-5.json | 128 ++++++++++++++++++ .../clusters/vercors14/vercors14.json | 93 +++++++++++++ .../clusters/vercors14/nodes.yaml.erb | 88 ++++++++++++ .../clusters/vercors14/vercors14.yaml | 41 ++++++ .../clusters/vercors14/vercors14_metrics.yaml | 63 +++++++++ 9 files changed, 925 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-4.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-5.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors14/vercors14.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors14/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors14/vercors14.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors14/vercors14_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-1.json b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-1.json new file mode 100644 index 00000000000..b6d11bdceb9 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.24", + "ip6": "2001:660:4406:100:c::18", + "kavlan": false, + "mac": "ec:f4:bb:f0:14:60", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors14-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.24", + "kavlan": false, + "mac": "00:11:22:33:44:14", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors14-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors14", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors14-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-2.json b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-2.json new file mode 100644 index 00000000000..339ca233092 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.25", + "ip6": "2001:660:4406:100:c::19", + "kavlan": false, + "mac": "ec:f4:bb:f0:12:c8", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors14-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.25", + "kavlan": false, + "mac": "00:11:22:33:44:16", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors14-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors14", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors14-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-3.json b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-3.json new file mode 100644 index 00000000000..06744776144 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.26", + "ip6": "2001:660:4406:100:c::1a", + "kavlan": false, + "mac": "24:6e:96:07:fb:10", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors14-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.26", + "kavlan": false, + "mac": "00:11:22:33:44:17", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors14-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors14", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors14-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-4.json b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-4.json new file mode 100644 index 00000000000..2e9db92667c --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.27", + "ip6": "2001:660:4406:100:c::1b", + "kavlan": false, + "mac": "24:6e:96:4c:4a:04", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors14-4.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.27", + "kavlan": false, + "mac": "00:11:22:33:44:19", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors14-4-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors14", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors14-4" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-5.json b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-5.json new file mode 100644 index 00000000000..c7b72e36820 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors14/nodes/vercors14-5.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.28", + "ip6": "2001:660:4406:100:c::1c", + "kavlan": false, + "mac": "24:6e:96:75:f1:fc", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors14-5.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.28", + "kavlan": false, + "mac": "00:11:22:33:44:21", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors14-5-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors14", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors14-5" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors14/vercors14.json b/data/grid5000/sites/grenoble/clusters/vercors14/vercors14.json new file mode 100644 index 00000000000..60fca961f64 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors14/vercors14.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 5, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors14", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors14/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors14/nodes.yaml.erb new file mode 100644 index 00000000000..eb08fac93c6 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors14/nodes.yaml.erb @@ -0,0 +1,88 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors14" + nodes_number = 5 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +ec:f4:bb:f0:14:60 +ec:f4:bb:f0:12:c8 +24:6e:96:07:fb:10 +24:6e:96:4c:4a:04 +24:6e:96:75:f1:fc + ) + mac_bmc_list = %w( +00:11:22:33:44:14 +00:11:22:33:44:16 +00:11:22:33:44:17 +00:11:22:33:44:19 +00:11:22:33:44:21 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors14/vercors14.yaml b/input/grid5000/sites/grenoble/clusters/vercors14/vercors14.yaml new file mode 100644 index 00000000000..f5499e1288b --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors14/vercors14.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors14-[1-5]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors14 diff --git a/input/grid5000/sites/grenoble/clusters/vercors14/vercors14_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors14/vercors14_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors14/vercors14_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From ee016a9bb58bac87bf8f93de0dc37e8840d08153 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 13/27] [grenoble] Add new cluster vercors15 --- .../clusters/vercors15/nodes/vercors15-1.json | 128 ++++++++++++++++++ .../clusters/vercors15/nodes/vercors15-2.json | 128 ++++++++++++++++++ .../clusters/vercors15/nodes/vercors15-3.json | 128 ++++++++++++++++++ .../clusters/vercors15/nodes/vercors15-4.json | 128 ++++++++++++++++++ .../clusters/vercors15/vercors15.json | 93 +++++++++++++ .../clusters/vercors15/nodes.yaml.erb | 86 ++++++++++++ .../clusters/vercors15/vercors15.yaml | 41 ++++++ .../clusters/vercors15/vercors15_metrics.yaml | 63 +++++++++ 8 files changed, 795 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-4.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors15/vercors15.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors15/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors15/vercors15.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors15/vercors15_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-1.json b/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-1.json new file mode 100644 index 00000000000..af775d20e5d --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.29", + "ip6": "2001:660:4406:100:c::1d", + "kavlan": false, + "mac": "ec:f4:bb:f0:12:18", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors15-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.29", + "kavlan": false, + "mac": "00:11:22:33:44:15", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors15-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors15", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors15-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-2.json b/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-2.json new file mode 100644 index 00000000000..45ba2a52d34 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.30", + "ip6": "2001:660:4406:100:c::1e", + "kavlan": false, + "mac": "24:6e:96:07:fc:e0", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors15-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.30", + "kavlan": false, + "mac": "00:11:22:33:44:18", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors15-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors15", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors15-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-3.json b/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-3.json new file mode 100644 index 00000000000..8126f655719 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.31", + "ip6": "2001:660:4406:100:c::1f", + "kavlan": false, + "mac": "24:6e:96:4c:4a:2c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors15-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.31", + "kavlan": false, + "mac": "00:11:22:33:44:20", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors15-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors15", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors15-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-4.json b/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-4.json new file mode 100644 index 00000000000..b25d3b9ddfe --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors15/nodes/vercors15-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.32", + "ip6": "2001:660:4406:100:c::20", + "kavlan": false, + "mac": "24:6e:96:75:ed:74", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors15-4.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.32", + "kavlan": false, + "mac": "00:11:22:33:44:22", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors15-4-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors15", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors15-4" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors15/vercors15.json b/data/grid5000/sites/grenoble/clusters/vercors15/vercors15.json new file mode 100644 index 00000000000..80da737d19d --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors15/vercors15.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 4, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors15", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors15/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors15/nodes.yaml.erb new file mode 100644 index 00000000000..28e86950add --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors15/nodes.yaml.erb @@ -0,0 +1,86 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors15" + nodes_number = 4 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +ec:f4:bb:f0:12:18 +24:6e:96:07:fc:e0 +24:6e:96:4c:4a:2c +24:6e:96:75:ed:74 + ) + mac_bmc_list = %w( +00:11:22:33:44:15 +00:11:22:33:44:18 +00:11:22:33:44:20 +00:11:22:33:44:22 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors15/vercors15.yaml b/input/grid5000/sites/grenoble/clusters/vercors15/vercors15.yaml new file mode 100644 index 00000000000..2e08efbd180 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors15/vercors15.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors15-[1-4]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors15 diff --git a/input/grid5000/sites/grenoble/clusters/vercors15/vercors15_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors15/vercors15_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors15/vercors15_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From db93e6e7f3f98229e6c8cc648477c7d453e112e7 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 14/27] [grenoble] Add new cluster vercors16 --- .../clusters/vercors16/nodes/vercors16-1.json | 128 ++++++++++++++++++ .../clusters/vercors16/nodes/vercors16-2.json | 128 ++++++++++++++++++ .../clusters/vercors16/vercors16.json | 93 +++++++++++++ .../clusters/vercors16/nodes.yaml.erb | 82 +++++++++++ .../clusters/vercors16/vercors16.yaml | 41 ++++++ .../clusters/vercors16/vercors16_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors16/nodes/vercors16-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors16/nodes/vercors16-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors16/vercors16.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors16/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors16/vercors16.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors16/vercors16_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors16/nodes/vercors16-1.json b/data/grid5000/sites/grenoble/clusters/vercors16/nodes/vercors16-1.json new file mode 100644 index 00000000000..3cb0a92a0be --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors16/nodes/vercors16-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.33", + "ip6": "2001:660:4406:100:c::21", + "kavlan": false, + "mac": "48:df:37:03:2a:68", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors16-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.33", + "kavlan": false, + "mac": "00:11:22:33:44:23", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors16-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors16", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors16-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors16/nodes/vercors16-2.json b/data/grid5000/sites/grenoble/clusters/vercors16/nodes/vercors16-2.json new file mode 100644 index 00000000000..571a1e8f626 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors16/nodes/vercors16-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.34", + "ip6": "2001:660:4406:100:c::22", + "kavlan": false, + "mac": "48:df:37:03:32:ac", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors16-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.34", + "kavlan": false, + "mac": "00:11:22:33:44:24", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors16-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors16", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors16-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors16/vercors16.json b/data/grid5000/sites/grenoble/clusters/vercors16/vercors16.json new file mode 100644 index 00000000000..b97c2d2dda2 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors16/vercors16.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors16", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors16/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors16/nodes.yaml.erb new file mode 100644 index 00000000000..c97c77ff32f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors16/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors16" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +48:df:37:03:2a:68 +48:df:37:03:32:ac + ) + mac_bmc_list = %w( +00:11:22:33:44:23 +00:11:22:33:44:24 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors16/vercors16.yaml b/input/grid5000/sites/grenoble/clusters/vercors16/vercors16.yaml new file mode 100644 index 00000000000..55dc29c56fe --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors16/vercors16.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors16-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors16 diff --git a/input/grid5000/sites/grenoble/clusters/vercors16/vercors16_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors16/vercors16_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors16/vercors16_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 44cd2585c881063daa29d860f2dad173154feff6 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 15/27] [grenoble] Add new cluster vercors17 --- .../clusters/vercors17/nodes/vercors17-1.json | 128 ++++++++++++++++++ .../clusters/vercors17/nodes/vercors17-2.json | 128 ++++++++++++++++++ .../clusters/vercors17/nodes/vercors17-3.json | 128 ++++++++++++++++++ .../clusters/vercors17/vercors17.json | 93 +++++++++++++ .../clusters/vercors17/nodes.yaml.erb | 84 ++++++++++++ .../clusters/vercors17/vercors17.yaml | 41 ++++++ .../clusters/vercors17/vercors17_metrics.yaml | 63 +++++++++ 7 files changed, 665 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors17/vercors17.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors17/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors17/vercors17.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors17/vercors17_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-1.json b/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-1.json new file mode 100644 index 00000000000..c23e061cb35 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.35", + "ip6": "2001:660:4406:100:c::23", + "kavlan": false, + "mac": "24:6e:96:de:36:54", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors17-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.35", + "kavlan": false, + "mac": "00:11:22:33:44:25", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors17-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors17", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors17-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-2.json b/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-2.json new file mode 100644 index 00000000000..1e8eaf17134 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.36", + "ip6": "2001:660:4406:100:c::24", + "kavlan": false, + "mac": "24:6e:96:de:38:fc", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors17-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.36", + "kavlan": false, + "mac": "00:11:22:33:44:27", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors17-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors17", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors17-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-3.json b/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-3.json new file mode 100644 index 00000000000..ff230716dc1 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors17/nodes/vercors17-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.37", + "ip6": "2001:660:4406:100:c::25", + "kavlan": false, + "mac": "24:6e:96:de:15:a8", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors17-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.37", + "kavlan": false, + "mac": "00:11:22:33:44:28", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors17-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors17", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors17-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors17/vercors17.json b/data/grid5000/sites/grenoble/clusters/vercors17/vercors17.json new file mode 100644 index 00000000000..71cfafc4023 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors17/vercors17.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 3, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors17", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors17/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors17/nodes.yaml.erb new file mode 100644 index 00000000000..1039a2236fb --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors17/nodes.yaml.erb @@ -0,0 +1,84 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors17" + nodes_number = 3 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +24:6e:96:de:36:54 +24:6e:96:de:38:fc +24:6e:96:de:15:a8 + ) + mac_bmc_list = %w( +00:11:22:33:44:25 +00:11:22:33:44:27 +00:11:22:33:44:28 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors17/vercors17.yaml b/input/grid5000/sites/grenoble/clusters/vercors17/vercors17.yaml new file mode 100644 index 00000000000..22b49f19cdd --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors17/vercors17.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors17-[1-3]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors17 diff --git a/input/grid5000/sites/grenoble/clusters/vercors17/vercors17_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors17/vercors17_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors17/vercors17_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From f67b314a8723666d3abd8559ebfd8c28fa34cc0d Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 16/27] [grenoble] Add new cluster vercors18 --- .../clusters/vercors18/nodes/vercors18-1.json | 128 ++++++++++++++++++ .../clusters/vercors18/nodes/vercors18-2.json | 128 ++++++++++++++++++ .../clusters/vercors18/vercors18.json | 93 +++++++++++++ .../clusters/vercors18/nodes.yaml.erb | 82 +++++++++++ .../clusters/vercors18/vercors18.yaml | 41 ++++++ .../clusters/vercors18/vercors18_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors18/nodes/vercors18-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors18/nodes/vercors18-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors18/vercors18.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors18/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors18/vercors18.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors18/vercors18_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors18/nodes/vercors18-1.json b/data/grid5000/sites/grenoble/clusters/vercors18/nodes/vercors18-1.json new file mode 100644 index 00000000000..777fd2291a9 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors18/nodes/vercors18-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.38", + "ip6": "2001:660:4406:100:c::26", + "kavlan": false, + "mac": "aa:bb:cc:dd:ee:26", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors18-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.38", + "kavlan": false, + "mac": "00:11:22:33:44:26", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors18-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors18", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors18-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors18/nodes/vercors18-2.json b/data/grid5000/sites/grenoble/clusters/vercors18/nodes/vercors18-2.json new file mode 100644 index 00000000000..05ec7f9b422 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors18/nodes/vercors18-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.39", + "ip6": "2001:660:4406:100:c::27", + "kavlan": false, + "mac": "24:6e:96:de:3f:7e", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors18-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.39", + "kavlan": false, + "mac": "00:11:22:33:44:29", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors18-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors18", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors18-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors18/vercors18.json b/data/grid5000/sites/grenoble/clusters/vercors18/vercors18.json new file mode 100644 index 00000000000..507d70e823a --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors18/vercors18.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors18", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors18/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors18/nodes.yaml.erb new file mode 100644 index 00000000000..f3db953e9f1 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors18/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors18" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +aa:bb:cc:dd:ee:26 +24:6e:96:de:3f:7e + ) + mac_bmc_list = %w( +00:11:22:33:44:26 +00:11:22:33:44:29 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors18/vercors18.yaml b/input/grid5000/sites/grenoble/clusters/vercors18/vercors18.yaml new file mode 100644 index 00000000000..d3987e07bde --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors18/vercors18.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors18-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors18 diff --git a/input/grid5000/sites/grenoble/clusters/vercors18/vercors18_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors18/vercors18_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors18/vercors18_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 212218c4c5b324ecad33445f8af6dc8df1b7ece9 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 17/27] [grenoble] Add new cluster vercors19 --- .../clusters/vercors19/nodes/vercors19-1.json | 128 ++++++++++++++++++ .../clusters/vercors19/vercors19.json | 93 +++++++++++++ .../clusters/vercors19/nodes.yaml.erb | 80 +++++++++++ .../clusters/vercors19/vercors19.yaml | 41 ++++++ .../clusters/vercors19/vercors19_metrics.yaml | 63 +++++++++ 5 files changed, 405 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors19/nodes/vercors19-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors19/vercors19.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors19/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors19/vercors19.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors19/vercors19_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors19/nodes/vercors19-1.json b/data/grid5000/sites/grenoble/clusters/vercors19/nodes/vercors19-1.json new file mode 100644 index 00000000000..a285a88b989 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors19/nodes/vercors19-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.40", + "ip6": "2001:660:4406:100:c::28", + "kavlan": false, + "mac": "78:ac:44:74:cf:94", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors19-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.40", + "kavlan": false, + "mac": "00:11:22:33:44:30", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors19-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors19", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors19-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors19/vercors19.json b/data/grid5000/sites/grenoble/clusters/vercors19/vercors19.json new file mode 100644 index 00000000000..0c0faa02bc5 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors19/vercors19.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors19", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors19/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors19/nodes.yaml.erb new file mode 100644 index 00000000000..b2b5652a0ea --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors19/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors19" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +78:ac:44:74:cf:94 + ) + mac_bmc_list = %w( +00:11:22:33:44:30 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors19/vercors19.yaml b/input/grid5000/sites/grenoble/clusters/vercors19/vercors19.yaml new file mode 100644 index 00000000000..4a733fd6202 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors19/vercors19.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors19-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors19 diff --git a/input/grid5000/sites/grenoble/clusters/vercors19/vercors19_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors19/vercors19_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors19/vercors19_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 895998d4373edb2f5bdced1ad71ff33822a3a772 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 18/27] [grenoble] Add new cluster vercors2 --- .../clusters/vercors2/nodes/vercors2-1.json | 128 ++++++++++++++++++ .../clusters/vercors2/nodes/vercors2-2.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors2/vercors2.json | 93 +++++++++++++ .../grenoble/clusters/vercors2/nodes.yaml.erb | 82 +++++++++++ .../grenoble/clusters/vercors2/vercors2.yaml | 41 ++++++ .../clusters/vercors2/vercors2_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors2/nodes/vercors2-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors2/nodes/vercors2-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors2/vercors2.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors2/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors2/vercors2.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors2/vercors2_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors2/nodes/vercors2-1.json b/data/grid5000/sites/grenoble/clusters/vercors2/nodes/vercors2-1.json new file mode 100644 index 00000000000..43a2bbee86f --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors2/nodes/vercors2-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.3", + "ip6": "2001:660:4406:100:c::3", + "kavlan": false, + "mac": "24:6e:96:29:5b:90", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors2-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.3", + "kavlan": false, + "mac": "00:11:22:33:44:01", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors2-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors2", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors2-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors2/nodes/vercors2-2.json b/data/grid5000/sites/grenoble/clusters/vercors2/nodes/vercors2-2.json new file mode 100644 index 00000000000..3589838e073 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors2/nodes/vercors2-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.4", + "ip6": "2001:660:4406:100:c::4", + "kavlan": false, + "mac": "24:6e:96:29:5b:20", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors2-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.4", + "kavlan": false, + "mac": "00:11:22:33:44:02", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors2-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors2", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors2-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors2/vercors2.json b/data/grid5000/sites/grenoble/clusters/vercors2/vercors2.json new file mode 100644 index 00000000000..db415c818d3 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors2/vercors2.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors2", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors2/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors2/nodes.yaml.erb new file mode 100644 index 00000000000..883da3e18b6 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors2/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors2" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +24:6e:96:29:5b:90 +24:6e:96:29:5b:20 + ) + mac_bmc_list = %w( +00:11:22:33:44:01 +00:11:22:33:44:02 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors2/vercors2.yaml b/input/grid5000/sites/grenoble/clusters/vercors2/vercors2.yaml new file mode 100644 index 00000000000..0b656958228 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors2/vercors2.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors2-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors2 diff --git a/input/grid5000/sites/grenoble/clusters/vercors2/vercors2_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors2/vercors2_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors2/vercors2_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From e13626ee60e95099791b597e6699e631131bc638 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 19/27] [grenoble] Add new cluster vercors20 --- .../clusters/vercors20/nodes/vercors20-1.json | 128 ++++++++++++++++++ .../clusters/vercors20/nodes/vercors20-2.json | 128 ++++++++++++++++++ .../clusters/vercors20/nodes/vercors20-3.json | 128 ++++++++++++++++++ .../clusters/vercors20/vercors20.json | 93 +++++++++++++ .../clusters/vercors20/nodes.yaml.erb | 84 ++++++++++++ .../clusters/vercors20/vercors20.yaml | 41 ++++++ .../clusters/vercors20/vercors20_metrics.yaml | 63 +++++++++ 7 files changed, 665 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors20/vercors20.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors20/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors20/vercors20.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors20/vercors20_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-1.json b/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-1.json new file mode 100644 index 00000000000..623f872c508 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.41", + "ip6": "2001:660:4406:100:c::29", + "kavlan": false, + "mac": "aa:bb:cc:dd:ee:31", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors20-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.41", + "kavlan": false, + "mac": "00:11:22:33:44:31", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors20-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors20", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors20-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-2.json b/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-2.json new file mode 100644 index 00000000000..686f38d20ea --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.42", + "ip6": "2001:660:4406:100:c::2a", + "kavlan": false, + "mac": "aa:bb:cc:dd:ee:32", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors20-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.42", + "kavlan": false, + "mac": "00:11:22:33:44:32", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors20-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors20", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors20-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-3.json b/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-3.json new file mode 100644 index 00000000000..fe93e19cf43 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors20/nodes/vercors20-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.43", + "ip6": "2001:660:4406:100:c::2b", + "kavlan": false, + "mac": "aa:bb:cc:dd:ee:33", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors20-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.43", + "kavlan": false, + "mac": "00:11:22:33:44:33", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors20-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors20", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors20-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors20/vercors20.json b/data/grid5000/sites/grenoble/clusters/vercors20/vercors20.json new file mode 100644 index 00000000000..a3b3efd2064 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors20/vercors20.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 3, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors20", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors20/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors20/nodes.yaml.erb new file mode 100644 index 00000000000..9cbc138c870 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors20/nodes.yaml.erb @@ -0,0 +1,84 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors20" + nodes_number = 3 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +aa:bb:cc:dd:ee:31 +aa:bb:cc:dd:ee:32 +aa:bb:cc:dd:ee:33 + ) + mac_bmc_list = %w( +00:11:22:33:44:31 +00:11:22:33:44:32 +00:11:22:33:44:33 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors20/vercors20.yaml b/input/grid5000/sites/grenoble/clusters/vercors20/vercors20.yaml new file mode 100644 index 00000000000..159b2ce9e1e --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors20/vercors20.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors20-[1-3]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors20 diff --git a/input/grid5000/sites/grenoble/clusters/vercors20/vercors20_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors20/vercors20_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors20/vercors20_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From aea3b9a7cb5a6a4c5f57834955111e906fe8d898 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 20/27] [grenoble] Add new cluster vercors21 --- .../clusters/vercors21/nodes/vercors21-1.json | 128 ++++++++++++++++++ .../clusters/vercors21/vercors21.json | 93 +++++++++++++ .../clusters/vercors21/nodes.yaml.erb | 80 +++++++++++ .../clusters/vercors21/vercors21.yaml | 41 ++++++ .../clusters/vercors21/vercors21_metrics.yaml | 63 +++++++++ 5 files changed, 405 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors21/nodes/vercors21-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors21/vercors21.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors21/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors21/vercors21.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors21/vercors21_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors21/nodes/vercors21-1.json b/data/grid5000/sites/grenoble/clusters/vercors21/nodes/vercors21-1.json new file mode 100644 index 00000000000..7b1968beae9 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors21/nodes/vercors21-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.44", + "ip6": "2001:660:4406:100:c::2c", + "kavlan": false, + "mac": "aa:bb:cc:dd:ee:34", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors21-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.44", + "kavlan": false, + "mac": "00:11:22:33:44:34", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors21-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors21", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors21-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors21/vercors21.json b/data/grid5000/sites/grenoble/clusters/vercors21/vercors21.json new file mode 100644 index 00000000000..81a1ad42965 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors21/vercors21.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors21", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors21/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors21/nodes.yaml.erb new file mode 100644 index 00000000000..7629cbe98f6 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors21/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors21" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +aa:bb:cc:dd:ee:34 + ) + mac_bmc_list = %w( +00:11:22:33:44:34 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors21/vercors21.yaml b/input/grid5000/sites/grenoble/clusters/vercors21/vercors21.yaml new file mode 100644 index 00000000000..5b963d13f66 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors21/vercors21.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors21-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors21 diff --git a/input/grid5000/sites/grenoble/clusters/vercors21/vercors21_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors21/vercors21_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors21/vercors21_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From c6420c16895ec484476ad9734d4422e1f12322c6 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 21/27] [grenoble] Add new cluster vercors3 --- .../clusters/vercors3/nodes/vercors3-1.json | 128 ++++++++++++++++++ .../clusters/vercors3/nodes/vercors3-2.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors3/vercors3.json | 93 +++++++++++++ .../grenoble/clusters/vercors3/nodes.yaml.erb | 82 +++++++++++ .../grenoble/clusters/vercors3/vercors3.yaml | 41 ++++++ .../clusters/vercors3/vercors3_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors3/nodes/vercors3-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors3/nodes/vercors3-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors3/vercors3.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors3/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors3/vercors3.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors3/vercors3_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors3/nodes/vercors3-1.json b/data/grid5000/sites/grenoble/clusters/vercors3/nodes/vercors3-1.json new file mode 100644 index 00000000000..49bcb0976f3 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors3/nodes/vercors3-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.5", + "ip6": "2001:660:4406:100:c::5", + "kavlan": false, + "mac": "00:0a:f7:bf:4d:bc", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors3-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.5", + "kavlan": false, + "mac": "58:8a:5a:ee:70:2c", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors3-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors3-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors3/nodes/vercors3-2.json b/data/grid5000/sites/grenoble/clusters/vercors3/nodes/vercors3-2.json new file mode 100644 index 00000000000..7869c5f6c6b --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors3/nodes/vercors3-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.6", + "ip6": "2001:660:4406:100:c::6", + "kavlan": false, + "mac": "00:0a:f7:bf:1e:46", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors3-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.6", + "kavlan": false, + "mac": "58:8a:5a:ee:6e:ca", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors3-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors3-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors3/vercors3.json b/data/grid5000/sites/grenoble/clusters/vercors3/vercors3.json new file mode 100644 index 00000000000..58d98027868 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors3/vercors3.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors3", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors3/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors3/nodes.yaml.erb new file mode 100644 index 00000000000..88df36b6e3d --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors3/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors3" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +00:0a:f7:bf:4d:bc +00:0a:f7:bf:1e:46 + ) + mac_bmc_list = %w( +58:8a:5a:ee:70:2c +58:8a:5a:ee:6e:ca + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors3/vercors3.yaml b/input/grid5000/sites/grenoble/clusters/vercors3/vercors3.yaml new file mode 100644 index 00000000000..0d65f29daaf --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors3/vercors3.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors3-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors3 diff --git a/input/grid5000/sites/grenoble/clusters/vercors3/vercors3_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors3/vercors3_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors3/vercors3_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From f0d1f8c6c850311a247d50778df8a646d6ae21fc Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 22/27] [grenoble] Add new cluster vercors4 --- .../clusters/vercors4/nodes/vercors4-1.json | 128 ++++++++++++++++++ .../clusters/vercors4/nodes/vercors4-2.json | 128 ++++++++++++++++++ .../clusters/vercors4/nodes/vercors4-3.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors4/vercors4.json | 93 +++++++++++++ .../grenoble/clusters/vercors4/nodes.yaml.erb | 84 ++++++++++++ .../grenoble/clusters/vercors4/vercors4.yaml | 41 ++++++ .../clusters/vercors4/vercors4_metrics.yaml | 63 +++++++++ 7 files changed, 665 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-3.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors4/vercors4.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors4/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors4/vercors4.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors4/vercors4_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-1.json b/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-1.json new file mode 100644 index 00000000000..7f842acd4f5 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.7", + "ip6": "2001:660:4406:100:c::7", + "kavlan": false, + "mac": "b0:26:28:59:8d:14", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors4-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.7", + "kavlan": false, + "mac": "4c:d9:8f:67:ab:a9", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors4-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors4", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors4-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-2.json b/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-2.json new file mode 100644 index 00000000000..73ad7acd53c --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.8", + "ip6": "2001:660:4406:100:c::8", + "kavlan": false, + "mac": "b0:26:28:59:cd:22", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors4-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.8", + "kavlan": false, + "mac": "4c:d9:8f:67:97:97", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors4-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors4", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors4-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-3.json b/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-3.json new file mode 100644 index 00000000000..4a508e65356 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors4/nodes/vercors4-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.9", + "ip6": "2001:660:4406:100:c::9", + "kavlan": false, + "mac": "b0:26:28:b3:27:0e", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors4-3.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.9", + "kavlan": false, + "mac": "f4:02:70:af:6c:58", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors4-3-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors4", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors4-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors4/vercors4.json b/data/grid5000/sites/grenoble/clusters/vercors4/vercors4.json new file mode 100644 index 00000000000..c186b093beb --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors4/vercors4.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 3, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors4", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors4/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors4/nodes.yaml.erb new file mode 100644 index 00000000000..3d3c72a5e4f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors4/nodes.yaml.erb @@ -0,0 +1,84 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors4" + nodes_number = 3 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +b0:26:28:59:8d:14 +b0:26:28:59:cd:22 +b0:26:28:b3:27:0e + ) + mac_bmc_list = %w( +4c:d9:8f:67:ab:a9 +4c:d9:8f:67:97:97 +f4:02:70:af:6c:58 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors4/vercors4.yaml b/input/grid5000/sites/grenoble/clusters/vercors4/vercors4.yaml new file mode 100644 index 00000000000..a24b0180e72 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors4/vercors4.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors4-[1-3]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors4 diff --git a/input/grid5000/sites/grenoble/clusters/vercors4/vercors4_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors4/vercors4_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors4/vercors4_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From c5894f42d3a0dcc87153400a591d290216026eca Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 23/27] [grenoble] Add new cluster vercors5 --- .../clusters/vercors5/nodes/vercors5-1.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors5/vercors5.json | 93 +++++++++++++ .../grenoble/clusters/vercors5/nodes.yaml.erb | 80 +++++++++++ .../grenoble/clusters/vercors5/vercors5.yaml | 41 ++++++ .../clusters/vercors5/vercors5_metrics.yaml | 63 +++++++++ 5 files changed, 405 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors5/nodes/vercors5-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors5/vercors5.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors5/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors5/vercors5.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors5/vercors5_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors5/nodes/vercors5-1.json b/data/grid5000/sites/grenoble/clusters/vercors5/nodes/vercors5-1.json new file mode 100644 index 00000000000..a475eae8d55 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors5/nodes/vercors5-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.10", + "ip6": "2001:660:4406:100:c::a", + "kavlan": false, + "mac": "f8:f2:1e:d4:43:a0", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors5-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.10", + "kavlan": false, + "mac": "70:b5:e8:f2:a1:26", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors5-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors5", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors5-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors5/vercors5.json b/data/grid5000/sites/grenoble/clusters/vercors5/vercors5.json new file mode 100644 index 00000000000..ec6c431a4ac --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors5/vercors5.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors5", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors5/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors5/nodes.yaml.erb new file mode 100644 index 00000000000..13a77f80e1a --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors5/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors5" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +f8:f2:1e:d4:43:a0 + ) + mac_bmc_list = %w( +70:b5:e8:f2:a1:26 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors5/vercors5.yaml b/input/grid5000/sites/grenoble/clusters/vercors5/vercors5.yaml new file mode 100644 index 00000000000..8277a7b9363 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors5/vercors5.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors5-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors5 diff --git a/input/grid5000/sites/grenoble/clusters/vercors5/vercors5_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors5/vercors5_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors5/vercors5_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 45e59a08c72982dadf468d213ba9eb2d3ddf395b Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 24/27] [grenoble] Add new cluster vercors6 --- .../clusters/vercors6/nodes/vercors6-1.json | 128 ++++++++++++++++++ .../clusters/vercors6/nodes/vercors6-2.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors6/vercors6.json | 93 +++++++++++++ .../grenoble/clusters/vercors6/nodes.yaml.erb | 82 +++++++++++ .../grenoble/clusters/vercors6/vercors6.yaml | 41 ++++++ .../clusters/vercors6/vercors6_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors6/nodes/vercors6-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors6/nodes/vercors6-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors6/vercors6.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors6/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors6/vercors6.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors6/vercors6_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors6/nodes/vercors6-1.json b/data/grid5000/sites/grenoble/clusters/vercors6/nodes/vercors6-1.json new file mode 100644 index 00000000000..2cc9c1b7b2f --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors6/nodes/vercors6-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.11", + "ip6": "2001:660:4406:100:c::b", + "kavlan": false, + "mac": "44:a8:42:0c:17:41", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors6-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.11", + "kavlan": false, + "mac": "44:a8:42:0c:17:43", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors6-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors6", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors6-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors6/nodes/vercors6-2.json b/data/grid5000/sites/grenoble/clusters/vercors6/nodes/vercors6-2.json new file mode 100644 index 00000000000..76bc878e508 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors6/nodes/vercors6-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.12", + "ip6": "2001:660:4406:100:c::c", + "kavlan": false, + "mac": "44:a8:42:0c:0d:7c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors6-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.12", + "kavlan": false, + "mac": "00:11:22:33:44:03", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors6-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors6", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors6-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors6/vercors6.json b/data/grid5000/sites/grenoble/clusters/vercors6/vercors6.json new file mode 100644 index 00000000000..1b310702e21 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors6/vercors6.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors6", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors6/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors6/nodes.yaml.erb new file mode 100644 index 00000000000..36c54187c7c --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors6/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors6" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +44:a8:42:0c:17:41 +44:a8:42:0c:0d:7c + ) + mac_bmc_list = %w( +44:a8:42:0c:17:43 +00:11:22:33:44:03 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors6/vercors6.yaml b/input/grid5000/sites/grenoble/clusters/vercors6/vercors6.yaml new file mode 100644 index 00000000000..2ddda5d185b --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors6/vercors6.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors6-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors6 diff --git a/input/grid5000/sites/grenoble/clusters/vercors6/vercors6_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors6/vercors6_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors6/vercors6_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 2e35fc469c5c2c44dd4a86700006712cd7b26d42 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 25/27] [grenoble] Add new cluster vercors7 --- .../clusters/vercors7/nodes/vercors7-1.json | 128 ++++++++++++++++++ .../clusters/vercors7/nodes/vercors7-2.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors7/vercors7.json | 93 +++++++++++++ .../grenoble/clusters/vercors7/nodes.yaml.erb | 82 +++++++++++ .../grenoble/clusters/vercors7/vercors7.yaml | 41 ++++++ .../clusters/vercors7/vercors7_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors7/nodes/vercors7-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors7/nodes/vercors7-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors7/vercors7.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors7/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors7/vercors7.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors7/vercors7_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors7/nodes/vercors7-1.json b/data/grid5000/sites/grenoble/clusters/vercors7/nodes/vercors7-1.json new file mode 100644 index 00000000000..eeb725cef16 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors7/nodes/vercors7-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.13", + "ip6": "2001:660:4406:100:c::d", + "kavlan": false, + "mac": "aa:bb:cc:dd:ee:04", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors7-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.13", + "kavlan": false, + "mac": "00:11:22:33:44:04", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors7-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors7", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors7-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors7/nodes/vercors7-2.json b/data/grid5000/sites/grenoble/clusters/vercors7/nodes/vercors7-2.json new file mode 100644 index 00000000000..5e2af235232 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors7/nodes/vercors7-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.14", + "ip6": "2001:660:4406:100:c::e", + "kavlan": false, + "mac": "b0:26:28:e9:17:d8", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors7-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.14", + "kavlan": false, + "mac": "00:11:22:33:44:05", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors7-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors7", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors7-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors7/vercors7.json b/data/grid5000/sites/grenoble/clusters/vercors7/vercors7.json new file mode 100644 index 00000000000..ba6b68d214a --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors7/vercors7.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors7", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors7/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors7/nodes.yaml.erb new file mode 100644 index 00000000000..1ada15eccb7 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors7/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors7" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +aa:bb:cc:dd:ee:04 +b0:26:28:e9:17:d8 + ) + mac_bmc_list = %w( +00:11:22:33:44:04 +00:11:22:33:44:05 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors7/vercors7.yaml b/input/grid5000/sites/grenoble/clusters/vercors7/vercors7.yaml new file mode 100644 index 00000000000..9277c3d8472 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors7/vercors7.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors7-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors7 diff --git a/input/grid5000/sites/grenoble/clusters/vercors7/vercors7_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors7/vercors7_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors7/vercors7_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From 6cc576f56b8f64aa98b1d1b348800813df652b4a Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:44 +0100 Subject: [PATCH 26/27] [grenoble] Add new cluster vercors8 --- .../clusters/vercors8/nodes/vercors8-1.json | 128 ++++++++++++++++++ .../clusters/vercors8/nodes/vercors8-2.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors8/vercors8.json | 93 +++++++++++++ .../grenoble/clusters/vercors8/nodes.yaml.erb | 82 +++++++++++ .../grenoble/clusters/vercors8/vercors8.yaml | 41 ++++++ .../clusters/vercors8/vercors8_metrics.yaml | 63 +++++++++ 6 files changed, 535 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors8/nodes/vercors8-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors8/nodes/vercors8-2.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors8/vercors8.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors8/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors8/vercors8.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors8/vercors8_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors8/nodes/vercors8-1.json b/data/grid5000/sites/grenoble/clusters/vercors8/nodes/vercors8-1.json new file mode 100644 index 00000000000..952a91d4c92 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors8/nodes/vercors8-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.15", + "ip6": "2001:660:4406:100:c::f", + "kavlan": false, + "mac": "bc:97:e1:a4:59:30", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors8-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.15", + "kavlan": false, + "mac": "70:b5:e8:e2:c3:5c", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors8-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors8", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors8-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors8/nodes/vercors8-2.json b/data/grid5000/sites/grenoble/clusters/vercors8/nodes/vercors8-2.json new file mode 100644 index 00000000000..07096ae5b30 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors8/nodes/vercors8-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.16", + "ip6": "2001:660:4406:100:c::10", + "kavlan": false, + "mac": "bc:97:e1:a4:32:f0", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors8-2.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.16", + "kavlan": false, + "mac": "00:11:22:33:44:06", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors8-2-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors8", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors8-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors8/vercors8.json b/data/grid5000/sites/grenoble/clusters/vercors8/vercors8.json new file mode 100644 index 00000000000..0784ac50848 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors8/vercors8.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors8", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors8/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors8/nodes.yaml.erb new file mode 100644 index 00000000000..fc1dd0398b0 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors8/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors8" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +bc:97:e1:a4:59:30 +bc:97:e1:a4:32:f0 + ) + mac_bmc_list = %w( +70:b5:e8:e2:c3:5c +00:11:22:33:44:06 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors8/vercors8.yaml b/input/grid5000/sites/grenoble/clusters/vercors8/vercors8.yaml new file mode 100644 index 00000000000..dd8bcb0327a --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors8/vercors8.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors8-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors8 diff --git a/input/grid5000/sites/grenoble/clusters/vercors8/vercors8_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors8/vercors8_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors8/vercors8_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab From bafd842b5acba1f0b03138a3471f26c80701af72 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Fri, 14 Mar 2025 15:20:45 +0100 Subject: [PATCH 27/27] [grenoble] Add new cluster vercors9 --- .../clusters/vercors9/nodes/vercors9-1.json | 128 ++++++++++++++++++ .../grenoble/clusters/vercors9/vercors9.json | 93 +++++++++++++ .../grenoble/clusters/vercors9/nodes.yaml.erb | 80 +++++++++++ .../grenoble/clusters/vercors9/vercors9.yaml | 41 ++++++ .../clusters/vercors9/vercors9_metrics.yaml | 63 +++++++++ 5 files changed, 405 insertions(+) create mode 100644 data/grid5000/sites/grenoble/clusters/vercors9/nodes/vercors9-1.json create mode 100644 data/grid5000/sites/grenoble/clusters/vercors9/vercors9.json create mode 100644 input/grid5000/sites/grenoble/clusters/vercors9/nodes.yaml.erb create mode 100644 input/grid5000/sites/grenoble/clusters/vercors9/vercors9.yaml create mode 100644 input/grid5000/sites/grenoble/clusters/vercors9/vercors9_metrics.yaml diff --git a/data/grid5000/sites/grenoble/clusters/vercors9/nodes/vercors9-1.json b/data/grid5000/sites/grenoble/clusters/vercors9/nodes/vercors9-1.json new file mode 100644 index 00000000000..f9f0461eac9 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors9/nodes/vercors9-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.27.17", + "ip6": "2001:660:4406:100:c::11", + "kavlan": false, + "mac": "b0:83:fe:e5:1f:53", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "vercors9-1.grenoble.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.27.17", + "kavlan": false, + "mac": "00:11:22:33:44:07", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "vercors9-1-bmc.grenoble.grid5000.fr" + } + ], + "nodeset": "vercors9", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "vercors9-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/grenoble/clusters/vercors9/vercors9.json b/data/grid5000/sites/grenoble/clusters/vercors9/vercors9.json new file mode 100644 index 00000000000..27d5744d095 --- /dev/null +++ b/data/grid5000/sites/grenoble/clusters/vercors9/vercors9.json @@ -0,0 +1,93 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "DCGM_FI_DEV_SM_CLOCK", + "DCGM_FI_DEV_MEM_CLOCK", + "DCGM_FI_DEV_GPU_TEMP", + "DCGM_FI_DEV_POWER_USAGE", + "DCGM_FI_DEV_MEM_COPY_UTIL" + ], + "port": 9400, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Nvidia DCGM Exporter", + "name": "prom_nvgpu_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9400, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "vercors9", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/input/grid5000/sites/grenoble/clusters/vercors9/nodes.yaml.erb b/input/grid5000/sites/grenoble/clusters/vercors9/nodes.yaml.erb new file mode 100644 index 00000000000..bba48c6027c --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors9/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=mass-create-2025-03-14.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "vercors9" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +b0:83:fe:e5:1f:53 + ) + mac_bmc_list = %w( +00:11:22:33:44:07 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> diff --git a/input/grid5000/sites/grenoble/clusters/vercors9/vercors9.yaml b/input/grid5000/sites/grenoble/clusters/vercors9/vercors9.yaml new file mode 100644 index 00000000000..f1ff5fa22f3 --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors9/vercors9.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + vercors9-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: vercors9 diff --git a/input/grid5000/sites/grenoble/clusters/vercors9/vercors9_metrics.yaml b/input/grid5000/sites/grenoble/clusters/vercors9/vercors9_metrics.yaml new file mode 100644 index 00000000000..03c90530a6f --- /dev/null +++ b/input/grid5000/sites/grenoble/clusters/vercors9/vercors9_metrics.yaml @@ -0,0 +1,63 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 -- GitLab