From 987ebce82b1cf559500e355a95c61c7e00f0f3cb Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 01/34] [sophia][esterel3] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel3/esterel3.json | 66 +++++++++ .../clusters/esterel3/nodes/esterel3-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel3/esterel3.yaml | 41 ++++++ .../clusters/esterel3/esterel3_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel3/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel3/esterel3.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel3/nodes/esterel3-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel3/esterel3.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel3/esterel3_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel3/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel3/esterel3.json b/data/grid5000/sites/sophia/clusters/esterel3/esterel3.json new file mode 100644 index 00000000000..0a76dd75fa2 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel3/esterel3.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel3", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel3/nodes/esterel3-1.json b/data/grid5000/sites/sophia/clusters/esterel3/nodes/esterel3-1.json new file mode 100644 index 00000000000..a1e4ec07a41 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel3/nodes/esterel3-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.3", + "ip6": "2001:660:4406:800:4::3", + "kavlan": false, + "mac": "14:18:77:72:09:4c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel3-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.3", + "kavlan": false, + "mac": "14:18:77:72:09:4e", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel3-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel3-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel3/esterel3.yaml b/input/grid5000/sites/sophia/clusters/esterel3/esterel3.yaml new file mode 100644 index 00000000000..bb691795c00 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel3/esterel3.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel3-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel3 diff --git a/input/grid5000/sites/sophia/clusters/esterel3/esterel3_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel3/esterel3_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel3/esterel3_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel3/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel3/nodes.yaml.erb new file mode 100644 index 00000000000..b3a7b90938d --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel3/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel3" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +14:18:77:72:09:4c + ) + mac_bmc_list = %w( +14:18:77:72:09:4e + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From b75a2c2162e52682af7a241b9c2e11798b287d0f Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 02/34] [sophia][esterel8] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel8/esterel8.json | 66 +++++++++ .../clusters/esterel8/nodes/esterel8-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel8/esterel8.yaml | 41 ++++++ .../clusters/esterel8/esterel8_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel8/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel8/esterel8.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel8/nodes/esterel8-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel8/esterel8.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel8/esterel8_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel8/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel8/esterel8.json b/data/grid5000/sites/sophia/clusters/esterel8/esterel8.json new file mode 100644 index 00000000000..b382e8e6c77 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel8/esterel8.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel8", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel8/nodes/esterel8-1.json b/data/grid5000/sites/sophia/clusters/esterel8/nodes/esterel8-1.json new file mode 100644 index 00000000000..840047c6bf8 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel8/nodes/esterel8-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.10", + "ip6": "2001:660:4406:800:4::a", + "kavlan": false, + "mac": "e0:db:55:fd:39:e7", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel8-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.10", + "kavlan": false, + "mac": "d0:94:66:1f:64:cd", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel8-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel8", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel8-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel8/esterel8.yaml b/input/grid5000/sites/sophia/clusters/esterel8/esterel8.yaml new file mode 100644 index 00000000000..f44a5fe58b5 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel8/esterel8.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel8-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel8 diff --git a/input/grid5000/sites/sophia/clusters/esterel8/esterel8_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel8/esterel8_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel8/esterel8_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel8/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel8/nodes.yaml.erb new file mode 100644 index 00000000000..37a8d5017c3 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel8/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel8" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +e0:db:55:fd:39:e7 + ) + mac_bmc_list = %w( +d0:94:66:1f:64:cd + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From d0008bc237c2c14c45a41ce6b285ea0547641eee Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 03/34] [sophia][esterel9] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel9/esterel9.json | 66 +++++++++ .../clusters/esterel9/nodes/esterel9-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel9/esterel9.yaml | 41 ++++++ .../clusters/esterel9/esterel9_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel9/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel9/esterel9.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel9/nodes/esterel9-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel9/esterel9.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel9/esterel9_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel9/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel9/esterel9.json b/data/grid5000/sites/sophia/clusters/esterel9/esterel9.json new file mode 100644 index 00000000000..ac7e0a89587 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel9/esterel9.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel9", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel9/nodes/esterel9-1.json b/data/grid5000/sites/sophia/clusters/esterel9/nodes/esterel9-1.json new file mode 100644 index 00000000000..f86d5ab4f0c --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel9/nodes/esterel9-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.11", + "ip6": "2001:660:4406:800:4::b", + "kavlan": false, + "mac": "e0:db:55:fd:38:5e", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel9-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.11", + "kavlan": false, + "mac": "d0:94:66:1f:75:08", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel9-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel9", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel9-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel9/esterel9.yaml b/input/grid5000/sites/sophia/clusters/esterel9/esterel9.yaml new file mode 100644 index 00000000000..3de196aa676 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel9/esterel9.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel9-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel9 diff --git a/input/grid5000/sites/sophia/clusters/esterel9/esterel9_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel9/esterel9_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel9/esterel9_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel9/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel9/nodes.yaml.erb new file mode 100644 index 00000000000..ee96abcd551 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel9/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel9" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +e0:db:55:fd:38:5e + ) + mac_bmc_list = %w( +d0:94:66:1f:75:08 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 0d68d20adafc8b1ad9f3b461cab28a135ce54f5a Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 04/34] [sophia][esterel11] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel11/esterel11.json | 66 +++++++++ .../clusters/esterel11/nodes/esterel11-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel11/esterel11.yaml | 41 ++++++ .../clusters/esterel11/esterel11_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel11/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel11/esterel11.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel11/nodes/esterel11-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel11/esterel11.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel11/esterel11_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel11/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel11/esterel11.json b/data/grid5000/sites/sophia/clusters/esterel11/esterel11.json new file mode 100644 index 00000000000..8b15b6ff151 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel11/esterel11.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel11", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel11/nodes/esterel11-1.json b/data/grid5000/sites/sophia/clusters/esterel11/nodes/esterel11-1.json new file mode 100644 index 00000000000..f54f8ea30c6 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel11/nodes/esterel11-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.15", + "ip6": "2001:660:4406:800:4::f", + "kavlan": false, + "mac": "e0:db:55:fd:39:69", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel11-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.15", + "kavlan": false, + "mac": "d0:94:66:13:a9:4f", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel11-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel11", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel11-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel11/esterel11.yaml b/input/grid5000/sites/sophia/clusters/esterel11/esterel11.yaml new file mode 100644 index 00000000000..d0c18af39c9 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel11/esterel11.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel11-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel11 diff --git a/input/grid5000/sites/sophia/clusters/esterel11/esterel11_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel11/esterel11_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel11/esterel11_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel11/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel11/nodes.yaml.erb new file mode 100644 index 00000000000..9eed1b15731 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel11/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel11" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +e0:db:55:fd:39:69 + ) + mac_bmc_list = %w( +d0:94:66:13:a9:4f + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 84f0387b5111dba65e4444a6db3a14745cf36abd Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 05/34] [sophia][esterel13] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel13/esterel13.json | 66 +++++++++ .../clusters/esterel13/nodes/esterel13-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel13/esterel13.yaml | 41 ++++++ .../clusters/esterel13/esterel13_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel13/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel13/esterel13.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel13/nodes/esterel13-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel13/esterel13.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel13/esterel13_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel13/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel13/esterel13.json b/data/grid5000/sites/sophia/clusters/esterel13/esterel13.json new file mode 100644 index 00000000000..26623b5e6c7 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel13/esterel13.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel13", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel13/nodes/esterel13-1.json b/data/grid5000/sites/sophia/clusters/esterel13/nodes/esterel13-1.json new file mode 100644 index 00000000000..f707217fda9 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel13/nodes/esterel13-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.18", + "ip6": "2001:660:4406:800:4::12", + "kavlan": false, + "mac": "e0:db:55:fd:38:7f", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel13-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.18", + "kavlan": false, + "mac": "50:9a:4c:ab:4e:f4", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel13-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel13", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel13-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel13/esterel13.yaml b/input/grid5000/sites/sophia/clusters/esterel13/esterel13.yaml new file mode 100644 index 00000000000..4481f81bb8d --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel13/esterel13.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel13-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel13 diff --git a/input/grid5000/sites/sophia/clusters/esterel13/esterel13_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel13/esterel13_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel13/esterel13_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel13/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel13/nodes.yaml.erb new file mode 100644 index 00000000000..c89f123f265 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel13/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel13" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +e0:db:55:fd:38:7f + ) + mac_bmc_list = %w( +50:9a:4c:ab:4e:f4 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 6799f41838f85c9bd6cf03e0590685fec5b22a4d Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 06/34] [sophia][esterel14] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel14/esterel14.json | 66 +++++++++ .../clusters/esterel14/nodes/esterel14-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel14/esterel14.yaml | 41 ++++++ .../clusters/esterel14/esterel14_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel14/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel14/esterel14.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel14/nodes/esterel14-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel14/esterel14.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel14/esterel14_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel14/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel14/esterel14.json b/data/grid5000/sites/sophia/clusters/esterel14/esterel14.json new file mode 100644 index 00000000000..f252d965a01 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel14/esterel14.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel14", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel14/nodes/esterel14-1.json b/data/grid5000/sites/sophia/clusters/esterel14/nodes/esterel14-1.json new file mode 100644 index 00000000000..5d00c4e5197 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel14/nodes/esterel14-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.19", + "ip6": "2001:660:4406:800:4::13", + "kavlan": false, + "mac": "e0:db:55:fd:2f:d0", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel14-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.19", + "kavlan": false, + "mac": "58:8a:5a:ec:8f:b4", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel14-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel14", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel14-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel14/esterel14.yaml b/input/grid5000/sites/sophia/clusters/esterel14/esterel14.yaml new file mode 100644 index 00000000000..00af76797fc --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel14/esterel14.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel14-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel14 diff --git a/input/grid5000/sites/sophia/clusters/esterel14/esterel14_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel14/esterel14_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel14/esterel14_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel14/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel14/nodes.yaml.erb new file mode 100644 index 00000000000..3ee167a2e5b --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel14/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel14" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +e0:db:55:fd:2f:d0 + ) + mac_bmc_list = %w( +58:8a:5a:ec:8f:b4 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 697c71e572ec6b7b027ca1f83464312892a3b500 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 07/34] [sophia][esterel15] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel15/esterel15.json | 66 +++++++++ .../clusters/esterel15/nodes/esterel15-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel15/esterel15.yaml | 41 ++++++ .../clusters/esterel15/esterel15_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel15/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel15/esterel15.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel15/nodes/esterel15-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel15/esterel15.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel15/esterel15_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel15/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel15/esterel15.json b/data/grid5000/sites/sophia/clusters/esterel15/esterel15.json new file mode 100644 index 00000000000..3a5f947eae0 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel15/esterel15.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel15", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel15/nodes/esterel15-1.json b/data/grid5000/sites/sophia/clusters/esterel15/nodes/esterel15-1.json new file mode 100644 index 00000000000..3aeb5b1b409 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel15/nodes/esterel15-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.20", + "ip6": "2001:660:4406:800:4::14", + "kavlan": false, + "mac": "e0:db:55:fd:39:81", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel15-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.20", + "kavlan": false, + "mac": "54:48:10:f8:5a:f0", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel15-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel15", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel15-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel15/esterel15.yaml b/input/grid5000/sites/sophia/clusters/esterel15/esterel15.yaml new file mode 100644 index 00000000000..dae8992ec50 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel15/esterel15.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel15-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel15 diff --git a/input/grid5000/sites/sophia/clusters/esterel15/esterel15_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel15/esterel15_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel15/esterel15_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel15/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel15/nodes.yaml.erb new file mode 100644 index 00000000000..2d548a359d6 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel15/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel15" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +e0:db:55:fd:39:81 + ) + mac_bmc_list = %w( +54:48:10:f8:5a:f0 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From ab3e20d0486ceb855d5226c838a854253c14007a Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 08/34] [sophia][esterel16] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel16/esterel16.json | 66 +++++++++ .../clusters/esterel16/nodes/esterel16-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel16/esterel16.yaml | 41 ++++++ .../clusters/esterel16/esterel16_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel16/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel16/esterel16.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel16/nodes/esterel16-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel16/esterel16.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel16/esterel16_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel16/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel16/esterel16.json b/data/grid5000/sites/sophia/clusters/esterel16/esterel16.json new file mode 100644 index 00000000000..3192d7a2d25 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel16/esterel16.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel16", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel16/nodes/esterel16-1.json b/data/grid5000/sites/sophia/clusters/esterel16/nodes/esterel16-1.json new file mode 100644 index 00000000000..458de734ca7 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel16/nodes/esterel16-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.21", + "ip6": "2001:660:4406:800:4::15", + "kavlan": false, + "mac": "34:17:eb:e7:25:01", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel16-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.21", + "kavlan": false, + "mac": "4c:d9:8f:0c:35:30", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel16-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel16", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel16-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel16/esterel16.yaml b/input/grid5000/sites/sophia/clusters/esterel16/esterel16.yaml new file mode 100644 index 00000000000..bef29aafff6 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel16/esterel16.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel16-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel16 diff --git a/input/grid5000/sites/sophia/clusters/esterel16/esterel16_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel16/esterel16_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel16/esterel16_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel16/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel16/nodes.yaml.erb new file mode 100644 index 00000000000..9aff4f28ba4 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel16/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel16" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +34:17:eb:e7:25:01 + ) + mac_bmc_list = %w( +4c:d9:8f:0c:35:30 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 95b287e04efa312a8fef1f9d53b64a4757851a6c Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 09/34] [sophia][esterel17] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel17/esterel17.json | 66 +++++++++ .../clusters/esterel17/nodes/esterel17-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel17/esterel17.yaml | 41 ++++++ .../clusters/esterel17/esterel17_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel17/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel17/esterel17.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel17/nodes/esterel17-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel17/esterel17.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel17/esterel17_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel17/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel17/esterel17.json b/data/grid5000/sites/sophia/clusters/esterel17/esterel17.json new file mode 100644 index 00000000000..143ac65f780 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel17/esterel17.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel17", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel17/nodes/esterel17-1.json b/data/grid5000/sites/sophia/clusters/esterel17/nodes/esterel17-1.json new file mode 100644 index 00000000000..2cad592d0ba --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel17/nodes/esterel17-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.22", + "ip6": "2001:660:4406:800:4::16", + "kavlan": false, + "mac": "e0:db:55:fd:39:ab", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel17-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.22", + "kavlan": false, + "mac": "50:9a:4c:aa:fe:f2", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel17-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel17", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel17-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel17/esterel17.yaml b/input/grid5000/sites/sophia/clusters/esterel17/esterel17.yaml new file mode 100644 index 00000000000..c85931fc702 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel17/esterel17.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel17-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel17 diff --git a/input/grid5000/sites/sophia/clusters/esterel17/esterel17_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel17/esterel17_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel17/esterel17_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel17/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel17/nodes.yaml.erb new file mode 100644 index 00000000000..e6e52648100 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel17/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel17" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +e0:db:55:fd:39:ab + ) + mac_bmc_list = %w( +50:9a:4c:aa:fe:f2 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 350248c34f0f28d6e4dd2fdfa9b61c12a1688cbe Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 10/34] [sophia][esterel18] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel18/esterel18.json | 66 +++++++++ .../clusters/esterel18/nodes/esterel18-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel18/esterel18.yaml | 41 ++++++ .../clusters/esterel18/esterel18_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel18/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel18/esterel18.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel18/nodes/esterel18-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel18/esterel18.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel18/esterel18_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel18/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel18/esterel18.json b/data/grid5000/sites/sophia/clusters/esterel18/esterel18.json new file mode 100644 index 00000000000..2e794807f67 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel18/esterel18.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel18", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel18/nodes/esterel18-1.json b/data/grid5000/sites/sophia/clusters/esterel18/nodes/esterel18-1.json new file mode 100644 index 00000000000..be3426c3185 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel18/nodes/esterel18-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.23", + "ip6": "2001:660:4406:800:4::17", + "kavlan": false, + "mac": "00:26:6c:f8:58:a2", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel18-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.23", + "kavlan": false, + "mac": "4c:d9:8f:15:27:46", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel18-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel18", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel18-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel18/esterel18.yaml b/input/grid5000/sites/sophia/clusters/esterel18/esterel18.yaml new file mode 100644 index 00000000000..6ed68202dc1 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel18/esterel18.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel18-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel18 diff --git a/input/grid5000/sites/sophia/clusters/esterel18/esterel18_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel18/esterel18_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel18/esterel18_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel18/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel18/nodes.yaml.erb new file mode 100644 index 00000000000..b0d891b5c35 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel18/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel18" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +00:26:6c:f8:58:a2 + ) + mac_bmc_list = %w( +4c:d9:8f:15:27:46 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 64c9a086b4284fbb633850886eb1b5dc21110965 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 11/34] [sophia][esterel19] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel19/esterel19.json | 66 +++++++++ .../clusters/esterel19/nodes/esterel19-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel19/esterel19.yaml | 41 ++++++ .../clusters/esterel19/esterel19_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel19/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel19/esterel19.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel19/nodes/esterel19-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel19/esterel19.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel19/esterel19_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel19/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel19/esterel19.json b/data/grid5000/sites/sophia/clusters/esterel19/esterel19.json new file mode 100644 index 00000000000..33e3d5a2b25 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel19/esterel19.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel19", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel19/nodes/esterel19-1.json b/data/grid5000/sites/sophia/clusters/esterel19/nodes/esterel19-1.json new file mode 100644 index 00000000000..4bff620ab90 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel19/nodes/esterel19-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.24", + "ip6": "2001:660:4406:800:4::18", + "kavlan": false, + "mac": "34:17:eb:e8:81:e6", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel19-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.24", + "kavlan": false, + "mac": "4c:d9:8f:2c:d6:24", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel19-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel19", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel19-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel19/esterel19.yaml b/input/grid5000/sites/sophia/clusters/esterel19/esterel19.yaml new file mode 100644 index 00000000000..028bcfde0cf --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel19/esterel19.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel19-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel19 diff --git a/input/grid5000/sites/sophia/clusters/esterel19/esterel19_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel19/esterel19_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel19/esterel19_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel19/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel19/nodes.yaml.erb new file mode 100644 index 00000000000..b99c24cb237 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel19/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel19" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +34:17:eb:e8:81:e6 + ) + mac_bmc_list = %w( +4c:d9:8f:2c:d6:24 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From acaef73ffb627649a91e2f3f5818754748f41cf7 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 12/34] [sophia][esterel20] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel20/esterel20.json | 66 +++++++++ .../clusters/esterel20/nodes/esterel20-1.json | 128 ++++++++++++++++++ .../clusters/esterel20/nodes/esterel20-2.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel20/esterel20.yaml | 41 ++++++ .../clusters/esterel20/esterel20_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel20/nodes.yaml.erb | 82 +++++++++++ 6 files changed, 485 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel20/esterel20.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel20/nodes/esterel20-1.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel20/nodes/esterel20-2.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel20/esterel20.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel20/esterel20_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel20/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel20/esterel20.json b/data/grid5000/sites/sophia/clusters/esterel20/esterel20.json new file mode 100644 index 00000000000..c355db808ee --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel20/esterel20.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel20", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel20/nodes/esterel20-1.json b/data/grid5000/sites/sophia/clusters/esterel20/nodes/esterel20-1.json new file mode 100644 index 00000000000..cad7f192335 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel20/nodes/esterel20-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.25", + "ip6": "2001:660:4406:800:4::19", + "kavlan": false, + "mac": "34:17:eb:e8:83:7e", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel20-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.25", + "kavlan": false, + "mac": "4c:d9:8f:2c:da:44", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel20-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel20", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel20-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel20/nodes/esterel20-2.json b/data/grid5000/sites/sophia/clusters/esterel20/nodes/esterel20-2.json new file mode 100644 index 00000000000..bac537dbaa9 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel20/nodes/esterel20-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.26", + "ip6": "2001:660:4406:800:4::1a", + "kavlan": false, + "mac": "34:17:eb:e8:83:75", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel20-2.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.26", + "kavlan": false, + "mac": "4c:d9:8f:2c:d3:90", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel20-2-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel20", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel20-2" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel20/esterel20.yaml b/input/grid5000/sites/sophia/clusters/esterel20/esterel20.yaml new file mode 100644 index 00000000000..d85712c5264 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel20/esterel20.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel20-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel20 diff --git a/input/grid5000/sites/sophia/clusters/esterel20/esterel20_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel20/esterel20_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel20/esterel20_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel20/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel20/nodes.yaml.erb new file mode 100644 index 00000000000..e5549692f72 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel20/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel20" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +34:17:eb:e8:83:7e +34:17:eb:e8:83:75 + ) + mac_bmc_list = %w( +4c:d9:8f:2c:da:44 +4c:d9:8f:2c:d3:90 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 0aa920acb7be4104f1dc4c8b04b48475a7a74ccd Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 13/34] [sophia][esterel21] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel21/esterel21.json | 66 +++++++++ .../clusters/esterel21/nodes/esterel21-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel21/esterel21.yaml | 41 ++++++ .../clusters/esterel21/esterel21_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel21/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel21/esterel21.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel21/nodes/esterel21-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel21/esterel21.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel21/esterel21_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel21/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel21/esterel21.json b/data/grid5000/sites/sophia/clusters/esterel21/esterel21.json new file mode 100644 index 00000000000..c1e0fb52e04 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel21/esterel21.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel21", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel21/nodes/esterel21-1.json b/data/grid5000/sites/sophia/clusters/esterel21/nodes/esterel21-1.json new file mode 100644 index 00000000000..8e9b6384bb4 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel21/nodes/esterel21-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.27", + "ip6": "2001:660:4406:800:4::1b", + "kavlan": false, + "mac": "34:17:eb:e8:83:72", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel21-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.27", + "kavlan": false, + "mac": "f4:02:70:9a:c2:c4", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel21-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel21", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel21-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel21/esterel21.yaml b/input/grid5000/sites/sophia/clusters/esterel21/esterel21.yaml new file mode 100644 index 00000000000..b755ff27c6c --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel21/esterel21.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel21-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel21 diff --git a/input/grid5000/sites/sophia/clusters/esterel21/esterel21_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel21/esterel21_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel21/esterel21_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel21/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel21/nodes.yaml.erb new file mode 100644 index 00000000000..ec9f0117671 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel21/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel21" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +34:17:eb:e8:83:72 + ) + mac_bmc_list = %w( +f4:02:70:9a:c2:c4 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From b31265f4d6775dfa5e07dc91ed542c3dc331c978 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 14/34] [sophia][esterel22] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel22/esterel22.json | 66 +++++++++ .../clusters/esterel22/nodes/esterel22-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel22/esterel22.yaml | 41 ++++++ .../clusters/esterel22/esterel22_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel22/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel22/esterel22.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel22/nodes/esterel22-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel22/esterel22.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel22/esterel22_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel22/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel22/esterel22.json b/data/grid5000/sites/sophia/clusters/esterel22/esterel22.json new file mode 100644 index 00000000000..925071e50e4 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel22/esterel22.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel22", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel22/nodes/esterel22-1.json b/data/grid5000/sites/sophia/clusters/esterel22/nodes/esterel22-1.json new file mode 100644 index 00000000000..75f810e1ee1 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel22/nodes/esterel22-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.28", + "ip6": "2001:660:4406:800:4::1c", + "kavlan": false, + "mac": "34:17:eb:e8:83:84", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel22-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.28", + "kavlan": false, + "mac": "f4:02:70:9d:f4:e8", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel22-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel22", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel22-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel22/esterel22.yaml b/input/grid5000/sites/sophia/clusters/esterel22/esterel22.yaml new file mode 100644 index 00000000000..fd0ec344fb0 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel22/esterel22.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel22-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel22 diff --git a/input/grid5000/sites/sophia/clusters/esterel22/esterel22_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel22/esterel22_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel22/esterel22_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel22/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel22/nodes.yaml.erb new file mode 100644 index 00000000000..f70df1a0ba2 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel22/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel22" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +34:17:eb:e8:83:84 + ) + mac_bmc_list = %w( +f4:02:70:9d:f4:e8 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 86919a61935bd8b6a4e93e1d435a1320d11b8866 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 15/34] [sophia][esterel23] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel23/esterel23.json | 66 +++++++++ .../clusters/esterel23/nodes/esterel23-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel23/esterel23.yaml | 41 ++++++ .../clusters/esterel23/esterel23_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel23/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel23/esterel23.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel23/nodes/esterel23-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel23/esterel23.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel23/esterel23_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel23/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel23/esterel23.json b/data/grid5000/sites/sophia/clusters/esterel23/esterel23.json new file mode 100644 index 00000000000..a7b3a0709c5 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel23/esterel23.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel23", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel23/nodes/esterel23-1.json b/data/grid5000/sites/sophia/clusters/esterel23/nodes/esterel23-1.json new file mode 100644 index 00000000000..377c1e8a3c8 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel23/nodes/esterel23-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.29", + "ip6": "2001:660:4406:800:4::1d", + "kavlan": false, + "mac": "34:17:eb:e8:83:00", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel23-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.29", + "kavlan": false, + "mac": "f4:02:70:9d:78:36", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel23-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel23", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel23-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel23/esterel23.yaml b/input/grid5000/sites/sophia/clusters/esterel23/esterel23.yaml new file mode 100644 index 00000000000..5556b27e579 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel23/esterel23.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel23-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel23 diff --git a/input/grid5000/sites/sophia/clusters/esterel23/esterel23_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel23/esterel23_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel23/esterel23_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel23/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel23/nodes.yaml.erb new file mode 100644 index 00000000000..ad8d949268c --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel23/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel23" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +34:17:eb:e8:83:00 + ) + mac_bmc_list = %w( +f4:02:70:9d:78:36 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From aad48ebdeecabca850f5cad9ef707ceb2e3f5cab Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 16/34] [sophia][esterel24] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel24/esterel24.json | 66 +++++++++ .../clusters/esterel24/nodes/esterel24-1.json | 128 ++++++++++++++++++ .../clusters/esterel24/nodes/esterel24-2.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel24/esterel24.yaml | 41 ++++++ .../clusters/esterel24/esterel24_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel24/nodes.yaml.erb | 82 +++++++++++ 6 files changed, 485 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel24/esterel24.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel24/nodes/esterel24-1.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel24/nodes/esterel24-2.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel24/esterel24.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel24/esterel24_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel24/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel24/esterel24.json b/data/grid5000/sites/sophia/clusters/esterel24/esterel24.json new file mode 100644 index 00000000000..0bf2ac9216c --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel24/esterel24.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel24", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel24/nodes/esterel24-1.json b/data/grid5000/sites/sophia/clusters/esterel24/nodes/esterel24-1.json new file mode 100644 index 00000000000..98409fe662d --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel24/nodes/esterel24-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.30", + "ip6": "2001:660:4406:800:4::1e", + "kavlan": false, + "mac": "34:17:eb:e8:82:bb", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel24-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.30", + "kavlan": false, + "mac": "f4:02:70:9d:75:a2", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel24-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel24", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel24-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel24/nodes/esterel24-2.json b/data/grid5000/sites/sophia/clusters/esterel24/nodes/esterel24-2.json new file mode 100644 index 00000000000..6a2ef342807 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel24/nodes/esterel24-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.31", + "ip6": "2001:660:4406:800:4::1f", + "kavlan": false, + "mac": "50:9a:4c:6c:38:af", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel24-2.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.31", + "kavlan": false, + "mac": "70:b5:e8:c9:ff:e2", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel24-2-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel24", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel24-2" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel24/esterel24.yaml b/input/grid5000/sites/sophia/clusters/esterel24/esterel24.yaml new file mode 100644 index 00000000000..09454b75df5 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel24/esterel24.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel24-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel24 diff --git a/input/grid5000/sites/sophia/clusters/esterel24/esterel24_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel24/esterel24_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel24/esterel24_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel24/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel24/nodes.yaml.erb new file mode 100644 index 00000000000..01290256a74 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel24/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel24" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +34:17:eb:e8:82:bb +50:9a:4c:6c:38:af + ) + mac_bmc_list = %w( +f4:02:70:9d:75:a2 +70:b5:e8:c9:ff:e2 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 67628827f2e171f505beda3313bde405d3f99274 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 17/34] [sophia][esterel25] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel25/esterel25.json | 66 +++++++++ .../clusters/esterel25/nodes/esterel25-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel25/esterel25.yaml | 41 ++++++ .../clusters/esterel25/esterel25_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel25/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel25/esterel25.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel25/nodes/esterel25-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel25/esterel25.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel25/esterel25_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel25/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel25/esterel25.json b/data/grid5000/sites/sophia/clusters/esterel25/esterel25.json new file mode 100644 index 00000000000..41249c5664f --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel25/esterel25.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel25", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel25/nodes/esterel25-1.json b/data/grid5000/sites/sophia/clusters/esterel25/nodes/esterel25-1.json new file mode 100644 index 00000000000..d91f768fb25 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel25/nodes/esterel25-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.32", + "ip6": "2001:660:4406:800:4::20", + "kavlan": false, + "mac": "50:9a:4c:6c:39:82", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel25-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.32", + "kavlan": false, + "mac": "f4:02:70:ed:3f:24", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel25-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel25", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel25-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel25/esterel25.yaml b/input/grid5000/sites/sophia/clusters/esterel25/esterel25.yaml new file mode 100644 index 00000000000..bb8567cf848 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel25/esterel25.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel25-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel25 diff --git a/input/grid5000/sites/sophia/clusters/esterel25/esterel25_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel25/esterel25_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel25/esterel25_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel25/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel25/nodes.yaml.erb new file mode 100644 index 00000000000..47f8d527e2c --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel25/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel25" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6c:39:82 + ) + mac_bmc_list = %w( +f4:02:70:ed:3f:24 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 6e83392f93955a8bc5b0a9e09df54206d101e6d9 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 18/34] [sophia][esterel26] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel26/esterel26.json | 66 +++++++++ .../clusters/esterel26/nodes/esterel26-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel26/esterel26.yaml | 41 ++++++ .../clusters/esterel26/esterel26_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel26/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel26/esterel26.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel26/esterel26_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json new file mode 100644 index 00000000000..61c5bd1e192 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel26/esterel26.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel26", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json new file mode 100644 index 00000000000..71a2c919c50 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel26/nodes/esterel26-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.33", + "ip6": "2001:660:4406:800:4::21", + "kavlan": false, + "mac": "50:9a:4c:6c:38:d9", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel26-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.33", + "kavlan": false, + "mac": "70:b5:e8:e1:41:24", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel26-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel26", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel26-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml new file mode 100644 index 00000000000..a3f40bf62b8 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel26/esterel26.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel26-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel26 diff --git a/input/grid5000/sites/sophia/clusters/esterel26/esterel26_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel26/esterel26_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel26/esterel26_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb new file mode 100644 index 00000000000..cd244fa8358 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel26/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel26" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6c:38:d9 + ) + mac_bmc_list = %w( +70:b5:e8:e1:41:24 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 64bbdfe6a96d7368071b095aa2fe53c890f2e194 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 19/34] [sophia][esterel28] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel28/esterel28.json | 66 +++++++++ .../clusters/esterel28/nodes/esterel28-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel28/esterel28.yaml | 41 ++++++ .../clusters/esterel28/esterel28_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel28/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel28/esterel28.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel28/nodes/esterel28-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel28/esterel28.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel28/esterel28_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel28/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel28/esterel28.json b/data/grid5000/sites/sophia/clusters/esterel28/esterel28.json new file mode 100644 index 00000000000..b6b7a41dce2 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel28/esterel28.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel28", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel28/nodes/esterel28-1.json b/data/grid5000/sites/sophia/clusters/esterel28/nodes/esterel28-1.json new file mode 100644 index 00000000000..44f77947d05 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel28/nodes/esterel28-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.35", + "ip6": "2001:660:4406:800:4::23", + "kavlan": false, + "mac": "50:9a:4c:6c:38:9e", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel28-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.35", + "kavlan": false, + "mac": "0c:c4:7a:e7:10:d3", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel28-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel28", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel28-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel28/esterel28.yaml b/input/grid5000/sites/sophia/clusters/esterel28/esterel28.yaml new file mode 100644 index 00000000000..b65ab60a9ff --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel28/esterel28.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel28-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel28 diff --git a/input/grid5000/sites/sophia/clusters/esterel28/esterel28_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel28/esterel28_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel28/esterel28_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel28/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel28/nodes.yaml.erb new file mode 100644 index 00000000000..18fcf9118bc --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel28/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel28" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6c:38:9e + ) + mac_bmc_list = %w( +0c:c4:7a:e7:10:d3 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 2950b2fe1eff4e9ac2a29d5d4b233334ff2579f7 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 20/34] [sophia][esterel29] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel29/esterel29.json | 66 +++++++++ .../clusters/esterel29/nodes/esterel29-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel29/esterel29.yaml | 41 ++++++ .../clusters/esterel29/esterel29_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel29/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel29/esterel29.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel29/nodes/esterel29-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel29/esterel29.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel29/esterel29_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel29/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel29/esterel29.json b/data/grid5000/sites/sophia/clusters/esterel29/esterel29.json new file mode 100644 index 00000000000..beb440bde81 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel29/esterel29.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel29", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel29/nodes/esterel29-1.json b/data/grid5000/sites/sophia/clusters/esterel29/nodes/esterel29-1.json new file mode 100644 index 00000000000..3dcfbbff8fa --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel29/nodes/esterel29-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.36", + "ip6": "2001:660:4406:800:4::24", + "kavlan": false, + "mac": "50:9a:4c:6c:39:31", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel29-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.36", + "kavlan": false, + "mac": "4c:d9:8f:1a:3c:b8", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel29-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel29", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel29-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel29/esterel29.yaml b/input/grid5000/sites/sophia/clusters/esterel29/esterel29.yaml new file mode 100644 index 00000000000..8f06216eaf0 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel29/esterel29.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel29-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel29 diff --git a/input/grid5000/sites/sophia/clusters/esterel29/esterel29_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel29/esterel29_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel29/esterel29_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel29/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel29/nodes.yaml.erb new file mode 100644 index 00000000000..55ae3ac0f27 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel29/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel29" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6c:39:31 + ) + mac_bmc_list = %w( +4c:d9:8f:1a:3c:b8 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From f76d372e1a07cefe56534de4e6449e7568772b9e Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 21/34] [sophia][esterel30] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel30/esterel30.json | 66 +++++++++ .../clusters/esterel30/nodes/esterel30-1.json | 128 ++++++++++++++++++ .../clusters/esterel30/nodes/esterel30-2.json | 128 ++++++++++++++++++ .../clusters/esterel30/nodes/esterel30-3.json | 128 ++++++++++++++++++ .../clusters/esterel30/nodes/esterel30-4.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel30/esterel30.yaml | 41 ++++++ .../clusters/esterel30/esterel30_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel30/nodes.yaml.erb | 86 ++++++++++++ 8 files changed, 745 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel30/esterel30.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-1.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-2.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-3.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-4.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel30/esterel30.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel30/esterel30_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel30/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel30/esterel30.json b/data/grid5000/sites/sophia/clusters/esterel30/esterel30.json new file mode 100644 index 00000000000..df172fefa25 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel30/esterel30.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 4, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel30", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-1.json b/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-1.json new file mode 100644 index 00000000000..558c1612082 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.37", + "ip6": "2001:660:4406:800:4::25", + "kavlan": false, + "mac": "50:9a:4c:6b:5d:b0", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel30-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.37", + "kavlan": false, + "mac": "f4:02:70:b3:66:70", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel30-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel30", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel30-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-2.json b/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-2.json new file mode 100644 index 00000000000..f216b5dfa42 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.38", + "ip6": "2001:660:4406:800:4::26", + "kavlan": false, + "mac": "50:9a:4c:85:c8:15", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel30-2.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.38", + "kavlan": false, + "mac": "f4:02:70:b3:66:28", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel30-2-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel30", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel30-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-3.json b/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-3.json new file mode 100644 index 00000000000..2964ddb478b --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.39", + "ip6": "2001:660:4406:800:4::27", + "kavlan": false, + "mac": "50:9a:4c:70:fa:7a", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel30-3.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.39", + "kavlan": false, + "mac": "f4:02:70:b3:5d:b2", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel30-3-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel30", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel30-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-4.json b/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-4.json new file mode 100644 index 00000000000..efa7ab872ac --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel30/nodes/esterel30-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.40", + "ip6": "2001:660:4406:800:4::28", + "kavlan": false, + "mac": "50:9a:4c:70:fa:3c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel30-4.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.40", + "kavlan": false, + "mac": "f4:02:70:ba:d8:4e", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel30-4-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel30", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel30-4" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel30/esterel30.yaml b/input/grid5000/sites/sophia/clusters/esterel30/esterel30.yaml new file mode 100644 index 00000000000..867ead53e79 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel30/esterel30.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel30-[1-4]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel30 diff --git a/input/grid5000/sites/sophia/clusters/esterel30/esterel30_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel30/esterel30_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel30/esterel30_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel30/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel30/nodes.yaml.erb new file mode 100644 index 00000000000..768e87f5582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel30/nodes.yaml.erb @@ -0,0 +1,86 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel30" + nodes_number = 4 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6b:5d:b0 +50:9a:4c:85:c8:15 +50:9a:4c:70:fa:7a +50:9a:4c:70:fa:3c + ) + mac_bmc_list = %w( +f4:02:70:b3:66:70 +f4:02:70:b3:66:28 +f4:02:70:b3:5d:b2 +f4:02:70:ba:d8:4e + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 201c5e4da0b0f9c1b1f4982520634cc9bea8282e Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 22/34] [sophia][esterel31] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel31/esterel31.json | 66 +++++++++ .../clusters/esterel31/nodes/esterel31-1.json | 128 ++++++++++++++++++ .../clusters/esterel31/nodes/esterel31-2.json | 128 ++++++++++++++++++ .../clusters/esterel31/nodes/esterel31-3.json | 128 ++++++++++++++++++ .../clusters/esterel31/nodes/esterel31-4.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel31/esterel31.yaml | 41 ++++++ .../clusters/esterel31/esterel31_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel31/nodes.yaml.erb | 86 ++++++++++++ 8 files changed, 745 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel31/esterel31.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-1.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-2.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-3.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-4.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel31/esterel31.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel31/esterel31_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel31/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel31/esterel31.json b/data/grid5000/sites/sophia/clusters/esterel31/esterel31.json new file mode 100644 index 00000000000..64396142b8e --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel31/esterel31.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 4, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel31", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-1.json b/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-1.json new file mode 100644 index 00000000000..91dcac235cf --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.41", + "ip6": "2001:660:4406:800:4::29", + "kavlan": false, + "mac": "50:9a:4c:70:fa:7c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel31-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.41", + "kavlan": false, + "mac": "f4:02:70:ed:22:5a", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel31-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel31", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel31-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-2.json b/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-2.json new file mode 100644 index 00000000000..756c0c97c28 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.42", + "ip6": "2001:660:4406:800:4::2a", + "kavlan": false, + "mac": "50:9a:4c:70:aa:e1", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel31-2.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.42", + "kavlan": false, + "mac": "f4:02:70:ed:1c:2a", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel31-2-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel31", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel31-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-3.json b/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-3.json new file mode 100644 index 00000000000..73e14ff0b4b --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.43", + "ip6": "2001:660:4406:800:4::2b", + "kavlan": false, + "mac": "50:9a:4c:6c:38:ad", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel31-3.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.43", + "kavlan": false, + "mac": "f4:02:70:ed:2a:84", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel31-3-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel31", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel31-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-4.json b/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-4.json new file mode 100644 index 00000000000..ee27302aab5 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel31/nodes/esterel31-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.44", + "ip6": "2001:660:4406:800:4::2c", + "kavlan": false, + "mac": "50:9a:4c:6c:38:6b", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel31-4.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.44", + "kavlan": false, + "mac": "f4:02:70:ed:2e:a4", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel31-4-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel31", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel31-4" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel31/esterel31.yaml b/input/grid5000/sites/sophia/clusters/esterel31/esterel31.yaml new file mode 100644 index 00000000000..56cb2035107 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel31/esterel31.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel31-[1-4]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel31 diff --git a/input/grid5000/sites/sophia/clusters/esterel31/esterel31_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel31/esterel31_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel31/esterel31_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel31/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel31/nodes.yaml.erb new file mode 100644 index 00000000000..c88973ded1c --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel31/nodes.yaml.erb @@ -0,0 +1,86 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel31" + nodes_number = 4 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:70:fa:7c +50:9a:4c:70:aa:e1 +50:9a:4c:6c:38:ad +50:9a:4c:6c:38:6b + ) + mac_bmc_list = %w( +f4:02:70:ed:22:5a +f4:02:70:ed:1c:2a +f4:02:70:ed:2a:84 +f4:02:70:ed:2e:a4 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 49e173d150b1e9868ab5700b2c47a1a4c0eaadac Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 23/34] [sophia][esterel32] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel32/esterel32.json | 66 +++++++++ .../clusters/esterel32/nodes/esterel32-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel32/esterel32.yaml | 41 ++++++ .../clusters/esterel32/esterel32_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel32/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel32/esterel32.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel32/esterel32.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel32/esterel32_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel32/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel32/esterel32.json b/data/grid5000/sites/sophia/clusters/esterel32/esterel32.json new file mode 100644 index 00000000000..4e258a540fc --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel32/esterel32.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel32", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.json b/data/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.json new file mode 100644 index 00000000000..ebd8436bae3 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.45", + "ip6": "2001:660:4406:800:4::2d", + "kavlan": false, + "mac": "50:9a:4c:6c:39:85", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel32-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.45", + "kavlan": false, + "mac": "70:b5:e8:d9:89:bc", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel32-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel32", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel32-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel32/esterel32.yaml b/input/grid5000/sites/sophia/clusters/esterel32/esterel32.yaml new file mode 100644 index 00000000000..bf42b2bebdc --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel32/esterel32.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel32-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel32 diff --git a/input/grid5000/sites/sophia/clusters/esterel32/esterel32_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel32/esterel32_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel32/esterel32_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel32/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel32/nodes.yaml.erb new file mode 100644 index 00000000000..d8765aac1a7 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel32/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel32" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6c:39:85 + ) + mac_bmc_list = %w( +70:b5:e8:d9:89:bc + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 26285fa8a7c97f981d8d9169c190101e7f3d2603 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 24/34] [sophia][esterel33] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel33/esterel33.json | 66 +++++++++ .../clusters/esterel33/nodes/esterel33-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel33/esterel33.yaml | 41 ++++++ .../clusters/esterel33/esterel33_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel33/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel33/esterel33.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel33/esterel33.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel33/esterel33_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel33/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json b/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json new file mode 100644 index 00000000000..706fb8f4c11 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel33", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.json b/data/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.json new file mode 100644 index 00000000000..6fe44039965 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel33/nodes/esterel33-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.46", + "ip6": "2001:660:4406:800:4::2e", + "kavlan": false, + "mac": "50:9a:4c:6c:39:89", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel33-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.46", + "kavlan": false, + "mac": "b0:7b:25:d0:f8:f2", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel33-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel33", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel33-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel33/esterel33.yaml b/input/grid5000/sites/sophia/clusters/esterel33/esterel33.yaml new file mode 100644 index 00000000000..e6648f6371e --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel33/esterel33.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel33-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel33 diff --git a/input/grid5000/sites/sophia/clusters/esterel33/esterel33_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel33/esterel33_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel33/esterel33_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel33/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel33/nodes.yaml.erb new file mode 100644 index 00000000000..2f0e80cfe50 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel33/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel33" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6c:39:89 + ) + mac_bmc_list = %w( +b0:7b:25:d0:f8:f2 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 1cff2fb53d170340c81ed2aa9e2ff91199766be3 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 25/34] [sophia][esterel34] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel34/esterel34.json | 66 +++++++++ .../clusters/esterel34/nodes/esterel34-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel34/esterel34.yaml | 41 ++++++ .../clusters/esterel34/esterel34_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel34/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel34/esterel34.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel34/nodes/esterel34-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel34/esterel34.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel34/esterel34_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel34/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel34/esterel34.json b/data/grid5000/sites/sophia/clusters/esterel34/esterel34.json new file mode 100644 index 00000000000..27115032c78 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel34/esterel34.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel34", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel34/nodes/esterel34-1.json b/data/grid5000/sites/sophia/clusters/esterel34/nodes/esterel34-1.json new file mode 100644 index 00000000000..a26693092c2 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel34/nodes/esterel34-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.47", + "ip6": "2001:660:4406:800:4::2f", + "kavlan": false, + "mac": "24:6e:96:81:28:dc", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel34-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.47", + "kavlan": false, + "mac": "b0:7b:25:e1:14:10", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel34-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel34", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel34-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel34/esterel34.yaml b/input/grid5000/sites/sophia/clusters/esterel34/esterel34.yaml new file mode 100644 index 00000000000..af33f7b566a --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel34/esterel34.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel34-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel34 diff --git a/input/grid5000/sites/sophia/clusters/esterel34/esterel34_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel34/esterel34_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel34/esterel34_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel34/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel34/nodes.yaml.erb new file mode 100644 index 00000000000..3f75f7f420d --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel34/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel34" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +24:6e:96:81:28:dc + ) + mac_bmc_list = %w( +b0:7b:25:e1:14:10 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 3b03e58f7f65a36ee648dab0dc00deac21f39a97 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 26/34] [sophia][esterel35] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel35/esterel35.json | 66 +++++++++ .../clusters/esterel35/nodes/esterel35-1.json | 128 ++++++++++++++++++ .../clusters/esterel35/nodes/esterel35-2.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel35/esterel35.yaml | 41 ++++++ .../clusters/esterel35/esterel35_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel35/nodes.yaml.erb | 82 +++++++++++ 6 files changed, 485 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel35/esterel35.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel35/nodes/esterel35-1.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel35/nodes/esterel35-2.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel35/esterel35.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel35/esterel35_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel35/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel35/esterel35.json b/data/grid5000/sites/sophia/clusters/esterel35/esterel35.json new file mode 100644 index 00000000000..476e225a7ce --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel35/esterel35.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel35", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel35/nodes/esterel35-1.json b/data/grid5000/sites/sophia/clusters/esterel35/nodes/esterel35-1.json new file mode 100644 index 00000000000..7fd04225adb --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel35/nodes/esterel35-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.48", + "ip6": "2001:660:4406:800:4::30", + "kavlan": false, + "mac": "6c:2b:59:9c:4a:32", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel35-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.48", + "kavlan": false, + "mac": "b0:7b:25:df:e0:d4", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel35-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel35", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel35-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel35/nodes/esterel35-2.json b/data/grid5000/sites/sophia/clusters/esterel35/nodes/esterel35-2.json new file mode 100644 index 00000000000..d57356ce9e9 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel35/nodes/esterel35-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.49", + "ip6": "2001:660:4406:800:4::31", + "kavlan": false, + "mac": "6c:2b:59:9c:3f:3a", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel35-2.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.49", + "kavlan": false, + "mac": "b0:7b:25:df:df:60", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel35-2-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel35", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel35-2" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel35/esterel35.yaml b/input/grid5000/sites/sophia/clusters/esterel35/esterel35.yaml new file mode 100644 index 00000000000..fc95026dd38 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel35/esterel35.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel35-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel35 diff --git a/input/grid5000/sites/sophia/clusters/esterel35/esterel35_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel35/esterel35_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel35/esterel35_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel35/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel35/nodes.yaml.erb new file mode 100644 index 00000000000..21feab3ce2f --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel35/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel35" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +6c:2b:59:9c:4a:32 +6c:2b:59:9c:3f:3a + ) + mac_bmc_list = %w( +b0:7b:25:df:e0:d4 +b0:7b:25:df:df:60 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From fdbd121568b750b65bbdb195b33dfc762c8e2768 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 27/34] [sophia][esterel36] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel36/esterel36.json | 66 +++++++++ .../clusters/esterel36/nodes/esterel36-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel36/esterel36.yaml | 41 ++++++ .../clusters/esterel36/esterel36_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel36/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel36/esterel36.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel36/nodes/esterel36-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel36/esterel36.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel36/esterel36_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel36/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel36/esterel36.json b/data/grid5000/sites/sophia/clusters/esterel36/esterel36.json new file mode 100644 index 00000000000..45c76415bbb --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel36/esterel36.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel36", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel36/nodes/esterel36-1.json b/data/grid5000/sites/sophia/clusters/esterel36/nodes/esterel36-1.json new file mode 100644 index 00000000000..d987664b1bc --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel36/nodes/esterel36-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.50", + "ip6": "2001:660:4406:800:4::32", + "kavlan": false, + "mac": "6c:2b:59:9c:4a:6d", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel36-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.50", + "kavlan": false, + "mac": "5c:ba:2c:3e:a2:7c", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel36-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel36", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel36-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel36/esterel36.yaml b/input/grid5000/sites/sophia/clusters/esterel36/esterel36.yaml new file mode 100644 index 00000000000..0e301c8a0dc --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel36/esterel36.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel36-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel36 diff --git a/input/grid5000/sites/sophia/clusters/esterel36/esterel36_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel36/esterel36_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel36/esterel36_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel36/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel36/nodes.yaml.erb new file mode 100644 index 00000000000..304575001bf --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel36/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel36" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +6c:2b:59:9c:4a:6d + ) + mac_bmc_list = %w( +5c:ba:2c:3e:a2:7c + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 50a51d3a2a8aa185a454191ed9d8e4f4e522ee2a Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:04 +0100 Subject: [PATCH 28/34] [sophia][esterel37] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel37/esterel37.json | 66 +++++++++ .../clusters/esterel37/nodes/esterel37-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel37/esterel37.yaml | 41 ++++++ .../clusters/esterel37/esterel37_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel37/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel37/esterel37.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel37/nodes/esterel37-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel37/esterel37.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel37/esterel37_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel37/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel37/esterel37.json b/data/grid5000/sites/sophia/clusters/esterel37/esterel37.json new file mode 100644 index 00000000000..bb62e5e3158 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel37/esterel37.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel37", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel37/nodes/esterel37-1.json b/data/grid5000/sites/sophia/clusters/esterel37/nodes/esterel37-1.json new file mode 100644 index 00000000000..1cfb22b1e06 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel37/nodes/esterel37-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.51", + "ip6": "2001:660:4406:800:4::33", + "kavlan": false, + "mac": "6c:2b:59:9a:05:c4", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel37-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.51", + "kavlan": false, + "mac": "5c:ed:8c:32:15:b8", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel37-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel37", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel37-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel37/esterel37.yaml b/input/grid5000/sites/sophia/clusters/esterel37/esterel37.yaml new file mode 100644 index 00000000000..4842fb79970 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel37/esterel37.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel37-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel37 diff --git a/input/grid5000/sites/sophia/clusters/esterel37/esterel37_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel37/esterel37_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel37/esterel37_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel37/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel37/nodes.yaml.erb new file mode 100644 index 00000000000..59fb3f05cf6 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel37/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel37" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +6c:2b:59:9a:05:c4 + ) + mac_bmc_list = %w( +5c:ed:8c:32:15:b8 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 7675126793623470fa45be11e1ebcbfea36ad815 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:05 +0100 Subject: [PATCH 29/34] [sophia][esterel38] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel38/esterel38.json | 66 +++++++++ .../clusters/esterel38/nodes/esterel38-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel38/esterel38.yaml | 41 ++++++ .../clusters/esterel38/esterel38_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel38/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel38/esterel38.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel38/nodes/esterel38-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel38/esterel38.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel38/esterel38_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel38/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel38/esterel38.json b/data/grid5000/sites/sophia/clusters/esterel38/esterel38.json new file mode 100644 index 00000000000..0819c819598 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel38/esterel38.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel38", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel38/nodes/esterel38-1.json b/data/grid5000/sites/sophia/clusters/esterel38/nodes/esterel38-1.json new file mode 100644 index 00000000000..3aaaee40239 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel38/nodes/esterel38-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.52", + "ip6": "2001:660:4406:800:4::34", + "kavlan": false, + "mac": "f4:02:70:b9:2b:b6", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel38-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.52", + "kavlan": false, + "mac": "5c:ed:8c:3b:ee:14", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel38-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel38", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel38-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel38/esterel38.yaml b/input/grid5000/sites/sophia/clusters/esterel38/esterel38.yaml new file mode 100644 index 00000000000..c21dacf93fc --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel38/esterel38.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel38-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel38 diff --git a/input/grid5000/sites/sophia/clusters/esterel38/esterel38_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel38/esterel38_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel38/esterel38_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel38/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel38/nodes.yaml.erb new file mode 100644 index 00000000000..60d7e86a7a9 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel38/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel38" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +F4:02:70:B9:2B:B6 + ) + mac_bmc_list = %w( +5c:ed:8c:3b:ee:14 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From c50f784cb9f706b7175d30751dc0b5a52786b0b0 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:05 +0100 Subject: [PATCH 30/34] [sophia][esterel39] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel39/esterel39.json | 66 +++++++++ .../clusters/esterel39/nodes/esterel39-1.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel39/esterel39.yaml | 41 ++++++ .../clusters/esterel39/esterel39_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel39/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 355 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel39/esterel39.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel39/nodes/esterel39-1.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel39/esterel39.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel39/esterel39_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel39/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel39/esterel39.json b/data/grid5000/sites/sophia/clusters/esterel39/esterel39.json new file mode 100644 index 00000000000..85c7b37d809 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel39/esterel39.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel39", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel39/nodes/esterel39-1.json b/data/grid5000/sites/sophia/clusters/esterel39/nodes/esterel39-1.json new file mode 100644 index 00000000000..24abd73638e --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel39/nodes/esterel39-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.53", + "ip6": "2001:660:4406:800:4::35", + "kavlan": false, + "mac": "f4:02:70:b9:60:34", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel39-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.53", + "kavlan": false, + "mac": "5c:ed:8c:38:09:c8", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel39-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel39", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel39-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel39/esterel39.yaml b/input/grid5000/sites/sophia/clusters/esterel39/esterel39.yaml new file mode 100644 index 00000000000..50c8b190355 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel39/esterel39.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel39-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel39 diff --git a/input/grid5000/sites/sophia/clusters/esterel39/esterel39_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel39/esterel39_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel39/esterel39_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel39/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel39/nodes.yaml.erb new file mode 100644 index 00000000000..9d4ee52f103 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel39/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel39" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +F4:02:70:B9:60:34 + ) + mac_bmc_list = %w( +5c:ed:8c:38:09:c8 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 76d0031c6f027697c1f27e5998b750d3fb63333e Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:05 +0100 Subject: [PATCH 31/34] [sophia][esterel40] add basic configuration generated by rake mass:create --- .../sophia/clusters/esterel40/esterel40.json | 66 +++++++++ .../clusters/esterel40/nodes/esterel40-1.json | 128 ++++++++++++++++++ .../clusters/esterel40/nodes/esterel40-2.json | 128 ++++++++++++++++++ .../sophia/clusters/esterel40/esterel40.yaml | 41 ++++++ .../clusters/esterel40/esterel40_metrics.yaml | 40 ++++++ .../sophia/clusters/esterel40/nodes.yaml.erb | 82 +++++++++++ 6 files changed, 485 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/esterel40/esterel40.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel40/nodes/esterel40-1.json create mode 100644 data/grid5000/sites/sophia/clusters/esterel40/nodes/esterel40-2.json create mode 100644 input/grid5000/sites/sophia/clusters/esterel40/esterel40.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel40/esterel40_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/esterel40/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/esterel40/esterel40.json b/data/grid5000/sites/sophia/clusters/esterel40/esterel40.json new file mode 100644 index 00000000000..4da6f619ae4 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel40/esterel40.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 2, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "esterel40", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel40/nodes/esterel40-1.json b/data/grid5000/sites/sophia/clusters/esterel40/nodes/esterel40-1.json new file mode 100644 index 00000000000..4f36155b68e --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel40/nodes/esterel40-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.54", + "ip6": "2001:660:4406:800:4::36", + "kavlan": false, + "mac": "50:7c:6f:4f:43:06", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel40-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.54", + "kavlan": false, + "mac": "5c:ed:8c:b3:08:68", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel40-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel40", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel40-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/esterel40/nodes/esterel40-2.json b/data/grid5000/sites/sophia/clusters/esterel40/nodes/esterel40-2.json new file mode 100644 index 00000000000..469a44c375f --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/esterel40/nodes/esterel40-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.131.55", + "ip6": "2001:660:4406:800:4::37", + "kavlan": false, + "mac": "50:7c:6f:64:6e:6a", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "esterel40-2.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.131.55", + "kavlan": false, + "mac": "5c:ed:8c:bf:cd:2c", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "esterel40-2-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "esterel40", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "esterel40-2" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/esterel40/esterel40.yaml b/input/grid5000/sites/sophia/clusters/esterel40/esterel40.yaml new file mode 100644 index 00000000000..2afef7d3d06 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel40/esterel40.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + esterel40-[1-2]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: esterel40 diff --git a/input/grid5000/sites/sophia/clusters/esterel40/esterel40_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel40/esterel40_metrics.yaml new file mode 100644 index 00000000000..729e9beb582 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel40/esterel40_metrics.yaml @@ -0,0 +1,40 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/esterel40/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel40/nodes.yaml.erb new file mode 100644 index 00000000000..9ccb6e2229c --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel40/nodes.yaml.erb @@ -0,0 +1,82 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "esterel40" + nodes_number = 2 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:7c:6f:4f:43:06 +50:7c:6f:64:6e:6a + ) + mac_bmc_list = %w( +5c:ed:8c:b3:08:68 +5c:ed:8c:bf:cd:2c + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 10c562770f140f1914f21907090b980c18f1fc48 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:43 +0100 Subject: [PATCH 32/34] [sophia][mercantour3] add basic configuration generated by rake mass:create --- .../clusters/mercantour3/mercantour3.json | 66 +++++++++ .../mercantour3/nodes/mercantour3-1.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-10.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-11.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-12.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-13.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-14.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-15.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-16.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-2.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-3.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-4.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-5.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-6.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-7.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-8.json | 128 ++++++++++++++++++ .../mercantour3/nodes/mercantour3-9.json | 128 ++++++++++++++++++ .../clusters/mercantour3/mercantour3.yaml | 41 ++++++ .../mercantour3/mercantour3_metrics.yaml | 42 ++++++ .../clusters/mercantour3/nodes.yaml.erb | 110 +++++++++++++++ 20 files changed, 2307 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/mercantour3.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-1.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-10.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-11.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-12.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-13.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-14.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-15.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-16.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-2.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-3.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-4.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-5.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-6.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-7.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-8.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-9.json create mode 100644 input/grid5000/sites/sophia/clusters/mercantour3/mercantour3.yaml create mode 100644 input/grid5000/sites/sophia/clusters/mercantour3/mercantour3_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/mercantour3/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/mercantour3.json b/data/grid5000/sites/sophia/clusters/mercantour3/mercantour3.json new file mode 100644 index 00000000000..d36336904fe --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/mercantour3.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 16, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "mercantour3", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-1.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-1.json new file mode 100644 index 00000000000..f51bb12de91 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.25", + "ip6": "2001:660:4406:800:3::19", + "kavlan": false, + "mac": "50:9a:4c:6c:38:af", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.25", + "kavlan": false, + "mac": "50:9a:4c:6c:32:83", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-1" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-10.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-10.json new file mode 100644 index 00000000000..3981d818b03 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-10.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.34", + "ip6": "2001:660:4406:800:3::22", + "kavlan": false, + "mac": "50:9a:4c:70:fa:3c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-10.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.34", + "kavlan": false, + "mac": "50:9a:4c:70:f7:cc", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-10-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-10" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-11.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-11.json new file mode 100644 index 00000000000..a66d0a76283 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-11.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.35", + "ip6": "2001:660:4406:800:3::23", + "kavlan": false, + "mac": "50:9a:4c:70:fa:7c", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-11.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.35", + "kavlan": false, + "mac": "50:9a:4c:70:f6:4c", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-11-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-11" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-12.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-12.json new file mode 100644 index 00000000000..392bf6feda7 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-12.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.36", + "ip6": "2001:660:4406:800:3::24", + "kavlan": false, + "mac": "50:9a:4c:70:aa:e1", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-12.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.36", + "kavlan": false, + "mac": "50:9a:4c:70:a4:06", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-12-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-12" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-13.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-13.json new file mode 100644 index 00000000000..9385d6b8121 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-13.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.37", + "ip6": "2001:660:4406:800:3::25", + "kavlan": false, + "mac": "50:9a:4c:6c:38:ad", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-13.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.37", + "kavlan": false, + "mac": "50:9a:4c:6c:32:77", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-13-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-13" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-14.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-14.json new file mode 100644 index 00000000000..9ea1b72df20 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-14.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.38", + "ip6": "2001:660:4406:800:3::26", + "kavlan": false, + "mac": "50:9a:4c:6c:38:6b", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-14.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.38", + "kavlan": false, + "mac": "50:9a:4c:6c:30:eb", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-14-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-14" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-15.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-15.json new file mode 100644 index 00000000000..a8c3001fa80 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-15.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.39", + "ip6": "2001:660:4406:800:3::27", + "kavlan": false, + "mac": "50:9a:4c:6c:39:85", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-15.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.39", + "kavlan": false, + "mac": "50:9a:4c:6c:37:87", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-15-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-15" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-16.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-16.json new file mode 100644 index 00000000000..9c50c999556 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-16.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.40", + "ip6": "2001:660:4406:800:3::28", + "kavlan": false, + "mac": "50:9a:4c:6c:39:89", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-16.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.40", + "kavlan": false, + "mac": "50:9a:4c:6c:37:9f", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-16-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-16" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-2.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-2.json new file mode 100644 index 00000000000..70e5ffae078 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-2.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.26", + "ip6": "2001:660:4406:800:3::1a", + "kavlan": false, + "mac": "50:9a:4c:6c:39:82", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-2.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.26", + "kavlan": false, + "mac": "50:9a:4c:6c:37:75", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-2-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-2" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-3.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-3.json new file mode 100644 index 00000000000..fd5a0602db5 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-3.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.27", + "ip6": "2001:660:4406:800:3::1b", + "kavlan": false, + "mac": "50:9a:4c:6c:38:d9", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-3.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.27", + "kavlan": false, + "mac": "50:9a:4c:6c:33:7f", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-3-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-3" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-4.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-4.json new file mode 100644 index 00000000000..23c17c2d586 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-4.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.28", + "ip6": "2001:660:4406:800:3::1c", + "kavlan": false, + "mac": "50:9a:4c:6c:39:53", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-4.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.28", + "kavlan": false, + "mac": "50:9a:4c:6c:36:5b", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-4-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-4" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-5.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-5.json new file mode 100644 index 00000000000..3785bded94b --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-5.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.29", + "ip6": "2001:660:4406:800:3::1d", + "kavlan": false, + "mac": "50:9a:4c:6c:38:9e", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-5.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.29", + "kavlan": false, + "mac": "50:9a:4c:6c:32:1d", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-5-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-5" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-6.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-6.json new file mode 100644 index 00000000000..861ef2ff0f1 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-6.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.30", + "ip6": "2001:660:4406:800:3::1e", + "kavlan": false, + "mac": "50:9a:4c:6c:39:31", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-6.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.30", + "kavlan": false, + "mac": "50:9a:4c:6c:35:8f", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-6-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-6" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-7.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-7.json new file mode 100644 index 00000000000..207d62523cc --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-7.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.31", + "ip6": "2001:660:4406:800:3::1f", + "kavlan": false, + "mac": "50:9a:4c:6b:5d:b0", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-7.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.31", + "kavlan": false, + "mac": "50:9a:4c:6b:54:b9", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-7-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-7" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-8.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-8.json new file mode 100644 index 00000000000..f0d38290edc --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-8.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.32", + "ip6": "2001:660:4406:800:3::20", + "kavlan": false, + "mac": "50:9a:4c:85:c8:15", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-8.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.32", + "kavlan": false, + "mac": "50:9a:4c:85:c5:28", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-8-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-8" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-9.json b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-9.json new file mode 100644 index 00000000000..2532d9692d8 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour3/nodes/mercantour3-9.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.33", + "ip6": "2001:660:4406:800:3::21", + "kavlan": false, + "mac": "50:9a:4c:70:fa:7a", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour3-9.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.33", + "kavlan": false, + "mac": "50:9a:4c:70:f6:58", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour3-9-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour3", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour3-9" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/mercantour3/mercantour3.yaml b/input/grid5000/sites/sophia/clusters/mercantour3/mercantour3.yaml new file mode 100644 index 00000000000..7810f59b058 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour3/mercantour3.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + mercantour3-[1-16]: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: mercantour3 diff --git a/input/grid5000/sites/sophia/clusters/mercantour3/mercantour3_metrics.yaml b/input/grid5000/sites/sophia/clusters/mercantour3/mercantour3_metrics.yaml new file mode 100644 index 00000000000..3c11d0073c1 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour3/mercantour3_metrics.yaml @@ -0,0 +1,42 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/mercantour3/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/mercantour3/nodes.yaml.erb new file mode 100644 index 00000000000..b2b9919f458 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour3/nodes.yaml.erb @@ -0,0 +1,110 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "mercantour3" + nodes_number = 16 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +50:9a:4c:6c:38:af +50:9a:4c:6c:39:82 +50:9a:4c:6c:38:d9 +50:9a:4c:6c:39:53 +50:9a:4c:6c:38:9e +50:9a:4c:6c:39:31 +50:9a:4c:6b:5d:b0 +50:9a:4c:85:c8:15 +50:9a:4c:70:fa:7a +50:9a:4c:70:fa:3c +50:9a:4c:70:fa:7c +50:9a:4c:70:aa:e1 +50:9a:4c:6c:38:ad +50:9a:4c:6c:38:6b +50:9a:4c:6c:39:85 +50:9a:4c:6c:39:89 + ) + mac_bmc_list = %w( +50:9a:4c:6c:32:83 +50:9a:4c:6c:37:75 +50:9a:4c:6c:33:7f +50:9a:4c:6c:36:5b +50:9a:4c:6c:32:1d +50:9a:4c:6c:35:8f +50:9a:4c:6b:54:b9 +50:9a:4c:85:c5:28 +50:9a:4c:70:f6:58 +50:9a:4c:70:f7:cc +50:9a:4c:70:f6:4c +50:9a:4c:70:a4:06 +50:9a:4c:6c:32:77 +50:9a:4c:6c:30:eb +50:9a:4c:6c:37:87 +50:9a:4c:6c:37:9f + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From 333991649b4e64eca7b6218dddf0dc48b34bf0e2 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:43 +0100 Subject: [PATCH 33/34] [sophia][mercantour4] add basic configuration generated by rake mass:create --- .../clusters/mercantour4/mercantour4.json | 66 +++++++++ .../mercantour4/nodes/mercantour4-1.json | 128 ++++++++++++++++++ .../clusters/mercantour4/mercantour4.yaml | 41 ++++++ .../mercantour4/mercantour4_metrics.yaml | 42 ++++++ .../clusters/mercantour4/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 357 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/mercantour4/mercantour4.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour4/nodes/mercantour4-1.json create mode 100644 input/grid5000/sites/sophia/clusters/mercantour4/mercantour4.yaml create mode 100644 input/grid5000/sites/sophia/clusters/mercantour4/mercantour4_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/mercantour4/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/mercantour4/mercantour4.json b/data/grid5000/sites/sophia/clusters/mercantour4/mercantour4.json new file mode 100644 index 00000000000..33cf0881527 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour4/mercantour4.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "mercantour4", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour4/nodes/mercantour4-1.json b/data/grid5000/sites/sophia/clusters/mercantour4/nodes/mercantour4-1.json new file mode 100644 index 00000000000..52a28521349 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour4/nodes/mercantour4-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.41", + "ip6": "2001:660:4406:800:3::29", + "kavlan": false, + "mac": "24:6e:96:81:28:dc", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour4-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.41", + "kavlan": false, + "mac": "50:9a:4c:65:5d:38", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour4-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour4", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour4-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/mercantour4/mercantour4.yaml b/input/grid5000/sites/sophia/clusters/mercantour4/mercantour4.yaml new file mode 100644 index 00000000000..fed5bd18871 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour4/mercantour4.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + mercantour4-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: mercantour4 diff --git a/input/grid5000/sites/sophia/clusters/mercantour4/mercantour4_metrics.yaml b/input/grid5000/sites/sophia/clusters/mercantour4/mercantour4_metrics.yaml new file mode 100644 index 00000000000..3c11d0073c1 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour4/mercantour4_metrics.yaml @@ -0,0 +1,42 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/mercantour4/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/mercantour4/nodes.yaml.erb new file mode 100644 index 00000000000..7161c6c9d70 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour4/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "mercantour4" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +24:6e:96:81:28:dc + ) + mac_bmc_list = %w( +50:9a:4c:65:5d:38 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab From bf55dbb72f7304d3bc0e0684d2ed83ead01a81b8 Mon Sep 17 00:00:00 2001 From: Nicolas Perrin <nicolas.perrin@inria.fr> Date: Wed, 19 Mar 2025 14:52:43 +0100 Subject: [PATCH 34/34] [sophia][mercantour7] add basic configuration generated by rake mass:create --- .../clusters/mercantour7/mercantour7.json | 66 +++++++++ .../mercantour7/nodes/mercantour7-1.json | 128 ++++++++++++++++++ .../clusters/mercantour7/mercantour7.yaml | 41 ++++++ .../mercantour7/mercantour7_metrics.yaml | 42 ++++++ .../clusters/mercantour7/nodes.yaml.erb | 80 +++++++++++ 5 files changed, 357 insertions(+) create mode 100644 data/grid5000/sites/sophia/clusters/mercantour7/mercantour7.json create mode 100644 data/grid5000/sites/sophia/clusters/mercantour7/nodes/mercantour7-1.json create mode 100644 input/grid5000/sites/sophia/clusters/mercantour7/mercantour7.yaml create mode 100644 input/grid5000/sites/sophia/clusters/mercantour7/mercantour7_metrics.yaml create mode 100644 input/grid5000/sites/sophia/clusters/mercantour7/nodes.yaml.erb diff --git a/data/grid5000/sites/sophia/clusters/mercantour7/mercantour7.json b/data/grid5000/sites/sophia/clusters/mercantour7/mercantour7.json new file mode 100644 index 00000000000..cfe42311d42 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour7/mercantour7.json @@ -0,0 +1,66 @@ +{ + "boot_type": "uefi", + "created_at": "Thu, 01 Jan 1970 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "1970-01-01", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "Cluster Model", + "nodes_count": 1, + "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", + "priority": 197001, + "queues": [ + "admin", + "testing" + ], + "redfish": true, + "type": "cluster", + "uid": "mercantour7", + "warranty_end": "1970-01-01" +} \ No newline at end of file diff --git a/data/grid5000/sites/sophia/clusters/mercantour7/nodes/mercantour7-1.json b/data/grid5000/sites/sophia/clusters/mercantour7/nodes/mercantour7-1.json new file mode 100644 index 00000000000..b8c92491bf6 --- /dev/null +++ b/data/grid5000/sites/sophia/clusters/mercantour7/nodes/mercantour7-1.json @@ -0,0 +1,128 @@ +{ + "architecture": { + "cpu_core_numbering": "contiguous", + "nb_cores": 72, + "nb_procs": 1, + "nb_threads": 72, + "platform_type": "x86_64" + }, + "bios": { + "release_date": "01/01/2000", + "vendor": "Unknown", + "version": 1 + }, + "bmc_version": "v1", + "chassis": { + "manufactured_at": "1970-01-01", + "manufacturer": "Unknown", + "name": "Unknown", + "warranty_end": "1970-01-01" + }, + "exotic": false, + "main_memory": { + "ram_size": 8 + }, + "management_tools": { + "bmc_vendor_tool": "ipmitool", + "ipmitool": { + "retries": 5 + } + }, + "memory_devices": [ + { + "device": "dimm_proc 1 dimm 1", + "size": 8, + "technology": "dram" + } + ], + "network_adapters": [ + { + "device": "eth0", + "driver": "mlx_core", + "enabled": true, + "interface": "Ethernet", + "ip": "172.16.130.47", + "ip6": "2001:660:4406:800:3::2f", + "kavlan": false, + "mac": "f4:02:70:b9:60:34", + "management": false, + "mountable": true, + "mounted": true, + "name": "enp1s0f0np0", + "network_address": "mercantour7-1.sophia.grid5000.fr", + "rate": 10000000000, + "switch": null, + "switch_port": null + }, + { + "device": "bmc", + "enabled": true, + "interface": "Ethernet", + "ip": "172.17.130.47", + "kavlan": false, + "mac": "70:b5:e8:de:e8:76", + "management": true, + "mountable": false, + "mounted": false, + "network_address": "mercantour7-1-bmc.sophia.grid5000.fr" + } + ], + "nodeset": "mercantour7", + "operating_system": { + "cstate_driver": "unknown", + "cstate_governor": "unknown", + "ht_enabled": true, + "pstate_driver": "unknwon", + "pstate_governor": "unknown", + "turboboost_enabled": true + }, + "performance": { + "core_flops": 128, + "node_flops": 9216 + }, + "processor": { + "cache_l1": null, + "cache_l1d": 8, + "cache_l1i": 8, + "cache_l2": 8, + "cache_l3": 8, + "clock_speed": 8, + "ht_capable": true, + "instruction_set": "x86-64", + "microarchitecture": "Haswell", + "microcode": "0xd000001", + "model": "Unknown", + "other_description": "description", + "vendor": "vendor", + "version": "vendor" + }, + "redfish": true, + "software": { + "forced-deployment-timestamp": 202007300948, + "postinstall-version": "1.2025021810", + "standard-environment": "debian11-x64-std" + }, + "storage_devices": [ + { + "by_id": "", + "by_path": "/dev/disk/by-path/dummy", + "id": "disk0", + "interface": "SAS", + "model": "unknown", + "size": 8, + "storage": "SSD", + "vendor": "Unknown" + } + ], + "supported_job_types": { + "besteffort": true, + "deploy": true, + "max_walltime": 0, + "queues": [ + "admin", + "testing" + ] + }, + "type": "node", + "uid": "mercantour7-1" +} \ No newline at end of file diff --git a/input/grid5000/sites/sophia/clusters/mercantour7/mercantour7.yaml b/input/grid5000/sites/sophia/clusters/mercantour7/mercantour7.yaml new file mode 100644 index 00000000000..e9c58e03bff --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour7/mercantour7.yaml @@ -0,0 +1,41 @@ +--- +model: Cluster Model # TODO: change this value. +created_at: 1970-01-01 # TODO: change this value +kavlan: false +boot_type: uefi # TODO: specify if 'uefi' (ideally) or 'bios' (legacy, if no other choice) +exotic: false # TODO: specify if 'true' or 'false' +queues: + - admin + - testing +nodes: + mercantour7-1: + chassis: + manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. + supported_job_types: + deploy: true + besteffort: true + max_walltime: 0 + processor: + microarchitecture: Haswell # TODO: replace with microarch name. + clock_speed: 8 # TODO: Replace with clock speed. + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + storage_devices: + disk0: # This field will have to be renamed later. + id: disk0 + interface: SAS + by_path: "/dev/disk/by-path/dummy" # this path will have to change later. + software: + standard-environment: debian11-x64-std # TODO: check that architecture is OK + management_tools: + bmc_vendor_tool: ipmitool # TODO: replace with bmc_vendor_tool (ipmitool, racadm) + nodeset: mercantour7 diff --git a/input/grid5000/sites/sophia/clusters/mercantour7/mercantour7_metrics.yaml b/input/grid5000/sites/sophia/clusters/mercantour7/mercantour7_metrics.yaml new file mode 100644 index 00000000000..3c11d0073c1 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour7/mercantour7_metrics.yaml @@ -0,0 +1,42 @@ +--- +metrics: + + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/sophia/clusters/mercantour7/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/mercantour7/nodes.yaml.erb new file mode 100644 index 00000000000..4ad5c6ca64e --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/mercantour7/nodes.yaml.erb @@ -0,0 +1,80 @@ +<% + # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-tmp.csv + # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' + + cluster_name = "mercantour7" + nodes_number = 1 # Size of the cluster (number of nodes) + # MAC addresses declaration + mac_eth0_list = %w( +F4:02:70:B9:60:34 + ) + mac_bmc_list = %w( +70:b5:e8:de:e8:76 + ) +%> +--- +nodes: +<% (1..nodes_number).each { |i| %> + <%= cluster_name %>-<%= i %>: + architecture: + nb_procs: 1 # Fake data, will be replaced by g5k-checks + nb_cores: 72 # Fake data, will be replaced by g5k-checks + nb_threads: 72 # Fake data, will be replaced by g5k-checks + platform_type: x86_64 # Fake data, will be replaced by g5k-checks + cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks + bios: + release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks + vendor: Unknown # Fake vendor, will be replaced by g5k-checks + version: 1 # Fake version, will be replaced by g5k-checks + bmc_version: v1 # Fake version, will be replaced by g5k-checks + chassis: + manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks + name: Unknown # Fake name, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake size, will be replaced by g5k-checks + memory_devices: + dimm: + size: 8 # Fake size, will be replaced by g5k-checks + technology: dram # Common memory technology, will be replaced by g5k-checks + processor: + model: Unknown # Fake model name, will be replaced by g5k-checks + other_description: description # Fake description, will be replaced by g5k-checks + vendor: vendor # Fake vendor, will be replaced by g5k-checks + version: vendor # Fake version, will be replaced by g5k-checks + cache_l1d: 8 # Fake cache, will be replaced by g5k-checks + cache_l1i: 8 # Fake cache, will be replaced by g5k-checks + cache_l2: 8 # Fake cache, will be replaced by g5k-checks + cache_l3: 8 # Fake cache, will be replaced by g5k-checks + instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks + microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks + ht_capable: true # Default ht capable value, will be replaced by g5k-checks + main_memory: + ram_size: 8 # Fake ram size, will be replaced by g5k-checks + memory_devices: + dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks + size: 8 # Fake dimm size, will be replaced by g5k-checks + technology: dram # Default dimm technology, will be replaced by g5k-checks + operating_system: + cstate_driver: unknown # Fake driver, will be replaced by g5k-checks + cstate_governor: unknown # Fake governor, will be replaced by g5k-checks + ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks + pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks + pstate_governor: unknown # Fake driver, will be replaced by g5k-checks + turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks + network_adapters: + bmc: + management: true + mac: <%= mac_bmc_list[i - 1] %> + eth0: + interface: Ethernet + management: false + driver: mlx_core # Fake data, will be replaced by g5k-checks + name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks + rate: 10000000000 # Fake data, will be replaced by g5k-checks + mac: <%= mac_eth0_list[i - 1] %> + storage_devices: + disk0: + storage: SSD # Fake data, will be replaced by g5k-checks + model: unknown # Fake data, will be replaced by g5k-checks + size: 8 # Fake data, will be replaced by g5k-checks +<% } %> -- GitLab