From 3f5deb1060432d0279512b4a29c00362626d6bc8 Mon Sep 17 00:00:00 2001 From: PARISOT Clement <clement.parisot@inria.fr> Date: Mon, 14 Oct 2024 17:38:57 +0200 Subject: [PATCH] [rennes] abacus - separate abacus-22 in 2 clusters Signed-off-by: PARISOT Clement <clement.parisot@inria.fr> --- data/grid5000/accesses/refrepo.json | 33 +++++++--- .../clusters/abacus22/nodes/abacus22-1.json | 2 +- .../clusters/abacus22/nodes/abacus22-2.json | 2 +- .../rennes/clusters/abacus23/abacus23.json | 64 +++++++++++++++++++ .../nodes/abacus23-1.json} | 11 ++-- .../swdc-a035-01-02-rba.json | 2 +- input/grid5000/access/rennes.yaml | 4 +- input/grid5000/ipv4.yaml | 1 + .../rennes/clusters/abacus22/abacus22.yaml | 15 +---- .../rennes/clusters/abacus23/abacus23.yaml | 47 ++++++++++++++ .../clusters/abacus23/abacus23_metrics.yaml | 41 ++++++++++++ .../nodes/abacus23-1.yaml} | 2 +- .../rennes/networks/swdc-a035-01-02-rba.yaml | 2 +- lib/refrepo/valid/data/homogeneity.yaml.erb | 7 -- 14 files changed, 192 insertions(+), 41 deletions(-) create mode 100644 data/grid5000/sites/rennes/clusters/abacus23/abacus23.json rename data/grid5000/sites/rennes/clusters/{abacus22/nodes/abacus22-3.json => abacus23/nodes/abacus23-1.json} (96%) create mode 100644 input/grid5000/sites/rennes/clusters/abacus23/abacus23.yaml create mode 100644 input/grid5000/sites/rennes/clusters/abacus23/abacus23_metrics.yaml rename input/grid5000/sites/rennes/clusters/{abacus22/nodes/abacus22-3.yaml => abacus23/nodes/abacus23-1.yaml} (99%) diff --git a/data/grid5000/accesses/refrepo.json b/data/grid5000/accesses/refrepo.json index 1b64ca2575e..af0897d7132 100644 --- a/data/grid5000/accesses/refrepo.json +++ b/data/grid5000/accesses/refrepo.json @@ -99008,7 +99008,7 @@ "network_address": "abacus22-1-bmc.rennes.grid5000.fr" } ], - "nodeset": "abacus22-A", + "nodeset": "abacus22", "performance": { "core_flops": 42400000000, "node_flops": 2035200000000 @@ -99252,7 +99252,7 @@ "network_address": "abacus22-2-bmc.rennes.grid5000.fr" } ], - "nodeset": "abacus22-A", + "nodeset": "abacus22", "performance": { "core_flops": 42400000000, "node_flops": 2035200000000 @@ -99298,8 +99298,20 @@ } ], "uid": "abacus22-2" - }, - "abacus22-3": { + } + }, + "queues": [ + "admin", + "production" + ], + "uid": "abacus22" + }, + "abacus23": { + "created_at": "Wed, 31 Jul 2024 00:00:00 GMT", + "manufactured_at": "2022-01-19", + "model": "ProLiant DL385 Gen10 Plus v2", + "nodes": { + "abacus23-1": { "architecture": { "cpu_core_numbering": "contiguous", "nb_cores": 48, @@ -99424,7 +99436,7 @@ "mountable": true, "mounted": true, "name": "ens10f0", - "network_address": "abacus22-3.rennes.grid5000.fr", + "network_address": "abacus23-1.rennes.grid5000.fr", "rate": 10000000000, "sriov": true, "sriov_totalvfs": 128, @@ -99459,10 +99471,10 @@ "management": true, "mountable": false, "mounted": false, - "network_address": "abacus22-3-bmc.rennes.grid5000.fr" + "network_address": "abacus23-1-bmc.rennes.grid5000.fr" } ], - "nodeset": "abacus22-B", + "nodeset": "abacus23", "performance": { "core_flops": 42400000000, "node_flops": 2035200000000 @@ -99507,14 +99519,15 @@ "vendor": "HPE" } ], - "uid": "abacus22-3" + "uid": "abacus23-1" } }, "queues": [ "admin", - "production" + "production", + "challenge" ], - "uid": "abacus22" + "uid": "abacus23" }, "abacus25": { "created_at": "Mon, 05 Jun 2023 00:00:00 GMT", diff --git a/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-1.json b/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-1.json index ce303d87c64..14cb60573e0 100644 --- a/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-1.json +++ b/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-1.json @@ -218,7 +218,7 @@ "network_address": "abacus22-1-bmc.rennes.grid5000.fr" } ], - "nodeset": "abacus22-A", + "nodeset": "abacus22", "operating_system": { "cstate_driver": "acpi_idle", "cstate_governor": "menu", diff --git a/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-2.json b/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-2.json index 8ee7ec25afc..c2ba2930f74 100644 --- a/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-2.json +++ b/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-2.json @@ -218,7 +218,7 @@ "network_address": "abacus22-2-bmc.rennes.grid5000.fr" } ], - "nodeset": "abacus22-A", + "nodeset": "abacus22", "operating_system": { "cstate_driver": "acpi_idle", "cstate_governor": "menu", diff --git a/data/grid5000/sites/rennes/clusters/abacus23/abacus23.json b/data/grid5000/sites/rennes/clusters/abacus23/abacus23.json new file mode 100644 index 00000000000..c41da847269 --- /dev/null +++ b/data/grid5000/sites/rennes/clusters/abacus23/abacus23.json @@ -0,0 +1,64 @@ +{ + "boot_type": "uefi", + "created_at": "Wed, 31 Jul 2024 00:00:00 GMT", + "exotic": false, + "kavlan": false, + "manufactured_at": "2022-01-19", + "metrics": [ + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } + ], + "model": "ProLiant DL385 Gen10 Plus v2", + "queues": [ + "admin", + "production", + "challenge" + ], + "redfish": true, + "type": "cluster", + "uid": "abacus23", + "warranty_end": "2027-01-25" +} \ No newline at end of file diff --git a/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-3.json b/data/grid5000/sites/rennes/clusters/abacus23/nodes/abacus23-1.json similarity index 96% rename from data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-3.json rename to data/grid5000/sites/rennes/clusters/abacus23/nodes/abacus23-1.json index bd81fca4601..9134274821b 100644 --- a/data/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-3.json +++ b/data/grid5000/sites/rennes/clusters/abacus23/nodes/abacus23-1.json @@ -146,7 +146,7 @@ "mountable": true, "mounted": true, "name": "ens10f0", - "network_address": "abacus22-3.rennes.grid5000.fr", + "network_address": "abacus23-1.rennes.grid5000.fr", "rate": 10000000000, "sriov": true, "sriov_totalvfs": 128, @@ -181,10 +181,10 @@ "management": true, "mountable": false, "mounted": false, - "network_address": "abacus22-3-bmc.rennes.grid5000.fr" + "network_address": "abacus23-1-bmc.rennes.grid5000.fr" } ], - "nodeset": "abacus22-B", + "nodeset": "abacus23", "operating_system": { "cstate_driver": "acpi_idle", "cstate_governor": "menu", @@ -249,10 +249,11 @@ "max_walltime": 604800, "queues": [ "admin", - "production" + "production", + "challenge" ], "virtual": "amd-v" }, "type": "node", - "uid": "abacus22-3" + "uid": "abacus23-1" } \ No newline at end of file diff --git a/data/grid5000/sites/rennes/network_equipments/swdc-a035-01-02-rba.json b/data/grid5000/sites/rennes/network_equipments/swdc-a035-01-02-rba.json index fa2080c1327..bf7153ea660 100644 --- a/data/grid5000/sites/rennes/network_equipments/swdc-a035-01-02-rba.json +++ b/data/grid5000/sites/rennes/network_equipments/swdc-a035-01-02-rba.json @@ -9231,7 +9231,7 @@ "kind": "node", "port": "eth0", "snmp_name": "Eth1/4601", - "uid": "abacus22-3" + "uid": "abacus23-1" }, { "kind": "node", diff --git a/input/grid5000/access/rennes.yaml b/input/grid5000/access/rennes.yaml index 9b804a1b9bc..c072a706651 100644 --- a/input/grid5000/access/rennes.yaml +++ b/input/grid5000/access/rennes.yaml @@ -52,7 +52,7 @@ abacus5: &sirocco-exclusive p1: [sirocco, '@admin'] besteffort: ['@inria_group', '@other_groups_with_access'] abacus20: *sirocco-exclusive -abacus22-A: *sirocco-exclusive +abacus22: *sirocco-exclusive ## INTUIDOC abacus11: @@ -72,7 +72,7 @@ abacus17: besteffort: ['@inria_group', '@other_groups_with_access'] ## CIDRE -abacus22-B: +abacus23: p1: [cidre, '@admin'] besteffort: ['@inria_group', '@other_groups_with_access'] diff --git a/input/grid5000/ipv4.yaml b/input/grid5000/ipv4.yaml index c6cd1a570fc..d2021c4b14a 100644 --- a/input/grid5000/ipv4.yaml +++ b/input/grid5000/ipv4.yaml @@ -84,6 +84,7 @@ ipv4: rennes abacus20 eth0 0 0 13 19 rennes abacus21 eth0 0 0 13 20 rennes abacus22 eth0 0 0 13 21 + rennes abacus23 eth0 0 0 13 23 rennes abacus25 eth0 0 0 13 24 rennes roazhon1 eth1 0 0 14 0 rennes roazhon2 eth0 0 0 14 1 diff --git a/input/grid5000/sites/rennes/clusters/abacus22/abacus22.yaml b/input/grid5000/sites/rennes/clusters/abacus22/abacus22.yaml index e0992d53361..3f2a94355d5 100644 --- a/input/grid5000/sites/rennes/clusters/abacus22/abacus22.yaml +++ b/input/grid5000/sites/rennes/clusters/abacus22/abacus22.yaml @@ -7,7 +7,7 @@ queues: - admin - production nodes: - abacus22-[1-3]: + abacus22-[1-2]: supported_job_types: deploy: true besteffort: true @@ -37,11 +37,10 @@ nodes: pci-0000:64:00.0-scsi-0:1:0:0: id: disk0 interface: SATA - abacus22-[1-2]: chassis: manufactured_at: 2022-01-14 warranty_end: 2027-01-20 - nodeset: abacus22-A + nodeset: abacus22 abacus22-1: storage_devices: pci-0000:64:00.0-sas-0x50000f0b019454a2-lun-0: @@ -52,12 +51,4 @@ nodes: pci-0000:64:00.0-sas-0x50000f0b01945482-lun-0: id: disk1 interface: SAS - abacus22-3: - chassis: - manufactured_at: 2022-01-19 - warranty_end: 2027-01-25 - storage_devices: - pci-0000:64:00.0-sas-0x50000f0b01938442-lun-0: - id: disk1 - interface: SAS - nodeset: abacus22-B + diff --git a/input/grid5000/sites/rennes/clusters/abacus23/abacus23.yaml b/input/grid5000/sites/rennes/clusters/abacus23/abacus23.yaml new file mode 100644 index 00000000000..68faeb73f7e --- /dev/null +++ b/input/grid5000/sites/rennes/clusters/abacus23/abacus23.yaml @@ -0,0 +1,47 @@ +model: ProLiant DL385 Gen10 Plus v2 +created_at: 2024-07-31 +kavlan: false +boot_type: uefi +exotic: false +queues: + - admin + - production + - challenge +nodes: + abacus23-1: + supported_job_types: + deploy: true + besteffort: true + max_walltime: 604800 + processor: + microarchitecture: Zen 3 + clock_speed: 2650000000 + software: + standard-environment: debian11-x64-std + management_tools: + bmc_vendor_tool: ipmitool + network_adapters: + bmc: + interface: Ethernet + enabled: true + mountable: false + mounted: false + eth0: + enabled: true + mountable: true + mounted: true + eth1: + enabled: false + mountable: false + mounted: false + storage_devices: + pci-0000:64:00.0-scsi-0:1:0:0: + id: disk0 + interface: SATA + pci-0000:64:00.0-sas-0x50000f0b01938442-lun-0: + id: disk1 + interface: SAS + chassis: + manufactured_at: 2022-01-19 + warranty_end: 2027-01-25 + nodeset: abacus23 diff --git a/input/grid5000/sites/rennes/clusters/abacus23/abacus23_metrics.yaml b/input/grid5000/sites/rennes/clusters/abacus23/abacus23_metrics.yaml new file mode 100644 index 00000000000..1b7527d9fcc --- /dev/null +++ b/input/grid5000/sites/rennes/clusters/abacus23/abacus23_metrics.yaml @@ -0,0 +1,41 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 diff --git a/input/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-3.yaml b/input/grid5000/sites/rennes/clusters/abacus23/nodes/abacus23-1.yaml similarity index 99% rename from input/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-3.yaml rename to input/grid5000/sites/rennes/clusters/abacus23/nodes/abacus23-1.yaml index df3b5e50696..1d4067bd761 100644 --- a/input/grid5000/sites/rennes/clusters/abacus22/nodes/abacus22-3.yaml +++ b/input/grid5000/sites/rennes/clusters/abacus23/nodes/abacus23-1.yaml @@ -1,6 +1,6 @@ # Generated by g5k-checks (g5k-checks -m api) --- -abacus22-3: +abacus23-1: architecture: cpu_core_numbering: contiguous nb_cores: 48 diff --git a/input/grid5000/sites/rennes/networks/swdc-a035-01-02-rba.yaml b/input/grid5000/sites/rennes/networks/swdc-a035-01-02-rba.yaml index 55f37a7cd7a..bbfa36f2657 100644 --- a/input/grid5000/sites/rennes/networks/swdc-a035-01-02-rba.yaml +++ b/input/grid5000/sites/rennes/networks/swdc-a035-01-02-rba.yaml @@ -22,7 +22,7 @@ swdc-a035-01-02-rba: 45_02: # physically on interface 1/45 of swdc-a035-02-rba only uid: abacus25-3 46_01: # physically on interface 1/46 of swdc-a035-01-rba only - uid: abacus22-3 + uid: abacus23-1 46_02: # physically on interface 1/46 of swdc-a035-02-rba only uid: abacus25-4 47_01: # physically on interface 1/47 of swdc-a035-01-rba only diff --git a/lib/refrepo/valid/data/homogeneity.yaml.erb b/lib/refrepo/valid/data/homogeneity.yaml.erb index 8f09716ff19..2a1f06d58dc 100644 --- a/lib/refrepo/valid/data/homogeneity.yaml.erb +++ b/lib/refrepo/valid/data/homogeneity.yaml.erb @@ -382,13 +382,6 @@ rennes: - ~network_adapters.eth1.firmware_version - -storage_devices.pci-0000:64:00.0-sas-0x50000f0b019454a2-lun-0 - +storage_devices.pci-0000:64:00.0-sas-0x50000f0b01945482-lun-0 - abacus22-3: - - ~network_adapters.eth0.firmware_version - - ~network_adapters.eth1.firmware_version - - -gpu_devices.nvidia1 - - -gpu_devices.nvidia2 - - -storage_devices.pci-0000:64:00.0-sas-0x50000f0b01945482-lun-0 - - +storage_devices.pci-0000:64:00.0-sas-0x50000f0b01938442-lun-0 abacus25-3: - ~storage_devices.pci-0000:64:00.0-scsi-0:2:1:0.model - ~storage_devices.pci-0000:64:00.0-scsi-0:2:1:0.firmware_version -- GitLab