From c6fdc814c48a4a387dfbe8371d10d3d69745ada1 Mon Sep 17 00:00:00 2001 From: Lucas Nussbaum <lucas.nussbaum@inria.fr> Date: Mon, 20 Jan 2025 13:12:41 +0100 Subject: [PATCH 1/2] [abacus27] configure kavlan --- .../rennes/clusters/abacus27/abacus27.json | 2 +- .../clusters/abacus27/nodes/abacus27-1.json | 50 +++++++++++++++++-- .../rennes/network_equipments/sw-1-c003.json | 6 +++ .../rennes/clusters/abacus27/abacus27.yaml | 2 +- .../sites/rennes/networks/sw-1-c003.yaml | 2 + input/grid5000/vlans.yaml | 3 ++ 6 files changed, 60 insertions(+), 5 deletions(-) diff --git a/data/grid5000/sites/rennes/clusters/abacus27/abacus27.json b/data/grid5000/sites/rennes/clusters/abacus27/abacus27.json index 9dcd0ed0204..70c6b3f735b 100644 --- a/data/grid5000/sites/rennes/clusters/abacus27/abacus27.json +++ b/data/grid5000/sites/rennes/clusters/abacus27/abacus27.json @@ -2,7 +2,7 @@ "boot_type": "uefi", "created_at": "Wed, 15 Jan 2025 00:00:00 GMT", "exotic": false, - "kavlan": false, + "kavlan": true, "manufactured_at": "2024-11-26", "metrics": [ diff --git a/data/grid5000/sites/rennes/clusters/abacus27/nodes/abacus27-1.json b/data/grid5000/sites/rennes/clusters/abacus27/nodes/abacus27-1.json index 677331fd54e..6804528740c 100644 --- a/data/grid5000/sites/rennes/clusters/abacus27/nodes/abacus27-1.json +++ b/data/grid5000/sites/rennes/clusters/abacus27/nodes/abacus27-1.json @@ -73,6 +73,50 @@ "vendor": "Nvidia" } }, + "kavlan": { + "eth0": { + "kavlan-1": "192.168.202.9", + "kavlan-11": "10.7.244.41", + "kavlan-12": "10.11.244.41", + "kavlan-13": "10.15.244.41", + "kavlan-14": "10.19.244.41", + "kavlan-16": "10.27.244.41", + "kavlan-17": "10.31.244.41", + "kavlan-18": "10.35.244.41", + "kavlan-2": "192.168.218.9", + "kavlan-20": "10.43.244.41", + "kavlan-21": "10.47.244.41", + "kavlan-3": "192.168.234.9", + "kavlan-4": "10.24.10.9", + "kavlan-5": "10.24.74.9", + "kavlan-6": "10.24.138.9", + "kavlan-7": "10.24.202.9", + "kavlan-8": "10.25.10.9", + "kavlan-9": "10.25.74.9" + } + }, + "kavlan6": { + "eth0": { + "kavlan-1": "2001:660:4406:780:e::1e", + "kavlan-11": "2001:660:4406:1a0:70e::1e", + "kavlan-12": "2001:660:4406:2a0:70e::1e", + "kavlan-13": "2001:660:4406:4a0:70e::1e", + "kavlan-14": "2001:660:4406:5a0:70e::1e", + "kavlan-16": "2001:660:4406:7a0:70e::1e", + "kavlan-17": "2001:660:4406:9a0:70e::1e", + "kavlan-18": "2001:660:4406:8a0:70e::1e", + "kavlan-2": "2001:660:4406:781:e::1e", + "kavlan-20": "2001:660:4406:3a0:70e::1e", + "kavlan-21": "2001:660:4406:6a0:70e::1e", + "kavlan-3": "2001:660:4406:782:e::1e", + "kavlan-4": "2001:660:4406:790:e::1e", + "kavlan-5": "2001:660:4406:791:e::1e", + "kavlan-6": "2001:660:4406:792:e::1e", + "kavlan-7": "2001:660:4406:793:e::1e", + "kavlan-8": "2001:660:4406:794:e::1e", + "kavlan-9": "2001:660:4406:795:e::1e" + } + }, "main_memory": { "ram_size": 549755813888 }, @@ -173,7 +217,7 @@ "interface": "Ethernet", "ip": "172.16.109.30", "ip6": "2001:660:4406:700:e::1e", - "kavlan": false, + "kavlan": true, "mac": "d4:04:e6:b4:b1:e0", "management": false, "model": "BCM57414 NetXtreme-E 10Gb/25Gb RDMA Ethernet Controller", @@ -184,8 +228,8 @@ "rate": 10000000000, "sriov": false, "sriov_totalvfs": 0, - "switch": null, - "switch_port": null, + "switch": "sw-1-c003", + "switch_port": "ethernet1/1/28", "vendor": "Broadcom Inc. and subsidiaries" }, { diff --git a/data/grid5000/sites/rennes/network_equipments/sw-1-c003.json b/data/grid5000/sites/rennes/network_equipments/sw-1-c003.json index 6721c131cd5..2adb1d3abf2 100644 --- a/data/grid5000/sites/rennes/network_equipments/sw-1-c003.json +++ b/data/grid5000/sites/rennes/network_equipments/sw-1-c003.json @@ -159,6 +159,12 @@ "port": "1/1/43", "snmp_name": "ethernet1/1/27", "uid": "gw" + }, + { + "kind": "node", + "port": "eth0", + "snmp_name": "ethernet1/1/28", + "uid": "abacus27-1" } ], "rate": 10000000000, diff --git a/input/grid5000/sites/rennes/clusters/abacus27/abacus27.yaml b/input/grid5000/sites/rennes/clusters/abacus27/abacus27.yaml index f880490fe48..8da018e7dd8 100644 --- a/input/grid5000/sites/rennes/clusters/abacus27/abacus27.yaml +++ b/input/grid5000/sites/rennes/clusters/abacus27/abacus27.yaml @@ -1,7 +1,7 @@ --- model: ProLiant DL385 Gen11 created_at: 2025-01-15 -kavlan: false +kavlan: true boot_type: uefi exotic: false queues: diff --git a/input/grid5000/sites/rennes/networks/sw-1-c003.yaml b/input/grid5000/sites/rennes/networks/sw-1-c003.yaml index 693902854ef..629594f79ed 100644 --- a/input/grid5000/sites/rennes/networks/sw-1-c003.yaml +++ b/input/grid5000/sites/rennes/networks/sw-1-c003.yaml @@ -64,3 +64,5 @@ sw-1-c003: uid: gw kind: router port: 1/1/43 + 28: + uid: abacus27-1 diff --git a/input/grid5000/vlans.yaml b/input/grid5000/vlans.yaml index 72f4ac7de96..a5026ed0c19 100644 --- a/input/grid5000/vlans.yaml +++ b/input/grid5000/vlans.yaml @@ -114,6 +114,7 @@ vlans: local rennes abacus12 eth0 0 0 10 2 local rennes abacus14 eth0 0 0 10 4 local rennes abacus16 eth0 0 0 10 6 + local rennes abacus27 eth0 0 0 10 8 local rennes roazhon3 eth0 0 0 11 0 local rennes roazhon7 eth0 0 0 11 2 local rennes roazhon8 eth0 0 0 11 6 @@ -174,6 +175,7 @@ vlans: global rennes abacus12 eth0 0 0 52 10 global rennes abacus14 eth0 0 0 52 20 global rennes abacus16 eth0 0 0 52 30 + global rennes abacus27 eth0 0 0 52 40 global rennes roazhon3 eth0 0 0 53 0 global rennes roazhon7 eth0 0 0 53 10 global rennes roazhon8 eth0 0 0 53 20 @@ -223,6 +225,7 @@ vlans: routed rennes abacus12 eth0 0 24 10 2 routed rennes abacus14 eth0 0 24 10 4 routed rennes abacus16 eth0 0 24 10 6 + routed rennes abacus27 eth0 0 24 10 8 routed rennes roazhon3 eth0 0 24 11 0 routed rennes roazhon7 eth0 0 24 11 5 routed rennes roazhon8 eth0 0 24 11 9 -- GitLab From 8f96d1ef951ae3445a8d6378e9f11a419c1128ce Mon Sep 17 00:00:00 2001 From: Lucas Nussbaum <lucas.nussbaum@inria.fr> Date: Mon, 20 Jan 2025 13:13:21 +0100 Subject: [PATCH 2/2] [abacus27] add metrics --- .../rennes/clusters/abacus27/abacus27.json | 46 ++++++++++++++++++- .../clusters/abacus27/abacus27_metrics.yaml | 41 +++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 input/grid5000/sites/rennes/clusters/abacus27/abacus27_metrics.yaml diff --git a/data/grid5000/sites/rennes/clusters/abacus27/abacus27.json b/data/grid5000/sites/rennes/clusters/abacus27/abacus27.json index 70c6b3f735b..74e4760f480 100644 --- a/data/grid5000/sites/rennes/clusters/abacus27/abacus27.json +++ b/data/grid5000/sites/rennes/clusters/abacus27/abacus27.json @@ -5,7 +5,51 @@ "kavlan": true, "manufactured_at": "2024-11-26", "metrics": [ - + { + "description": "Default subset of metrics from Prometheus Node Exporter", + "name": "prom_default_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "id": [ + "node_boot_time_seconds", + "node_cpu_scaling_frequency_hertz", + "node_cpu_seconds_total", + "node_filesystem_free_bytes", + "node_filesystem_size_bytes", + "node_load1", + "node_load15", + "node_load5", + "node_memory_Buffers_bytes", + "node_memory_Cached_bytes", + "node_memory_MemAvailable_bytes", + "node_memory_MemFree_bytes", + "node_memory_MemTotal_bytes", + "node_memory_Shmem_bytes", + "node_memory_SwapFree_bytes", + "node_memory_SwapTotal_bytes", + "node_network_receive_bytes_total", + "node_network_receive_packets_total", + "node_network_transmit_bytes_total", + "node_network_transmit_packets_total", + "node_procs_blocked", + "node_procs_running", + "kwollect_custom" + ], + "port": 9100, + "protocol": "prometheus" + } + }, + { + "description": "All metrics from Prometheus Node Exporter", + "name": "prom_all_metrics", + "optional_period": 15000, + "period": 0, + "source": { + "port": 9100, + "protocol": "prometheus" + } + } ], "model": "ProLiant DL385 Gen11", "priority": 202511, diff --git a/input/grid5000/sites/rennes/clusters/abacus27/abacus27_metrics.yaml b/input/grid5000/sites/rennes/clusters/abacus27/abacus27_metrics.yaml new file mode 100644 index 00000000000..1b7527d9fcc --- /dev/null +++ b/input/grid5000/sites/rennes/clusters/abacus27/abacus27_metrics.yaml @@ -0,0 +1,41 @@ +--- +metrics: + - name: prom_default_metrics + description: Default subset of metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 + id: + - node_boot_time_seconds + - node_cpu_scaling_frequency_hertz + - node_cpu_seconds_total + - node_filesystem_free_bytes + - node_filesystem_size_bytes + - node_load1 + - node_load15 + - node_load5 + - node_memory_Buffers_bytes + - node_memory_Cached_bytes + - node_memory_MemAvailable_bytes + - node_memory_MemFree_bytes + - node_memory_MemTotal_bytes + - node_memory_Shmem_bytes + - node_memory_SwapFree_bytes + - node_memory_SwapTotal_bytes + - node_network_receive_bytes_total + - node_network_receive_packets_total + - node_network_transmit_bytes_total + - node_network_transmit_packets_total + - node_procs_blocked + - node_procs_running + - kwollect_custom + + - name: prom_all_metrics + description: All metrics from Prometheus Node Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9100 -- GitLab