From 17a212d4e491c3c0af428ac2f7630f6d69e2efae Mon Sep 17 00:00:00 2001
From: Nicolas Perrin <nicolas.perrin@inria.fr>
Date: Fri, 21 Feb 2025 14:28:25 +0100
Subject: [PATCH 1/8] [sophia][esterel1] add basic metrics

---
 .../sophia/clusters/esterel1/esterel1.json    | 73 ++++++++++++++++++-
 .../clusters/esterel1/esterel1_metrics.yaml   | 64 ++++++++++++++++
 2 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 input/grid5000/sites/sophia/clusters/esterel1/esterel1_metrics.yaml

diff --git a/data/grid5000/sites/sophia/clusters/esterel1/esterel1.json b/data/grid5000/sites/sophia/clusters/esterel1/esterel1.json
index d66d9081b69..8c7cbc427c8 100644
--- a/data/grid5000/sites/sophia/clusters/esterel1/esterel1.json
+++ b/data/grid5000/sites/sophia/clusters/esterel1/esterel1.json
@@ -5,7 +5,78 @@
   "kavlan": false,
   "manufactured_at": "1970-01-01",
   "metrics": [
-
+    {
+      "description": "Default subset of metrics from Prometheus Node Exporter",
+      "name": "prom_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "node_boot_time_seconds",
+          "node_cpu_scaling_frequency_hertz",
+          "node_cpu_seconds_total",
+          "node_filesystem_free_bytes",
+          "node_filesystem_size_bytes",
+          "node_load1",
+          "node_load15",
+          "node_load5",
+          "node_memory_Buffers_bytes",
+          "node_memory_Cached_bytes",
+          "node_memory_MemAvailable_bytes",
+          "node_memory_MemFree_bytes",
+          "node_memory_MemTotal_bytes",
+          "node_memory_Shmem_bytes",
+          "node_memory_SwapFree_bytes",
+          "node_memory_SwapTotal_bytes",
+          "node_network_receive_bytes_total",
+          "node_network_receive_packets_total",
+          "node_network_transmit_bytes_total",
+          "node_network_transmit_packets_total",
+          "node_procs_blocked",
+          "node_procs_running",
+          "kwollect_custom"
+        ],
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Node Exporter",
+      "name": "prom_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "DCGM_FI_DEV_SM_CLOCK",
+          "DCGM_FI_DEV_MEM_CLOCK",
+          "DCGM_FI_DEV_GPU_TEMP",
+          "DCGM_FI_DEV_POWER_USAGE",
+          "DCGM_FI_DEV_MEM_COPY_UTIL"
+        ],
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    }
   ],
   "model": "Cluster Model",
   "priority": 197001,
diff --git a/input/grid5000/sites/sophia/clusters/esterel1/esterel1_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel1/esterel1_metrics.yaml
new file mode 100644
index 00000000000..a59d3e58fa6
--- /dev/null
+++ b/input/grid5000/sites/sophia/clusters/esterel1/esterel1_metrics.yaml
@@ -0,0 +1,64 @@
+---
+metrics:
+
+  - name: prom_default_metrics
+    description: Default subset of metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+      id:
+        - node_boot_time_seconds
+        - node_cpu_scaling_frequency_hertz
+        - node_cpu_seconds_total
+        - node_filesystem_free_bytes
+        - node_filesystem_size_bytes
+        - node_load1
+        - node_load15
+        - node_load5
+        - node_memory_Buffers_bytes
+        - node_memory_Cached_bytes
+        - node_memory_MemAvailable_bytes
+        - node_memory_MemFree_bytes
+        - node_memory_MemTotal_bytes
+        - node_memory_Shmem_bytes
+        - node_memory_SwapFree_bytes
+        - node_memory_SwapTotal_bytes
+        - node_network_receive_bytes_total
+        - node_network_receive_packets_total
+        - node_network_transmit_bytes_total
+        - node_network_transmit_packets_total
+        - node_procs_blocked
+        - node_procs_running
+        - kwollect_custom
+
+  - name: prom_all_metrics
+    description: All metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+
+  - name: prom_nvgpu_default_metrics
+    description: Default subset of metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
+      id:
+        - DCGM_FI_DEV_SM_CLOCK
+        - DCGM_FI_DEV_MEM_CLOCK
+        - DCGM_FI_DEV_GPU_TEMP
+        - DCGM_FI_DEV_POWER_USAGE
+        - DCGM_FI_DEV_MEM_COPY_UTIL
+
+  - name: prom_nvgpu_all_metrics
+    description: All metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
-- 
GitLab


From 3a79adc9e86d06f7b7556f106d754634a973ec1d Mon Sep 17 00:00:00 2001
From: Nicolas Perrin <nicolas.perrin@inria.fr>
Date: Fri, 21 Feb 2025 14:29:33 +0100
Subject: [PATCH 2/8] [sophia][esterel2] add basic metrics

---
 .../sophia/clusters/esterel2/esterel2.json    | 73 ++++++++++++++++++-
 .../clusters/esterel2/esterel2_metrics.yaml   | 64 ++++++++++++++++
 2 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 input/grid5000/sites/sophia/clusters/esterel2/esterel2_metrics.yaml

diff --git a/data/grid5000/sites/sophia/clusters/esterel2/esterel2.json b/data/grid5000/sites/sophia/clusters/esterel2/esterel2.json
index 063327d1d8e..a3654eac965 100644
--- a/data/grid5000/sites/sophia/clusters/esterel2/esterel2.json
+++ b/data/grid5000/sites/sophia/clusters/esterel2/esterel2.json
@@ -5,7 +5,78 @@
   "kavlan": false,
   "manufactured_at": "1970-01-01",
   "metrics": [
-
+    {
+      "description": "Default subset of metrics from Prometheus Node Exporter",
+      "name": "prom_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "node_boot_time_seconds",
+          "node_cpu_scaling_frequency_hertz",
+          "node_cpu_seconds_total",
+          "node_filesystem_free_bytes",
+          "node_filesystem_size_bytes",
+          "node_load1",
+          "node_load15",
+          "node_load5",
+          "node_memory_Buffers_bytes",
+          "node_memory_Cached_bytes",
+          "node_memory_MemAvailable_bytes",
+          "node_memory_MemFree_bytes",
+          "node_memory_MemTotal_bytes",
+          "node_memory_Shmem_bytes",
+          "node_memory_SwapFree_bytes",
+          "node_memory_SwapTotal_bytes",
+          "node_network_receive_bytes_total",
+          "node_network_receive_packets_total",
+          "node_network_transmit_bytes_total",
+          "node_network_transmit_packets_total",
+          "node_procs_blocked",
+          "node_procs_running",
+          "kwollect_custom"
+        ],
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Node Exporter",
+      "name": "prom_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "DCGM_FI_DEV_SM_CLOCK",
+          "DCGM_FI_DEV_MEM_CLOCK",
+          "DCGM_FI_DEV_GPU_TEMP",
+          "DCGM_FI_DEV_POWER_USAGE",
+          "DCGM_FI_DEV_MEM_COPY_UTIL"
+        ],
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    }
   ],
   "model": "Cluster Model",
   "priority": 197001,
diff --git a/input/grid5000/sites/sophia/clusters/esterel2/esterel2_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel2/esterel2_metrics.yaml
new file mode 100644
index 00000000000..a59d3e58fa6
--- /dev/null
+++ b/input/grid5000/sites/sophia/clusters/esterel2/esterel2_metrics.yaml
@@ -0,0 +1,64 @@
+---
+metrics:
+
+  - name: prom_default_metrics
+    description: Default subset of metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+      id:
+        - node_boot_time_seconds
+        - node_cpu_scaling_frequency_hertz
+        - node_cpu_seconds_total
+        - node_filesystem_free_bytes
+        - node_filesystem_size_bytes
+        - node_load1
+        - node_load15
+        - node_load5
+        - node_memory_Buffers_bytes
+        - node_memory_Cached_bytes
+        - node_memory_MemAvailable_bytes
+        - node_memory_MemFree_bytes
+        - node_memory_MemTotal_bytes
+        - node_memory_Shmem_bytes
+        - node_memory_SwapFree_bytes
+        - node_memory_SwapTotal_bytes
+        - node_network_receive_bytes_total
+        - node_network_receive_packets_total
+        - node_network_transmit_bytes_total
+        - node_network_transmit_packets_total
+        - node_procs_blocked
+        - node_procs_running
+        - kwollect_custom
+
+  - name: prom_all_metrics
+    description: All metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+
+  - name: prom_nvgpu_default_metrics
+    description: Default subset of metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
+      id:
+        - DCGM_FI_DEV_SM_CLOCK
+        - DCGM_FI_DEV_MEM_CLOCK
+        - DCGM_FI_DEV_GPU_TEMP
+        - DCGM_FI_DEV_POWER_USAGE
+        - DCGM_FI_DEV_MEM_COPY_UTIL
+
+  - name: prom_nvgpu_all_metrics
+    description: All metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
-- 
GitLab


From 3edaea414d038c4d791a8d9e3038ffbb0acff536 Mon Sep 17 00:00:00 2001
From: Nicolas Perrin <nicolas.perrin@inria.fr>
Date: Fri, 21 Feb 2025 14:31:26 +0100
Subject: [PATCH 3/8] [sophia][esterel5] add basic metrics

---
 .../sophia/clusters/esterel5/esterel5.json    | 73 ++++++++++++++++++-
 .../clusters/esterel5/esterel5_metrics.yaml   | 64 ++++++++++++++++
 2 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 input/grid5000/sites/sophia/clusters/esterel5/esterel5_metrics.yaml

diff --git a/data/grid5000/sites/sophia/clusters/esterel5/esterel5.json b/data/grid5000/sites/sophia/clusters/esterel5/esterel5.json
index 29c5be3bc19..83a941dbe4b 100644
--- a/data/grid5000/sites/sophia/clusters/esterel5/esterel5.json
+++ b/data/grid5000/sites/sophia/clusters/esterel5/esterel5.json
@@ -5,7 +5,78 @@
   "kavlan": false,
   "manufactured_at": "1970-01-01",
   "metrics": [
-
+    {
+      "description": "Default subset of metrics from Prometheus Node Exporter",
+      "name": "prom_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "node_boot_time_seconds",
+          "node_cpu_scaling_frequency_hertz",
+          "node_cpu_seconds_total",
+          "node_filesystem_free_bytes",
+          "node_filesystem_size_bytes",
+          "node_load1",
+          "node_load15",
+          "node_load5",
+          "node_memory_Buffers_bytes",
+          "node_memory_Cached_bytes",
+          "node_memory_MemAvailable_bytes",
+          "node_memory_MemFree_bytes",
+          "node_memory_MemTotal_bytes",
+          "node_memory_Shmem_bytes",
+          "node_memory_SwapFree_bytes",
+          "node_memory_SwapTotal_bytes",
+          "node_network_receive_bytes_total",
+          "node_network_receive_packets_total",
+          "node_network_transmit_bytes_total",
+          "node_network_transmit_packets_total",
+          "node_procs_blocked",
+          "node_procs_running",
+          "kwollect_custom"
+        ],
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Node Exporter",
+      "name": "prom_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "DCGM_FI_DEV_SM_CLOCK",
+          "DCGM_FI_DEV_MEM_CLOCK",
+          "DCGM_FI_DEV_GPU_TEMP",
+          "DCGM_FI_DEV_POWER_USAGE",
+          "DCGM_FI_DEV_MEM_COPY_UTIL"
+        ],
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    }
   ],
   "model": "Cluster Model",
   "priority": 197001,
diff --git a/input/grid5000/sites/sophia/clusters/esterel5/esterel5_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel5/esterel5_metrics.yaml
new file mode 100644
index 00000000000..a59d3e58fa6
--- /dev/null
+++ b/input/grid5000/sites/sophia/clusters/esterel5/esterel5_metrics.yaml
@@ -0,0 +1,64 @@
+---
+metrics:
+
+  - name: prom_default_metrics
+    description: Default subset of metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+      id:
+        - node_boot_time_seconds
+        - node_cpu_scaling_frequency_hertz
+        - node_cpu_seconds_total
+        - node_filesystem_free_bytes
+        - node_filesystem_size_bytes
+        - node_load1
+        - node_load15
+        - node_load5
+        - node_memory_Buffers_bytes
+        - node_memory_Cached_bytes
+        - node_memory_MemAvailable_bytes
+        - node_memory_MemFree_bytes
+        - node_memory_MemTotal_bytes
+        - node_memory_Shmem_bytes
+        - node_memory_SwapFree_bytes
+        - node_memory_SwapTotal_bytes
+        - node_network_receive_bytes_total
+        - node_network_receive_packets_total
+        - node_network_transmit_bytes_total
+        - node_network_transmit_packets_total
+        - node_procs_blocked
+        - node_procs_running
+        - kwollect_custom
+
+  - name: prom_all_metrics
+    description: All metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+
+  - name: prom_nvgpu_default_metrics
+    description: Default subset of metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
+      id:
+        - DCGM_FI_DEV_SM_CLOCK
+        - DCGM_FI_DEV_MEM_CLOCK
+        - DCGM_FI_DEV_GPU_TEMP
+        - DCGM_FI_DEV_POWER_USAGE
+        - DCGM_FI_DEV_MEM_COPY_UTIL
+
+  - name: prom_nvgpu_all_metrics
+    description: All metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
-- 
GitLab


From 639953edc67c1f3f515f3e0e527f1ef965b12fb3 Mon Sep 17 00:00:00 2001
From: Nicolas Perrin <nicolas.perrin@inria.fr>
Date: Fri, 21 Feb 2025 14:32:10 +0100
Subject: [PATCH 4/8] [sophia][esterel7] add basic metrics

---
 .../sophia/clusters/esterel7/esterel7.json    | 73 ++++++++++++++++++-
 .../clusters/esterel7/esterel7_metrics.yaml   | 64 ++++++++++++++++
 2 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 input/grid5000/sites/sophia/clusters/esterel7/esterel7_metrics.yaml

diff --git a/data/grid5000/sites/sophia/clusters/esterel7/esterel7.json b/data/grid5000/sites/sophia/clusters/esterel7/esterel7.json
index 2095fe9391a..e513bfb4bc8 100644
--- a/data/grid5000/sites/sophia/clusters/esterel7/esterel7.json
+++ b/data/grid5000/sites/sophia/clusters/esterel7/esterel7.json
@@ -5,7 +5,78 @@
   "kavlan": false,
   "manufactured_at": "1970-01-01",
   "metrics": [
-
+    {
+      "description": "Default subset of metrics from Prometheus Node Exporter",
+      "name": "prom_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "node_boot_time_seconds",
+          "node_cpu_scaling_frequency_hertz",
+          "node_cpu_seconds_total",
+          "node_filesystem_free_bytes",
+          "node_filesystem_size_bytes",
+          "node_load1",
+          "node_load15",
+          "node_load5",
+          "node_memory_Buffers_bytes",
+          "node_memory_Cached_bytes",
+          "node_memory_MemAvailable_bytes",
+          "node_memory_MemFree_bytes",
+          "node_memory_MemTotal_bytes",
+          "node_memory_Shmem_bytes",
+          "node_memory_SwapFree_bytes",
+          "node_memory_SwapTotal_bytes",
+          "node_network_receive_bytes_total",
+          "node_network_receive_packets_total",
+          "node_network_transmit_bytes_total",
+          "node_network_transmit_packets_total",
+          "node_procs_blocked",
+          "node_procs_running",
+          "kwollect_custom"
+        ],
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Node Exporter",
+      "name": "prom_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "DCGM_FI_DEV_SM_CLOCK",
+          "DCGM_FI_DEV_MEM_CLOCK",
+          "DCGM_FI_DEV_GPU_TEMP",
+          "DCGM_FI_DEV_POWER_USAGE",
+          "DCGM_FI_DEV_MEM_COPY_UTIL"
+        ],
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    }
   ],
   "model": "Cluster Model",
   "priority": 197001,
diff --git a/input/grid5000/sites/sophia/clusters/esterel7/esterel7_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel7/esterel7_metrics.yaml
new file mode 100644
index 00000000000..a59d3e58fa6
--- /dev/null
+++ b/input/grid5000/sites/sophia/clusters/esterel7/esterel7_metrics.yaml
@@ -0,0 +1,64 @@
+---
+metrics:
+
+  - name: prom_default_metrics
+    description: Default subset of metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+      id:
+        - node_boot_time_seconds
+        - node_cpu_scaling_frequency_hertz
+        - node_cpu_seconds_total
+        - node_filesystem_free_bytes
+        - node_filesystem_size_bytes
+        - node_load1
+        - node_load15
+        - node_load5
+        - node_memory_Buffers_bytes
+        - node_memory_Cached_bytes
+        - node_memory_MemAvailable_bytes
+        - node_memory_MemFree_bytes
+        - node_memory_MemTotal_bytes
+        - node_memory_Shmem_bytes
+        - node_memory_SwapFree_bytes
+        - node_memory_SwapTotal_bytes
+        - node_network_receive_bytes_total
+        - node_network_receive_packets_total
+        - node_network_transmit_bytes_total
+        - node_network_transmit_packets_total
+        - node_procs_blocked
+        - node_procs_running
+        - kwollect_custom
+
+  - name: prom_all_metrics
+    description: All metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+
+  - name: prom_nvgpu_default_metrics
+    description: Default subset of metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
+      id:
+        - DCGM_FI_DEV_SM_CLOCK
+        - DCGM_FI_DEV_MEM_CLOCK
+        - DCGM_FI_DEV_GPU_TEMP
+        - DCGM_FI_DEV_POWER_USAGE
+        - DCGM_FI_DEV_MEM_COPY_UTIL
+
+  - name: prom_nvgpu_all_metrics
+    description: All metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
-- 
GitLab


From 0b31e7657769653a4c0266a8c4c7a6b224f4d83e Mon Sep 17 00:00:00 2001
From: Nicolas Perrin <nicolas.perrin@inria.fr>
Date: Fri, 21 Feb 2025 14:33:00 +0100
Subject: [PATCH 5/8] [sophia][esterel6] add basic metrics

---
 .../sophia/clusters/esterel6/esterel6.json    | 73 ++++++++++++++++++-
 .../clusters/esterel6/esterel6_metrics.yaml   | 64 ++++++++++++++++
 2 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 input/grid5000/sites/sophia/clusters/esterel6/esterel6_metrics.yaml

diff --git a/data/grid5000/sites/sophia/clusters/esterel6/esterel6.json b/data/grid5000/sites/sophia/clusters/esterel6/esterel6.json
index d403a3efe7a..f027cfacf26 100644
--- a/data/grid5000/sites/sophia/clusters/esterel6/esterel6.json
+++ b/data/grid5000/sites/sophia/clusters/esterel6/esterel6.json
@@ -5,7 +5,78 @@
   "kavlan": false,
   "manufactured_at": "1970-01-01",
   "metrics": [
-
+    {
+      "description": "Default subset of metrics from Prometheus Node Exporter",
+      "name": "prom_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "node_boot_time_seconds",
+          "node_cpu_scaling_frequency_hertz",
+          "node_cpu_seconds_total",
+          "node_filesystem_free_bytes",
+          "node_filesystem_size_bytes",
+          "node_load1",
+          "node_load15",
+          "node_load5",
+          "node_memory_Buffers_bytes",
+          "node_memory_Cached_bytes",
+          "node_memory_MemAvailable_bytes",
+          "node_memory_MemFree_bytes",
+          "node_memory_MemTotal_bytes",
+          "node_memory_Shmem_bytes",
+          "node_memory_SwapFree_bytes",
+          "node_memory_SwapTotal_bytes",
+          "node_network_receive_bytes_total",
+          "node_network_receive_packets_total",
+          "node_network_transmit_bytes_total",
+          "node_network_transmit_packets_total",
+          "node_procs_blocked",
+          "node_procs_running",
+          "kwollect_custom"
+        ],
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Node Exporter",
+      "name": "prom_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "DCGM_FI_DEV_SM_CLOCK",
+          "DCGM_FI_DEV_MEM_CLOCK",
+          "DCGM_FI_DEV_GPU_TEMP",
+          "DCGM_FI_DEV_POWER_USAGE",
+          "DCGM_FI_DEV_MEM_COPY_UTIL"
+        ],
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    }
   ],
   "model": "Cluster Model",
   "priority": 197001,
diff --git a/input/grid5000/sites/sophia/clusters/esterel6/esterel6_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel6/esterel6_metrics.yaml
new file mode 100644
index 00000000000..a59d3e58fa6
--- /dev/null
+++ b/input/grid5000/sites/sophia/clusters/esterel6/esterel6_metrics.yaml
@@ -0,0 +1,64 @@
+---
+metrics:
+
+  - name: prom_default_metrics
+    description: Default subset of metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+      id:
+        - node_boot_time_seconds
+        - node_cpu_scaling_frequency_hertz
+        - node_cpu_seconds_total
+        - node_filesystem_free_bytes
+        - node_filesystem_size_bytes
+        - node_load1
+        - node_load15
+        - node_load5
+        - node_memory_Buffers_bytes
+        - node_memory_Cached_bytes
+        - node_memory_MemAvailable_bytes
+        - node_memory_MemFree_bytes
+        - node_memory_MemTotal_bytes
+        - node_memory_Shmem_bytes
+        - node_memory_SwapFree_bytes
+        - node_memory_SwapTotal_bytes
+        - node_network_receive_bytes_total
+        - node_network_receive_packets_total
+        - node_network_transmit_bytes_total
+        - node_network_transmit_packets_total
+        - node_procs_blocked
+        - node_procs_running
+        - kwollect_custom
+
+  - name: prom_all_metrics
+    description: All metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+
+  - name: prom_nvgpu_default_metrics
+    description: Default subset of metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
+      id:
+        - DCGM_FI_DEV_SM_CLOCK
+        - DCGM_FI_DEV_MEM_CLOCK
+        - DCGM_FI_DEV_GPU_TEMP
+        - DCGM_FI_DEV_POWER_USAGE
+        - DCGM_FI_DEV_MEM_COPY_UTIL
+
+  - name: prom_nvgpu_all_metrics
+    description: All metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
-- 
GitLab


From efb1cd9e98428e96f46d2975723c0baa91ef2de7 Mon Sep 17 00:00:00 2001
From: Nicolas Perrin <nicolas.perrin@inria.fr>
Date: Fri, 21 Feb 2025 14:34:41 +0100
Subject: [PATCH 6/8] [sophia][mercantour1] add basic metrics

---
 .../clusters/mercantour1/mercantour1.json     | 46 ++++++++++++++++++-
 .../mercantour1/mercantour1_metrics.yaml      | 42 +++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 input/grid5000/sites/sophia/clusters/mercantour1/mercantour1_metrics.yaml

diff --git a/data/grid5000/sites/sophia/clusters/mercantour1/mercantour1.json b/data/grid5000/sites/sophia/clusters/mercantour1/mercantour1.json
index 2b356f9f284..4ecbdaea324 100644
--- a/data/grid5000/sites/sophia/clusters/mercantour1/mercantour1.json
+++ b/data/grid5000/sites/sophia/clusters/mercantour1/mercantour1.json
@@ -5,7 +5,51 @@
   "kavlan": false,
   "manufactured_at": "1970-01-01",
   "metrics": [
-
+    {
+      "description": "Default subset of metrics from Prometheus Node Exporter",
+      "name": "prom_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "node_boot_time_seconds",
+          "node_cpu_scaling_frequency_hertz",
+          "node_cpu_seconds_total",
+          "node_filesystem_free_bytes",
+          "node_filesystem_size_bytes",
+          "node_load1",
+          "node_load15",
+          "node_load5",
+          "node_memory_Buffers_bytes",
+          "node_memory_Cached_bytes",
+          "node_memory_MemAvailable_bytes",
+          "node_memory_MemFree_bytes",
+          "node_memory_MemTotal_bytes",
+          "node_memory_Shmem_bytes",
+          "node_memory_SwapFree_bytes",
+          "node_memory_SwapTotal_bytes",
+          "node_network_receive_bytes_total",
+          "node_network_receive_packets_total",
+          "node_network_transmit_bytes_total",
+          "node_network_transmit_packets_total",
+          "node_procs_blocked",
+          "node_procs_running",
+          "kwollect_custom"
+        ],
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Node Exporter",
+      "name": "prom_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    }
   ],
   "model": "Cluster Model",
   "priority": 197001,
diff --git a/input/grid5000/sites/sophia/clusters/mercantour1/mercantour1_metrics.yaml b/input/grid5000/sites/sophia/clusters/mercantour1/mercantour1_metrics.yaml
new file mode 100644
index 00000000000..3c11d0073c1
--- /dev/null
+++ b/input/grid5000/sites/sophia/clusters/mercantour1/mercantour1_metrics.yaml
@@ -0,0 +1,42 @@
+---
+metrics:
+
+  - name: prom_default_metrics
+    description: Default subset of metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+      id:
+        - node_boot_time_seconds
+        - node_cpu_scaling_frequency_hertz
+        - node_cpu_seconds_total
+        - node_filesystem_free_bytes
+        - node_filesystem_size_bytes
+        - node_load1
+        - node_load15
+        - node_load5
+        - node_memory_Buffers_bytes
+        - node_memory_Cached_bytes
+        - node_memory_MemAvailable_bytes
+        - node_memory_MemFree_bytes
+        - node_memory_MemTotal_bytes
+        - node_memory_Shmem_bytes
+        - node_memory_SwapFree_bytes
+        - node_memory_SwapTotal_bytes
+        - node_network_receive_bytes_total
+        - node_network_receive_packets_total
+        - node_network_transmit_bytes_total
+        - node_network_transmit_packets_total
+        - node_procs_blocked
+        - node_procs_running
+        - kwollect_custom
+
+  - name: prom_all_metrics
+    description: All metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
-- 
GitLab


From fe750db82a531d394533c79e9d29e385d720df98 Mon Sep 17 00:00:00 2001
From: Nicolas Perrin <nicolas.perrin@inria.fr>
Date: Fri, 21 Feb 2025 14:35:28 +0100
Subject: [PATCH 7/8] [sophia][mercantour5] add basic metrics

---
 .../clusters/mercantour5/mercantour5.json     | 46 ++++++++++++++++++-
 .../mercantour5/mercantour5_metrics.yaml      | 42 +++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 input/grid5000/sites/sophia/clusters/mercantour5/mercantour5_metrics.yaml

diff --git a/data/grid5000/sites/sophia/clusters/mercantour5/mercantour5.json b/data/grid5000/sites/sophia/clusters/mercantour5/mercantour5.json
index cadd840b129..52a070647c3 100644
--- a/data/grid5000/sites/sophia/clusters/mercantour5/mercantour5.json
+++ b/data/grid5000/sites/sophia/clusters/mercantour5/mercantour5.json
@@ -5,7 +5,51 @@
   "kavlan": false,
   "manufactured_at": "1970-01-01",
   "metrics": [
-
+    {
+      "description": "Default subset of metrics from Prometheus Node Exporter",
+      "name": "prom_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "node_boot_time_seconds",
+          "node_cpu_scaling_frequency_hertz",
+          "node_cpu_seconds_total",
+          "node_filesystem_free_bytes",
+          "node_filesystem_size_bytes",
+          "node_load1",
+          "node_load15",
+          "node_load5",
+          "node_memory_Buffers_bytes",
+          "node_memory_Cached_bytes",
+          "node_memory_MemAvailable_bytes",
+          "node_memory_MemFree_bytes",
+          "node_memory_MemTotal_bytes",
+          "node_memory_Shmem_bytes",
+          "node_memory_SwapFree_bytes",
+          "node_memory_SwapTotal_bytes",
+          "node_network_receive_bytes_total",
+          "node_network_receive_packets_total",
+          "node_network_transmit_bytes_total",
+          "node_network_transmit_packets_total",
+          "node_procs_blocked",
+          "node_procs_running",
+          "kwollect_custom"
+        ],
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Node Exporter",
+      "name": "prom_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    }
   ],
   "model": "Cluster Model",
   "priority": 197001,
diff --git a/input/grid5000/sites/sophia/clusters/mercantour5/mercantour5_metrics.yaml b/input/grid5000/sites/sophia/clusters/mercantour5/mercantour5_metrics.yaml
new file mode 100644
index 00000000000..3c11d0073c1
--- /dev/null
+++ b/input/grid5000/sites/sophia/clusters/mercantour5/mercantour5_metrics.yaml
@@ -0,0 +1,42 @@
+---
+metrics:
+
+  - name: prom_default_metrics
+    description: Default subset of metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+      id:
+        - node_boot_time_seconds
+        - node_cpu_scaling_frequency_hertz
+        - node_cpu_seconds_total
+        - node_filesystem_free_bytes
+        - node_filesystem_size_bytes
+        - node_load1
+        - node_load15
+        - node_load5
+        - node_memory_Buffers_bytes
+        - node_memory_Cached_bytes
+        - node_memory_MemAvailable_bytes
+        - node_memory_MemFree_bytes
+        - node_memory_MemTotal_bytes
+        - node_memory_Shmem_bytes
+        - node_memory_SwapFree_bytes
+        - node_memory_SwapTotal_bytes
+        - node_network_receive_bytes_total
+        - node_network_receive_packets_total
+        - node_network_transmit_bytes_total
+        - node_network_transmit_packets_total
+        - node_procs_blocked
+        - node_procs_running
+        - kwollect_custom
+
+  - name: prom_all_metrics
+    description: All metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
-- 
GitLab


From 63450c2fad50e911644682bab9ba64a6448aecf6 Mon Sep 17 00:00:00 2001
From: Nicolas Perrin <nicolas.perrin@inria.fr>
Date: Fri, 21 Feb 2025 14:36:05 +0100
Subject: [PATCH 8/8] [sophia][mercantour6] add basic metrics

---
 .../clusters/mercantour6/mercantour6.json     | 46 ++++++++++++++++++-
 .../mercantour6/mercantour6_metrics.yaml      | 42 +++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 input/grid5000/sites/sophia/clusters/mercantour6/mercantour6_metrics.yaml

diff --git a/data/grid5000/sites/sophia/clusters/mercantour6/mercantour6.json b/data/grid5000/sites/sophia/clusters/mercantour6/mercantour6.json
index 371f85d01c2..911cdab2da1 100644
--- a/data/grid5000/sites/sophia/clusters/mercantour6/mercantour6.json
+++ b/data/grid5000/sites/sophia/clusters/mercantour6/mercantour6.json
@@ -5,7 +5,51 @@
   "kavlan": false,
   "manufactured_at": "1970-01-01",
   "metrics": [
-
+    {
+      "description": "Default subset of metrics from Prometheus Node Exporter",
+      "name": "prom_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "node_boot_time_seconds",
+          "node_cpu_scaling_frequency_hertz",
+          "node_cpu_seconds_total",
+          "node_filesystem_free_bytes",
+          "node_filesystem_size_bytes",
+          "node_load1",
+          "node_load15",
+          "node_load5",
+          "node_memory_Buffers_bytes",
+          "node_memory_Cached_bytes",
+          "node_memory_MemAvailable_bytes",
+          "node_memory_MemFree_bytes",
+          "node_memory_MemTotal_bytes",
+          "node_memory_Shmem_bytes",
+          "node_memory_SwapFree_bytes",
+          "node_memory_SwapTotal_bytes",
+          "node_network_receive_bytes_total",
+          "node_network_receive_packets_total",
+          "node_network_transmit_bytes_total",
+          "node_network_transmit_packets_total",
+          "node_procs_blocked",
+          "node_procs_running",
+          "kwollect_custom"
+        ],
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Node Exporter",
+      "name": "prom_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9100,
+        "protocol": "prometheus"
+      }
+    }
   ],
   "model": "Cluster Model",
   "priority": 197001,
diff --git a/input/grid5000/sites/sophia/clusters/mercantour6/mercantour6_metrics.yaml b/input/grid5000/sites/sophia/clusters/mercantour6/mercantour6_metrics.yaml
new file mode 100644
index 00000000000..3c11d0073c1
--- /dev/null
+++ b/input/grid5000/sites/sophia/clusters/mercantour6/mercantour6_metrics.yaml
@@ -0,0 +1,42 @@
+---
+metrics:
+
+  - name: prom_default_metrics
+    description: Default subset of metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
+      id:
+        - node_boot_time_seconds
+        - node_cpu_scaling_frequency_hertz
+        - node_cpu_seconds_total
+        - node_filesystem_free_bytes
+        - node_filesystem_size_bytes
+        - node_load1
+        - node_load15
+        - node_load5
+        - node_memory_Buffers_bytes
+        - node_memory_Cached_bytes
+        - node_memory_MemAvailable_bytes
+        - node_memory_MemFree_bytes
+        - node_memory_MemTotal_bytes
+        - node_memory_Shmem_bytes
+        - node_memory_SwapFree_bytes
+        - node_memory_SwapTotal_bytes
+        - node_network_receive_bytes_total
+        - node_network_receive_packets_total
+        - node_network_transmit_bytes_total
+        - node_network_transmit_packets_total
+        - node_procs_blocked
+        - node_procs_running
+        - kwollect_custom
+
+  - name: prom_all_metrics
+    description: All metrics from Prometheus Node Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9100
-- 
GitLab