From ce189e2c504bbb97de6af0a9924e5d736a15b789 Mon Sep 17 00:00:00 2001
From: Nathan Paulin <nathan.paulin@inria.fr>
Date: Mon, 5 May 2025 10:09:32 +0200
Subject: [PATCH] [sophia][esterel11] add kwollect metrics

---
 .../sophia/clusters/esterel11/esterel11.json  | 27 +++++++++++++++++++
 .../clusters/esterel11/esterel11_metrics.yaml | 23 ++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/data/grid5000/sites/sophia/clusters/esterel11/esterel11.json b/data/grid5000/sites/sophia/clusters/esterel11/esterel11.json
index 96ad96b5f22..9dac17a9078 100644
--- a/data/grid5000/sites/sophia/clusters/esterel11/esterel11.json
+++ b/data/grid5000/sites/sophia/clusters/esterel11/esterel11.json
@@ -49,6 +49,33 @@
         "port": 9100,
         "protocol": "prometheus"
       }
+    },
+    {
+      "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "DCGM_FI_DEV_SM_CLOCK",
+          "DCGM_FI_DEV_MEM_CLOCK",
+          "DCGM_FI_DEV_GPU_TEMP",
+          "DCGM_FI_DEV_POWER_USAGE",
+          "DCGM_FI_DEV_MEM_COPY_UTIL"
+        ],
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9400,
+        "protocol": "prometheus"
+      }
     }
   ],
   "model": "Dell PowerEdge T630",
diff --git a/input/grid5000/sites/sophia/clusters/esterel11/esterel11_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel11/esterel11_metrics.yaml
index 729e9beb582..a3b0c1ba4e7 100644
--- a/input/grid5000/sites/sophia/clusters/esterel11/esterel11_metrics.yaml
+++ b/input/grid5000/sites/sophia/clusters/esterel11/esterel11_metrics.yaml
@@ -38,3 +38,26 @@ metrics:
     source:
       protocol: prometheus
       port: 9100
+
+  - name: prom_nvgpu_default_metrics
+    description: Default subset of metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
+      id:
+        - DCGM_FI_DEV_SM_CLOCK
+        - DCGM_FI_DEV_MEM_CLOCK
+        - DCGM_FI_DEV_GPU_TEMP
+        - DCGM_FI_DEV_POWER_USAGE
+        - DCGM_FI_DEV_MEM_COPY_UTIL
+
+  - name: prom_nvgpu_all_metrics
+    description: All metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
+
-- 
GitLab