From 774e532342ea7e17c2a5195660bd639cecab5a22 Mon Sep 17 00:00:00 2001
From: Nathan Paulin <nathan.paulin@inria.fr>
Date: Mon, 28 Apr 2025 16:00:05 +0200
Subject: [PATCH] [sophia][esterel33] add kwollect metrics

---
 .../sophia/clusters/esterel33/esterel33.json  | 27 +++++++++++++++++++
 .../clusters/esterel33/esterel33_metrics.yaml | 22 +++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json b/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json
index b0e1e706f39..3a102682098 100644
--- a/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json
+++ b/data/grid5000/sites/sophia/clusters/esterel33/esterel33.json
@@ -49,6 +49,33 @@
         "port": 9100,
         "protocol": "prometheus"
       }
+    },
+    {
+      "description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_default_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "id": [
+          "DCGM_FI_DEV_SM_CLOCK",
+          "DCGM_FI_DEV_MEM_CLOCK",
+          "DCGM_FI_DEV_GPU_TEMP",
+          "DCGM_FI_DEV_POWER_USAGE",
+          "DCGM_FI_DEV_MEM_COPY_UTIL"
+        ],
+        "port": 9400,
+        "protocol": "prometheus"
+      }
+    },
+    {
+      "description": "All metrics from Prometheus Nvidia DCGM Exporter",
+      "name": "prom_nvgpu_all_metrics",
+      "optional_period": 15000,
+      "period": 0,
+      "source": {
+        "port": 9400,
+        "protocol": "prometheus"
+      }
     }
   ],
   "model": "Dell PowerEdge R7525",
diff --git a/input/grid5000/sites/sophia/clusters/esterel33/esterel33_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel33/esterel33_metrics.yaml
index 729e9beb582..fc4d71df1e4 100644
--- a/input/grid5000/sites/sophia/clusters/esterel33/esterel33_metrics.yaml
+++ b/input/grid5000/sites/sophia/clusters/esterel33/esterel33_metrics.yaml
@@ -38,3 +38,25 @@ metrics:
     source:
       protocol: prometheus
       port: 9100
+
+  - name: prom_nvgpu_default_metrics
+    description: Default subset of metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
+      id:
+        - DCGM_FI_DEV_SM_CLOCK
+        - DCGM_FI_DEV_MEM_CLOCK
+        - DCGM_FI_DEV_GPU_TEMP
+        - DCGM_FI_DEV_POWER_USAGE
+        - DCGM_FI_DEV_MEM_COPY_UTIL
+
+  - name: prom_nvgpu_all_metrics
+    description: All metrics from Prometheus Nvidia DCGM Exporter
+    period: 0
+    optional_period: 15000
+    source:
+      protocol: prometheus
+      port: 9400
\ No newline at end of file
-- 
GitLab