From 9bf28cbb32bc0afdd0a305d4192110cc83abdc92 Mon Sep 17 00:00:00 2001 From: Nathan Paulin <nathan.paulin@inria.fr> Date: Mon, 24 Mar 2025 10:43:30 +0100 Subject: [PATCH] [sophia][esterel4] kwollect add gpu metrics --- .../clusters/esterel4/esterel4_metrics.yaml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/input/grid5000/sites/sophia/clusters/esterel4/esterel4_metrics.yaml b/input/grid5000/sites/sophia/clusters/esterel4/esterel4_metrics.yaml index 3c11d0073c1..bbcc255a649 100644 --- a/input/grid5000/sites/sophia/clusters/esterel4/esterel4_metrics.yaml +++ b/input/grid5000/sites/sophia/clusters/esterel4/esterel4_metrics.yaml @@ -40,3 +40,25 @@ metrics: source: protocol: prometheus port: 9100 + + - name: prom_nvgpu_default_metrics + description: Default subset of metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 + id: + - DCGM_FI_DEV_SM_CLOCK + - DCGM_FI_DEV_MEM_CLOCK + - DCGM_FI_DEV_GPU_TEMP + - DCGM_FI_DEV_POWER_USAGE + - DCGM_FI_DEV_MEM_COPY_UTIL + + - name: prom_nvgpu_all_metrics + description: All metrics from Prometheus Nvidia DCGM Exporter + period: 0 + optional_period: 15000 + source: + protocol: prometheus + port: 9400 \ No newline at end of file -- GitLab