Une MAJ de sécurité est nécessaire sur notre version actuelle. Elle sera effectuée lundi 02/08 entre 12h30 et 13h. L'interruption de service devrait durer quelques minutes (probablement moins de 5 minutes).

Commit 5d9b32e3 authored by Simon Delamare's avatar Simon Delamare
Browse files

[all] Add prometheus metrics to all clusters

parent d5efc94e
Pipeline #188667 passed with stages
in 8 minutes and 41 seconds
......@@ -218,6 +218,51 @@
"protocol": "snmp"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -315,6 +315,51 @@
"protocol": "snmp"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -266,6 +266,51 @@
"protocol": "snmp"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -307,6 +307,51 @@
"protocol": "snmp"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -203,6 +203,51 @@
"protocol": "snmp"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -203,6 +203,79 @@
"protocol": "snmp"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
"name": "prom_nvgpu_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"prom_DCGM_FI_DEV_SM_CLOCK",
"prom_DCGM_FI_DEV_MEM_CLOCK",
"prom_DCGM_FI_DEV_GPU_TEMP",
"prom_DCGM_FI_DEV_POWER_USAGE",
"prom_DCGM_FI_DEV_GPU_UTIL",
"prom_DCGM_FI_DEV_MEM_COPY_UTIL"
],
"port": 9400,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Nvidia DCGM Exporter",
"name": "prom_nvgpu_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9400,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -231,6 +231,89 @@
"protocol": "snmp"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
"name": "prom_nvgpu_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"prom_DCGM_FI_DEV_SM_CLOCK",
"prom_DCGM_FI_DEV_MEM_CLOCK",
"prom_DCGM_FI_DEV_GPU_TEMP",
"prom_DCGM_FI_DEV_POWER_USAGE",
"prom_DCGM_FI_DEV_GPU_UTIL",
"prom_DCGM_FI_DEV_MEM_COPY_UTIL"
],
"port": 9400,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Nvidia DCGM Exporter",
"name": "prom_nvgpu_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9400,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -105,6 +105,51 @@
"protocol": "prometheus"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -898,6 +898,89 @@
"protocol": "ipmisensor"
}
},
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
"name": "prom_nvgpu_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"prom_DCGM_FI_DEV_SM_CLOCK",
"prom_DCGM_FI_DEV_MEM_CLOCK",
"prom_DCGM_FI_DEV_GPU_TEMP",
"prom_DCGM_FI_DEV_POWER_USAGE",
"prom_DCGM_FI_DEV_GPU_UTIL",
"prom_DCGM_FI_DEV_MEM_COPY_UTIL"
],
"port": 9400,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Nvidia DCGM Exporter",
"name": "prom_nvgpu_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9400,
"protocol": "prometheus"
}
},
{
"description": "Input byte counter for the network device port",
"labels": {
......
......@@ -224,6 +224,51 @@
"protocol": "ipmisensor"