diff --git a/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json b/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json index e19dd9c97cb41a223c63d27a9e319438063c70fd..7218eb72d54f53d2360775d2fe9d827d47ee11c2 100644 --- a/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json +++ b/data/grid5000/sites/lille/clusters/chiconade1/chiconade1.json @@ -5,6 +5,507 @@ "kavlan": true, "manufactured_at": "2025-04-03", "metrics": [ + { + "description": "Temperature reported by BMC, in celsius", + "name": "bmc_ambient_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 2, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of Cpu 1 Pkg reported by BMC, in celsius", + "labels": { + "cpu": "1pkg" + }, + "name": "bmc_cpu_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 3, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of Dimm 1-8 reported by BMC, in celsius", + "labels": { + "dimm": "1-8" + }, + "name": "bmc_dimm_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 4, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of Dimm 9-16 reported by BMC, in celsius", + "labels": { + "dimm": "9-16" + }, + "name": "bmc_dimm_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 6, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 12-VR P1 reported by BMC, in celsius", + "labels": { + "id": "12-vrp1" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 8, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 16-Exp Bay Drive reported by BMC, in celsius", + "labels": { + "id": "16-expbaydrive" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 11, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 21-Chipset reported by BMC, in celsius", + "labels": { + "id": "21-chipset" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 14, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 22-BMC reported by BMC, in celsius", + "labels": { + "id": "22-bmc" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 15, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 23-P/S 1 Inlet reported by BMC, in celsius", + "labels": { + "id": "23-p/s1inlet" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 16, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 24-P/S 1 reported by BMC, in celsius", + "labels": { + "id": "24-p/s1" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 17, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 25-P/S 2 Inlet reported by BMC, in celsius", + "labels": { + "id": "25-p/s2inlet" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 18, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 26-P/S 2 reported by BMC, in celsius", + "labels": { + "id": "26-p/s2" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 19, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 29-OCP 1 Zone reported by BMC, in celsius", + "labels": { + "id": "29-ocp1zone" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 20, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 36-Board Inlet reported by BMC, in celsius", + "labels": { + "id": "36-boardinlet" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 23, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 40-Battery Zone reported by BMC, in celsius", + "labels": { + "id": "40-batteryzone" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 24, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 41-P/S 1 Zone reported by BMC, in celsius", + "labels": { + "id": "41-p/s1zone" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 25, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 42-Sys Exhaust reported by BMC, in celsius", + "labels": { + "id": "42-sysexhaust" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 26, + "protocol": "ipmisensor" + } + }, + { + "description": "Usage of Fan 1 reported by BMC, in percent", + "labels": { + "fan": "1" + }, + "name": "bmc_fan_usage_percent", + "optional_period": 5000, + "period": 0, + "source": { + "id": 34, + "protocol": "ipmisensor" + } + }, + { + "description": "Usage of Fan 2 reported by BMC, in percent", + "labels": { + "fan": "2" + }, + "name": "bmc_fan_usage_percent", + "optional_period": 5000, + "period": 0, + "source": { + "id": 37, + "protocol": "ipmisensor" + } + }, + { + "description": "Usage of Fan 3 reported by BMC, in percent", + "labels": { + "fan": "3" + }, + "name": "bmc_fan_usage_percent", + "optional_period": 5000, + "period": 0, + "source": { + "id": 40, + "protocol": "ipmisensor" + } + }, + { + "description": "Usage of Fan 4 reported by BMC, in percent", + "labels": { + "fan": "4" + }, + "name": "bmc_fan_usage_percent", + "optional_period": 5000, + "period": 0, + "source": { + "id": 43, + "protocol": "ipmisensor" + } + }, + { + "description": "Usage of Fan 5 reported by BMC, in percent", + "labels": { + "fan": "5" + }, + "name": "bmc_fan_usage_percent", + "optional_period": 5000, + "period": 0, + "source": { + "id": 46, + "protocol": "ipmisensor" + } + }, + { + "description": "Usage of Fan 6 reported by BMC, in percent", + "labels": { + "fan": "6" + }, + "name": "bmc_fan_usage_percent", + "optional_period": 5000, + "period": 0, + "source": { + "id": 49, + "protocol": "ipmisensor" + } + }, + { + "description": "Usage of Fan 7 reported by BMC, in percent", + "labels": { + "fan": "7" + }, + "name": "bmc_fan_usage_percent", + "optional_period": 5000, + "period": 0, + "source": { + "id": 52, + "protocol": "ipmisensor" + } + }, + { + "description": "Power consumption of Psu 1 reported by BMC, in watt", + "labels": { + "psu": "1" + }, + "name": "bmc_psu_power_watt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 55, + "protocol": "ipmisensor" + } + }, + { + "description": "Power consumption of Psu 2 reported by BMC, in watt", + "labels": { + "psu": "2" + }, + "name": "bmc_psu_power_watt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 57, + "protocol": "ipmisensor" + } + }, + { + "description": "Power consumption reported by BMC, in watt", + "name": "bmc_node_power_watt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 58, + "protocol": "ipmisensor" + } + }, + { + "description": "Power consumption of PS 1 Output reported by BMC, in watt", + "labels": { + "id": "ps1output" + }, + "name": "bmc_other_power_watt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 67, + "protocol": "ipmisensor" + } + }, + { + "description": "Voltage of PS_Volt_Out_01 reported by BMC, in volt", + "labels": { + "id": "ps_volt_out_01" + }, + "name": "bmc_other_voltage_volt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 68, + "protocol": "ipmisensor" + } + }, + { + "description": "Voltage of PS_Volt_In_01 reported by BMC, in volt", + "labels": { + "id": "ps_volt_in_01" + }, + "name": "bmc_other_voltage_volt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 69, + "protocol": "ipmisensor" + } + }, + { + "description": "Current of PS_Curr_Out_01 reported by BMC, in amp", + "labels": { + "id": "ps_curr_out_01" + }, + "name": "bmc_other_current_amp", + "optional_period": 5000, + "period": 0, + "source": { + "id": 70, + "protocol": "ipmisensor" + } + }, + { + "description": "Current of PS_Curr_In_01 reported by BMC, in amp", + "labels": { + "id": "ps_curr_in_01" + }, + "name": "bmc_other_current_amp", + "optional_period": 5000, + "period": 0, + "source": { + "id": 71, + "protocol": "ipmisensor" + } + }, + { + "description": "Power consumption of PS 2 Output reported by BMC, in watt", + "labels": { + "id": "ps2output" + }, + "name": "bmc_other_power_watt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 72, + "protocol": "ipmisensor" + } + }, + { + "description": "Voltage of PS_Volt_Out_02 reported by BMC, in volt", + "labels": { + "id": "ps_volt_out_02" + }, + "name": "bmc_other_voltage_volt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 73, + "protocol": "ipmisensor" + } + }, + { + "description": "Voltage of PS_Volt_In_02 reported by BMC, in volt", + "labels": { + "id": "ps_volt_in_02" + }, + "name": "bmc_other_voltage_volt", + "optional_period": 5000, + "period": 0, + "source": { + "id": 74, + "protocol": "ipmisensor" + } + }, + { + "description": "Current of PS_Curr_Out_02 reported by BMC, in amp", + "labels": { + "id": "ps_curr_out_02" + }, + "name": "bmc_other_current_amp", + "optional_period": 5000, + "period": 0, + "source": { + "id": 75, + "protocol": "ipmisensor" + } + }, + { + "description": "Current of PS_Curr_In_02 reported by BMC, in amp", + "labels": { + "id": "ps_curr_in_02" + }, + "name": "bmc_other_current_amp", + "optional_period": 5000, + "period": 0, + "source": { + "id": 76, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 50.1-XLR8R 4-GPU reported by BMC, in celsius", + "labels": { + "id": "50.1-xlr8r4-gpu" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 77, + "protocol": "ipmisensor" + } + }, + { + "description": "Temperature of 28.1-OCP 1-Netwo reported by BMC, in celsius", + "labels": { + "id": "28.1-ocp1-netwo" + }, + "name": "bmc_other_temp_celsius", + "optional_period": 5000, + "period": 0, + "source": { + "id": 78, + "protocol": "ipmisensor" + } + }, { "description": "Default subset of metrics from Prometheus Node Exporter", "name": "prom_default_metrics", diff --git a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1_metrics.yaml b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1_metrics.yaml index 0406ead1267a378f2a78a5620f5cc1f6257832b8..2546906a26f51a35cbbfc25238b2a548a9d30b78 100644 --- a/input/grid5000/sites/lille/clusters/chiconade1/chiconade1_metrics.yaml +++ b/input/grid5000/sites/lille/clusters/chiconade1/chiconade1_metrics.yaml @@ -1,6 +1,355 @@ --- metrics: + - name: bmc_ambient_temp_celsius + description: Temperature reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 2 #01-Inlet Ambient + + - name: bmc_cpu_temp_celsius + labels: {"cpu": "1pkg"} + description: Temperature of Cpu 1 Pkg reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 3 #02-CPU 1 PkgTmp + + - name: bmc_dimm_temp_celsius + labels: {"dimm": "1-8"} + description: Temperature of Dimm 1-8 reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 4 #04-P1 DIMM 1-8 + + - name: bmc_dimm_temp_celsius + labels: {"dimm": "9-16"} + description: Temperature of Dimm 9-16 reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 6 #06-P1 DIMM 9-16 + + - name: bmc_other_temp_celsius + labels: {"id": "12-vrp1"} + description: Temperature of 12-VR P1 reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 8 #12-VR P1 + + - name: bmc_other_temp_celsius + labels: {"id": "16-expbaydrive"} + description: Temperature of 16-Exp Bay Drive reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 11 #16-Exp Bay Drive + + - name: bmc_other_temp_celsius + labels: {"id": "21-chipset"} + description: Temperature of 21-Chipset reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 14 #21-Chipset + + - name: bmc_other_temp_celsius + labels: {"id": "22-bmc"} + description: Temperature of 22-BMC reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 15 #22-BMC + + - name: bmc_other_temp_celsius + labels: {"id": "23-p/s1inlet"} + description: Temperature of 23-P/S 1 Inlet reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 16 #23-P/S 1 Inlet + + - name: bmc_other_temp_celsius + labels: {"id": "24-p/s1"} + description: Temperature of 24-P/S 1 reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 17 #24-P/S 1 + + - name: bmc_other_temp_celsius + labels: {"id": "25-p/s2inlet"} + description: Temperature of 25-P/S 2 Inlet reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 18 #25-P/S 2 Inlet + + - name: bmc_other_temp_celsius + labels: {"id": "26-p/s2"} + description: Temperature of 26-P/S 2 reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 19 #26-P/S 2 + + - name: bmc_other_temp_celsius + labels: {"id": "29-ocp1zone"} + description: Temperature of 29-OCP 1 Zone reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 20 #29-OCP 1 Zone + + - name: bmc_other_temp_celsius + labels: {"id": "36-boardinlet"} + description: Temperature of 36-Board Inlet reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 23 #36-Board Inlet + + - name: bmc_other_temp_celsius + labels: {"id": "40-batteryzone"} + description: Temperature of 40-Battery Zone reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 24 #40-Battery Zone + + - name: bmc_other_temp_celsius + labels: {"id": "41-p/s1zone"} + description: Temperature of 41-P/S 1 Zone reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 25 #41-P/S 1 Zone + + - name: bmc_other_temp_celsius + labels: {"id": "42-sysexhaust"} + description: Temperature of 42-Sys Exhaust reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 26 #42-Sys Exhaust + + - name: bmc_fan_usage_percent + labels: {"fan": "1"} + description: Usage of Fan 1 reported by BMC, in percent + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 34 #Fan 1 DutyCycle + + - name: bmc_fan_usage_percent + labels: {"fan": "2"} + description: Usage of Fan 2 reported by BMC, in percent + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 37 #Fan 2 DutyCycle + + - name: bmc_fan_usage_percent + labels: {"fan": "3"} + description: Usage of Fan 3 reported by BMC, in percent + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 40 #Fan 3 DutyCycle + + - name: bmc_fan_usage_percent + labels: {"fan": "4"} + description: Usage of Fan 4 reported by BMC, in percent + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 43 #Fan 4 DutyCycle + + - name: bmc_fan_usage_percent + labels: {"fan": "5"} + description: Usage of Fan 5 reported by BMC, in percent + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 46 #Fan 5 DutyCycle + + - name: bmc_fan_usage_percent + labels: {"fan": "6"} + description: Usage of Fan 6 reported by BMC, in percent + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 49 #Fan 6 DutyCycle + + - name: bmc_fan_usage_percent + labels: {"fan": "7"} + description: Usage of Fan 7 reported by BMC, in percent + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 52 #Fan 7 DutyCycle + + - name: bmc_psu_power_watt + labels: {"psu": "1"} + description: Power consumption of Psu 1 reported by BMC, in watt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 55 #PS 1 Input + + - name: bmc_psu_power_watt + labels: {"psu": "2"} + description: Power consumption of Psu 2 reported by BMC, in watt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 57 #PS 2 Input + + - name: bmc_node_power_watt + description: Power consumption reported by BMC, in watt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 58 #Power Meter + + - name: bmc_other_power_watt + labels: {"id": "ps1output"} + description: Power consumption of PS 1 Output reported by BMC, in watt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 67 #PS 1 Output + + - name: bmc_other_voltage_volt + labels: {"id": "ps_volt_out_01"} + description: Voltage of PS_Volt_Out_01 reported by BMC, in volt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 68 #PS_Volt_Out_01 + + - name: bmc_other_voltage_volt + labels: {"id": "ps_volt_in_01"} + description: Voltage of PS_Volt_In_01 reported by BMC, in volt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 69 #PS_Volt_In_01 + + - name: bmc_other_current_amp + labels: {"id": "ps_curr_out_01"} + description: Current of PS_Curr_Out_01 reported by BMC, in amp + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 70 #PS_Curr_Out_01 + + - name: bmc_other_current_amp + labels: {"id": "ps_curr_in_01"} + description: Current of PS_Curr_In_01 reported by BMC, in amp + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 71 #PS_Curr_In_01 + + - name: bmc_other_power_watt + labels: {"id": "ps2output"} + description: Power consumption of PS 2 Output reported by BMC, in watt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 72 #PS 2 Output + + - name: bmc_other_voltage_volt + labels: {"id": "ps_volt_out_02"} + description: Voltage of PS_Volt_Out_02 reported by BMC, in volt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 73 #PS_Volt_Out_02 + + - name: bmc_other_voltage_volt + labels: {"id": "ps_volt_in_02"} + description: Voltage of PS_Volt_In_02 reported by BMC, in volt + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 74 #PS_Volt_In_02 + + - name: bmc_other_current_amp + labels: {"id": "ps_curr_out_02"} + description: Current of PS_Curr_Out_02 reported by BMC, in amp + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 75 #PS_Curr_Out_02 + + - name: bmc_other_current_amp + labels: {"id": "ps_curr_in_02"} + description: Current of PS_Curr_In_02 reported by BMC, in amp + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 76 #PS_Curr_In_02 + + - name: bmc_other_temp_celsius + labels: {"id": "50.1-xlr8r4-gpu"} + description: Temperature of 50.1-XLR8R 4-GPU reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 77 #50.1-XLR8R 4-GPU + + - name: bmc_other_temp_celsius + labels: {"id": "28.1-ocp1-netwo"} + description: Temperature of 28.1-OCP 1-Netwo reported by BMC, in celsius + period: 0 + optional_period: 5000 + source: + protocol: ipmisensor + id: 78 #28.1-OCP 1-Netwo + - name: prom_default_metrics description: Default subset of metrics from Prometheus Node Exporter period: 0 @@ -39,7 +388,9 @@ metrics: optional_period: 15000 source: protocol: prometheus - port: 9100 + port: 9100 + + - name: prom_nvgpu_default_metrics description: Default subset of metrics from Prometheus Nvidia DCGM Exporter period: 0 @@ -61,3 +412,4 @@ metrics: source: protocol: prometheus port: 9400 +