Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 05661ed9 authored by JACQUOT Pierre's avatar JACQUOT Pierre
Browse files

Merge branch 'montcalm' into 'master'

[toulouse][montcalm] Montcalm integration with OAR

See merge request !469
parents c2547602 792ef0b5
No related branches found
No related tags found
1 merge request!469[toulouse][montcalm] Montcalm integration with OAR
Pipeline #796151 passed
Showing
with 162 additions and 13 deletions
......@@ -4,11 +4,84 @@
"exotic": false,
"kavlan": false,
"metrics": [
{
"description": "Default subset of metrics from Prometheus Node Exporter",
"name": "prom_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"node_boot_time_seconds",
"node_cpu_scaling_frequency_hertz",
"node_cpu_seconds_total",
"node_filesystem_free_bytes",
"node_filesystem_size_bytes",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Shmem_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_network_receive_bytes_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_packets_total",
"node_procs_blocked",
"node_procs_running",
"kwollect_custom"
],
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Node Exporter",
"name": "prom_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9100,
"protocol": "prometheus"
}
},
{
"description": "Default subset of metrics from Prometheus Nvidia DCGM Exporter",
"name": "prom_nvgpu_default_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"id": [
"DCGM_FI_DEV_SM_CLOCK",
"DCGM_FI_DEV_MEM_CLOCK",
"DCGM_FI_DEV_GPU_TEMP",
"DCGM_FI_DEV_POWER_USAGE",
"DCGM_FI_DEV_GPU_UTIL",
"DCGM_FI_DEV_MEM_COPY_UTIL"
],
"port": 9400,
"protocol": "prometheus"
}
},
{
"description": "All metrics from Prometheus Nvidia DCGM Exporter",
"name": "prom_nvgpu_all_metrics",
"optional_period": 15000,
"period": 0,
"source": {
"port": 9400,
"protocol": "prometheus"
}
}
],
"model": "HPE Proliant DL360 Gen10+",
"queues": [
"admin"
"admin",
"testing"
],
"type": "cluster",
"uid": "montcalm"
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -174,7 +174,8 @@
"deploy": true,
"max_walltime": 0,
"queues": [
"admin"
"admin",
"testing"
],
"virtual": "ivt"
},
......
......@@ -2,7 +2,8 @@
"alias": [
"nfs",
"home-g5k",
"modules"
"modules",
"public"
],
"kind": "physical",
"network_adapters": {
......
......@@ -5,6 +5,7 @@ boot_type: uefi
exotic: false
queues:
- admin
- testing
nodes:
montcalm-[1-9]:
supported_job_types:
......
---
metrics:
- name: prom_default_metrics
description: Default subset of metrics from Prometheus Node Exporter
period: 0
optional_period: 15000
source:
protocol: prometheus
port: 9100
id:
- node_boot_time_seconds
- node_cpu_scaling_frequency_hertz
- node_cpu_seconds_total
- node_filesystem_free_bytes
- node_filesystem_size_bytes
- node_load1
- node_load15
- node_load5
- node_memory_Buffers_bytes
- node_memory_Cached_bytes
- node_memory_MemAvailable_bytes
- node_memory_MemFree_bytes
- node_memory_MemTotal_bytes
- node_memory_Shmem_bytes
- node_memory_SwapFree_bytes
- node_memory_SwapTotal_bytes
- node_network_receive_bytes_total
- node_network_receive_packets_total
- node_network_transmit_bytes_total
- node_network_transmit_packets_total
- node_procs_blocked
- node_procs_running
- kwollect_custom
- name: prom_all_metrics
description: All metrics from Prometheus Node Exporter
period: 0
optional_period: 15000
source:
protocol: prometheus
port: 9100
- name: prom_nvgpu_default_metrics
description: Default subset of metrics from Prometheus Nvidia DCGM Exporter
period: 0
optional_period: 15000
source:
protocol: prometheus
port: 9400
id:
- DCGM_FI_DEV_SM_CLOCK
- DCGM_FI_DEV_MEM_CLOCK
- DCGM_FI_DEV_GPU_TEMP
- DCGM_FI_DEV_POWER_USAGE
- DCGM_FI_DEV_GPU_UTIL
- DCGM_FI_DEV_MEM_COPY_UTIL
- name: prom_nvgpu_all_metrics
description: All metrics from Prometheus Nvidia DCGM Exporter
period: 0
optional_period: 15000
source:
protocol: prometheus
port: 9400
......@@ -29,7 +29,7 @@ pve-2:
srv-data:
kind: physical
serial: CZ22230FL8
alias: [nfs, home-g5k, modules]
alias: [nfs, home-g5k, modules, public]
network_adapters:
bmc:
ip: 172.17.126.3
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment