Commit 006597a3 authored by Combava Orange's avatar Combava Orange
Browse files

rename grafana override to monitoring, use stuckyhm/prometheus-docker-sd to...

rename grafana override to monitoring, use stuckyhm/prometheus-docker-sd to auto-discover containers
parent 6faa527d
Pipeline #273702 passed with stages
in 21 minutes and 16 seconds
......@@ -14,8 +14,6 @@ spring:
database-platform: org.hibernate.dialect.PostgreSQL95Dialect
generate-ddl: false
open-in-view: false
main:
web-application-type: none
output:
ansi:
enabled: ALWAYS
......@@ -24,8 +24,6 @@ services:
depends_on:
- postgres
- kafka
ports:
- "7070:8080"
restart: always
clea-batch:
......@@ -53,7 +51,7 @@ services:
- ./clea-dbmigration/sql:/flyway/sql
depends_on:
- postgres
restart: always
restart: "no"
kafka:
image: "wurstmeister/kafka:latest"
environment:
......
route:
receiver: "slack"
receivers:
- name: "slack"
# slack_configs:
# - send_resolved: true
# username: '<username>'
# channel: '#<channel-name>'
# api_url: '<incomming-webhook-url>'
......@@ -6,6 +6,7 @@ OPTIONS="-p $SCRIPTNAME -f $SCRIPTPATH/../docker-compose.yml $OVERRIDES"
# IP of the host (bridge)
export EXTERNAL_IP=$(docker network inspect bridge --format='{{(index .IPAM.Config 0).Gateway}}')
export ENDPOINT_OUTSCALE=http://minio:9000
while [[ $# -gt 0 ]]
do
......
version: "3.8"
services:
grafana:
image: grafana/grafana:7.5.0
ports:
- "3000:3000"
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning
- ./grafana/dashboards:/var/lib/grafana/dashboards
# healthcheck:
# test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
# interval: 30s
# timeout: 20s
# retries: 3
version: "3.8"
services:
clea-ws-rest:
labels:
- prometheus-scrape.enabled=true
- prometheus-scrape.port=8080
- prometheus-scrape.metrics_path=/actuator/prometheus
clea-venue-consumer:
labels:
- prometheus-scrape.enabled=true
- prometheus-scrape.port=8080
- prometheus-scrape.metrics_path=/actuator/prometheus
grafana:
image: grafana/grafana:7.5.0
ports:
- "3000:3000"
volumes:
- ./docker-compose/grafana/provisioning:/etc/grafana/provisioning
- ./docker-compose/grafana/dashboards:/var/lib/grafana/dashboards
prometheus:
image: prom/prometheus:v2.28.0
volumes:
- ./docker-compose/prometheus/:/etc/prometheus/
- /var/run/docker.sock:/var/run/docker.sock
- prometheus-docker-sd:/prometheus-docker-sd:ro
ports:
- 9090:9090
alertmanager:
image: prom/alertmanager:v0.22.2
ports:
- 9093:9093
volumes:
- ./docker-compose/alertmanager/:/etc/alertmanager/
command:
- "--config.file=/etc/alertmanager/config.yml"
- "--storage.path=/alertmanager"
# ==============================================================================
# prometheus-docker-sd
# ------------------------------------------------------------------------------
# https://github.com/stuckyhm/prometheus-docker-sd
prometheus-docker-sd:
image: "stucky/prometheus-docker-sd:latest"
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- prometheus-docker-sd:/prometheus-docker-sd:rw
# ==============================================================================
# Volumes
# ------------------------------------------------------------------------------
volumes:
prometheus-docker-sd:
......@@ -20,3 +20,8 @@ datasources:
sslmode: disable
secureJsonData:
password: password
- name: prometheus
type: prometheus
url: http://prometheus:9090
jsonData:
sslmode: disable
groups:
- name: tousanticovid
rules:
# Alert for any instance that is unreachable for >2 minutes.
- alert: service_down
expr: up == 0
for: 2m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
- alert: high_load
expr: node_load1 > 0.5
for: 2m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} under high load"
description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
# my global config
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
evaluation_interval: 15s
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: "tousanticovid"
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- "alert.rules"
# alert
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- "alertmanager:9093"
# A scrape configuration containing exactly one endpoint to scrape:
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "docker"
file_sd_configs:
- files:
- /prometheus-docker-sd/docker-targets.json
- job_name: "kong"
static_configs:
- targets: ["kong:8001"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment