diff --git a/.gitignore b/.gitignore index 0621410a592efcee9a9bc7e4a84058aab67bd735..e484e18c253b9bd16ef338e92f95bf5eb0bda35f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ vidjil/ __pycache__ .env -volumes/ +volumes*/ volumes_dev/ certs/*.pem diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f65577ed6516d063363c074ea2ae760051ceab5 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,30 @@ +# Metrics + +stages : + - publish_release + + +deploy_docker: + stage: publish_release + image: docker:latest + services: + - name: docker:dind + alias: docker + command: ["--tls=false"] + services: + - docker:dind + script: + - git submodule init + - git submodule update --remote + - export CUR_DATE=`date +%Y-%m-%d` + - export SHA=`echo $CI_COMMIT_SHA | awk '{ print substr($0, 0, 8) }'` + - export TAG="vidjil/metrics:$CUR_DATE-$SHA" + - docker login -u "$CI_DOCKER_USER" -p "$CI_DOCKER_PASSWORD" $CI_DOCKER + - docker build --no-cache -t $TAG -f metrics/Dockerfile ./metrics/ + - docker push $TAG + - docker tag $TAG "vidjil/metrics:latest" + - docker push "vidjil/metrics:latest" + - docker rmi $TAG "vidjil/metrics:latest" + # when: manual + tags: + - cidocker diff --git a/docker-compose-node.yml b/docker-compose-node.yml new file mode 100644 index 0000000000000000000000000000000000000000..23b0c7096617f2cfbf6820c63227ef18aa693b94 --- /dev/null +++ b/docker-compose-node.yml @@ -0,0 +1,12 @@ +version: '3.3' + +services: + + ## This service allow to get cpu/memory/... information. + # It should be launch on target server and not in this project + node_exporter: + image: prom/node-exporter:v0.18.1 + container_name: monitoring_node_exporter + hostname: monitoring_node_exporter + ports: + - "9100:9100" diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 78c113ea950ebec993b7ded78b0586bb23c567fd..5d648a37a8bcf3cc1faf8d11e279a517bf45a860 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,6 +1,6 @@ version: '3.3' services: - metrics_vidjil: + vidjil: volumes: - ./metrics/entrypoints:/entrypoints - ./metrics/vidjil/tools/api_vidjil.py:/app/vidjil/tools/api_vidjil.py diff --git a/docker-compose.yml b/docker-compose.yml index 0c1c3d023ade04049f1a25e5618ac0276ed5e913..5bccfcca669b313bb2c9eee6ef43b12cc7a3fc46 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,11 @@ services: prometheus: image: prom/prometheus:latest container_name: monitoring_prometheus - user: "1000:1000" + user: root + # Commented ports to be only avaialble fron metrics network and not exposed + # If needed for dev, you can expose this port but data are fully exposed + # ports: + # - "9090:9090" deploy: replicas: 1 env_file: @@ -18,7 +22,10 @@ services: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=365d' - network_mode: "host" + # network_mode: "host" + networks: + - metrics + hostname: monitoring_prometheus # alertmanager: # image: prom/alertmanager:v0.18.0 @@ -35,7 +42,9 @@ services: grafana: image: grafana/grafana:latest container_name: monitoring_grafana - user: "1000:1000" + user: root + ports: + - "3000:3000" environment: - GF_PATHS_CONFIG=/etc/grafana/grafana.ini env_file: @@ -50,7 +59,10 @@ services: - ./grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml - ./grafana/grafana.ini:/etc/grafana/grafana.ini - ./grafana/dashboards:/var/lib/grafana/dashboards - network_mode: "host" + # network_mode: "host" + networks: + - metrics + hostname: monitoring_grafana # blackbox: # image: prom/blackbox-exporter:v0.14.0 @@ -61,16 +73,14 @@ services: # - ./blackbox:/config # network_mode: "host" - # node_exporter: - # image: prom/node-exporter:v0.18.1 - # container_name: monitoring_node_exporter - # deploy: - # replicas: 1 - # network_mode: "host" - metrics_vidjil: - image: vidjil/metrics-app:latest - container_name: monitoring_vidjil_instance + vidjil: + image: vidjil/metrics:latest + container_name: monitoring_vidjil + # Commented ports to be only avaialble fron metrics network and not exposed + # If needed for dev, you can expose this port but data are fully exposed + # ports: + # - "5000:5000" env_file: # cp .env-sample as .env file and modify it with user/password of metrics declared user on server # Don't use an admin account. See XXX page to create metrics user if not already done @@ -85,9 +95,16 @@ services: - ./certs:/app/certs - ./metrics/conf.py:/app/conf.py command: bash entrypoints/flask-entrypoint.sh - network_mode: "host" + # network_mode: "host" + networks: + - metrics + hostname: monitoring_vidjil build: ./metrics/ + # volumes: # prometheus-data: - # grafana-data: \ No newline at end of file + # grafana-data: + +networks: + metrics: \ No newline at end of file diff --git a/grafana/datasource.yml b/grafana/datasource.yml index a0ea46ad23f6d38507ad264d12edd22b83f76723..b67f29a28bee2340d31943da35ffdc99abb23043 100644 --- a/grafana/datasource.yml +++ b/grafana/datasource.yml @@ -4,7 +4,7 @@ datasources: - name: Prometheus type: prometheus access: proxy - url: http://localhost:9090 + url: http://monitoring_prometheus:9090 # - name: Prometheus-perso2 # type: prometheus-perso2 # access: proxy diff --git a/metrics/Makefile b/metrics/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..cd6143b6f0ef4508d64515c0faa5e08119831483 --- /dev/null +++ b/metrics/Makefile @@ -0,0 +1,14 @@ +init_submodule: + git submodule init + git submodule update --remote + +build_metrics: + docker compose build vidjil + +init: + python3 -m pip install -U pip + python3 -m pip install --no-cache -r requirements.txt + +unit: + #python3 -m pytest --cov=./ --cov-report html:cov.html -v -s . + python3 -m unittest test_main.py \ No newline at end of file diff --git a/metrics/conf.py b/metrics/conf.sample.py similarity index 74% rename from metrics/conf.py rename to metrics/conf.sample.py index ab099b7e666bd666359f09b0afc49bcb51b72536..522d66983ba5024f9a88115be6f7c89f3d5f8a87 100644 --- a/metrics/conf.py +++ b/metrics/conf.sample.py @@ -10,9 +10,6 @@ vidjil_servers = [ "certificat": "./certs/cert_localhost.pem", "user":"metrics@vidjil.org", "password": "foobartest" - # }, { - # "url": "https://app.vidjil.org/vidjil/", - # "certificat": "./certs/cert_app.vidjil.org.pem" } ] diff --git a/metrics/main.py b/metrics/main.py index 600bf4460dbe48dc8cb9d855d943d9ec1e74f534..e9c9c9b0fb448fdadd5929b44da40edfdb0b053e 100644 --- a/metrics/main.py +++ b/metrics/main.py @@ -18,8 +18,6 @@ def get_env_list(): @app.route('/metrics_test') def metrics_test(): content = "" - # user = os.getenv('METRICS_USER_EMAIL') - # password = os.getenv('METRICS_USER_PASSWORD') dev_mode = os.getenv('DEV_MODE') print( "====== /metrics_test") @@ -44,7 +42,7 @@ def unavailableServer(url) -> str: return f"\ # HELP vidjil_api_request_unavailable_server {'A server is unreachable'}\n\ # TYPE vidjil_api_request_unavailable_server {'gauge'}\n\ -vidjil_api_request_unavailable_server {{server={cleanUrlServer(url)}}}" +vidjil_api_request_unavailable_server {{server='{cleanUrlServer(url)}'}}\n" @app.route('/metrics') @@ -65,7 +63,7 @@ def metrics(): vidjil = Vidjil(url_server, ssl=certificat) vidjil.login(user, password) - metrics = vidjil.metrics() + metrics = vidjil.metrics(metrics_list="all") # print(f"{metrics=}") formated_response += formated_metrics(metrics,cleanUrlServer(url_server)) except Exception as e: @@ -112,6 +110,9 @@ def formated_metrics(metrics, url_server): 'set_generics_by_group': {"type": "gauge", "description": "set_generics_by_group.", "function": convert_set_generics_by_group}, 'set_runs_by_group': {"type": "gauge", "description": "set_runs_by_group.", "function": convert_set_runs_by_group}, 'set_patients_by_group': {"type": "gauge", "description": "set_patients_by_group.", "function": convert_set_patients_by_group}, + + ### Not used + # 'request_times': {"type": "gauge", "description": "request_times.", "function": convert_request_times}, } string = f"# Information : {metrics['message']}\n" @@ -205,7 +206,6 @@ def convert_login_count(metrics_key, formater, data, url_server): # HELP vidjil_api_request_login_count {formater['description']}\n\ # TYPE vidjil_api_request_login_count {formater['type']}\n" for elt in data: - # print(elt) if elt['auth_user']['email'] == "metrics@vidjil.org": continue string += f"\ @@ -225,7 +225,6 @@ def convert_status_analysis(metrics_key, formater, data, url_server): # HELP vidjil_api_request_status_analysis {formater['description']}\n\ # TYPE vidjil_api_request_status_analysis {formater['type']}\n" for elt in data: - print(elt) string += f"\ vidjil_api_request_status_analysis\ {{status=\"{elt['scheduler_task']['status']}\", task_name=\"{elt['scheduler_task']['task_name']}\", server=\"{url_server}\"}} \ @@ -313,9 +312,11 @@ def convert_sequence_size_by_user(metrics_key, formater, data, url_server): # HELP vidjil_api_request_sequence_size_by_user {formater['description']}\n\ # TYPE vidjil_api_request_sequence_size_by_user {formater['type']}\n" for elt in data: + # Some old data have none value (no provider filled); needto replace None by 0 + size = elt['size_file_sum'] if not elt['size_file_sum'] == None else -1 string += f"\ vidjil_api_request_sequence_size_by_user\ -{{user_id=\"{elt['user_id']}\", server=\"{url_server}\"}} {elt['size_file_sum']}\n" +{{user_id=\"{elt['user_id']}\", server=\"{url_server}\"}} {size}\n" return string @@ -365,15 +366,16 @@ vidjil_api_request_set_generics_count\ return string +# def convert_request_times(metrics_key, formater, data, url_server): +# return f"=*=*= convert_request_times ==> {data}" +# string = f"\ +# # HELP vidjil_api_request_request_times {formater['description']}\n\ +# # TYPE vidjil_api_request_request_times {formater['type']}\n" +# return string + def convert_config_analysis_by_users_patients(metrics_key, formater, data, url_server): - # "status_analysis": [{ - # 'config_id': 2, - # 'config_name': 'multi+inc+xxx', - # 'count': 3, - # 'user_id': 1}, ...] - # print( "=====\n===== convert_config_analysis_by_users_patients") - # print(f"{data[:2]=}") + # return f"=*=*= convert_config_analysis_by_users_patients ==> {data[:2]}" string = f"\ # HELP vidjil_api_request_config_analysis_by_users_patients {formater['description']}\n\ # TYPE vidjil_api_request_config_analysis_by_users_patients {formater['type']}\n" diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index f61ddff390aefda56ef867b17b57481d26493df4..f5f4605151093d7e80d52dc01df0ca7bafc836f2 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -1,5 +1,5 @@ global: - scrape_interval: 15s # Par défaut prometheus va scrapper les cibles toutes les 15 secondes + scrape_interval: 90s # Par défaut prometheus va scrapper les cibles toutes les 15 secondes # Attach these labels to any time series or alerts when communicating with # external systems (federation, remote storage, Alertmanager). @@ -21,16 +21,16 @@ global: # rule_files: # - "/alertmanager/alert.rules/alerts.rules.yml" -# la scrape configuration de prometheus, les hosts qu'il va contacter pour chercher les métrique + scrape_configs: - - job_name: 'measure server' # <-- j'ai mis ce titre de job mais le truc c'est de mettre un nom de job explicite + - job_name: 'measure server' # Ask for node reporter (cpu, memory, ...) on each targets server at port 9100 static_configs: - - targets: ['127.0.0.1:9100', '192.168.1.77:9100'] - scrape_interval: 5s + - targets: [] + scrape_interval: 30s - job_name: 'vidjil' static_configs: - - targets: ['127.0.0.1:5000'] + - targets: ['monitoring_vidjil:5000'] scrape_interval: 90s diff --git a/readme.md b/readme.md index 73d3c814ec302909322ca551a9768e1f113fbdcb..aad156affdd49cd2e0f3f757fa3654249d4d434f 100644 --- a/readme.md +++ b/readme.md @@ -7,37 +7,80 @@ You can find information on third-party-tools at: - [Grafana](https://github.com/grafana/grafana) <!-- - [Alertmanager](https://github.com/prometheus/alertmanager) --> <!-- - [Blackbox exporter](https://github.com/prometheus/blackbox_exporter) --> -<!-- - [Node exporter](https://github.com/prometheus/node_exporter) --> +- [Node exporter](https://github.com/prometheus/node_exporter) -### Set parameters +### Set project + +#### Vidjil submodules initialisation + +This project depend of vidjil component. +A first step is needed to get vidjil reposirory fetch inside this project. + +You need to launch this command to set it up. + +``` +make init_submodules +``` + +If everything work fine, Vidjil submodule will be downloaded under `metrics/vidjil`. + #### Grafana -J'ai changé le login/pass par défaut, voici les credentials de grafana : +Default password of grafana server is `admin/admin`. +At first login, a new password will be asked. + +<!-- TODO; filled information to get correct dashboard at starting --> -- login : `unicorn` -- password : `UnicornsExists!` +#### Prometheus + +Configuration should be setted in `prometheus/prometheus.yml` file. +For vidjil metrics, everything should work out of the box. +However, you need to declare target servers for node exporter in `measure server` job. #### Vidjil servers -TODO -## lancer la stack +Configuration can be set in a conf file `metrics/conf.py`. A sample if present under `metrics/conf.sample.py`. +Please copy and rename this file as `conf.py` to be sure to not send your credential on repository. +You can after that modifiy configuration to set urln, metrics user and password + +You can also use a SSL certificate for API as described in [vidjil documentation](https://www.vidjil.org/doc/api/). +On target server, you need to set up a metrics user if not already done. +See this [page](https://www.vidjil.org/doc/admin/#server-monitoring) for more information on how to do that. + +#### Node reporter + +Node reporter isa dedicated service to follow hardware of a server (CPU and memory usage, disk space, ...). +It should be launch on each target and declared in prometheus as specified below. + +You can use for that dedicated `docker-compose-node.yml` file. + + +## Launch services + To launch this stack, you will only need to launch a docker-compose file and set some variable on server to follow : -```console -docker-compose up -d + ``` +docker-compose up -d +``` + +Note that for the moment, no image is avaialbe for vidjil metrics. +At first lauch, you should launch `make build_metrics` to build metrics docker images that will be used. +Note that if you made some dev and want to build again this image, you can add `--build` at `docker compsoe up -d` to build metrics server image at launch. + +By default, only grafana server will be serve on port 3000. +As metrics and prometheus don't allow to manage user and credential, +we use an internal network allowing a communication between service but not with external world. +However, you can open network by uncomment `ports` declaration of these services. Host network will be used. You can find services on **localhost** at these port - Prometheus : `9090` - Grafana : `3000` +- Metrics vidjil: `5000` +- Node exporter : `9100` <!-- - Alertmanager : `9093` --> -<!-- - Node exporter : `9100` --> - - -## ouidou -Chez ouidou, nous aimons jouer avec les nouvelles technologies et nous sommes toujours curieux de découvrir des projets sympa et de nouvelles personnes. On recrute, donc n'hésite pas à nous contacter [ici](mailto:contact@ouidou.fr) et de visiter notre site [ici](https://ouidou.fr). <!-- Based on a projet of ouidou found on github --> <!-- Ce dépôt est en lien avec mon article medium : https://medium.com/ouidou/un-monitoring-complet-en-quelques-minutes-avec-prometheus-33e849e6392e --> \ No newline at end of file