diff --git a/deploy/G5k/K3S/config/configmap.yaml b/deploy/G5k/K3S/config/configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f6ac241b06d4348014da6892474fdc9f2e81889 --- /dev/null +++ b/deploy/G5k/K3S/config/configmap.yaml @@ -0,0 +1,50 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-config +data: + otel-collector-config.yaml: | + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + + exporters: + prometheus: + endpoint: "0.0.0.0:8889" + const_labels: + label1: value1 + + debug: + + zipkin: + endpoint: "http://zipkin-all-in-one:9411/api/v2/spans" + format: proto + + otlp: + endpoint: jaeger:4317 + tls: + insecure: true + + processors: + batch: + + extensions: + health_check: + pprof: + endpoint: :1888 + zpages: + endpoint: :55679 + + service: + extensions: [pprof, zpages, health_check] + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [debug, zipkin, otlp] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [debug, prometheus] diff --git a/deploy/G5k/K3S/config/costum-values.yaml b/deploy/G5k/K3S/config/costum-values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3c0443b7337388656eba428cd5e3d70db96c367 --- /dev/null +++ b/deploy/G5k/K3S/config/costum-values.yaml @@ -0,0 +1,279 @@ +serverFiles: + prometheus.yml: + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-apiservers + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-nodes + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - replacement: kubernetes.default.svc:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$1/proxy/metrics + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-nodes-cadvisor + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - replacement: kubernetes.default.svc:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + + - honor_labels: true + job_name: kubernetes-service-endpoints + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape + - action: drop + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape_slow + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: (.+?)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: service + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: node + + - honor_labels: true + job_name: kubernetes-service-endpoints-slow + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape_slow + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: (.+?)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: service + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: node + scrape_interval: 5m + scrape_timeout: 30s + + - honor_labels: true + job_name: prometheus-pushgateway + kubernetes_sd_configs: + - role: service + relabel_configs: + - action: keep + regex: pushgateway + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + + - honor_labels: true + job_name: kubernetes-services + kubernetes_sd_configs: + - role: service + metrics_path: /probe + params: + module: + - http_2xx + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - source_labels: + - __address__ + target_label: __param_target + - replacement: blackbox + target_label: __address__ + - source_labels: + - __param_target + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - source_labels: + - __meta_kubernetes_service_name + target_label: service + + - honor_labels: true + job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + - action: drop + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) + replacement: '[$2]:$1' + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_port + - __meta_kubernetes_pod_ip + target_label: __address__ + - action: replace + regex: (\d+);((([0-9]+?)(\.|$)){4}) + replacement: $2:$1 + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_port + - __meta_kubernetes_pod_ip + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: pod + - action: drop + regex: Pending|Succeeded|Failed|Completed + source_labels: + - __meta_kubernetes_pod_phase + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: node + + - honor_labels: true + job_name: blackbox + metrics_path: /probe + params: + module: + - http_2xx + static_configs: + - targets: + - https://prometheus.io + - https://grafana.com + relabel_configs: + - source_labels: + - __address__ + target_label: __param_target + - target_label: instance + source_labels: + - __param_target + - replacement: blackbox + target_label: __address__ + + # The new job to scrape the OpenTelemetry Collector + - job_name: 'otel-collector' + static_configs: + - targets: ['otel-collector:8889'] # Update this with the correct endpoint if needed diff --git a/deploy/G5k/K3S/config/deployement.yaml b/deploy/G5k/K3S/config/deployement.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f61f5fae48b1cbc512c548411c73723886866e8 --- /dev/null +++ b/deploy/G5k/K3S/config/deployement.yaml @@ -0,0 +1,346 @@ +apiVersion: v1 +kind: Service +metadata: + name: object-recognizer + namespace: default +spec: + ports: + - protocol: TCP + port: 9999 + targetPort: 9999 + selector: + app: object-recognizer + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: object-recognizer + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: object-recognizer + template: + metadata: + labels: + app: object-recognizer + spec: + nodeSelector: + pos: cloud # Node selector for cloud nodes + containers: + - name: object-recognizer + image: medkaddour/object_recognizer:latest + imagePullPolicy: Always + ports: + - containerPort: 9999 + imagePullSecrets: + - name: dockerhub-secret # Reference to the Docker registry secret +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: motion-detector-1 +spec: + replicas: 1 + selector: + matchLabels: + app: motion-detector-1 + template: + metadata: + labels: + app: motion-detector-1 + spec: + nodeSelector: + pos: edge # Node selector for edge nodes + containers: + - name: motion-detector + image: medkaddour/motion_detector + imagePullPolicy: Always + ports: + - containerPort: 9998 + command: ["/bin/sh", "-c"] + args: ["echo 1 > /app/index.txt && python motion_detection.py --host object-recognizer --port 9999"] + imagePullSecrets: + - name: dockerhub-secret # Reference to the Docker registry secret +--- +apiVersion: v1 +kind: Service +metadata: + name: motion-detector-1 +spec: + selector: + app: motion-detector-1 + ports: + - protocol: TCP + port: 9998 + targetPort: 9998 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: motion-detector-2 +spec: + replicas: 1 + selector: + matchLabels: + app: motion-detector-2 + template: + metadata: + labels: + app: motion-detector-2 + spec: + nodeSelector: + pos: edge # Node selector for edge nodes + containers: + - name: motion-detector + image: medkaddour/motion_detector + imagePullPolicy: Always + ports: + - containerPort: 9998 + command: ["/bin/sh", "-c"] + args: ["echo 2 > /app/index.txt && python motion_detection.py --host object-recognizer --port 9999"] + imagePullSecrets: + - name: dockerhub-secret # Reference to the Docker registry secret +--- +apiVersion: v1 +kind: Service +metadata: + name: motion-detector-2 +spec: + selector: + app: motion-detector-2 + ports: + - protocol: TCP + port: 9998 + targetPort: 9998 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: motion-detector-3 +spec: + replicas: 1 + selector: + matchLabels: + app: motion-detector-3 + template: + metadata: + labels: + app: motion-detector-3 + spec: + nodeSelector: + pos: edge # Node selector for edge nodes + containers: + - name: motion-detector + image: medkaddour/motion_detector + imagePullPolicy: Always + ports: + - containerPort: 9998 + command: ["/bin/sh", "-c"] + args: ["echo 3 > /app/index.txt && python motion_detection.py --host object-recognizer --port 9999"] + imagePullSecrets: + - name: dockerhub-secret # Reference to the Docker registry secret +--- +apiVersion: v1 +kind: Service +metadata: + name: motion-detector-3 +spec: + selector: + app: motion-detector-3 + ports: + - protocol: TCP + port: 9998 + targetPort: 9998 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: camera-1 +spec: + replicas: 1 + selector: + matchLabels: + app: camera-1 + template: + metadata: + labels: + app: camera-1 + spec: + nodeSelector: + pos: camera # Node selector for camera nodes + containers: + - name: camera + image: medkaddour/camera + imagePullPolicy: Always + command: ["/bin/sh", "-c"] + args: ["echo 1 > /app/index.txt && python camera.py --frequency 6 --mdhost motion-detector-1 --mdport 9998"] + imagePullSecrets: + - name: dockerhub-secret # Reference to the Docker registry secret +--- +apiVersion: v1 +kind: Service +metadata: + name: camera-1 +spec: + selector: + app: camera-1 + ports: + - protocol: TCP + port: 9998 + targetPort: 9998 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: camera-2 +spec: + replicas: 1 + selector: + matchLabels: + app: camera-2 + template: + metadata: + labels: + app: camera-2 + spec: + nodeSelector: + pos: camera # Node selector for camera nodes + containers: + - name: camera + image: medkaddour/camera + imagePullPolicy: Always + command: ["/bin/sh", "-c"] + args: ["echo 2 > /app/index.txt && python camera.py --frequency 6 --mdhost motion-detector-2 --mdport 9998"] + imagePullSecrets: + - name: dockerhub-secret # Reference to the Docker registry secret +--- +apiVersion: v1 +kind: Service +metadata: + name: camera-2 +spec: + selector: + app: camera-2 + ports: + - protocol: TCP + port: 9998 + targetPort: 9998 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: camera-3 +spec: + replicas: 1 + selector: + matchLabels: + app: camera-3 + template: + metadata: + labels: + app: camera-3 + spec: + nodeSelector: + pos: camera # Node selector for camera nodes + containers: + - name: camera + image: medkaddour/camera + imagePullPolicy: Always + command: ["/bin/sh", "-c"] + args: ["echo 3 > /app/index.txt && python camera.py --frequency 6 --mdhost motion-detector-3 --mdport 9998"] + imagePullSecrets: + - name: dockerhub-secret # Reference to the Docker registry secret +--- +apiVersion: v1 +kind: Service +metadata: + name: camera-3 +spec: + selector: + app: camera-3 + ports: + - protocol: TCP + port: 9998 + targetPort: 9998 +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector +spec: + replicas: 1 + selector: + matchLabels: + app: otel-collector + template: + metadata: + labels: + app: otel-collector + spec: + nodeSelector: + pos: cloud # Node selector for cloud nodes + containers: + - name: otel-collector + image: otel/opentelemetry-collector-contrib + imagePullPolicy: Always + args: ["--config=/etc/otel-collector-config.yaml"] + ports: + - name: pprof + containerPort: 1888 + - name: prometheus + containerPort: 8888 + - name: prometheus-ex + containerPort: 8889 + - name: health-check + containerPort: 13133 + - name: otlp-grpc + containerPort: 4317 + - name: zpages + containerPort: 55679 + volumeMounts: + - name: config-volume + mountPath: /etc/otel-collector-config.yaml + subPath: otel-collector-config.yaml + volumes: + - name: config-volume + configMap: + name: otel-collector-config + imagePullSecrets: + - name: dockerhub-secret # Reference to the Docker registry secret +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector +spec: + selector: + app: otel-collector + ports: + - name: pprof + protocol: TCP + port: 1888 + targetPort: 1888 + - name: prometheus + protocol: TCP + port: 8888 + targetPort: 8888 + - name: prometheus-ex + protocol: TCP + port: 8889 + targetPort: 8889 + - name: health-check + protocol: TCP + port: 13133 + targetPort: 13133 + - name: otlp-grpc + protocol: TCP + port: 4317 + targetPort: 4317 + - name: zpages + protocol: TCP + port: 55679 + targetPort: 55679 diff --git a/deploy/G5k/K3S/deploy on K3S.py b/deploy/G5k/K3S/deploy on K3S.py new file mode 100644 index 0000000000000000000000000000000000000000..8302b03480c5cce2874c14612c5aaf5095ddf125 --- /dev/null +++ b/deploy/G5k/K3S/deploy on K3S.py @@ -0,0 +1,176 @@ +import os +import subprocess +from datetime import datetime +import enoslib as en + +# --------------------------- +# Step 1: Initialize Logging +# --------------------------- +_ = en.init_logging() + +# Generate a timestamp for the job name +timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') +job_name = f"usecase_{timestamp}" + +# --------------------------- +# Step 2: Cluster Resource Configuration +# --------------------------- +cluster = "parasilo" +conf = ( + en.VMonG5kConf + .from_settings(job_name=job_name, walltime="04:00:00") + .add_machine(roles=["master"], cluster=cluster, number=1, flavour="large") + .add_machine(roles=["agent", "object_recognizer"], cluster=cluster, number=1, flavour_desc={"core": 8, "mem": 8192}) + .add_machine(roles=["agent", "motion_detector"], cluster=cluster, number=3, flavour_desc={"core": 4, "mem": 4096}) + .add_machine(roles=["agent", "camera"], cluster=cluster, number=3, flavour_desc={"core": 1, "mem": 1024}) + .finalize() +) + +provider = en.VMonG5k(conf) +roles, networks = provider.init() +en.wait_for(roles) + +# --------------------------- +# Step 3: Kubernetes Setup +# --------------------------- +k3s = en.K3s(master=roles["master"], agent=roles["agent"]) +k3s.deploy() + +print("Create a tunnel from your local machine to the head node:") +print(f"ssh -NL 8001:{roles['master'][0].address}:8001 skaddour@access.grid5000.fr") + + +# --------------------------- +# Step 4: Label Nodes +# --------------------------- +def label_nodes(node_names, labels): + label_str = ",".join(f"{key}={value}" for key, value in labels.items()) + + for node in node_names: + command = f"kubectl label nodes {node} {label_str} --overwrite" + try: + result = en.run_command(command, roles=roles['master']) + print(f"Successfully labeled {node} with {label_str}") + except subprocess.CalledProcessError as e: + print(f"Failed to label {node}. Error: {e.stderr.decode('utf-8')}") + + +label_nodes([str(node.alias) for node in roles["object_recognizer"]], {"pos": "cloud"}) +label_nodes([str(node.alias) for node in roles["motion_detector"]], {"pos": "edge"}) +label_nodes([str(node.alias) for node in roles["camera"]], {"pos": "camera"}) + +# --------------------------- +# Step 5: Docker Registry Secret with Anonymized Credentials +# --------------------------- +docker_username = "username_placeholder" +docker_password = "password_placeholder" +docker_email = "email_placeholder" + +# Create the Docker registry secret using the anonymized credentials +en.run_command(f"kubectl create secret docker-registry dockerhub-secret \ + --docker-username={docker_username} \ + --docker-password={docker_password} \ + --docker-email={docker_email}", roles=roles['master']) + + +# --------------------------- +# Step 6: Send Files to Cluster +# --------------------------- +def send_file(file_path, file_name): + scp_command = f"scp {file_path} root@{roles['master'][0].address}:{file_name}" + os.system(f"ssh-keygen -f /home/{G5k_username} /.ssh/known_hosts -R {roles['master'][0].address}") + os.system(scp_command) + return scp_command + + +local_directory = "deploy/G5k/K3S/config" +for root, dirs, files in os.walk(local_directory): + for file_name in files: + print(local_directory + file_name) + send_file(local_directory + file_name, file_name) + +en.run_command("kubectl apply -f deployement.yaml", roles=roles['master']) +en.run_command("kubectl apply -f configmap.yaml", roles=roles['master']) + +# --------------------------- +# Step 7: Helm Installation and Prometheus Setup +# --------------------------- +with en.actions(roles=roles["master"]) as a: + a.raw("curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3") + a.raw("chmod 700 get_helm.sh") + a.raw("./get_helm.sh") + + # Install Prometheus + a.raw( + "helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo add prometheus-community https://prometheus-community.github.io/helm-charts") + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo update") + a.raw( + "helm --kubeconfig /etc/rancher/k3s/k3s.yaml install prometheus prometheus-community/prometheus -n default --set grafana.enabled=True -f costum-values.yaml") + a.raw( + "helm --kubeconfig /etc/rancher/k3s/k3s.yaml upgrade --kubeconfig /etc/rancher/k3s/k3s.yaml prometheus prometheus-community/prometheus --namespace default --set prometheus-node-exporter.service.port=9500") + +# --------------------------- +# Step 8: Chaos Mesh Installation +# --------------------------- +with en.actions(roles=roles["master"]) as a: + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo add chaos-mesh https://charts.chaos-mesh.org") + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo update") + a.raw( + "helm --kubeconfig /etc/rancher/k3s/k3s.yaml install chaos-mesh chaos-mesh/chaos-mesh --set chaosDaemon.runtime=containerd --set chaosDaemon.containerdSocket=/run/containerd/containerd.sock -n default") + a.raw("kubectl apply -f rbac.yaml") + a.raw("kubectl create token account-default-admin-eechh") + a.raw( + 'kubectl patch svc chaos-dashboard -n default -p \'{"spec": {"ports": [{"name": "http", "protocol": "TCP", "port": 2333, "targetPort": 2333, "nodePort": 30312}]}}\'') + +# --------------------------- +# Step 9: Patch Prometheus Service +# --------------------------- +with en.actions(roles=roles["master"]) as a: + a.raw( + 'kubectl patch svc prometheus-server -n default -p \'{"spec": {"type": "NodePort", "ports": [{"name": "http", "port": 80, "targetPort": 9090, "nodePort": 30090}]}}\'') + + +# --------------------------- +# Step 10: Fetch Webpage Hosts +# --------------------------- +def get_host(service): + results = en.run_command(f"kubectl get pods -n kube-system -l app={service}", roles=roles['master']) + import re + pattern = r"host='([^']*)'" + match = re.search(pattern, str(results)) + + if match: + host = match.group(1) + print("Extracted host:", host) + else: + print("Host not found") + return None + + results = en.run_command(f"kubectl describe node {host}", roles=roles['master']) + pattern = r"InternalIP:\s*([\d.]+)" + match = re.search(pattern, str(results)) + + if match: + return match.group(1) + else: + print("Host not found") + return None + + +url = f"http://{get_host('prometheus')}:30090" +print(f"prometheus web page host: {url}") + +url = f"http://{get_host('chaos-dashboard')}:30312" +print(f"Chaos-mesh web page host: {url}") + +# --------------------------- +# Step 11: Token Creation and Cleanup +# --------------------------- +results = en.run_command("kubectl create token account-default-admin-eechh", roles=roles["master"]) +for res in results: + print(res.payload['stdout']) + +# --------------------------- +# Step 12: Cleanup +# --------------------------- +provider.destroy() diff --git a/deploy/G5k/K3S/kuberneteestest (1).ipynb b/deploy/G5k/K3S/kuberneteestest (1).ipynb new file mode 100644 index 0000000000000000000000000000000000000000..39f60a8409c3e4ec4e1bb54f9f097f871be6f4e0 --- /dev/null +++ b/deploy/G5k/K3S/kuberneteestest (1).ipynb @@ -0,0 +1,325 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "code", + "source": [ + "import enoslib as en\n", + "from datetime import datetime\n", + "# Enable rich logging\n", + "_ = en.init_logging()\n", + "\n", + " # Generate a timestamp for the job name\n", + "timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')\n", + "job_name = f\"usecase_{timestamp}\"\n", + "# claim the resources\n", + "cluster=\"parasilo\"\n", + "conf = (\n", + " en.VMonG5kConf\n", + " .from_settings(job_name=job_name, walltime=\"04:00:00\")\n", + " .add_machine(\n", + " roles=[\"master\"],\n", + " cluster=cluster,\n", + " number=1,\n", + " flavour=\"large\",\n", + " \n", + " )\n", + " .add_machine(\n", + " roles=[\"agent\",\"object_recognizer\"],\n", + " cluster=cluster,\n", + " number=1,\n", + " flavour_desc={\"core\": 8, \"mem\": 8192}\n", + " )\n", + " .add_machine(\n", + " roles=[\"agent\",\"motion_detector\"],\n", + " cluster=cluster,\n", + " number=3,\n", + " flavour_desc={\"core\": 4, \"mem\": 4096})\n", + " .add_machine(\n", + " roles=[\"agent\",\"camera\"],\n", + " cluster=cluster,\n", + " number=3,\n", + " flavour_desc={\"core\": 1, \"mem\": 1024}\n", + " )\n", + " .finalize()\n", + ")\n", + "\n", + "\n", + "provider = en.VMonG5k(conf)\n", + "\n", + "roles, networks = provider.init()\n", + "\n", + "roles \n", + "en.wait_for(roles)\n" + ], + "id": "23a4b2e5f46eca8d", + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "169fbc50-a57c-46c2-993e-292011defca3", + "metadata": {}, + "source": [ + "k3s = en.K3s(master=roles[\"master\"], agent=roles[\"agent\"])\n", + "k3s.deploy()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "128b9653-2c2c-4647-a8ea-6cc7de0ee9e8", + "metadata": {}, + "source": [ + "print(\"Create a tunnel from your local machine to the head node:\")\n", + "print(f\"ssh -NL 8001:{roles['master'][0].address}:8001 skaddour@access.grid5000.fr\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "95df0232-3017-4ac6-87f8-2d5b642a41bf", + "metadata": {}, + "source": [ + "def label_nodes(node_names, labels):\n", + " \"\"\"\n", + " Labels the specified Kubernetes nodes with the provided labels.\n", + "\n", + " :param node_names: List of node names to label.\n", + " :param labels: Dictionary of label key-value pairs.\n", + " \"\"\"\n", + " # Construct the label string from the dictionary\n", + " label_str = \",\".join(f\"{key}={value}\" for key, value in labels.items())\n", + " \n", + " for node in node_names:\n", + " # Construct the kubectl command\n", + " command = \"kubectl label nodes \"+ node +\" \"+ label_str+ \" --overwrite\"\n", + " \n", + " try:\n", + " # Execute the kubectl command\n", + " result = en.run_command(command, roles=roles['master'])\n", + " print(f\"Successfully labeled {node} with {label_str}\")\n", + " except subprocess.CalledProcessError as e:\n", + " print(f\"Failed to label {node}. Error: {e.stderr.decode('utf-8')}\")\n", + "\n", + "label_nodes([str(node.alias) for node in roles[\"object_recognizer\"]],{\"pos\":\"cloud\"})\n", + "label_nodes([str(node.alias) for node in roles[\"motion_detector\"]],{\"pos\":\"edge\"})\n", + "label_nodes([str(node.alias) for node in roles[\"camera\"]],{\"pos\":\"camera\"})" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "d3fd6791-a3dc-4735-a8f3-111ca24edd6e", + "metadata": {}, + "source": [ + "en.run_command(\"kubectl create secret docker-registry dockerhub-secret \\\n", + " --docker-username=medkaddour \\\n", + " --docker-password=dckr_pat_GUauO9LWmJmTCK5TmHYTEGCa13k\\\n", + " --docker-email=medkaddourr@gmail.com\", roles=roles['master'])\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "257fa070-f648-4f63-ac18-3d3089872d12", + "metadata": {}, + "source": [ + "import os\n", + "import subprocess\n", + "def send_file(file_path,file_name):\n", + " scp_command = f\"scp {file_path} root@{roles['master'][0].address}:{file_name}\"\n", + " os.system( f\"ssh-keygen -f /home/skaddour/.ssh/known_hosts -R {roles['master'][0].address}\")\n", + " os.system(scp_command)\n", + " \n", + " return scp_command\n", + "local_directory=\"/home/skaddour/edge-adapt-main/deployement/\"\n", + "for root, dirs, files in os.walk(local_directory):\n", + " for file_name in files:\n", + " print(local_directory +file_name)\n", + " send_file(local_directory +file_name,file_name)\n", + " \n", + "en.run_command(\"kubectl apply -f deployement.yaml\", roles=roles['master'])\n", + "en.run_command(\"kubectl apply -f configmap.yaml\", roles=roles['master'])" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "b448903b-6209-4620-a495-8ec3af61edca", + "metadata": {}, + "source": [ + "with en.actions(roles=roles[\"master\"]) as a:\n", + " # Helm\n", + " a.raw(\"curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3\")\n", + " a.raw(\"chmod 700 get_helm.sh\")\n", + " a.raw(\"./get_helm.sh\")\n", + "\n", + " # Prometheus\n", + " a.raw(\"helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo add prometheus-community https://prometheus-community.github.io/helm-charts\")\n", + " a.raw(\"helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo update\")\n", + " #a.raw(\"helm --kubeconfig /etc/rancher/k3s/k3s.yaml uninstall prometheus -n default\")\n", + " a.raw(\"helm --kubeconfig /etc/rancher/k3s/k3s.yaml install prometheus prometheus-community/prometheus -n default --set grafana.enabled=True -f costum-values.yaml\")\n", + " #a.raw(\"helm upgrade --kubeconfig /etc/rancher/k3s/k3s.yaml prometheus prometheus-community/prometheus --namespace default -f costum-values.yaml\")\n", + " a.raw(\"helm upgrade --kubeconfig /etc/rancher/k3s/k3s.yaml prometheus prometheus-community/prometheus --namespace default --set prometheus-node-exporter.service.port=9500\")\n", + "\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "80cc2eba-3a04-4a5a-9712-f4c61283173b", + "metadata": {}, + "source": [ + "en.run_command(\"helm upgrade --kubeconfig /etc/rancher/k3s/k3s.yaml prometheus prometheus-community/prometheus --namespace default -f costum-values.yaml\", roles=roles['master'])" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "b8b5c6eb-f554-49a3-a601-458ba79fb215", + "metadata": {}, + "source": [ + "def get_host(service):\n", + " results = en.run_command(\"kubectl get pods -n kube-system -l app=\"+service, roles=roles['master'])\n", + " #print(results)\n", + " import re\n", + " # Define a regex pattern to match the host value\n", + " pattern = r\"host='([^']*)'\"\n", + " \n", + " # Search for the pattern in the text\n", + " match = re.search(pattern, str(results))\n", + " \n", + " if match:\n", + " host = match.group(1)\n", + " print(\"Extracted host:\", host)\n", + " else:\n", + " print(\"Host not found\")\n", + " results = en.run_command(\"kubectl describe node \"+host, roles=roles['master'])\n", + " #print(results)\n", + " pattern = r\"InternalIP:\\s*([\\d.]+)\"\n", + " \n", + " # Search for the pattern in the text\n", + " match = re.search(pattern, str(results))\n", + " if match:\n", + " host = match.group(1)\n", + " return host\n", + " \n", + " \n", + " else:\n", + " print(\"Host not found\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "e78c3f76-b77b-40d3-9958-c926e2af81cf", + "metadata": {}, + "source": [ + "with en.actions(roles=roles[\"master\"]) as a:\n", + " # Install Chaos Mesh\n", + " #a.raw(\"kubectl apply -f https://charts.chaos-mesh.org/chaos-mesh-v2.1.0.tgz\")\n", + " \n", + " # Install Helm chart for Chaos Mesh\n", + " a.raw(\"helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo add chaos-mesh https://charts.chaos-mesh.org\")\n", + " a.raw(\"helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo update\")\n", + " a.raw(\"helm --kubeconfig /etc/rancher/k3s/k3s.yaml install chaos-mesh chaos-mesh/chaos-mesh --set chaosDaemon.runtime=containerd --set chaosDaemon.containerdSocket=/run/containerd/containerd.sock -n default\")\n", + " a.raw(\"kubectl apply -f rbac.yaml\")\n", + " a.raw(\"kubectl create token account-default-admin-eechh\")\n", + " a.raw('kubectl patch svc chaos-dashboard -n default -p \\'{\"spec\": {\"ports\": [{\"name\": \"http\", \"protocol\": \"TCP\", \"port\": 2333, \"targetPort\": 2333, \"nodePort\": 30312}]}}\\'')\n", + "\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "36a27fcb-80dd-4daa-a8d4-3981fa4a88e7", + "metadata": {}, + "source": [ + "with en.actions(roles=roles[\"master\"]) as a:\n", + " # Helm\n", + " a.raw('kubectl patch svc prometheus-server -n default -p \\'{\"spec\": {\"type\": \"NodePort\", \"ports\": [{\"name\": \"http\", \"port\": 80, \"targetPort\": 9090, \"nodePort\": 30090}]}}\\'')\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "a46efa7f-4c7e-4f24-8f96-55fcee00686c", + "metadata": {}, + "source": [ + "# Assuming get_host is a function that returns the host string\n", + "urlfb = f\"http://{get_host('feedbackmechanism')}:30095\"\n", + "print(f\"feedbackmechanisme web page host: {urlfb}\")\n", + "\n", + "url = f\"http://{get_host('prometheus')}:30090\"\n", + "print(f\"prometheus web page host: {url}\")\n", + "\n", + "url = f\"http://{get_host('chaos-dashboard')}:30312\"\n", + "print(f\"Chaos-mesh web page host: {url}\")\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "b5130637-d891-4506-bc0e-47f0c4ea24d7", + "metadata": {}, + "source": [ + "results=en.run_command(\"kubectl create token account-default-admin-eechh\",roles=roles[\"master\"])\n", + "for res in results:\n", + " print(res.payload['stdout'])" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "b7198604-e8c6-47b0-8f85-e7afcc505d04", + "metadata": {}, + "source": [ + "provider.destroy() " + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "670e34e9-ece0-40cc-a141-9d11bcc2969b", + "metadata": {}, + "source": [], + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "enosenv", + "language": "python", + "name": "enosenv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/readme.md b/readme.md index c36d5375578e8c4e87eb1fa7786ff2de83d42338..1044bb44b72c6b95d25be0d4b50cd6d509f60456 100644 --- a/readme.md +++ b/readme.md @@ -6,22 +6,36 @@ This surveillance system is designed to process video feeds using a distributed ## Table of Contents -1. [Overview](#overview) -2. [System Components](#system-components) - - [Camera](#camera) - - [Motion Detection](#motion-detection) - - [Object Recognizer](#object-recognizer) - - [Other Components](#other-components) - - [OpenTelemetry Collector](#opentelemetry-collector) - - [cAdvisor](#cadvisor) - - [Node Exporter](#node-exporter) - - [Prometheus](#prometheus) -3. [Docker Compose Tutorial](#docker-compose-tutorial) - - [Step 1: Build Docker Images for Services](#step-1-build-docker-images-for-services) - - [Step 2: Create or Update `docker-compose.yml`](#step-2-create-or-update-docker-composeyml) - - [Step 3: Deploy with Docker Compose](#step-3-deploy-with-docker-compose) +1. [Overview](#overview) +2. [System Components](#system-components) + - [Camera](#camera) + - [Motion Detection](#motion-detection) + - [Object Recognizer](#object-recognizer) + - [Other Components](#other-components) + - [OpenTelemetry Collector](#opentelemetry-collector) + - [cAdvisor](#cadvisor) + - [Node Exporter](#node-exporter) + - [Prometheus](#prometheus) +3. [Docker Compose Tutorial](#docker-compose-tutorial) + - [Step 1: Build Docker Images for Services](#step-1-build-docker-images-for-services) + - [Step 2: Create or Update `docker-compose.yml`](#step-2-create-or-update-docker-composeyml) + - [Step 3: Deploy with Docker Compose](#step-3-deploy-with-docker-compose) - [Step 4: Verify the Deployment](#step-4-verify-the-deployment) - +4. [Deploying on K3S using Enoslib on Grid'5000](#deploying-on-k3s-using-enoslib-on-grid5000) + - [Introduction](#introduction) + - [Step 0: Prerequisites](#step-0-prerequisites) + - [Step 1: Initialize Logging](#step-1-initialize-logging) + - [Step 2: Cluster Resource Configuration](#step-2-cluster-resource-configuration) + - [Step 3: Kubernetes Setup](#step-3-kubernetes-setup) + - [Step 4: Label Nodes](#step-4-label-nodes) + - [Step 5: Docker Registry Secret](#step-5-docker-registry-secret) + - [Step 6: Send Files to Cluster](#step-6-send-files-to-cluster) + - [Step 7: Helm Installation and Prometheus Setup](#step-7-helm-installation-and-prometheus-setup) + - [Step 8: Chaos Mesh Installation](#step-8-chaos-mesh-installation) + - [Step 9: Patch Prometheus Service](#step-9-patch-prometheus-service) + - [Step 10: Fetch Webpage Hosts](#step-10-fetch-webpage-hosts) + - [Step 11: Token Creation and Cleanup](#step-11-token-creation-and-cleanup) + - [Conclusion](#conclusion) ## System Components @@ -318,3 +332,211 @@ services: - Access Prometheus UI: [http://localhost:9090](http://localhost:9090) - Access cAdvisor UI: [http://localhost:8080](http://localhost:8080) - Access object recognizer interface : [http://localhost:5000](http://localhost:5000) + +## **Deploying on K3S using Enoslib on Grid'5000** + +This tutorial will walk you through the steps required to deploy a Kubernetes cluster on K3s using Enoslib on the Grid'5000 (G5k) infrastructure. The focus is on deploying the surveillance system, which includes components such as the Camera, Motion Detection, and Object Recognizer. These steps cover cluster configuration, deployment of Kubernetes resources, Helm setup, Prometheus and Chaos Mesh installation, and additional tasks for monitoring and resilience testing of the system. +### Step 0: Prerequisites +- **Grid'5000** account and access to the infrastructure. +- **enoslib** installed on your local machine. +- **kubectl**, **helm**, and **ssh** tools installed on your local machine. +- **K3s** installed on your machines. + + +### Step 1: Initialize Logging +First, initialize the logging for the deployment process. + +```python +import enoslib as en +from datetime import datetime + +# Initialize logging +_ = en.init_logging() + +# Generate a timestamp for the job name +timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') +job_name = f"usecase_{timestamp}" +``` + +### Step 2: Cluster Resource Configuration +Define the configuration for the cluster resources on Grid'5000. This will specify the number of machines for each role (master, agent, etc.). + +```python +cluster = "parasilo" +conf = ( + en.VMonG5kConf + .from_settings(job_name=job_name, walltime="04:00:00") + .add_machine(roles=["master"], cluster=cluster, number=1, flavour="large") + .add_machine(roles=["agent", "object_recognizer"], cluster=cluster, number=1, flavour_desc={"core": 8, "mem": 8192}) + .add_machine(roles=["agent", "motion_detector"], cluster=cluster, number=3, flavour_desc={"core": 4, "mem": 4096}) + .add_machine(roles=["agent", "camera"], cluster=cluster, number=3, flavour_desc={"core": 1, "mem": 1024}) + .finalize() +) + +# Initialize the provider +provider = en.VMonG5k(conf) +roles, networks = provider.init() +en.wait_for(roles) +``` + +### Step 3: Kubernetes Setup +Deploy the K3s cluster and ensure that it’s set up correctly. + +```python +# Initialize K3s with master and agent nodes +k3s = en.K3s(master=roles["master"], agent=roles["agent"]) +k3s.deploy() + +# Create a tunnel to the head node +print("Create a tunnel from your local machine to the head node:") +print(f"ssh -NL 8001:{roles['master'][0].address}:8001 {G5K_username}@access.grid5000.fr") +``` + +### Step 4: Label Nodes +Label your nodes with specific attributes based on their roles (cloud, edge, camera). + +```python +def label_nodes(node_names, labels): + label_str = ",".join(f"{key}={value}" for key, value in labels.items()) + + for node in node_names: + command = f"kubectl label nodes {node} {label_str} --overwrite" + try: + result = en.run_command(command, roles=roles['master']) + print(f"Successfully labeled {node} with {label_str}") + except subprocess.CalledProcessError as e: + print(f"Failed to label {node}. Error: {e.stderr.decode('utf-8')}") + +# Label nodes +label_nodes([str(node.alias) for node in roles["object_recognizer"]], {"pos": "cloud"}) +label_nodes([str(node.alias) for node in roles["motion_detector"]], {"pos": "edge"}) +label_nodes([str(node.alias) for node in roles["camera"]], {"pos": "camera"}) +``` + +### Step 5: Docker Registry Secret +Create a Docker registry secret with anonymized credentials to authenticate with Docker Hub. + +```python +docker_username = "username_placeholder" +docker_password = "password_placeholder" +docker_email = "email_placeholder" + +# Create the Docker registry secret +en.run_command(f"kubectl create secret docker-registry dockerhub-secret \ + --docker-username={docker_username} \ + --docker-password={docker_password} \ + --docker-email={docker_email}", roles=roles['master']) +``` + +### Step 6: Send Files to Cluster +Transfer necessary configuration files to the cluster using `scp`. + +```python +import os + +def send_file(file_path, file_name): + scp_command = f"scp {file_path} root@{roles['master'][0].address}:{file_name}" + os.system(f"ssh-keygen -f /home/{G5k_username} /.ssh/known_hosts -R {roles['master'][0].address}") + os.system(scp_command) + return scp_command + +# Send configuration files to the cluster +local_directory = "deploy/G5k/K3S/config" +for root, dirs, files in os.walk(local_directory): + for file_name in files: + send_file(local_directory + file_name, file_name) + +# Apply Kubernetes manifests +en.run_command("kubectl apply -f deployement.yaml", roles=roles['master']) +en.run_command("kubectl apply -f configmap.yaml", roles=roles['master']) +``` + +### Step 7: Helm Installation and Prometheus Setup +Install Helm, add Prometheus charts, and deploy Prometheus for monitoring. + +```python +with en.actions(roles=roles["master"]) as a: + a.raw("curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3") + a.raw("chmod 700 get_helm.sh") + a.raw("./get_helm.sh") + + # Install Prometheus + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo add prometheus-community https://prometheus-community.github.io/helm-charts") + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo update") + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml install prometheus prometheus-community/prometheus -n default --set grafana.enabled=True -f costum-values.yaml") + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml upgrade --kubeconfig /etc/rancher/k3s/k3s.yaml prometheus prometheus-community/prometheus --namespace default --set prometheus-node-exporter.service.port=9500") +``` + +### Step 8: Chaos Mesh Installation +Install Chaos Mesh using Helm for fault injection and resilience testing. + +```python +with en.actions(roles=roles["master"]) as a: + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo add chaos-mesh https://charts.chaos-mesh.org") + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml repo update") + a.raw("helm --kubeconfig /etc/rancher/k3s/k3s.yaml install chaos-mesh chaos-mesh/chaos-mesh --set chaosDaemon.runtime=containerd --set chaosDaemon.containerdSocket=/run/containerd/containerd.sock -n default") + a.raw("kubectl apply -f rbac.yaml") + a.raw("kubectl create token account-default-admin-eechh") + a.raw('kubectl patch svc chaos-dashboard -n default -p \'{"spec": {"ports": [{"name": "http", "protocol": "TCP", "port": 2333, "targetPort": 2333, "nodePort": 30312}]}}\'') + +``` + +### Step 9: Patch Prometheus Service +Expose Prometheus service on a NodePort for web access. + +```python +with en.actions(roles=roles["master"]) as a: + a.raw('kubectl patch svc prometheus-server -n default -p \'{"spec": {"type": "NodePort", "ports": [{"name": "http", "port": 80, "targetPort": 9090, "nodePort": 30090}]}}\'') + +``` + +### Step 10: Fetch Webpage Hosts +Retrieve the host IP of Prometheus and Chaos Dashboard services. + +```python +def get_host(service): + results = en.run_command(f"kubectl get pods -n kube-system -l app={service}", roles=roles['master']) + import re + pattern = r"host='([^']*)'" + match = re.search(pattern, str(results)) + + if match: + host = match.group(1) + print("Extracted host:", host) + else: + print("Host not found") + return None + + results = en.run_command(f"kubectl describe node {host}", roles=roles['master']) + pattern = r"InternalIP:\s*([\d.]+)" + match = re.search(pattern, str(results)) + + if match: + return match.group(1) + else: + print("Host not found") + return None + +# Fetch the Prometheus and Chaos Dashboard IPs +url = f"http://{get_host('prometheus')}:30090" +print(f"Prometheus web page host: {url}") + +url = f"http://{get_host('chaos-dashboard')}:30312" +print(f"Chaos-mesh web page host: {url}") +``` + +### Step 11: Token Creation and Cleanup +Create the necessary tokens and perform cleanup after the job is complete. + +```python +results = en.run_command("kubectl create token account-default-admin-eechh", roles=roles["master"]) +for res in results: + print(res.payload['stdout']) + +# Cleanup the resources +provider.destroy() +``` + +## Conclusion + +By following the steps above, you have successfully deployed a Kubernetes cluster using K3s on Grid'5000, set up monitoring with Prometheus, fault injection with Chaos Mesh, and labeled nodes for efficient management. \ No newline at end of file