Commit 4ae0f058 authored by SIMONIN Matthieu's avatar SIMONIN Matthieu
Browse files

Introduce a g5k launcher

tasks:

- deploy: take ownership of a g5k node and deploy tansiv and some vm
  according to the passed parameters (e.g platform, deployment...)
- validate: (naive for now) runs fping on all networks on all vms. Print
  the output (visual check).
- destroy: release the g5k resources
parent 5687d6e8
Pipeline #194748 passed with stages
in 26 minutes and 26 seconds
......@@ -83,4 +83,36 @@ PING t11 (192.168.120.11) 56(84) bytes of data.
64 bytes from t11 (192.168.120.11): icmp_seq=2 ttl=64 time=400 ms
```
# Automatiquement sur g5k
```
cd grid5000
pip install -r requirements
python g5k.py deploy ../packer/packer-debian-10.3.0-x86_64-qemu/debian-10.3.0-x86_64.qcow2 inputs/nova_cluster.xml inputs/deployment_10_on_nova.xml --cluster grvingt --queue production
python g5k.py validate
[...]
################## <mantap18> #################
mantap10 : 0.15 0.40 0.51 0.44 0.44 0.29 0.15 0.15 0.15 0.39
mantap11 : 0.17 0.15 0.18 0.26 0.23 0.30 0.19 0.15 0.17 0.37
mantap12 : 0.43 0.17 0.65 0.61 0.23 0.53 0.16 0.36 0.42 0.27
mantap13 : 0.22 0.21 0.83 0.25 0.74 0.61 0.62 0.54 0.88 0.25
mantap14 : 0.16 0.19 0.16 0.25 0.15 0.19 0.16 0.16 0.17 0.17
mantap15 : 0.16 0.29 0.28 0.26 0.15 0.32 0.15 0.15 0.20 0.15
mantap16 : 0.25 0.30 0.48 0.23 0.23 0.15 0.17 0.18 0.16 0.22
mantap17 : 0.19 0.16 0.15 0.53 0.71 0.19 0.59 0.15 0.20 0.18
mantap18 : 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02
mantap19 : 0.16 0.16 0.26 0.17 0.16 0.25 0.39 0.16 0.17 0.16
tantap10 : 400.54 400.19 400.16 400.20 400.14 400.55 400.26 400.18 400.31 400.18
tantap11 : 400.14 400.20 400.11 400.17 400.20 400.15 400.20 400.19 400.19 400.22
tantap12 : 400.49 400.18 400.17 400.23 400.25 400.19 400.28 400.21 400.22 400.20
tantap13 : 400.10 400.17 400.23 400.15 400.17 400.21 400.27 400.13 400.18 400.18
tantap14 : 400.29 400.13 400.23 400.24 400.31 400.29 400.29 400.22 400.35 400.19
tantap15 : 400.18 400.15 400.26 400.16 400.09 400.16 400.16 400.13 400.16 400.27
tantap16 : 400.19 400.21 400.21 400.18 400.18 400.17 400.21 400.21 400.40 400.18
tantap17 : 400.25 400.12 400.15 400.15 400.22 400.19 400.25 400.15 400.17 400.19
tantap18 : 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02
tantap19 : 400.19 400.29 400.17 400.19 400.19 400.13 400.22 400.20 400.28 400.17
[...]
```
\ No newline at end of file
......@@ -115,14 +115,19 @@ class TansivVM(object):
return dict(
addresses=[f"{interface.ip}"],
gateway4=str(next(interface.network.hosts())),
# routes=[
# dict(to=str(interface), via=str(next(interface.network.hosts())))
# ],
dhcp4=False,
dhcp6=False,
)
ens3 = ethernet_config(self.tantap)
ens4 = ethernet_config(self.management)
ens4 = ethernet_config(
self.management,
)
network_config = dict(version=2, ethernets=dict(ens3=ens3, ens4=ens4))
LOGGER.debug(network_config)
return network_config
def ci_user_data(self) -> Dict:
......@@ -138,11 +143,11 @@ class TansivVM(object):
t_entries = [
f'echo "{ip} {alias}" >> /etc/hosts'
for ip, alias in _mapping(self.tantap, "t")
for ip, alias in _mapping(self.tantap, "tantap")
]
m_entries = [
f'echo "{ip} {alias}" >> /etc/hosts'
for ip, alias in _mapping(self.management, "m")
for ip, alias in _mapping(self.management, "mantap")
]
bootcmd.extend(t_entries)
bootcmd.extend(m_entries)
......@@ -189,7 +194,11 @@ class TansivVM(object):
"""Create the bridges, the tap if needed."""
def br_tap(br: str, ip: IPv4Interface, tap: str):
"""Create a bridge and a tap attached."""
"""Create a bridge and a tap attached.
This assumes that the current process is running with the right
level of privilege.
"""
check_call(
f"""
ip link show dev {br} || ip link add name {br} type bridge
......@@ -288,7 +297,6 @@ done
help="The hostname of the virtual machine",
)
parser.add_argument("--qemu-args", type=str, help="arguments to pass to qemu")
logging.basicConfig(level=logging.DEBUG)
......
......@@ -6,21 +6,70 @@
<argument value="192.168.120.10"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.10/24"/> <!-- how vsg knows me-->
<argument value="172.16.0.10/24"/> <!-- management -->
<argument value="10.0.0.10/24"/> <!-- management -->
</actor>
<actor host="nova-2.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.11"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.11/24"/> <!-- how vsg knows me-->
<argument value="172.16.0.11/24"/> <!-- management -->
<argument value="10.0.0.11/24"/> <!-- management -->
</actor>
<actor host="nova-3.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.12"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.12/24"/> <!-- how vsg knows me-->
<argument value="172.16.0.12/24"/> <!-- management -->
<argument value="10.0.0.12/24"/> <!-- management -->
</actor>
<actor host="nova-4.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.13"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.13/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.13/24"/> <!-- management -->
</actor>
<actor host="nova-5.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.14"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.14/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.14/24"/> <!-- management -->
</actor>
<actor host="nova-6.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.15"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.15/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.15/24"/> <!-- management -->
</actor>
<actor host="nova-7.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.16"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.16/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.16/24"/> <!-- management -->
</actor>
<actor host="nova-8.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.17"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.17/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.17/24"/> <!-- management -->
</actor>
<actor host="nova-9.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.18"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.18/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.18/24"/> <!-- management -->
</actor>
<actor host="nova-10.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.19"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.19/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.19/24"/> <!-- management -->
</actor>
</platform>
\ No newline at end of file
import argparse
import logging
from ipaddress import IPv4Interface
from pathlib import Path
import traceback
from enoslib import *
from enoslib.api import gather_facts
from enoslib.types import Host, Roles
def build_tansiv_roles(deployment: Path, tansiv_node: Host) -> Roles:
"""Build enoslib roles based on a simgrid deployment file.
Args:
deployment: Path to the deployment file
tansiv_node: the Host representing the node where tansiv is launched
Returns
The roles representing the virtual machines launched by tansiv
according to the deployment file.
"""
# build the inventory based on the deployment file in use
import xml.etree.ElementTree as ET
tree = ET.parse(deployment)
root = tree.getroot()
ip_ifaces = sorted(
[
IPv4Interface(e.attrib["value"])
for e in root.findall("./actor/argument[last()]")
]
)
tansiv_roles = dict(
all=[
Host(
str(ip_iface.ip),
alias=f"mantap{ip_iface.ip.packed[-1]}",
user="root",
extra=dict(
tansiv_alias=f"tantap{ip_iface.ip.packed[-1]}",
gateway=tansiv_node.address,
gateway_user="root",
),
)
for ip_iface in ip_ifaces
]
)
print(tansiv_roles)
return tansiv_roles
@enostask(new=True)
def deploy(args, env=None):
"""Deploy tansiv and the associated VMs.
idempotent.
"""
image = args.image
cluster = args.cluster
platform = args.platform
deployment = args.deployment
queue = args.queue
prod = G5kNetworkConf(id="id", roles=["prod"], site="nancy", type="prod")
conf = (
G5kConf.from_settings(
job_name="tansiv",
job_type="allow_classic_ssh",
walltime="01:00:00",
queue=queue,
)
.add_machine(cluster=cluster, roles=["tansiv"], nodes=1, primary_network=prod)
.add_network_conf(prod)
).finalize()
provider = G5k(conf)
roles, _ = provider.init()
# install docker
docker = Docker(agent=roles["tansiv"], bind_var_docker="/tmp/docker")
docker.deploy()
# copy my ssh key
pub_key = Path.home() / ".ssh" / "id_rsa.pub"
if not pub_key.exists() or not pub_key.is_file():
raise Exception(f"No public key found in {pub_key}")
with play_on(roles=roles) as p:
# copy the pub_key
p.copy(src=str(pub_key), dest="/tmp/id_rsa.pub")
# copy also the example/qemu dir
# assumes that the qcow2 image is there
p.file(path="/tmp/tansiv", state="directory")
p.synchronize(
src=image, dest="/tmp/tansiv/image.qcow2", display_name="copying base image"
)
p.synchronize(
src=platform,
dest="/tmp/tansiv/platform.xml",
display_name="copying platform file",
)
p.synchronize(
src=deployment,
dest="/tmp/tansiv/deployment.xml",
display_name="copying deployment file",
)
# we also need the boot.py wrapper
p.synchronize(
src="../examples/qemus/boot.py",
dest="/tmp/tansiv/boot.py",
display_name="copying deployment file",
)
# finally start the container
p.docker_container(
state="started",
network_mode="host",
name="tansiv",
image="registry.gitlab.inria.fr/quinson/2018-vsg/tansiv:latest",
command="platform.xml deployment.xml",
volumes=["/tmp/id_rsa.pub:/root/.ssh/id_rsa.pub", "/tmp/tansiv:/srv"],
env={
"AUTOCONFIG_NET": "true",
"IMAGE": "image.qcow2",
},
capabilities=["NET_ADMIN"],
devices=["/dev/net/tun"],
)
# by default packets that needs to be forwarded by the bridge are sent to iptables
# iptables will most likely drop them.
# we can disabled this behaviour by bypassing iptables
# https://wiki.libvirt.org/page/Net.bridge.bridge-nf-call_and_sysctl.conf
# We have two bridges currently
# - the tantap bridge: only used for traffic not supported by the vsg implementation (e.g arp request, dhcp)
# - the mantap bridge: used for management tasks, traffic follow a
# regular flow through the bridge so might be dropped by iptables (e.g ping from m10 to m11)
p.sysctl(
name="net.bridge.bridge-nf-call-iptables",
value="0",
state="present",
sysctl_set="yes",
)
p.sysctl(
name="net.bridge.bridge-nf-call-arptables",
value="0",
state="present",
sysctl_set="yes",
)
tansiv_roles = build_tansiv_roles(
"../examples/qemus/deployment.xml", roles["tansiv"][0]
)
# waiting for the tansiv vms to show up
wait_ssh(roles=tansiv_roles)
env["roles"] = roles
env["tansiv_roles"] = tansiv_roles
env["provider"] = provider
@enostask()
def validate(args, env=None):
"""Validates the deployment.
Idempotent.
Only run fping on the remote hosts to get a matrix of latencies.
"""
tansiv_roles = env["tansiv_roles"]
# dummy validation
# -- runs fping and get point to point latency for every pair of nodes
# -- assuming that mXX is the name of the machine on the management interface
# -- assuming that tXX is the name of the machien on the tansiv interface
hostnames = [h.alias for h in tansiv_roles["all"]] + [
h.extra["tansiv_alias"] for h in tansiv_roles["all"]
]
print(hostnames)
result = run_command(
f'fping -q -C 10 -s -e {" ".join(hostnames)}', roles=tansiv_roles
)
# displayng the output (the result datastructure is a bit painful to parse...ask enoslib maintainer)
for hostname, r in result["ok"].items():
print(f"################## <{hostname}> #################")
# fping stats are displayed on stderr
print(r["stderr"])
print(f"################## </{hostname}> #################")
for hostname, r in result["failed"].items():
print(f"host that fails = {hostname}")
@enostask()
def destroy(args, env=None):
provider = env["provider"]
provider.destroy()
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(description="Tansiv experimentation engine")
# FIXME
# ------------------------------------------------------------------- DEPLOY
subparsers = parser.add_subparsers(help="deploy")
parser_deploy = subparsers.add_parser(
"deploy", help="Deploy tansiv and the associated VMs"
)
parser_deploy.add_argument(
"image",
help="Base image to use (qcow2)",
)
parser_deploy.add_argument(
"platform",
help="The simgrid plaform file",
)
parser_deploy.add_argument(
"deployment",
help="The simgrid deployment file",
)
parser_deploy.add_argument(
"--cluster", help="Cluster where to get the node", default="parapluie"
)
parser_deploy.add_argument("--queue", help="Qeueue to use", default="default")
parser_deploy.set_defaults(func=deploy)
# --------------------------------------------------------------------------
# ----------------------------------------------------------------- VALIDATE
parser_validate = subparsers.add_parser("validate", help="Validate the deployment")
parser_validate.set_defaults(func=validate)
# --------------------------------------------------------------------------
# ------------------------------------------------------------------ DESTROY
parser_destroy = subparsers.add_parser("destroy", help="Destroy the deployment")
parser_destroy.set_defaults(func=destroy)
# --------------------------------------------------------------------------
args = parser.parse_args()
try:
args.func(args)
except Exception as e:
parser.print_help()
print(e)
traceback.print_exc()
\ No newline at end of file
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "https://simgrid.org/simgrid.dtd">
<platform version="4.1">
<actor host="nova-1.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.10"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.10/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.10/24"/> <!-- management -->
</actor>
<actor host="nova-2.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.11"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.11/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.11/24"/> <!-- management -->
</actor>
<actor host="nova-3.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.12"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.12/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.12/24"/> <!-- management -->
</actor>
<actor host="nova-4.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.13"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.13/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.13/24"/> <!-- management -->
</actor>
<actor host="nova-5.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.14"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.14/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.14/24"/> <!-- management -->
</actor>
<actor host="nova-6.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.15"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.15/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.15/24"/> <!-- management -->
</actor>
<actor host="nova-7.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.16"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.16/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.16/24"/> <!-- management -->
</actor>
<actor host="nova-8.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.17"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.17/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.17/24"/> <!-- management -->
</actor>
<actor host="nova-9.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.18"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.18/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.18/24"/> <!-- management -->
</actor>
<actor host="nova-10.lyon.grid5000.fr" function="vsg_vm">
<argument value="192.168.120.19"/> <!-- how simgrid knows me-->
<argument value="./boot.py"/>
<argument value="192.168.120.19/24"/> <!-- how vsg knows me-->
<argument value="10.0.0.19/24"/> <!-- management -->
</actor>
</platform>
\ No newline at end of file
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<config>
<prop id ="network/latency-factor" value = "1" />
</config>
<zone id="AS0" routing="Full">
<!--
https://simgrid.org/doc/latest/Configuring_SimGrid.html#choosing-the-platform-models
We are transfering only small packets so we don't want to pay the slow start every time
-->
<cluster id="nova" prefix="nova-" suffix=".lyon.grid5000.fr" radical="0-200" speed="16120000000.0f,0.0f,0.0f" core="16" bw="10Gbps" lat="1.0E-1s" router_id="router1">
<prop id="watt_per_state" value="75.83:81.97:174.04, 123.86:123.86:123.86, 66:66:66" />
<prop id="watt_off" value="8.81" />
</cluster>
</zone>
</platform>
enoslib~=5.4
\ No newline at end of file
......@@ -8,7 +8,7 @@ apt -y install cloud-init iperf wget vim
# some apps to validate the bouzin
# since we have ssh ready on the management interface, we could also install
# some apps on the fly
apt -y install taktuk
apt -y install taktuk fping
mkdir -p /home/tansiv/.ssh
ssh-keygen -t rsa -f /home/tansiv/.ssh/id_rsa -P ''
......
......@@ -2,7 +2,7 @@
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<zone id="AS0" routing="Full">
<cluster id="nova" prefix="nova-" suffix=".lyon.grid5000.fr" radical="0-200" speed="16120000000.0f,0.0f,0.0f" core="16" bw="10Gbps" lat="1.0E-1s" router_id="router1">
<cluster id="nova" prefix="nova-" suffix=".lyon.grid5000.fr" radical="0-200" speed="16120000000.0f,0.0f,0.0f" core="16" bw="10Gbps" lat="1.0E-2s" router_id="router1">
<prop id="watt_per_state" value="75.83:81.97:174.04, 123.86:123.86:123.86, 66:66:66" />
<prop id="watt_off" value="8.81" />
</cluster>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment