Commit e308893e authored by BAIRE Anthony's avatar BAIRE Anthony
Browse files

implement static memory/cpu constraints

fix #103
parent 55c2957d
......@@ -21,6 +21,9 @@ ENV PORT="4567" \
ENV="" \
REGISTRY="" \
MAX_JOBS="4" \
CPUS="" \
MEM_SOFT_LIMIT="" \
MEM_HARD_LIMIT="" \
DATASTORE_PATH="/data/{ENV}/rails/rw/datastore" \
SANDBOX_PATH="/data/{ENV}/ssh/cache/sandbox" \
TOOLBOX_PATH="/data/{ENV}/toolbox/cache" \
......
......@@ -1042,6 +1042,20 @@ class JobManager(Manager):
# TODO use another uid
ctrl.check_host_path("isdir", job_path)
hc = ctrl.sandbox.create_host_config(
binds = {job_path: {"bind": "/work"}},
cap_drop = ["all"],
# FIXME: CAP_DAC_OVERRIDE needed because all nfs files have uid,gid=1000,1000
cap_add = ["dac_override"],
cpu_period = ctrl.cpu_period,
cpu_quota = ctrl.cpu_quota,
# mem_reservation = ctrl.mem_soft_limit,
mem_limit = ctrl.mem_hard_limit,
)
if ctrl.mem_soft_limit:
# TODO: upgrade docker-py (and use create_host_config)
hc["MemoryReservation"] = ctrl.mem_soft_limit
log.debug("host_config %r", hc)
info.ctr_id = info.client.create_container(image, name=info.ctr_name,
working_dir = "/work",
# NOTE: the command line is a little complex, but this is
......@@ -1055,7 +1069,7 @@ class JobManager(Manager):
# is run anyway, with the exit code of cat)
# - we have no unusual dependencies (only sh, cat and
# mkfifo)
command = ["/bin/sh", "-c", """
command = ["/bin/sh", "-c", """
fifo=/tmp/.allgo.fifo.{job_id}
if mkfifo "$fifo"
then
......@@ -1067,16 +1081,15 @@ class JobManager(Manager):
else
"$@" 2>&1 | cat >allgo.log
fi
failcnt="`cat /sys/fs/cgroup/memory/memory.failcnt`"
if [ "$failcnt" -ne 0 ] ; then
echo "WARNING: out of memory (memory.failcnt=$failcnt)" >>allgo.log
fi
""".format(job_id=job.id),
"job%d" % job.id, webapp.entrypoint] + shlex.split(job.param),
labels = {"allgo.tmp_img": tmp_img or ""},
host_config = ctrl.sandbox.create_host_config(
binds = {job_path: {"bind": "/work"}},
cap_drop = ["all"],
# FIXME: CAP_DAC_OVERRIDE needed because all nfs files have uid,gid=1000,1000
cap_add = ["dac_override"],
))["Id"]
host_config = hc)["Id"]
info.client.start(info.ctr_id)
except:
#TODO introduce a state JobState.ERROR
......@@ -1419,7 +1432,8 @@ class ImageManager:
class DockerController:
def __init__(self, sandbox_host, swarm_host, mysql_host,
port, registry, env, datastore_path, sandbox_path,
toolbox_path, max_jobs, sandbox_network):
toolbox_path, max_jobs, sandbox_network, cpus,
mem_soft_limit, mem_hard_limit):
self.sandbox = docker.Client(sandbox_host)
self.sandbox_watcher = DockerWatcher(self.sandbox)
......@@ -1444,6 +1458,11 @@ class DockerController:
self.sandbox_manager = SandboxManager(self)
self.job_manager = JobManager(self, max_jobs)
self.cpu_quota = None if cpus is None else int(cpus * 100000)
self.cpu_period = None if cpus is None else 100000
self.mem_soft_limit = None if mem_soft_limit is None else docker.utils.parse_bytes(mem_soft_limit)
self.mem_hard_limit = None if mem_hard_limit is None else docker.utils.parse_bytes(mem_hard_limit)
self.registry = registry
self.env = env
self.datastore_path = datastore_path
......
......@@ -111,6 +111,19 @@ def main():
raise ValueError("must provide a positive value")
log.info("max concurrent jobs %d", max_jobs)
with get_envvar("CPUS") as val:
cpus = float(val) if val else None
log.info("cpus %s", cpus)
with get_envvar("MEM_SOFT_LIMIT") as val:
mem_soft_limit = val or None
log.info("memory soft limit %s", mem_soft_limit)
with get_envvar("MEM_HARD_LIMIT") as val:
mem_hard_limit = val or None
log.info("memory hard limit %s", mem_hard_limit)
with get_envvar("SANDBOX_NETWORK") as val:
re.match(r"\A[\w]+[\w. _-]*[\w]+\Z", val)
sandbox_network = val
......@@ -136,7 +149,8 @@ def main():
return controller.DockerController(docker_host, swarm_host, mysql_host,
port, registry, env, datastore_path, sandbox_path, toolbox_path,
max_jobs, sandbox_network).run()
max_jobs, sandbox_network, cpus, mem_soft_limit, mem_hard_limit,
).run()
finally:
lock.release()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment