Commit 4b3c478b authored by BAIRE Anthony's avatar BAIRE Anthony
Browse files

make db state changes atomic on job start & job destroy

The controller and django can both change the job state, especially when
it is in the WAITING state (django may delete the job and the controller
may start the job).

To prevent any inconsistency, we must ensure that these transitions are
made atomically.
parent 0c129948
......@@ -1031,6 +1031,10 @@ class JobManager(Manager):
raise NotImplementedError()
# create the job and:
# - return True if started
# - return False if cancelled by the user
# - raise exception on error
def _create_job(self, info):
ctrl = self.ctrl
ses = ctrl.session
......@@ -1040,14 +1044,19 @@ class JobManager(Manager):
try:
with ses.begin():
# atomically switch the job state from WAITING to STARTING
# (because the UI may destroy the job before it is started)
if ses.execute("UPDATE dj_jobs SET state=%d WHERE id=%d AND state=%d"
% (JobState.RUNNING, info.job_id, JobState.WAITING)
).rowcount == 0:
log.info("job %d not started (destroyed by user)", info.job_id)
return False
job = ses.query(Job).filter_by(id=info.job_id).one()
webapp = job.webapp
log.info("start job %d (%s:%s)",
info.job_id, webapp.docker_name, info.version)
job.state = int(JobState.RUNNING) # pragma: nobranch (TODO: remove (coverage bug))
repo = ctrl.gen_image_name(webapp)
image = "%s:%s" % (repo, info.version)
......@@ -1152,6 +1161,7 @@ class JobManager(Manager):
with ses.begin():
# save the container_id into the db
job.container_id = info.ctr_id
return True
except:
#TODO introduce a state JobState.ERROR
......@@ -1471,7 +1481,9 @@ class JobManager(Manager):
# request a slot from the shared swarm
with info.client.request_slot(info.ctr_name, info.cpu or 0, info.mem or 0):
info.node_id = yield from info.client.wait_slot(info.ctr_name)
yield from self.run_in_executor(self._create_job, info, lock=False)
if not (yield from self.run_in_executor(self._create_job, info, lock=False)):
# cancelled by user
return
yield from self._notif_job_state(info, "RUNNING")
yield from self._finish_job(info, reset)
......
......@@ -25,6 +25,7 @@ from django.contrib.auth.mixins import LoginRequiredMixin
from django.contrib.auth.models import User
from django.contrib.messages.views import SuccessMessageMixin
from django.core.urlresolvers import reverse
from django.db import transaction
from django.http import HttpResponse, JsonResponse
from django.shortcuts import render, get_object_or_404, redirect
from django.urls import reverse, reverse_lazy
......@@ -658,16 +659,32 @@ class JobDelete(LoginRequiredMixin, DeleteView):
success_url = reverse_lazy('main:job_list')
template_name = 'job_delete.html'
def delete(self, request, *args, **kwargs):
job = self.get_object()
if job.destroyable:
messages.success(self.request, self.success_message)
return super().delete(request, *args, **kwargs)
else:
@classmethod
def as_view(cls, **kw):
# manage db transactions manually
return transaction.non_atomic_requests(super().as_view(**kw))
def delete(self, request, *args, pk, **kwargs):
# NOTE: if job is in WAITING state, then any state update must be done
# atomically so as not to mess up with che controller
if not (Job.objects.filter(id=pk, state=Job.DONE
).update(state=Job.ARCHIVED)
or Job.objects.filter(id=pk, state__in=(Job.NEW, Job.WAITING)
).update(state=Job.DELETED)
or Job.objects.filter(id=pk, state__in=(Job.DELETED, Job.ARCHIVED)).exists()
):
messages.error(self.request, "cannot delete a running job")
return redirect('main:job_detail', job.id)
return redirect('main:job_detail', pk)
transaction.commit()
self.object = job = self.get_object()
if job.state == Job.DELETED:
job.delete()
messages.success(self.request, self.success_message)
return redirect(self.get_success_url())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment