Commit 5aea4811 authored by BERJON Matthieu's avatar BERJON Matthieu
Browse files

Merge branch 'fix-job-downloads' into 'django'

Fix job downloads (mostly the generated .zip file)

Closes #268, #270, #269, and #272

See merge request !124
parents 47370d71 50e9d469
Pipeline #40178 failed with stage
in 1 minute and 20 seconds
......@@ -8,7 +8,7 @@ from django.http import HttpResponse, JsonResponse, FileResponse
from django.shortcuts import redirect
from django.views.decorators.csrf import csrf_exempt
from main.models import Job, AllgoUser, Webapp, JobQueue
from main.helpers import upload_data, get_base_url
from main.helpers import upload_data, get_base_url, lookup_job_file
log = logging.getLogger('allgo')
......@@ -50,9 +50,9 @@ def job_response(job, request):
4: 'archived',
5: 'deleted',
6: 'aborting'}
odir = os.path.join(DATASTORE, str(
files = {f: get_link(, odir, f, request) for f in os.listdir(odir)
if os.path.isfile(os.path.join(odir,f)) and not f.startswith(".")}
job_dir = job.data_dir
files = {f: get_link(, job_dir, f, request) for f in os.listdir(job_dir)
if lookup_job_file(, f)}
return {str( {'status': status[job.state],
'files': files
......@@ -105,7 +105,7 @@ def jobs(request):
log.debug('No usable versions')
return HttpResponse("This app is not yet published", status=404)
upload_data(request.FILES.values(), job)
# run the Job validators
......@@ -60,24 +60,24 @@ def get_ssh_data(key):
return fp_encoded, comment
def upload_data(uploaded_files, job_id):
def upload_data(uploaded_files, job):
Upload any data according to a specific job id
uploaded_files: iterable that yields
django.core.files.uploadedfile.UploadedFile objects
job_id (int): job ID.
job (Job): job
>>> upload_data(self.request.FILES.getlist('files'),
>>> upload_data(self.request.FILES.getlist('files'), job)
job_dir = os.path.join(settings.DATASTORE, str(job_id))
job_dir = job.data_dir
for file_data in uploaded_files:
......@@ -100,6 +100,24 @@ def upload_data(uploaded_files, job_id):
for chunk in file_data.chunks():
def lookup_job_file(job_id, filename):
"""Look up a job data file and return its real path
This function also performs additional security checks to prevent escaping
from the job data directory:
- prevent accessing subdirectories or other job directories
- exclude non-regular files
- exclude symbolic links
returns None if lookup fails
path = os.path.join(settings.DATASTORE, str(job_id), filename)
if ( "/" not in filename
and os.path.isfile(path)
and not os.path.islink(path)
return path
def get_redis_connection():
"Get a redis connection from the global pool"
from __future__ import unicode_literals
import os
from django.conf import settings
from django.contrib.auth.models import User, AnonymousUser
from django.core.validators import MinLengthValidator, MinValueValidator, \
......@@ -563,6 +565,13 @@ class Job(TimeStampModel):
return self.state not in (Job.RUNNING, Job.ABORTING)
def data_dir(self):
"""Return the system path to the job data directory"""
return os.path.join(settings.DATASTORE, str(
# def __str__(self):
# return self.webapp
......@@ -61,8 +61,8 @@ urlpatterns = [
url(r'^jobs/(?P<pk>\d+)/$', views.JobDetail.as_view(), name='job_detail'),
url(r'^jobs/delete/(?P<pk>\d+)/$', views.JobDelete.as_view(), name='job_delete'),
url(r'^jobs/abort/(?P<pk>\d+)/$', views.JobAbort.as_view(), name='job_abort'),
url(r'^jobs/(?P<pk>\d+)/archive$', views.JobFileDownloadAll.as_view(), name='job_download_all'),
url(r'^jobs/(?P<pk>\d+)/download/(?P<filename>(.*))$', views.JobFileDownload.as_view(), name='job_download_file'),
url(r'^jobs/(?P<pk>\d+)/download/all/$', views.JobFileDownloadAll.as_view(), name='job_download_all'),
url(r'^profile/$', views.UserUpdate.as_view(), name='user_detail'),
url(r'^profile/token/update$', views.UserToken.as_view(), name='user_token'),
......@@ -15,6 +15,7 @@ import json
import logging
import os
import shutil
import tempfile
import zipfile
import natsort
......@@ -29,7 +30,7 @@ from django.core.exceptions import ObjectDoesNotExist
from django.core.urlresolvers import reverse
from django.db import transaction
from django.db.models import Count
from django.http import HttpResponse, JsonResponse, HttpResponseRedirect
from django.http import HttpResponse, JsonResponse, HttpResponseRedirect, FileResponse
from django.shortcuts import render, get_object_or_404, redirect
from django.urls import reverse, reverse_lazy
from django.utils.crypto import get_random_string
......@@ -45,6 +46,7 @@ from django.views.generic import (
from django.views.generic.detail import SingleObjectMixin
from taggit.models import Tag
from .forms import (
......@@ -59,7 +61,7 @@ from .forms import (
# Local imports
import config
from .helpers import get_base_url, get_ssh_data, upload_data, notify_controller
from .helpers import get_base_url, get_ssh_data, upload_data, notify_controller, lookup_job_file
from .mixins import GroupRequiredMixin
from .models import (
......@@ -658,12 +660,11 @@ class JobDetail(LoginRequiredMixin, DetailView):
def get_context_data(self, **kwargs):
"""Recover the logs and files related to this job"""
job = Job.objects.get(
dirname = os.path.join(settings.DATASTORE, str(
if job.state == Job.DONE:
# job is done
# -> read the `allgo.log` file
log_file = os.path.join(dirname, 'allgo.log')
log_file = os.path.join(job.data_dir, 'allgo.log')
with open(log_file, 'r', errors="replace") as log_data:
logs =
......@@ -681,11 +682,12 @@ class JobDetail(LoginRequiredMixin, DetailView):
# Get the files and some metadata such as the webapp version
webapp = Webapp.objects.get(docker_name=self.object.webapp.docker_name)
if os.path.exists(dirname):
# put all files in a list
kwargs['files'] = [os.path.basename(x) for x in glob.glob(os.path.join(dirname, '*'))]
kwargs['files'] = []
# List all job files
# NOTE: calling lookup_job_file is a security feature
kwargs['files'] = [x for x in os.listdir(job.data_dir)
if lookup_job_file(, x)]
return super().get_context_data(**kwargs)
def render_to_response(self, context, **kwargs):
......@@ -742,7 +744,7 @@ class JobCreate(SuccessMessageMixin, CreateView):
# Upload files if there are any
upload_data(self.request.FILES.getlist('files'), obj)
# start the job
obj.state = Job.WAITING
......@@ -878,7 +880,7 @@ class JobDelete(LoginRequiredMixin, DeleteView):
# delete the data dir if present
# FIXME: if this fail then we have dangling files staying in the way
job_dir = os.path.join(settings.DATASTORE, str(pk))
job_dir = job.data_dir
if os.path.exists(job_dir):
......@@ -902,43 +904,42 @@ class JobFileDownload(LoginRequiredMixin, View):
return redirect("/datastore/%s/%s" % (job_id, filename))
class JobFileDownloadAll(LoginRequiredMixin, View):
class JobFileDownloadAll(LoginRequiredMixin, SingleObjectMixin, View):
"""Archive and download all files of a given job
model = Job
def get(self, request, *args, **kwargs):
"""get all the file for a given job
The method gets the job ID, recover each file related to this job,
archive into a ZIP file and return it.
job_id = self.kwargs['pk']
dirname = os.path.join(settings.DATASTORE, str(job_id))
s = io.BytesIO()
response = HttpResponse(s.getvalue(), content_type='application/x-zip-compressed')
if os.path.exists(dirname):
files = os.listdir(dirname)
zip_subdir = str(job_id)
zip_filename = '' % zip_subdir
The ZIP file is stored as an anonymous file in /tmp/ then streamed with
FileResponse. This is better that keepingthe whole file in memory
because it may be large (and linux has a quite efficient page cache).
job = self.get_object()
tmp_file = tempfile.TemporaryFile()
zip_file = zipfile.ZipFile(s, 'w')
zip_subdir = str(
zip_filename = '' % zip_subdir
for fpath in files:
filename = os.path.join(dirname, fpath)
fdir, fname = os.path.split(filename)
zip_path = os.path.join(zip_subdir, fname)
zip_file = zipfile.ZipFile(tmp_file, 'w')
zip_file.write(filename, zip_path)
for filename in os.listdir(job.data_dir):
# NOTE: calling lookup_job_file is a security feature
real_path = lookup_job_file(, filename)
if real_path:
zip_path = os.path.join(zip_subdir, filename)
zip_file.write(real_path, zip_path)
response["Content-Disposition"] = "attachment; filename={0}".format(zip_filename)
return response
response.status = 404
return response
response = FileResponse(tmp_file, content_type='application/x-zip-compressed')
response["Content-Disposition"] = "attachment; filename={0}".format(zip_filename)
return response
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment