Commit 14d29afb authored by Ryan Herbert's avatar Ryan Herbert
Browse files

docker split vidjil into two images client/server

create a separate image for client and server. This will make delivery
of the images easier since we version the client and the server
separately.
parent d52a41c6
......@@ -8,7 +8,7 @@ services:
- ./mysql/:/docker-entrypoint-initdb.d/
- /opt/vidjil/mysql:/var/lib/mysql
uwsgi:
image: vidjil:test
image: vidjil/server:test
command: bash /entrypoints/uwsgi-entrypoint.sh
volumes_from:
- fuse
......@@ -19,7 +19,7 @@ services:
- mysql:mysql
- fuse:fuse
fuse:
image: vidjil:test
image: vidjil/server:test
command: bash /entrypoints/fuse-entrypoint.sh
volumes:
- /opt/vidjil/databases:/usr/share/vidjil/server/web2py/applications/vidjil/databases
......@@ -27,12 +27,11 @@ services:
- /opt/vidjil/result/tmp:/mnt/result/tmp
- /opt/vidjil/upload/uploads:/mnt/upload/uploads
- /opt/vidjil/log:/var/vidjil
- /opt/vidjil/log/nginx:/var/log/nginx
- /opt/vidjil/log/uwsgi:/var/log/uwsgi
- /opt/vidjil/backup:/mnt/backup
#- ./vidjil/conf:/etc/vidjil
#- ./vidjil-server/conf:/etc/vidjil
nginx:
image: vidjil:test
image: vidjil/client:test
depends_on:
- uwsgi
links:
......@@ -41,11 +40,11 @@ services:
- "80:80"
- "443:443"
command: bash /entrypoints/nginx-entrypoint.sh
volumes_from:
- uwsgi
volumes:
- /opt/vidjil/log/nginx:/var/log/nginx
workers:
image: vidjil:test
image: vidjil/server:test
command: /usr/local/bin/gosu www-data python /usr/share/vidjil/server/web2py/web2py.py -K vidjil,vidjil,vidjil
volumes_from:
- uwsgi
......
from vidjil:test
from vidjil/server:test
run apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get update && apt-get install -y cron python python-pip iputils-ping
......
from nginx:1.15.0
label version="1.1"
label description="An NGINX based docker image which comes \
with a full installation of the Vidjil client."
env GOSU_VERSION 1.7
run set -x \
&& apt-get update && apt-get install -y --no-install-recommends wget make unzip git sudo curl fcgiwrap && rm -rf /var/lib/apt/lists/*
arg git_branch=dev
arg remote_repo=https://gitlab.inria.fr/vidjil/vidjil.git
run cd /usr/share/ && git config --global http.sslVerify false && git clone -b $git_branch $remote_repo
copy ./conf/nginx_gzip_static.conf /etc/nginx/conf.d/web2py/gzip_static.conf
copy ./conf/nginx_gzip.conf /etc/nginx/conf.d/web2py/gzip.conf
copy ./conf/uwsgi.conf /etc/nginx/conf.d/web2py/uwsgi.conf
add ./scripts/install.sh /opt/install_scripts/install.sh
copy ./conf/conf.js /opt/vidjil_conf/conf.js
copy ./conf/conf_http.js /opt/vidjil_conf/conf_http.js
copy ./conf/nginx_web2py /opt/vidjil_conf/web2py
copy ./conf/nginx_web2py_http /opt/vidjil_conf/web2py_http
copy ./conf/Gemfile /usr/share/vidjil/Gemfile
run cd /usr/share/vidjil/browser/css/icons && make
run cd /usr/share/vidjil/germline && make
arg build_env='PRODUCTION'
env BUILD_ENV $build_env
run mkdir /etc/vidjil
run rm /etc/nginx/conf.d/default.conf
run chmod +x /opt/install_scripts/install.sh; sync && /opt/install_scripts/install.sh
run ln -s /etc/vidjil/conf.js /usr/share/vidjil/browser/js/conf.js
copy ./scripts/nginx-entrypoint.sh /entrypoints/nginx-entrypoint.sh
run chown -R www-data:www-data /usr/share/vidjil
run useradd -ms /bin/bash vidjil && usermod -aG sudo vidjil
DEFAULT_G=homo-sapiens.g
DIRS=homo-sapiens/ mus-musculus/ rattus-norvegicus/
GERMLINE_JS=../browser/js/germline.js
all: get-saved-data
germline: get-saved-data $(GERMLINE_JS)
js: $(GERMLINE_JS)
$(GERMLINE_JS): $(DEFAULT_G)
python buildBrowserGermline.py $(DEFAULT_G) $@
get-all-data: clean
sh get-germline
python get-CD.py
get-saved-data: germline_id
sh get-saved-germline
clean:
rm -rf $(DIRS) $(GERMLINE_JS)
diff-from-saved:
rm -rf saved-germline
mkdir saved-germline
cd saved-germline ; sh ../get-saved-germline
echo
diff -r -u -x "*[.][^f][^a]" -x "germline*" -x "get*" -x "Makefile" -x "saved-*" saved-germline/ .
distrib: get-all-data js
cd .. ; tar cvzf germline-`cat germline/germline_id`.tar.gz germline/germline_id germline/*/*.fa germline/IMGT_RELEASE browser/js/germline.js
.PHONY: all germline js get-all-data clean diff-from-saved
import json
import sys
def get_required_files(germlines_data):
'''
Parse the germlines data and get all the files that are required by that
file.
The function returns a list of the files (uniqueness is guaranteed)
'''
g_json = json.load(open(germlines_data, 'r'))
path = g_json['path']
germlines_json = g_json['systems']
files = []
for germline in germlines_json.keys():
for recombination in germlines_json[germline]['recombinations']:
for gene in ['5', '4', '3']:
if gene in recombination:
for f in recombination[gene]:
f = path + '/' + f
if f not in files:
files.append(f)
return files
if len(sys.argv) != 3:
print("Usage: %s <JSON/DATA germline file> <JSON output file>" % sys.argv[0])
sys.exit()
data_file = sys.argv[1]
output_name = sys.argv[2]
table = {}
identifiant = ""
sequence = ""
germline_files = get_required_files(data_file)
for current_file in germline_files:
try:
fasta = open(current_file, "r")
except IOError as e:
raise type(e),\
type(e)(str(e) + '\nDid you forget to run ``make\'\' in the germline directory?\n'\
+'Otherwise, please tell us about the problem at contact@vidjil.org'),\
sys.exc_info()[2]
system = current_file.split('/')[-1].split('.')[0]
table[system] = {}
for ligne in fasta :
ligne = ligne.rstrip('\n\r')
if ligne:
if ligne[0]=='>' :
identifiant=ligne[1:]
if '|' in identifiant:
identifiant = identifiant.split('|')[1]
if '_' in identifiant:
identifiant = identifiant.split('_')[0]
sequence = ""
else :
sequence+=ligne
if sequence:
# If there is still some sequence left, this value will be overwritten in the next pass
table[system][identifiant]=sequence
fasta.close()
with open(output_name, "w") as file :
file.write("germline = ")
json.dump(table, file, indent=2, sort_keys=True)
data = open(data_file, "r")
file.write( "\n\n" )
file.write("germline_data = ")
file.write( data.read() )
import sys
COMPLEMENT_NUCLEOTIDE = {
'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C',
'Y': 'R', 'R': 'Y', # pyrimidine (CT) / purine (AG)
'W': 'S', 'S': 'W', # weak (AT) / strong (GC)
'K': 'M', 'M': 'K', # keto (TG) / amino (AC)
'B': 'V', 'V': 'B', 'D': 'H', 'H': 'D',
'N': 'N'
}
def revcomp(seq):
'''Returns the reverse complement of a sequence
>>> revcomp('ACGNTT')
'AANCGT'
'''
rc = ''
for nucl in seq[::-1]:
try:
rc += COMPLEMENT_NUCLEOTIDE[nucl.upper()]
except KeyError:
sys.stderr.write("! Unknown nucleotide : '%s' " % nucl + seq)
rc += 'N'
return rc
def parse(fasta, endline=''):
'''Iterates over sequences in a fasta files, yielding (header, sequence) pairs'''
header = ''
sequence = ''
for l in fasta:
l = l.strip()
if not l:
continue
if l[0] == '#':
continue
if l[0] == '>':
if header or sequence:
yield (header, sequence)
header = l[1:]
sequence = ''
else:
sequence += l + endline
if header or sequence:
yield (header, sequence)
def extract_field_if_exists(s, separator, field_number):
fields = s.split(separator)
if len(fields) > field_number:
return fields[field_number]
return str
def parse_as_Fasta(fasta):
for (header, sequence) in parse(fasta):
yield Fasta(header, sequence)
class Fasta():
def __init__(self, header, sequence):
self.header = header
self.seq = sequence
def revcomp(self):
self.seq = revcomp(self.seq)
@property
def name(self):
return extract_field_if_exists(self.header, '|', 1)
@property
def species(self):
return extract_field_if_exists(self.header, '|', 2)
def __len__(self):
return len(self.seq)
def __str__(self):
return '>%s\n%s\n' % (self.header, self.seq)
'''Generates artificial VJ recombinations'''
from __future__ import print_function
import json
import fasta
import random
import argparse
random.seed(33328778554)
class vdj_repertoire:
'''
The class generates recombinations among a set of sequences
'''
labels = []
sequences = []
def __init__(self, labels = None, repertoire = None):
'''
repertoire should be a list of dictionary of a list of Fasta sequences
'''
if repertoire is not None:
self.sequences = repertoire
self.labels = labels
@classmethod
def files(self, labels, repertoire):
'''
Provide a list of list of sequences
'''
sequences = []
for filenames in repertoire:
current_sequences = []
for filename in filenames:
current_sequences += list(fasta.parse_as_Fasta(open(filename)))
sequences.append(current_sequences)
return self(labels, sequences)
def germlines(self):
return self.labels
def nb_sequences(self, label):
'''
>>> rep = vdj_repertoire(['a', 'b'], [['aat', 'taa'], ['gcc']])
>>> rep.nb_sequences('a')
2
>>> rep.nb_sequences('b')
1
>>> rep.nb_sequences('c')
Traceback (most recent call last):
...
ValueError: 'c' is not in list
'''
index = self.labels.index(label)
return len(self.sequences[index])
def recombinations(self, at_most = None):
'''
Returns a list of recombinations.
The recombinations are given under the form of a list.
>>> [v for v in vdj_repertoire(['a', 'b'], [['aat', 'taa'],\
['gcc']]).recombinations()]
[['aat', 'gcc'], ['taa', 'gcc']]
>>> len([v for v in vdj_repertoire(['a', 'b'], [['aat', 'taa'],\
['gcc']]).recombinations(1)])
1
'''
if at_most is not None:
return self._at_most_recombinations_(at_most)
else:
return self._all_recombinations_()
def _at_most_recombinations_(self, at_most):
nb = 0
while nb < at_most:
recombination = []
for current_rep in self.sequences:
recombination.append(random.choice(current_rep))
yield recombination
nb += 1
def _all_recombinations_(self):
return list_recombinations(self.sequences)
def list_recombinations(l):
'''
>>> [i for i in list_recombinations([[1], [10, 11], [100, 102]])]
[[1, 10, 100], [1, 11, 100], [1, 10, 102], [1, 11, 102]]
>>> [i for i in list_recombinations([[1, 2], [3]])]
[[1, 3], [2, 3]]
'''
if len(l) <= 0:
yield []
else:
for item in l[len(l)-1]:
for recomb in list_recombinations(l[:-1]):
recomb.append(item)
yield recomb
class vdj_recombination:
deletions = None
insertions = None
processing = []
def __init__(self, insertions = None, deletions = None, processing = None):
'''insertions and deletions are lists of length 1 or whose length are the
number of locations where insertions and deletions take place. They
contain a function with no parameter returning a natural integer
corresponding to the number of expected insertions/deletions.
processing is a list of the same size which contains a function which
is applied to a string and which returns a string. It is used to alter
the input sequence.
'''
if insertions is not None:
self.insertions = insertions
else:
self.insertions = [(lambda: 0)]
if deletions is not None:
self.deletions = deletions
else:
self.deletions = [(lambda: 0)]
if processing is not None:
self.processing = processing
else:
self.processing = [(lambda s: s)]
def recombine(self, sequences):
'''
Recombine the sequences with the provided recombinations
>>> str(vdj_recombination().recombine([fasta.Fasta('a', 'AATTAT'),\
fasta.Fasta('b', 'GGGACACAT'),\
fasta.Fasta('c', 'ATAGATATGA')]))
'>a 0//0 b 0//0 c\\nAATTAT\\nGGGACACAT\\nATAGATATGA\\n\\n'
>>> str(vdj_recombination(deletions=[(lambda: 2)]).recombine([fasta.Fasta('a', 'AATTAT'),\
fasta.Fasta('b', 'GGGACACAT'),\
fasta.Fasta('c', 'ATAGATATGA')]))
'>a 2//2 b 2//2 c\\nAATT\\nGACAC\\nAGATATGA\\n\\n'
'''
name = ''
seq = ''
insertions = self.insertions * (len(sequences)-1)
deletions = self.deletions * (len(sequences)*2-2)
process = self.processing * (len(sequences))
for i, sequence in enumerate(sequences):
nb_deletions_start = 0
nb_deletions_end = 0
N_insertions = ''
sequence.seq = sequence.seq.translate(None, '.')
if i > 0:
# Start deletion
nb_deletions_start = deletions[2*i-1]()
name += '/%d ' % nb_deletions_start
name += sequence.name
if i < len(sequences) - 1:
# End deletion
nb_deletions_end = deletions[2*i]()
N_insertions = random_sequence(['A', 'C', 'G', 'T'],\
insertions[i]())
name += ' %d/%s' % (nb_deletions_end, N_insertions)
nb_deletions_end = -nb_deletions_end if nb_deletions_end > 0 else None
seq += process[i](sequence.seq[nb_deletions_start:nb_deletions_end])+"\n"+N_insertions+"\n"
return fasta.Fasta(name, seq)
def random_sequence(characters, length):
return ''.join([random.choice(characters) for x in range(length)])
def mutate_sequence(sequence, probability):
'''
Mutate the original DNA sequence given in parameter.
The probability is a per nucleotide probability.
This solution is inspired from Blckknght's: http://stackoverflow.com/a/24063748/1192742
'''
mutated = []
nucleotides = ['A', 'C', 'G', 'T']
for nt in sequence:
if random.random() < probability:
if nt.upper() in nucleotides:
nt = nucleotides[nucleotides.index(nt.upper()) - random.randint(1, 3)]
else:
nt = random.choice(nucleotides)
mutated.append(nt)
return ''.join(mutated)
def random_pos_int(mean, stddev):
'''
Returns a random number whose distribution
has the mean provided has a parameter and the standard deviation
is stddev
'''
result = random.gauss(mean, stddev)
if result < 0:
return 0
return int(result)
def get_gene_name(allele):
'''
From fasta sequence to Ig/TR gene name
'''
return allele.name[:allele.name.find('*')]
def write_seq_to_file(seq, code, f):
seq.header = seq.header.replace(' ', '_')+"__"+code
f.write(str(seq))
def generate_to_file(repertoire, recombination, code, f, nb_recomb):
print(" ==>", f)
output = open(f, 'w')
nb = 0
for recomb in repertoire.recombinations():
for i in range(nb_recomb):
write_seq_to_file(recombination.recombine(recomb), code, output)
nb += 1
print(" ==> %d recombinations" % nb)
def list_random_tuple(s):
try:
list_r = s.split(':')
result_list = []
for item in list_r:
one, two = map(float, item.split(','))
result_list.append((lambda: random_pos_int(one, two)))
return result_list
except Exception, e:
raise argparse.ArgumentTypeError('A list separated by colons, of couples separated by commas must be provided (ex: 1,2:2,1) '+str(e))
def list_int(s):
try:
result_list = []
for item in s.split(':'):
result_list.append((lambda: int(item)))
return result_list
except Exception, e:
raise argparse.ArgumentTypeError('A list of integers separated by colons must be provided (ex: 1:2) '+str(e))
if __name__ == '__main__':
DESCRIPTION='Script generating fake V(D)J recombinations'
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument('-g', '--germlines', type=file, default='germlines.data', help='path to the germlines.data file')
parser.add_argument('--deletions', '-d', type=list_int, default = [(lambda: 5)], help='List -- separated by colons -- of the number of deletions at junctions (or single value, if the number is the same everywhere).')
parser.add_argument('--insertions', '-i', type=list_int, default = [(lambda: 3)], help='List -- separated by colons -- of the number of insertions at junctions (or single value, if the number is the same everywhere')
parser.add_argument('--random-deletions', '-D', type=list_random_tuple, help='List of random deletions at junctions under the format mean,standard_deviation (or single value, if the number is the same everywhere')
parser.add_argument('--random-insertions', '-I', type=list_random_tuple, help='List of the number of insertions at junctions under the format mean,standard_deviation (or single value, if the number is the same everywhere')
parser.add_argument('-n', '--nb-recombinations', type=int, default=5, help='Number of times each recombination (with insertions/deletions) is generated')
parser.add_argument('-e', '--error', type=float, default = 0., help='Probability of error at the nucleotide level')
args = parser.parse_args()
germlines_json = args.germlines.read().replace('germline_data = ', '')
germlines = json.loads(germlines_json)
for code in germlines:
g = germlines[code]
print("--- %s - %-4s - %s" % (g['shortcut'], code, g['description']))
# Read germlines