Commit 62133868 authored by Marc Duez's avatar Marc Duez
Browse files
parents 515f3190 4c5dc73e
......@@ -78,10 +78,10 @@ cleanall: clean
RELEASE_SOURCE = $(wildcard $(VIDJIL_ALGO_SRC)/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/tests/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/tests/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/cgi/*.cpp)
RELEASE_MAKE = ./Makefile $(VIDJIL_ALGO_SRC)/Makefile $(VIDJIL_ALGO_SRC)/core/Makefile $(VIDJIL_ALGO_SRC)/tests/Makefile germline/Makefile data/Makefile
RELEASE_SOURCE = $(wildcard $(VIDJIL_ALGO_SRC)/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/tests/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/tests/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/cgi/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.h)
RELEASE_MAKE = ./Makefile $(VIDJIL_ALGO_SRC)/Makefile $(VIDJIL_ALGO_SRC)/core/Makefile $(VIDJIL_ALGO_SRC)/tests/Makefile $(VIDJIL_ALGO_SRC)/lib/Makefile germline/Makefile data/Makefile
RELEASE_TESTS = data/get-sequences $(wildcard data/*.fa) $(wildcard data/*.fq) $(VIDJIL_ALGO_SRC)/tests/ $(wildcard $(VIDJIL_ALGO_SRC)/tests/*.should_get) $(wildcard $(VIDJIL_ALGO_SRC)/tests/bugs/*.fa) $(wildcard $(VIDJIL_ALGO_SRC)/tests/bugs/*.should_get) $(VIDJIL_ALGO_SRC)/tests/
RELEASE_FILES = $(RELEASE_SOURCE) $(RELEASE_TESTS) $(RELEASE_MAKE) germline/get-germline germline/ doc/ doc/LICENSE data/segmentation.fasta
RELEASE_FILES = $(RELEASE_SOURCE) $(RELEASE_TESTS) $(RELEASE_MAKE) germline/get-germline germline/ doc/ doc/LICENSE data/segmentation.fasta $(wildcard data/*.fa.gz) $(wildcard data/*.label)
CURRENT_DIR = vidjil
This changelog concerns the algorithmic part (C++) of Vidjil.
2014-11-xx The Vidjil Team
2014-11-28 The Vidjil Team
* New input method, now accepts compressed fasta files with gzip (core/fasta.cpp, gzstream/zlib)
* Better multi-germline analysis (-g) and documentation. This analysis can now safely be used.
* Streamlined input. Option -d is removed, and a germline is required with (-V/(-D)/-J, or -G, or -g)
* Streamlined input. Option -d is removed, and a germline is required (-V/(-D)/-J, or -G, or -g)
* Removed unused code parts as well as some files
* New and updated unit and functional tests - now more than 80% code coverage
* New public continuous integration - travis, coveralls
......@@ -128,10 +128,9 @@ Germline databases (one -V/(-D)/-J, or -G, or -g option must be given for all co
-g <path> multiple germlines (in the path <path>, takes TRA, TRB, TRG, TRD, IGH and IGL and sets window prediction parameters)
There are three different ways
- Options such as =-G germline/IGH= or =-G germline/TRG= select one germline system.
- The =-V/(-D)/-J= options enable to select individual V, (D) and J repertoires (fasta files).
This allows in particular to select incomplete rearrangement using V or J repertoires.
This allows in particular to select incomplete rearrangement using custom V or J repertoires with added sequences.
- The =-g germline/= option launches the analysis on the six germlines (TRG and IGH are tested first, then the other ones).
Now the seed and window parameters are hard-coded for each germline. In a future release, the mechanism will be more flexible
and will parse the =germline/ file.
......@@ -158,6 +158,9 @@ class Window:
return obj
def latex(self, point=0):
return r" & & %7d & %-50s \\ %% %s" % (self.d["reads"][0], self.d["name"] if 'name' in self.d else self.d["id"], self.d["id"])
### print essential info about Window
def __str__(self):
return "<window : %s %s %s>" % ( self.d["reads"], '*' if self.d["top"] == sys.maxint else self.d["top"], self.d["id"])
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import fuse
import sys
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--min', '-m', type=int, default=1, help='minimal number of reads in the clone (%(default)d)')
parser.add_argument('--top', '-t', type=int, default=5, help='maximal number of clones to displlay (%(default)d)')
parser.add_argument('--verbose', '-v', action='store_true', help='verbose output')
parser.add_argument('file', nargs='+', help='''.vidjil files''')
def main():
args = parser.parse_args()
datas = []
for i in args.file:
data = fuse.ListWindows()
data.load(i, False, verbose = args.verbose)
print ' ', i
out = []
for w in data:
if w.d['reads'][0] >= args.min:
out += [(-w.d['reads'][0], w.latex())]
for bla, ltx in sorted(out[]):
print ltx
print r' \hline'
if __name__ =='__main__':
# coding: utf8
import gluon.contrib.simplejson
import defs
import vidjil_utils
import os
if request.env.http_origin:
response.headers['Access-Control-Allow-Origin'] = request.env.http_origin
......@@ -129,7 +130,7 @@ def upload():
mes = f.filename + ": upload finished"
seq_file = defs.DIR_SEQUENCES+db.sequence_file[request.vars["id"]].data_file
size = defs.format_size(os.path.getsize(seq_file))
size = vidjil_utils.format_size(os.path.getsize(seq_file))
db.sequence_file[request.vars["id"]] = dict(size_file = size)
res = {"message": mes}
import math
### Upload directory for .fasta/.fastq.
### Old sequences files could be thrown away.
### No regular backup.
......@@ -16,20 +14,3 @@ DIR_RESULTS = '/mnt/result/results/'
DIR_OUT_VIDJIL_ID = '/mnt/result/tmp/out-%06d/'
def format_size(n):
Takes an integer n, representing a filesize and returns a string
where the size is formatted in M, G, ...
>>> format_size(1000000)
1.0 MB
>>> format_size(1024*1014*1024)
1.073 GB
size = math.floor((n/1000)/1000)
if size > 1000 :
size = str( round( (size/1000), 3 ) ) + " GB"
else :
size = str( math.floor(size) ) + " MB"
return size
import math
def format_size(n, unit='B'):
Takes an integer n, representing a filesize and returns a string
where the size is formatted with the correct SI prefix and
with a constant number of significant digits.
>>> format_size(42)
'42 B'
>>> format_size(123456)
'123 kB'
>>> format_size(1000*1000)
'1.00 MB'
>>> format_size(1024*1024*1024)
'1.07 GB'
>>> format_size(42*(2**40))
'46.2 TB'
size = float(n)
PREFIXES = ['', 'k', 'M', 'G', 'T', 'P']
for prefix in PREFIXES:
if size < 1000:
size /= 1000
if size > 100 or not prefix:
fmt = '%.0f'
elif size > 10:
fmt = '%.1f'
fmt = '%.2f'
return fmt % size + ' ' + prefix + unit
sudo -u www-data python ../../../ -S vidjil -M -R applications/vidjil/scripts/
sudo -u www-data python ../../../ -S vidjil -M -R applications/vidjil/scripts/
import defs
import datetime
patient_id = 10
our_id = 0
for res in db( == db.sequence_file.patient_id).select():
our_id += 1
print "ln -s %s/%-20s %5s.fa" % (defs.DIR_SEQUENCES, res.sequence_file.data_file, our_id),
print "\t", "# seq-%04d" %, "%-20s" % res.sequence_file.filename,
print "\t", "# pat-%04d (%s %s)" % (, res.patient.first_name, res.patient.last_name)
sudo -u www-data python ../../../ -S vidjil -M -R applications/vidjil/scripts//
sudo -u www-data python ../../../ -S vidjil -M
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment