Commit 1141fd67 authored by Thonier Florian's avatar Thonier Florian

Merge branch 'dev' into 'feature-t/3944-pouvoir-calculer-des-distributions-2'

# Conflicts:
#   tools/fuse.py
parents 97fe8d94 7fabb78d
Pipeline #90570 failed with stage
in 7 seconds
......@@ -299,7 +299,6 @@ test_server_functional:
- sed -i '/\/etc\/nginx\/ssl\:\/etc\/nginx\/ssl/d' ./docker/docker-compose.yml
- sed -i 's/\:latest/\:test/g' ./docker/docker-compose.yml
- cd docker/vidjil-server/conf/ && mv defs.py defs_https.py && mv defs_http.py defs.py && cd ../../..
- cd docker/vidjil-client/conf/ && mv conf.js conf_https.js && mv conf_http.js conf.js && cd ../../..
- make germline && cp browser/js/germline.js docker/vidjil-client/conf
- cd docker && docker-compose up -d && cd ..
- sed -i "s/^python\ \.\.\/\.\.\/\.\./docker\ exec\ docker_uwsgi_1\ python\ \/usr\/share\/vidjil\/server\/web2py/" server/web2py/applications/vidjil/tests/init_func_test_db.sh
......
......@@ -2,12 +2,13 @@
Parses output of various RepSeq programs.
Takes either:
- a .fa file, a _Summary.txt file as produced by IMGT/V-QUEST
- or a results file produced by MiXCR
- or a results file produced by MiXCR or IgReC
and creates a .vdj file to be checked by should-vdj-to-tap.py
python repseq_vdj.py data-curated/curated_IG.fa data-curated/curated_ig_Summary.txt > data-curated/imgt-IG.vdj
python repsep_vdj.py data-curated/curated_TR.fa data-curated/curated_tr_Summary.txt > data-curated/imgt-TR.vdj
python repseq_vdj.py data-curated/mixcr.results > data-curated/mixcr.vdj
python repseq_vdj.py bla.igrec.results
python repseq_vdj.py data-curated/curated_IG.fa data-curated/igblast/IG/*.aln > data-curated/igblast-IG.vdj > data-curated/igblast-IG.vdj
python repseq_vdj.py data-curated/curated_TR.fa data-curated/igblast/TR/*.aln > data-curated/igblast-TR.vdj > data-curated/igblast-TR.vdj
'''
......@@ -91,6 +92,9 @@ class Result(VDJ_Formatter):
self.populate()
def __contains__ (self, key):
return key in self.d
def __getitem__(self, key):
return self.d[key]
......@@ -98,6 +102,49 @@ class Result(VDJ_Formatter):
return str(self.d)
### IgReC
IGREC_LABELS = [
'Read id', 'locus',
'V id', 'V start', 'V end', 'V score',
'J id', 'J start', 'J end', 'J score',
]
class IgReC_Result(Result):
r'''
>>> lig = '\t'.join(['blabli4577', 'TRB', 'TRBV13*02', '1', '164', '0.58156', 'TRBJ1-5*01', '319', '367', '0.94'])
>>> r = IgReC_Result(lig)
>>> r['Read id']
'blabli4577'
>>> r.vdj[V]
['TRBV13*02']
>>> r.vdj[J]
['TRBJ1-5*01']
'''
def parse(self, l):
self.labels = IGREC_LABELS
if ('\t' in l.strip()):
return l
else:
return None
def populate(self):
self.vdj[V] = [self['V id']]
self.vdj[J] = [self['J id']]
def header_igrec_results(ff_igrec):
f = open(ff_igrec).__iter__()
while True:
l = f.next()
result = IgReC_Result(l)
yield result['Read id'].replace('_', ' '), result.to_vdj()
### MiXCR
......@@ -111,16 +158,20 @@ class MiXCR_Result(Result):
return None
def populate(self):
self.vdj[V] = [self['Best V hit']]
if self['Best D hit']:
self.vdj[D] = [self['Best D hit']]
self.vdj[J] = [self['Best J hit']]
self.vdj[V] = [self['bestVHit']]
if self['bestDHit']:
self.vdj[D] = [self['bestDHit']]
self.vdj[J] = [self['bestJHit']]
self.vdj[N1] = self['N. Seq. VDJunction']
self.vdj[N2] = self['N. Seq. DJJunction']
self.vdj[N] = self['N. Seq. VJJunction']
if 'nSeqVDJunction' in self:
self.vdj[N1] = self['nSeqVDJunction']
if 'nSeqDJJunction' in self:
self.vdj[N2] = self['nSeqDJJunction']
if 'nSeqVJJunction' in self:
self.vdj[N] = self['nSeqVJJunction']
self.vdj[JUNCTION] = self['AA. Seq. CDR3']
if 'aaSeqCDR3' in self:
self.vdj[JUNCTION] = self['aaSeqCDR3']
def header_mixcr_results(ff_mixcr):
......@@ -128,12 +179,12 @@ def header_mixcr_results(ff_mixcr):
f = open(ff_mixcr).__iter__()
mixcr_first_line = f.next()
globals()['mixcr_labels'] = mixcr_first_line.split('\t')
globals()['mixcr_labels'] = mixcr_first_line.rstrip().split('\t')
while True:
l = f.next()
l = f.next().rstrip()
result = MiXCR_Result(l)
yield result['Description R1'], result.to_vdj()
yield result['descrsR1'], result.to_vdj()
......@@ -354,6 +405,8 @@ if __name__ == '__main__':
if 'mixcr' in sys.argv[1]:
vdj.parse_from_gen(header_mixcr_results(sys.argv[1]))
elif 'igrec' in sys.argv[1]:
vdj.parse_from_gen(header_igrec_results(sys.argv[1]))
elif 'igblast' in sys.argv[2]:
vdj.parse_from_gen(header_igblast_results(sys.argv[1], sys.argv[2:]))
else:
......
>IGHV1-18 (IGHJ1, IGHJ2)
atggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagagagctgaatacttccagcactggggccagggcaccctggtcaccgtctcctcag
......@@ -137,7 +137,7 @@ def should_pattern_to_regex(p):
gene = gene.replace('/', '/?')
if args.ignore_D and ('IGHD' in gene or 'TRBD' in gene or 'TRDD' in gene):
gene = '[^[:space]]*'
gene = '[^[:space:]]*'
allele = '[[:digit:]]*'
if args.ignore_allele:
......@@ -168,7 +168,7 @@ def should_pattern_to_regex(p):
if len(r) > 1 and r[1][0] == '|':
# We have an alternative
regex_pattern = '('+' '.join(r)+').*'
regex_pattern = '.*('+''.join(r)+').*'
else:
regex_pattern = '.*'.join(r)
......@@ -248,6 +248,46 @@ def should_result_to_tap(should_pattern, result, tap_id):
True
>>> srtt_ok(should, other_allele)
True
>>> should = 'TRAV1-1 TRAJ1'
>>> other = 'TRAV1-1*01 1/ACG/3 TRAJ1*01'
>>> (args.ignore_N, args.ignore_del) = (True, True)
>>> srtt_ok(should, other)
True
>>> should = 'TRAV1-1 (TRAJ1, TRAJ2)'
>>> other = 'TRAV1-1*01 1/ACG/3 TRAJ1*01'
>>> srtt_ok(should, other)
True
>>> should = '(IGKV1D-37, IGKV1-37) IGKJ5'
>>> curated = 'IGKV1D-37*01 2/ATA/0 IGKJ5*01'
>>> srtt_ok(should, curated)
True
>>> should = 'IGKV1D-37 IGKJ5'
>>> curated = 'IGKV1D-37*01 2/ATA/0 IGKJ5*01'
>>> srtt_ok(should, curated)
True
# Negative tests matter too
>>> should = '(IGKV1D-37, IGKV1-37) IGKJ5'
>>> curated = 'IGKV1D-32*01 2/ATA/0 IGKJ5*01'
>>> srtt_ok(should, curated)
False
>>> should = 'IGHV7-4-1*02 IGHD6-25*01 (IGHJ6*02 ,IGHJ6*04)'
>>> obtained = 'IGHV7-4-1*02 1//4 IGHJ6*01'
>>> args.ignore_D = True
>>> srtt_ok(should, obtained)
True
>>> args.ignore_allele = False
>>> srtt_ok(should, obtained)
False
>>> (args.ignore_allele, args.ignore_D) = (True, False)
>>> srtt_ok(should, obtained)
False
'''
m_locus = r_locus.search(should_pattern)
......
......@@ -85,6 +85,9 @@ function loadAfterConf() {
}else{
main();
}
if (typeof config.addons !== "undefined") {
require(config.addons);
}
})
})
},
......
/*
* Vidjil browser, main configuration file
* Vidjil client, main configuration file
* This file must be named 'js/conf.js' to be taken into account
* */
var config = {
/****************
* Static alerts
*/
// "alert": "Rescue server",
/****************
* External services
*/
/* Used for the 'align' script
* If this is not defined, the 'align' button is not available
*/
"cgi_address" : "https://db.vidjil.org/cgi/", // Public test server
// "cgi_address" : "http://127.0.1.1/cgi-bin/",
/* The following options control how the user may have access to .vidjil files.
/* Proxy config for IMGT querying */
/*
"proxy": "https://db.vidjil.org/vidjil/proxy/imgt",
*/
/* Used for the standalone http://app.vidjil.org/analyze page */
"segmenter_address" : "https://db.vidjil.org/vidjil/segmenter",
/* Do we have access to a CloneDB ? */
"clonedb": false,
/****************
/* Access to .vidjil files
* Any combination of 1), 2) and 3) should work
*/
......@@ -42,29 +65,19 @@ var config = {
// "autoload" : "data/Stanford-S22.vidjil",
// "autoload_analysis" : "data/Stanford-S22.analysis"
// Proxy config for IMGT querying
/*
"proxy": "https://db.vidjil.org/vidjil/proxy/imgt"
*/
/* Used for the standalone segmenter page */
"segmenter_address" : "https://db.vidjil.org/vidjil/segmenter",
/****************
* Load extra scripts
*/
/* "addons" : ["js/lib/important-lib.js", "js/myscript.js"], */
/* Do we have access to a CloneDB ? */
"clonedb": false,
/****************
* Tips of the day
*/
"doc_address" : "doctips/",
"available_tips" : [
'T01', 'T02', 'T03',
'T30', 'T31', 'T32'
],
"available_tips" : [ ]
// [ 'T01', 'T02', 'T03', 'T30', 'T31', 'T32' ]
/****************
* Static alerts
*/
// "alert": "Rescue server",
}
......@@ -699,8 +699,11 @@ changeAlleleNotation: function(alleleNotation) {
* if raw is defined, do not normalize
*/
normalize_reads: function(clone, time, raw) {
if (this.normalization_mode == this.NORM_EXTERNAL && clone.normalized_reads != undefined && raw == undefined) {
return clone.normalized_reads[time] ;
if (this.normalization_mode == this.NORM_EXTERNAL &&
clone.normalized_reads != undefined &&
clone.normalized_reads[time] != null &&
raw == undefined) {
return clone.normalized_reads[time] ;
} else {
return clone.reads[time] ;
}
......
......@@ -119,7 +119,7 @@ var json_clone6 = {
"id" : "id6",
"germline" : "TRG",
"reads" : [10,10,0,30],
"normalized_reads" : [20,20,0,30],
"normalized_reads" : [20,20,0,null],
}
QUnit.test("name, informations, getHtmlInfo", function(assert) {
......
......@@ -423,6 +423,12 @@ QUnit.test("normalization", function(assert) {
m.initClones()
assert.equal(m.have_external_normalization, false, "Model have_external_normalization is correctly resetted")
m.set_normalization(m.NORM_EXTERNAL)
assert.equal(m.normalize_reads(c6, 0, undefined), 20, "normalize_reads; get normalized value if present")
assert.equal(m.normalize_reads(c6, 0, false), 10, "normalize_reads; get raw value if specified" )
assert.equal(m.normalize_reads(c6, 2, undefined), 0, "normalize_reads; get value at 0 as computed by external normalization" )
assert.equal(m.normalize_reads(c6, 3, undefined), 30, "normalize_reads; get raw value if normalization equal null")
})
QUnit.test("findGermlineFromGene", function(assert) {
......
Here are aggregated notes forming the developer documentation of vidjil-algo.
This documentation is a work-in-progress, it is far from being as polished as the user documentation.
Help can also be found in the source code and in the commit messages.
# Algorithm
## Code organisation
The algorithm follows roughly those steps:
1. The germlines are read. Germlines are in the fasta format and are read
by the Fasta class (`core/fasta.h`). Germlines are built using the
Germline (or MultiGermline) class (`core/germline.h`)
2. The input sequence file (.fasta, .fastq, .gz) is read by an OnlineFasta
(`core/fasta.h`). The difference with the Fasta class being that all the
data is not stored in memory but the file is read online, storing only
the current entry.
3. Windows must be extracted from the read, which is done by the
WindowExtractor class (`core/windowExtractor.h`). This class has an
`extract` method which returns a WindowsStorage object
(`core/windows.h`) in which windows are stored.
4. To save space consumption, all the reads linked to a given window are
not stored. Only the longer ones are kept. The BinReadStorage class is
used for that purpose (`core/read_storage.h`).
5. In the WindowStorage, we now have the information on the clusters and on
the abundance of each cluster. However we lack a sequence representative
of the cluster. For that purpose the class provides a
`getRepresentativeComputer` method that provides a
KmerRepresentativeComputer (`core/representative.h`). This class can
compute a representative sequence using the (long) reads that were
stored for a given window.
6. The representative can then be segmented to determine what V, D and J
genes are at play. This is done by the FineSegmenter (`core/segment.h`).
## The xxx germline
- All germlines are inserted in one index using `build_with_one_index()` and
the segmentation method is set to `SEG_METHOD_MAX12` to tell that the
segmentation must somehow differ.
- So that the FineSegmenter correctly segments the sequence, the `rep_5` and
`rep_3` members (class `Fasta`) of the xxx germline are modified by the
FineSegmenter. The `override_rep5_rep3_from_labels()` method from the
Germline is the one that overwrites those members with the Fasta
corresponding to the affectation found by the KmerSegmenter.
## Tests
### Unit
Unit tests are managed using an internal lightweight poorly-designed
library that outputs a TAP file. They are organised in the directory
[algo/tests](../algo/tests).
All the tests are defined in the [tests.cpp](../algo/tests/tests.cpp) file. But, for the sake of
clarity, this file includes other `cpp` files that incorporate all the
tests. A call to `make` compiles and launches the `tests.cpp` file, which
outputs a TAP file (in case of total success) and creates a `tests.cpp.tap`
file (in every case).
1. Tap test library
The library is defined in the [testing.h](../algo/tests/testing.h) file.
Tests must be declared in the [tests.h](../algo/tests/tests.h) file:
1. Define a new macro (in the enum) corresponding to the test name
2. In `declare_tests()` use `RECORD_TAP_TEST` to associate the macro with a
description (that will be displayed in the TAP output file).
Then testing can be done using the `TAP_TEST` macro. The macro takes three
arguments. The first one is a boolean that is supposed to be true, the
second is the test name (using the macro defined in `tests.h`) and the
third one (which can be an empty string) is something which is displayed
when the test fails.
Here are aggregated notes forming the developer documentation on the Vidjil web client.
This documentation is a work-in-progress, it is far from being as polished as the user documentation.
Help can also be found in the source code and in the commit messages.
# Client
## Installation
Run a `make` into `browser/` to get the necessary files.
This will in particular get the germline files as well as the icon files.
Opening the `browser/index.html` file is then enough to get a functionning client,
able to open `.vidjil` files with the `import/export` menu.
To work with actual data, the easiest way is to copy `js/conf.js.sample` to `js/conf.js`.
This will unlock the `patients` menu and allow your local client
to access the public server at <http://app.vidjil.org/>.
## Client API and permanent URLs
The client can be opened on a data file specified from a `data` attribute,
and optionally on an analysis file specified from a `analysis` attribute,
as in the following URLs on our test server:
- <http://app.vidjil.org/browser/?data=test.vidjil>
- <http://app.vidjil.org/browser/?data=test.vidjil&analysis=test.analysis>
- <http://app.vidjil.org/browser/?data=http://app.vidjil.org/browser/test.vidjil>
Both GET and POST requests are accepted.
Note that the `browser/index.html` file and the `.vidjil/.analysis` files should be hosted on the same server.
Otherwise, the server hosting the `.vidjil/.analysis` files must accept cross-domain queries.
The client can also load data from a server (see below, requires logging), as in <http://app.vidjil.org/?set=3241&config=39>
| | |
| ----------- | ------------- |
| `set=xx` | sample set id |
| `config=xx` | config id |
Older formats (patients, run…) are also supported for compatibility but deprecated.
Moreover, the state of the client can be encoded in the URL, as in <http://app.vidjil.org/?set=3241&config=39&plot=v,size,bar&clone=11,31>
| | |
| ---------------- | --------------------- |
| `plot=x,y,m` | plot (x axis, y axis) |
| `clone=xx,xx,xx` | selected clone ids |
For `plot` the axis names are found in `browser/js/axes.js`. `m` is optional, and defines the type of plot (either `grid` or `bar`).
We intend to encode more parameters in the URL.
## Architecture
The Vidjil client is a set of *views* linked to a same *model*.
The model keeps the views in sync on some global properties,
most notably dealing with the selection of clones, with the clone filtering,
as well with the locus selection.
- The model (`js/model.js`) is the main object of the Vidjil client.
It loads and saves `.vidjil` json data (either directly from data, or from a local file, or from some url).
It provides function to access and edit information on the clones and on the global parameters
It keeps all the views in sync.
- Each of the views (`Graph`, `ScatterPlot`, `List`, `Segment`) is rendered inside one or several `<div>` elements,
and kept sync with the model. All the views are optional, and several views of the same type can be added.
See `js/main.js` for the invocation
- The link with the patient database/server is done with the `Database` object (`js/database.js`)
- Other objects: `Report`, `Shortcut`
Extends functionalities but requires elements from the full `index.html`.
## Integrating the client
### HTML and CSS
- The `index.html` contains the `<div>` for all views and the menus
- The CSS (`css/light.css`) is generated by `less` from `css/vidjil.less`
- The `small_example.html` is a minimal example embedding basic HTML, CSS, as well as some data.
As the menus are not embedded in this file, functionalities should be provided by direct calls to the models and the views.
### Javascript
- The wonderful library `require.js` is used, so there is only one file to include
\<script data-main="js/app.js" src="js/lib/require.js"\>\</script\>
- `js/main.js` creates the different views and binds them to the model.
Another option is to directly define a function named `main()`, as in `small_example.html`.
### JSON .vidjil data
Clone lists can be passed to the model through several ways:
- directly by the user (import/export)
- from a patient database (needs a database)
- trough the API (see below)
- or by directly providing data through Javascript (as in `small_example.html`)
The first three solutions need some further elements from the full `index.html`.
## Notifications
### Priority
\#<span id="browser:priority"></span>
The priority determines how the notification are shown and what action the
user should do. The priorities can be between 0 and 3.
- 0
The notification is not shown
- 1
The notification is shown (usually on green background) and
automatically disappears
- 2
The notification is shown (usually on yellow background) and
automatically disappears
- 3
The notification is shown (usually on red background) and doesn't
disappear until the user clicks on it.
In the `console.log`, the field `priority` takes one of those priorities.
## Plots
### How to add something to be plotted
You want to add a dimension in the scatterplot or as a color? Read the
following.
1. Scatterplot
In [scatterPlot.js](../browser/js/scatterPlot.js), the `available_axis` object defines the dimensions that
can be displayed. It suffices to add an entry so that it will be proposed
in the X and Y axis. This kind of way of doing should be generalized to
the other components.
The presets are defined in the `preset` object.
2. Color
Adding a color needs slightly more work than adding a dimension in the
scatterplot.
The function `updateColor` in file [clone.js](../browser/js/clone.js) must be modified to add our color method.
The variable `this.color` must contain a color (either in HTML or RGB, or…).
Then a legend must be displayed to understand what the color represents.
For this sake, modify the `build_info_color` method in [info.js](../browser/js/info.js) file. By
default four spans are defined (that can be used) to display the legend:
`span0`, …, `span3`.
Finally modify the [index.html](../browser/index.html) file to add the new color method in the
select box (which is under the `color_menu` ID).
## Classes
### Clone
1. Info box
In the info box all the fields starting with a \_ are put. Also all the
fields under the `seg` field are displayed as soon as they have a `start` and
`stop`. Some of them can be explicitly not displayed by filling the
`exclude_seg_info` array in `getHtmlInfo`.
## Tests
### Code Quality
Quality of code is checked using [JSHint](http://jshint.com/), by
running `make quality` from the `browser` directory.
Install with `npm install -g jshint`
### Unit
The unit tests in the client are managed by QUnit and launched using
[nightmare](http://www.nightmarejs.org/), by launching `make unit` from the `browser/test` directory.
The tests are organised in the directory
[browser/test/QUnit/testFiles](../browser/test/QUnit/testFiles). The file [data<sub>test</sub>.js](../browser/test/QUnit/testFiles/data_test.js) contains a toy
dataset that is used in the tests.
Unit tests can be launched using a real client (instead of nightmare). It
suffices to open the file [test<sub>Qunit</sub>.html](../browser/test/QUnit/test_Qunit.html). In this HTML webpage it is
possible to see the coverage. It is important that all possible functions
are covered by unit tests. Having the coverage displayed under Firefox
needs to display the webpage using a web server for security
reasons. Under Chromium/Chrome this should work fine by just opening the
webpage.
1. Installation
Nightmare is distributed withing `node` and can therefore be installed with it.
``` bash
apt-get install nodejs-legacy npm
npm install nightmare -g # make -C browser/test unit will automatically
link to global nightmare installation
```
Note that using `nightmare` for our unit testing
requires the installation of `xvfb`.
2. Debugging
If there is a problem with the nightmare or electron (nightmare
dependency), you may encounter a lack of output or error messages.
To address this issue, run:
``` bash
cd browser/test/QUnit
DEBUG=nightmare*,electron:* node nightmare.js
```
### Functional
1. Architecture