Commit 1141fd67 authored by Thonier Florian's avatar Thonier Florian

Merge branch 'dev' into 'feature-t/3944-pouvoir-calculer-des-distributions-2'

# Conflicts:
#   tools/fuse.py
parents 97fe8d94 7fabb78d
Pipeline #90570 failed with stage
in 7 seconds
......@@ -299,7 +299,6 @@ test_server_functional:
- sed -i '/\/etc\/nginx\/ssl\:\/etc\/nginx\/ssl/d' ./docker/docker-compose.yml
- sed -i 's/\:latest/\:test/g' ./docker/docker-compose.yml
- cd docker/vidjil-server/conf/ && mv defs.py defs_https.py && mv defs_http.py defs.py && cd ../../..
- cd docker/vidjil-client/conf/ && mv conf.js conf_https.js && mv conf_http.js conf.js && cd ../../..
- make germline && cp browser/js/germline.js docker/vidjil-client/conf
- cd docker && docker-compose up -d && cd ..
- sed -i "s/^python\ \.\.\/\.\.\/\.\./docker\ exec\ docker_uwsgi_1\ python\ \/usr\/share\/vidjil\/server\/web2py/" server/web2py/applications/vidjil/tests/init_func_test_db.sh
......
......@@ -2,12 +2,13 @@
Parses output of various RepSeq programs.
Takes either:
- a .fa file, a _Summary.txt file as produced by IMGT/V-QUEST
- or a results file produced by MiXCR
- or a results file produced by MiXCR or IgReC
and creates a .vdj file to be checked by should-vdj-to-tap.py
python repseq_vdj.py data-curated/curated_IG.fa data-curated/curated_ig_Summary.txt > data-curated/imgt-IG.vdj
python repsep_vdj.py data-curated/curated_TR.fa data-curated/curated_tr_Summary.txt > data-curated/imgt-TR.vdj
python repseq_vdj.py data-curated/mixcr.results > data-curated/mixcr.vdj
python repseq_vdj.py bla.igrec.results
python repseq_vdj.py data-curated/curated_IG.fa data-curated/igblast/IG/*.aln > data-curated/igblast-IG.vdj > data-curated/igblast-IG.vdj
python repseq_vdj.py data-curated/curated_TR.fa data-curated/igblast/TR/*.aln > data-curated/igblast-TR.vdj > data-curated/igblast-TR.vdj
'''
......@@ -91,6 +92,9 @@ class Result(VDJ_Formatter):
self.populate()
def __contains__ (self, key):
return key in self.d
def __getitem__(self, key):
return self.d[key]
......@@ -98,6 +102,49 @@ class Result(VDJ_Formatter):
return str(self.d)
### IgReC
IGREC_LABELS = [
'Read id', 'locus',
'V id', 'V start', 'V end', 'V score',
'J id', 'J start', 'J end', 'J score',
]
class IgReC_Result(Result):
r'''
>>> lig = '\t'.join(['blabli4577', 'TRB', 'TRBV13*02', '1', '164', '0.58156', 'TRBJ1-5*01', '319', '367', '0.94'])
>>> r = IgReC_Result(lig)
>>> r['Read id']
'blabli4577'
>>> r.vdj[V]
['TRBV13*02']
>>> r.vdj[J]
['TRBJ1-5*01']
'''
def parse(self, l):
self.labels = IGREC_LABELS
if ('\t' in l.strip()):
return l
else:
return None
def populate(self):
self.vdj[V] = [self['V id']]
self.vdj[J] = [self['J id']]
def header_igrec_results(ff_igrec):
f = open(ff_igrec).__iter__()
while True:
l = f.next()
result = IgReC_Result(l)
yield result['Read id'].replace('_', ' '), result.to_vdj()
### MiXCR
......@@ -111,16 +158,20 @@ class MiXCR_Result(Result):
return None
def populate(self):
self.vdj[V] = [self['Best V hit']]
if self['Best D hit']:
self.vdj[D] = [self['Best D hit']]
self.vdj[J] = [self['Best J hit']]
self.vdj[V] = [self['bestVHit']]
if self['bestDHit']:
self.vdj[D] = [self['bestDHit']]
self.vdj[J] = [self['bestJHit']]
self.vdj[N1] = self['N. Seq. VDJunction']
self.vdj[N2] = self['N. Seq. DJJunction']
self.vdj[N] = self['N. Seq. VJJunction']
if 'nSeqVDJunction' in self:
self.vdj[N1] = self['nSeqVDJunction']
if 'nSeqDJJunction' in self:
self.vdj[N2] = self['nSeqDJJunction']
if 'nSeqVJJunction' in self:
self.vdj[N] = self['nSeqVJJunction']
self.vdj[JUNCTION] = self['AA. Seq. CDR3']
if 'aaSeqCDR3' in self:
self.vdj[JUNCTION] = self['aaSeqCDR3']
def header_mixcr_results(ff_mixcr):
......@@ -128,12 +179,12 @@ def header_mixcr_results(ff_mixcr):
f = open(ff_mixcr).__iter__()
mixcr_first_line = f.next()
globals()['mixcr_labels'] = mixcr_first_line.split('\t')
globals()['mixcr_labels'] = mixcr_first_line.rstrip().split('\t')
while True:
l = f.next()
l = f.next().rstrip()
result = MiXCR_Result(l)
yield result['Description R1'], result.to_vdj()
yield result['descrsR1'], result.to_vdj()
......@@ -354,6 +405,8 @@ if __name__ == '__main__':
if 'mixcr' in sys.argv[1]:
vdj.parse_from_gen(header_mixcr_results(sys.argv[1]))
elif 'igrec' in sys.argv[1]:
vdj.parse_from_gen(header_igrec_results(sys.argv[1]))
elif 'igblast' in sys.argv[2]:
vdj.parse_from_gen(header_igblast_results(sys.argv[1], sys.argv[2:]))
else:
......
>IGHV1-18 (IGHJ1, IGHJ2)
atggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagagagctgaatacttccagcactggggccagggcaccctggtcaccgtctcctcag
......@@ -137,7 +137,7 @@ def should_pattern_to_regex(p):
gene = gene.replace('/', '/?')
if args.ignore_D and ('IGHD' in gene or 'TRBD' in gene or 'TRDD' in gene):
gene = '[^[:space]]*'
gene = '[^[:space:]]*'
allele = '[[:digit:]]*'
if args.ignore_allele:
......@@ -168,7 +168,7 @@ def should_pattern_to_regex(p):
if len(r) > 1 and r[1][0] == '|':
# We have an alternative
regex_pattern = '('+' '.join(r)+').*'
regex_pattern = '.*('+''.join(r)+').*'
else:
regex_pattern = '.*'.join(r)
......@@ -248,6 +248,46 @@ def should_result_to_tap(should_pattern, result, tap_id):
True
>>> srtt_ok(should, other_allele)
True
>>> should = 'TRAV1-1 TRAJ1'
>>> other = 'TRAV1-1*01 1/ACG/3 TRAJ1*01'
>>> (args.ignore_N, args.ignore_del) = (True, True)
>>> srtt_ok(should, other)
True
>>> should = 'TRAV1-1 (TRAJ1, TRAJ2)'
>>> other = 'TRAV1-1*01 1/ACG/3 TRAJ1*01'
>>> srtt_ok(should, other)
True
>>> should = '(IGKV1D-37, IGKV1-37) IGKJ5'
>>> curated = 'IGKV1D-37*01 2/ATA/0 IGKJ5*01'
>>> srtt_ok(should, curated)
True
>>> should = 'IGKV1D-37 IGKJ5'
>>> curated = 'IGKV1D-37*01 2/ATA/0 IGKJ5*01'
>>> srtt_ok(should, curated)
True
# Negative tests matter too
>>> should = '(IGKV1D-37, IGKV1-37) IGKJ5'
>>> curated = 'IGKV1D-32*01 2/ATA/0 IGKJ5*01'
>>> srtt_ok(should, curated)
False
>>> should = 'IGHV7-4-1*02 IGHD6-25*01 (IGHJ6*02 ,IGHJ6*04)'
>>> obtained = 'IGHV7-4-1*02 1//4 IGHJ6*01'
>>> args.ignore_D = True
>>> srtt_ok(should, obtained)
True
>>> args.ignore_allele = False
>>> srtt_ok(should, obtained)
False
>>> (args.ignore_allele, args.ignore_D) = (True, False)
>>> srtt_ok(should, obtained)
False
'''
m_locus = r_locus.search(should_pattern)
......
......@@ -85,6 +85,9 @@ function loadAfterConf() {
}else{
main();
}
if (typeof config.addons !== "undefined") {
require(config.addons);
}
})
})
},
......
/*
* Vidjil browser, main configuration file
* Vidjil client, main configuration file
* This file must be named 'js/conf.js' to be taken into account
* */
var config = {
/****************
* Static alerts
*/
// "alert": "Rescue server",
/****************
* External services
*/
/* Used for the 'align' script
* If this is not defined, the 'align' button is not available
*/
"cgi_address" : "https://db.vidjil.org/cgi/", // Public test server
// "cgi_address" : "http://127.0.1.1/cgi-bin/",
/* The following options control how the user may have access to .vidjil files.
/* Proxy config for IMGT querying */
/*
"proxy": "https://db.vidjil.org/vidjil/proxy/imgt",
*/
/* Used for the standalone http://app.vidjil.org/analyze page */
"segmenter_address" : "https://db.vidjil.org/vidjil/segmenter",
/* Do we have access to a CloneDB ? */
"clonedb": false,
/****************
/* Access to .vidjil files
* Any combination of 1), 2) and 3) should work
*/
......@@ -42,29 +65,19 @@ var config = {
// "autoload" : "data/Stanford-S22.vidjil",
// "autoload_analysis" : "data/Stanford-S22.analysis"
// Proxy config for IMGT querying
/*
"proxy": "https://db.vidjil.org/vidjil/proxy/imgt"
*/
/* Used for the standalone segmenter page */
"segmenter_address" : "https://db.vidjil.org/vidjil/segmenter",
/****************
* Load extra scripts
*/
/* "addons" : ["js/lib/important-lib.js", "js/myscript.js"], */
/* Do we have access to a CloneDB ? */
"clonedb": false,
/****************
* Tips of the day
*/
"doc_address" : "doctips/",
"available_tips" : [
'T01', 'T02', 'T03',
'T30', 'T31', 'T32'
],
"available_tips" : [ ]
// [ 'T01', 'T02', 'T03', 'T30', 'T31', 'T32' ]
/****************
* Static alerts
*/
// "alert": "Rescue server",
}
......@@ -699,8 +699,11 @@ changeAlleleNotation: function(alleleNotation) {
* if raw is defined, do not normalize
*/
normalize_reads: function(clone, time, raw) {
if (this.normalization_mode == this.NORM_EXTERNAL && clone.normalized_reads != undefined && raw == undefined) {
return clone.normalized_reads[time] ;
if (this.normalization_mode == this.NORM_EXTERNAL &&
clone.normalized_reads != undefined &&
clone.normalized_reads[time] != null &&
raw == undefined) {
return clone.normalized_reads[time] ;
} else {
return clone.reads[time] ;
}
......
......@@ -119,7 +119,7 @@ var json_clone6 = {
"id" : "id6",
"germline" : "TRG",
"reads" : [10,10,0,30],
"normalized_reads" : [20,20,0,30],
"normalized_reads" : [20,20,0,null],
}
QUnit.test("name, informations, getHtmlInfo", function(assert) {
......
......@@ -423,6 +423,12 @@ QUnit.test("normalization", function(assert) {
m.initClones()
assert.equal(m.have_external_normalization, false, "Model have_external_normalization is correctly resetted")
m.set_normalization(m.NORM_EXTERNAL)
assert.equal(m.normalize_reads(c6, 0, undefined), 20, "normalize_reads; get normalized value if present")
assert.equal(m.normalize_reads(c6, 0, false), 10, "normalize_reads; get raw value if specified" )
assert.equal(m.normalize_reads(c6, 2, undefined), 0, "normalize_reads; get value at 0 as computed by external normalization" )
assert.equal(m.normalize_reads(c6, 3, undefined), 30, "normalize_reads; get raw value if normalization equal null")
})
QUnit.test("findGermlineFromGene", function(assert) {
......
Here are aggregated notes forming the developer documentation of vidjil-algo.
This documentation is a work-in-progress, it is far from being as polished as the user documentation.
Help can also be found in the source code and in the commit messages.
# Algorithm
## Code organisation
The algorithm follows roughly those steps:
1. The germlines are read. Germlines are in the fasta format and are read
by the Fasta class (`core/fasta.h`). Germlines are built using the
Germline (or MultiGermline) class (`core/germline.h`)
2. The input sequence file (.fasta, .fastq, .gz) is read by an OnlineFasta
(`core/fasta.h`). The difference with the Fasta class being that all the
data is not stored in memory but the file is read online, storing only
the current entry.
3. Windows must be extracted from the read, which is done by the
WindowExtractor class (`core/windowExtractor.h`). This class has an
`extract` method which returns a WindowsStorage object
(`core/windows.h`) in which windows are stored.
4. To save space consumption, all the reads linked to a given window are
not stored. Only the longer ones are kept. The BinReadStorage class is
used for that purpose (`core/read_storage.h`).
5. In the WindowStorage, we now have the information on the clusters and on
the abundance of each cluster. However we lack a sequence representative
of the cluster. For that purpose the class provides a
`getRepresentativeComputer` method that provides a
KmerRepresentativeComputer (`core/representative.h`). This class can
compute a representative sequence using the (long) reads that were
stored for a given window.
6. The representative can then be segmented to determine what V, D and J
genes are at play. This is done by the FineSegmenter (`core/segment.h`).
## The xxx germline
- All germlines are inserted in one index using `build_with_one_index()` and
the segmentation method is set to `SEG_METHOD_MAX12` to tell that the
segmentation must somehow differ.
- So that the FineSegmenter correctly segments the sequence, the `rep_5` and
`rep_3` members (class `Fasta`) of the xxx germline are modified by the
FineSegmenter. The `override_rep5_rep3_from_labels()` method from the
Germline is the one that overwrites those members with the Fasta
corresponding to the affectation found by the KmerSegmenter.
## Tests
### Unit
Unit tests are managed using an internal lightweight poorly-designed
library that outputs a TAP file. They are organised in the directory
[algo/tests](../algo/tests).
All the tests are defined in the [tests.cpp](../algo/tests/tests.cpp) file. But, for the sake of
clarity, this file includes other `cpp` files that incorporate all the
tests. A call to `make` compiles and launches the `tests.cpp` file, which
outputs a TAP file (in case of total success) and creates a `tests.cpp.tap`
file (in every case).
1. Tap test library
The library is defined in the [testing.h](../algo/tests/testing.h) file.
Tests must be declared in the [tests.h](../algo/tests/tests.h) file:
1. Define a new macro (in the enum) corresponding to the test name
2. In `declare_tests()` use `RECORD_TAP_TEST` to associate the macro with a
description (that will be displayed in the TAP output file).
Then testing can be done using the `TAP_TEST` macro. The macro takes three
arguments. The first one is a boolean that is supposed to be true, the
second is the test name (using the macro defined in `tests.h`) and the
third one (which can be an empty string) is something which is displayed
when the test fails.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -268,7 +268,7 @@ Contact us (<mailto:contact@vidjil.org>) to have more information and help.
# Docker -- Troubleshooting
### Error "Can't connect to MySQL server on 'mysql'"
## Error "Can't connect to MySQL server on 'mysql'"
The mysql container is not fully launched. This can happen especially at the first launch.
You may relaunch the containers.
......@@ -298,6 +298,16 @@ you can look into:
```
If the database does not exist, mysql will display an error after logging in.
## Launching manually the backup
The backup should be handled by the backup container. If so, connect to this
container and run (for a full backup, otherwise add the `-i` option when
running `backup.sh`):
```sh
cd /usr/share/vidjil/server
sh backup.sh vidjil /mnt/backup >> /var/log/cron.log 2>&1
```
# Docker -- Updating a Docker installation
......
......@@ -37,7 +37,9 @@ This is an almost minimal `.vidjil` file, describing clones in one sample.
The `seg` element is optional: clones without `seg` elements will be shown on the grid with '?/?'.
The `_average_read_length` is also optional, but allows to plot GENSCAN-like plots more precisely than getting only the length of the sequence.
All other elements are required. The `reads.germlines` list can have only one element the case of data on a unique locus.
There is here one clone on the `TRG` locus with a designation `TRGV5*01 5/CC/0 TRGJ1*02`.
There is here one clone on the `TRG` locus with a designation (`name`) `TRGV5*01 5/CC/0 TRGJ1*02`.
Note that this `name` is just used to name the clone.
The actual values used for X- and Y- axis in the V/J grid plot are `seg.5.name` and `seg.3.name` fields.
Note that other elements could be added by some program (such as `tag`, to identify some clones,
or `clusters`, to further cluster some clones, see below).
......@@ -65,6 +67,7 @@ or `clusters`, to further cluster some clones, see below).
"clones": [
{
"id": "clone-001",
"name": "TRGV5*01 5/CC/0 TRGJ1*02",
"sequence": "CTCATACACCCAGGAGGTGGAGCTGGATATTGATACTACGAAATCTAATTGAAAATGATTCTGGGGTCTATTACTGTGCCACCTGGGCCTTATTATAAGAAACTCTTTGGCAGTGGAAC",
"reads" : [ 243241 ],
"_average_read_length": [ 119.3 ],
......@@ -72,9 +75,9 @@ or `clusters`, to further cluster some clones, see below).
"top": 1,
"seg":
{
"5": {"name": "TRGV5*01", "start": 1, "stop": 86, "delRight":5},
"5": {"name": "TRGV5*01", "start": 1, "stop": 87, "delRight":5},
"3": {"name": "TRGJ1*02", "start": 89, "stop": 118, "delLeft":0},
"cdr3": { "start": 77, "stop": 104, "seq": "gccacctgggccttattataagaaactc" }
"cdr3": { "start": 78, "stop": 105, "seq": "gccacctgggccttattataagaaactc" }
}
}
......@@ -85,7 +88,7 @@ or `clusters`, to further cluster some clones, see below).
## `.vidjil` file – several related samples
This a `.vidjil` file obtained by merging with `fuse.py` two `.vidjil` files corresponding to two samples.
Clones that have a same `id` are gathered (see 'What is a clone?', above).
Clones that are from different files but that have a same `id` are gathered (see 'What is a clone?', above).
It is the responsibility of the program generating the initial `.vidjil` files to choose these `id` to
do a correct gathering.
......@@ -119,7 +122,7 @@ do a correct gathering.
"top": 1,
"seg":
{
"5": {"name": "TRGV5*01", "start": 1, "stop": 86, "delRight": 5},
"5": {"name": "TRGV5*01", "start": 1, "stop": 87, "delRight": 5},
"3": {"name": "TRGJ1*02", "start": 89, "stop": 118, "delLeft": 0}
}
},
......@@ -308,7 +311,8 @@ In the `.analysis` file, this section is intended to describe some specific clon
// Recombination with several D may use "4a", "4b"...
"3": {"name": "IGHJ3*02", "start": 136, "stop": 171, "delLeft": 5}, // J (or 3') segment
// any feature to be highlighted in the sequence, with optional fields related to this feature:
// Any feature to be highlighted in the sequence.
// All those fields are optional (though some minor feature may not properly work in the client)
// - "start"/"stop" : positions on the clone sequence (starting at 1)
// - "delLeft/delRight" : a numerical value . It is the numbers of nucleotides deleted during the rearrangment. DelRight are compatible with V/5 and D/4 segments, delLeft is compatible with D/4 and J/3 segments.
// - "seq" : a sequence
......@@ -317,6 +321,8 @@ In the `.analysis` file, this section is intended to describe some specific clon
//
// JUNCTION//CDR3 should be stored that way (in fields called "junction" of "cdr3"),
// its productivity must be stored in a boolean field called "productive".
// "seq" field should not be filled for cdr3 or junction (it is extracted from the sequence itself).
// However a "aa" field may be used to give the amino-acid translation of the cdr3 or junction.
"somefeature": { "start": 56, "stop": 61, "seq": "ACTGTA", "val": 145.7, "info": "analyzed with xyz" },
// Numerical or textual features concerning all the sequence or its analysis (such as 'evalue')
......@@ -329,6 +335,11 @@ In the `.analysis` file, this section is intended to describe some specific clon
"reads": [], // number of reads in this clones [.vidjil only, required]
// (with samples.number elements)
"_average_read_length": [],
// Average read length of the reads clustered in this clone.
// This value allows to draw a genescan-like plot.
// (with samples.number elements)
"top": 0, // (not documented now) [required] threshold to display/hide the clone
"stats": [] // (not documented now) [.vidjil only] (with sample.number elements)
......
......@@ -13,14 +13,7 @@ arg git_branch=dev
arg remote_repo=https://gitlab.inria.fr/vidjil/vidjil.git
run cd /usr/share/ && git config --global http.sslVerify false && git clone -b $git_branch $remote_repo
copy ./conf/nginx_gzip_static.conf /etc/nginx/conf.d/web2py/gzip_static.conf
copy ./conf/nginx_gzip.conf /etc/nginx/conf.d/web2py/gzip.conf
copy ./conf/uwsgi.conf /etc/nginx/conf.d/web2py/uwsgi.conf
add ./scripts/install.sh /opt/install_scripts/install.sh
copy ./conf/conf.js /opt/vidjil_conf/conf.js
copy ./conf/conf_http.js /opt/vidjil_conf/conf_http.js
copy ./conf/nginx_web2py /opt/vidjil_conf/web2py
copy ./conf/nginx_web2py_http /opt/vidjil_conf/web2py_http
copy ./conf/Gemfile /usr/share/vidjil/Gemfile
copy ./conf/align.cgi /usr/share/vidjil/browser/cgi/align.cgi
copy ./conf/similarity.cgi /usr/share/vidjil/browser/cgi/similarity.cgi
......@@ -31,9 +24,12 @@ arg build_env='PRODUCTION'
env BUILD_ENV $build_env
run mkdir /etc/vidjil
run mkdir /etc/nginx/conf.d/web2py/
run rm /etc/nginx/conf.d/default.conf
run chmod +x /opt/install_scripts/install.sh; sync && /opt/install_scripts/install.sh
run ln -s /etc/vidjil/conf.js /usr/share/vidjil/browser/js/conf.js
run ln -s /etc/vidjil/nginx_gzip_static.conf /etc/nginx/conf.d/web2py/gzip_static.conf
run ln -s /etc/vidjil/nginx_gzip.conf /etc/nginx/conf.d/web2py/gzip.conf
run ln -s /etc/vidjil/uwsgi.conf /etc/nginx/conf.d/web2py/uwsgi.conf
run ln -s /etc/vidjil/germline.js /usr/share/vidjil/browser/js/germline.js
copy ./scripts/nginx-entrypoint.sh /entrypoints/nginx-entrypoint.sh
......
#!/bin/bash
echo "${BUILD_ENV}"
if [ "${BUILD_ENV}" = "TEST" ]; then
cp -avr /opt/vidjil_conf/conf_http.js /etc/vidjil/conf.js
cp -avr /opt/vidjil_conf/web2py_http /etc/nginx/conf.d/web2py.conf
ln -s /etc/vidjil/conf_http.js /usr/share/vidjil/browser/js/conf.js
ln -s /etc/vidjil/nginx_web2py_http /etc/nginx/conf.d/web2py.conf
else
cp -avr /opt/vidjil_conf/conf.js /etc/vidjil/conf.js
cp -avr /opt/vidjil_conf/web2py /etc/nginx/conf.d/web2py.conf
ln -s /etc/vidjil/conf.js /usr/share/vidjil/browser/js/conf.js
ln -s /etc/vidjil/nginx_web2py /etc/nginx/conf.d/web2py.conf
fi;
......@@ -16,6 +16,7 @@ doctests:
python -m doctest -v ../utils.py
python -m doctest -v ../vidjil-to-fasta.py
python -m doctest -v ../../algo/tests/should-vdj-to-tap.py
python -m doctest -v ../../algo/tests/repseq_vdj.py
python ../org-babel-tangle.py --test
@echo "*** All python tests passed"
......
!LAUNCH: python ../../fuse.py $FUSE_OPTIONS ../../../algo/tests/data/no_clones.vidjil ../../../algo/tests/data/results-two-clones-1-2.vidjil -o normalized_zero.vidjil; cat normalized_zero.vidjil
$ Case zero; No field normalized_reads if not contain in the given files/clones
0: normalized_reads
!LAUNCH: python ../../fuse.py $FUSE_OPTIONS ../../../algo/tests/data/results-two-clones-1-3.vidjil ../../../algo/tests/data/results-two-clones-1-3.vidjil -o normalized_both.vidjil; cat normalized_both.vidjil
$ Case both; Should find 1 field normalized_reads (clone id-1)
1: normalized_reads
$ Case both; Correct fusion of normalized_reads field if both clones are informatives
lr: normalized_reads.*500,.*500
!LAUNCH: python ../../fuse.py $FUSE_OPTIONS ../../../algo/tests/data/no_clones.vidjil ../../../algo/tests/data/results-two-clones-1-3.vidjil -o normalized_one.vidjil; cat normalized_one.vidjil
$ Case only one; Should find 1 filed normalized_reads
1: normalized_reads
$ Case only one; First value is null if on of file have not been annalysed by the normalization script
1: null
rl1: normalized_reads.*null,.*500
!LAUNCH: python ../../fuse.py $FUSE_OPTIONS ../../../algo/tests/data/results-two-clones-1-3.vidjil ../../../algo/tests/data/no_clones.vidjil -o normalized_one_revert.vidjil; cat normalized_one_revert.vidjil
$ Case inverse; Should find 1 filed normalized_reads
1: normalized_reads
$ Case inverse; Idem, but reverse files
1: null
lr: "normalized_reads".*500,.*null
!LAUNCH: python ../../fuse.py $FUSE_OPTIONS normalized_one.vidjil ../../../algo/tests/data/results-two-clones-1-3.vidjil -o normalized_double.vidjil; cat normalized_double.vidjil
$ Case double timepoint; Should find 1 filed normalized_reads
1: normalized_reads
$ Case double timepoint; Correct fusion if one of the file is a already fused vidjil
1: null
lr: "normalized_reads".*null,.*500,.*500
\ No newline at end of file
......@@ -52,8 +52,13 @@ def concatenate_with_padding(d,
continue
d[key] = d1[key]
if key not in d2 :
d[key] += t2
### For field normalized_reads, we prefer set it at None if not available for a timepoint
# Create a specific loop for it
if key not in d2:
if key != "normalized_reads":
d[key] += t2
elif key == "normalized_reads":
d[key] += [None]*len(d2["reads"])
for key in d2:
if key in ignore_keys:
......@@ -61,8 +66,11 @@ def concatenate_with_padding(d,
if type(d2[key]) is not list:
continue
if key not in d :
d[key] = t1 + d2[key]
if key not in d:
if key != "normalized_reads":
d[key] = t1 + d2[key]
elif key == "normalized_reads":
d[key] = [None]*len(d1["reads"]) + d2[key]
else :
d[key] = d[key] + d2[key]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment