Commit ed299a22 authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/3206-rename-and-merge-should' into 'dev'

Feature a/3206 rename and merge should

Closes #3762

See merge request !467
parents 1973f7e8 dd0ff4a6
Pipeline #78271 passed with stages
in 6 minutes and 8 seconds
......@@ -11,8 +11,8 @@ EXEC=$(SRC:.cpp=)
OBJ=$(SRC:.cpp=.o)
OTHER_SRC=$(wildcard unit-tests/*.cpp)
LIB=../core/vidjil.a ../lib/lib.a
SHOULD=$(wildcard should-get-tests/*.should-get) $(wildcard bugs/*.should-get)
SHOULD_LOG=$(SHOULD:.should-get=.tap)
SHOULD=$(wildcard should-get-tests/*.should) $(wildcard bugs/*.should)
SHOULD_LOG=$(SHOULD:.should=.tap)
SHOULD_VDJ=$(wildcard should-vdj-tests/*.should-vdj.fa)
SHOULD_VDJ_VDJ=$(SHOULD_VDJ:.should-vdj.fa=.1.vdj)
SHOULD_LOCUS=$(wildcard should-vdj-tests/*.should-locus.fa)
......
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -g $VIDJIL_DIR/germline/homo-sapiens.g $VIDJIL_DATA/3344-bad-filtering.fa
$ Check that proper filtering is used
1: IGHV4-31.02
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c designations -g $VIDJIL_DIR/germline/homo-sapiens.g $VIDJIL_DATA/3344-bad-filtering.fa
$ Check that proper filtering is used
1: IGHV4-31.02
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c clones -z 2 -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.tsv
!LAUNCH: $VIDJIL_DIR/$EXEC -c clones -z 2 -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.tsv
$ There are four lines, all with tabs
4:
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c clones -z 2 -2 -3 -r 1 -g $VIDJIL_DIR/germline/homo-sapiens.g ../should-vdj-tests/Demo-X5.should-vdj.fa > /dev/null ; cat out/Demo-X5.should-vdj.tsv
!LAUNCH: $VIDJIL_DIR/$EXEC -c clones -z 2 -2 -3 -r 1 -g $VIDJIL_DIR/germline/homo-sapiens.g ../should-vdj-tests/Demo-X5.should-vdj.fa > /dev/null ; cat out/Demo-X5.should-vdj.tsv
$ There are 15 = 1 + 14 lines, all with tabs
15:
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 1 -z 0 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -z 0 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python $VIDJIL_DIR/tools/format_json.py -1
$ Points list
1:"original_names": \[".*data//Stanford_S22.fasta", ".*data//Stanford_S22.fasta"\]
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -x 100 -r 1 -z 5 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/vidjil-to-fasta.py -o out/S22.fasta out/Stanford_S22.vidjil ;
!LAUNCH: $VIDJIL_DIR/$EXEC -x 100 -r 1 -z 5 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/vidjil-to-fasta.py -o out/S22.fasta out/Stanford_S22.vidjil ;
!OUTPUT_FILE: out/S22.fasta
$ 5 representative sequences in the FASTA output file
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --alternative-genes 3 -c designations -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC --alternative-genes 3 -c designations -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ Presence of alternative:
1: "3alt"
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 10 -z 0 -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -s "######-######" $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -e 10 -z 0 -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -s "######-######" $VIDJIL_DATA/Stanford_S22.fasta
$ Germlines are custom
1: custom germlines
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --analysis-cost '1, 2, 3, 4, 5' $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC --analysis-cost '1, 2, 3, 4, 5' $VIDJIL_DATA/Stanford_S22.fasta
!EXIT_CODE: 1
$Check that correct custom cost is used
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens-isoforms.g -r 1 -e 0.1 $VIDJIL_DATA/erg_debre.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens-isoforms.g -r 1 -e 0.1 $VIDJIL_DATA/erg_debre.fa
$ All sequences have been seen as different ERG recombinations
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens-isoforms.g -r 1 -e 0.1 $VIDJIL_DATA/ikaros_debre.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens-isoforms.g -r 1 -e 0.1 $VIDJIL_DATA/ikaros_debre.fa
$ All sequences have been seen as unique clone
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g -c clones --all -3 $VIDJIL_DATA/segment_lec.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g -c clones --all -3 $VIDJIL_DATA/segment_lec.fa
$ Extract up to 50bp windows (TRG)
1:windows up to 50bp
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 1e-2 -y 0 -g $VIDJIL_DIR/germline/homo-sapiens-cd.g $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -e 1e-2 -y 0 -g $VIDJIL_DIR/germline/homo-sapiens-cd.g $VIDJIL_DATA/Stanford_S22.fasta
$ Do not segment any read with SEG_METHOD_ONE on homo-sapiens-cd.g
1: CD .* -> .* 0 .* 0
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c designations -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/cdr3-stopcodon.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/cdr3-stopcodon.fa
!OUTPUT_FILE: out/cdr3-stopcodon.vidjil
$ Two identical junctions in JSON
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c designations -3 -E 1.0 -g $VIDJIL_DIR/germline ../should-vdj-tests/Demo-X5.should-vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -3 -E 1.0 -g $VIDJIL_DIR/germline ../should-vdj-tests/Demo-X5.should-vdj.fa
$ Detects a CDR3 on regular V(D)J recombinations
1: IGH SEG.* [{].*[}]
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline --all -2 $VIDJIL_DATA/2549.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline --all -2 $VIDJIL_DATA/2549.fa
$ The KmerSegmenter segments the chimera on xxx germline (-2)
1:unexpected .* -> .* 1
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake.fa
$ The KmerSegmenter segments the three chimera reads on PSEUDO_MAX12 germline (-2)
1:unexpected .* -> .* 3
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake-D.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake-D.fa
$ The KmerSegmenter segments the chimera reads on PSEUDO_MAX12 germline (-2)
f1:unexpected .* -> .* 2
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DATA/chimera-fake-VJ-trim.g $VIDJIL_DATA/chimera-fake-VJ.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DATA/chimera-fake-VJ-trim.g $VIDJIL_DATA/chimera-fake-VJ.fa
# Testing a custom (fake) .g with special parameters for the algorithm
$ The KmerSegmenter segments no read in Y because of the parameter
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DATA/chimera-fake-VJ.g $VIDJIL_DATA/chimera-fake-VJ.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DATA/chimera-fake-VJ.g $VIDJIL_DATA/chimera-fake-VJ.fa
# Testing a custom (fake) germlines.data
$ Report the species
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake-VJ.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake-VJ.fa
$ The KmerSegmenter segments the five chimera reads on PSEUDO_MAX12 germline (-2)
1:unexpected .* -> .* 5
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 100 --all -g $VIDJIL_DIR/germline -4 $VIDJIL_DATA/chimera-fake-half.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -e 100 --all -g $VIDJIL_DIR/germline -4 $VIDJIL_DATA/chimera-fake-half.fa
# TODO: a more precise modeling should give a e-value computation that could make this work even with -e 1
$ The KmerSegmenter segments the six chimera reads on PSEUDO_MAX1U germline (-4)
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 1e-2 -g $VIDJIL_DIR/germline $VIDJIL_DATA/chimera-fake.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -e 1e-2 -g $VIDJIL_DIR/germline $VIDJIL_DATA/chimera-fake.fa
$ Do not segment on any germline, even incomplete
1:junction detected in 0 reads
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -uU -g $VIDJIL_DIR/germline $VIDJIL_DATA/chimera-trg.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -uU -g $VIDJIL_DIR/germline $VIDJIL_DATA/chimera-trg.fa
$ Do not segment on IG/TR by chance
12:(IG|TR).* -> .* 0
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -k 14 -w 50 -c clones -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -y 3 -z 1 -r 1 $VIDJIL_DATA/clones_simul.fa
$ Junction extractions
1:found 25 windows in 66 reads
$ No clustering
1:==> 25 clones
$ Clone 1 output
1:Clone #001 .* 29 reads
$ Clone 2 output
1:Clone #002 .* 14 reads
$ Clone 3 output (sequencing error)
1:Clone #003 .* 1 reads
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -k 14 -w 50 -c clones -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -y 3 -z 0 -r 1 --cluster-epsilon 5 $VIDJIL_DATA/clones_simul.fa ; cat out/clones_simul.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -k 14 -w 50 -c clones -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -y 3 -z 1 -r 1 $VIDJIL_DATA/clones_simul.fa
$ Junction extractions
1:found 25 windows in 66 reads
$ No clustering
1:==> 25 clones
$ Clone 1 output
1:Clone #001 .* 29 reads
$ Clone 2 output
1:Clone #002 .* 14 reads
$ Clone 3 output (sequencing error)
1:Clone #003 .* 1 reads
!LAUNCH: $VIDJIL_DIR/$EXEC -k 14 -w 50 -c clones -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -y 3 -z 0 -r 1 --cluster-epsilon 5 $VIDJIL_DATA/clones_simul.fa ; cat out/clones_simul.vidjil
$ Window extractions
1:windows up to 50bp
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -K --all -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH+ -r 4 -b co $VIDJIL_DATA/D7-27--J1.fa ; cat out/co.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -K --all -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH+ -r 4 -b co $VIDJIL_DATA/D7-27--J1.fa ; cat out/co.vidjil
# Test D7-27 0/92/0 J1 non-recombination
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -x 2000 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --grep-reads GAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACT $VIDJIL_DATA/Stanford_S22.fasta ; cat out/seq/clone.fa-1
!LAUNCH: $VIDJIL_DIR/$EXEC -x 2000 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --grep-reads GAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACT $VIDJIL_DATA/Stanford_S22.fasta ; cat out/seq/clone.fa-1
# See also label-grep-reads.should-get
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -K --all -z 0 -s 10s -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa $VIDJIL_DATA/common-V-D.fa ; cat out/common-V-D.affects
!LAUNCH: $VIDJIL_DIR/$EXEC -K --all -z 0 -s 10s -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa $VIDJIL_DATA/common-V-D.fa ; cat out/common-V-D.affects
$ Segments the sequence
1: SEG .* -> .* 1
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --config ../data/config.vidjil $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --config ../data/config.vidjil $VIDJIL_DATA/Stanford_S22.fasta
$ Analyze 10 reads (--first-reads)
: in 10 reads
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 1 -x 10 -y 5 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -x 10 -y 5 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
# Testing -x/-y/-z options
......@@ -13,7 +13,7 @@ $ 1 clone is fully analyzed, and not more (-z 1)
2:clone-.* IGHV
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --config out/Stanford_S22.vidjil $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --config out/Stanford_S22.vidjil $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
$ Same tests than before, options taken from 'out/StanfordS22.vidjil' that was just generated
1: ==> 10 clones
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG ../should-vdj-tests/ext-nucleotides-N.should-vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG ../should-vdj-tests/ext-nucleotides-N.should-vdj.fa
$ Segments on TRG
1: TRG .* -> .* 1
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -c clones -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/test_representatives.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -c clones -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/test_representatives.fa
$ Three clones should be found
1:3 clones
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 1 -k 4 -w 20 -z 0 -c clones -V $VIDJIL_DATA/toy_V.fa -J $VIDJIL_DATA/toy_J.fa $VIDJIL_DATA/ambiguous_representative.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -k 4 -w 20 -z 0 -c clones -V $VIDJIL_DATA/toy_V.fa -J $VIDJIL_DATA/toy_J.fa $VIDJIL_DATA/ambiguous_representative.fa
$ Short reads properly segmented
1:SEG_+.* -> .* 4
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 5 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/representative-few-reads.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -r 5 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/representative-few-reads.fa
$ Consensus sequence should span on the whole sequence
1: clone.*99% of 243
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-default.log
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG --consensus-on-random-sample $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-random.log
!LAUNCH: $VIDJIL_DIR/$EXEC -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-default.log
!LAUNCH: $VIDJIL_DIR/$EXEC -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG --consensus-on-random-sample $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-random.log
!NO_LAUNCHER:
!LAUNCH: diff consensus-default.log consensus-random.log
!EXIT_CODE: 1
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --header-sep FA -k 16 -z 0 -w 60 -r 5 -o out2 -uuu -U -v -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa $VIDJIL_DATA/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --header-sep FA -k 16 -z 0 -w 60 -r 5 -o out2 -uuu -U -v -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa $VIDJIL_DATA/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
# Testing uncommon and debug options
$ verbose (-v)
......
......@@ -7,9 +7,9 @@
# other reads. This is what is tested, we first put 10 sequences, then 5 and
# finally just the sequence of interest alone.
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -x 6 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -d -r 1 -w 60 -z 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC -d -r 1 -w 60 -z 100 -x 6 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC -d -r 1 -w 60 -z 100 -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa
$ Three times the same window
3: TGTGCGGGATCTTCGTCCTCTTATCATAATAATGGTTTTTTGGCGGGGGAGTCATGGGGC
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -v -g $VIDJIL_DIR/germline -c designations $VIDJIL_DATA/segment_simul.fa | grep '^[>#]'
!LAUNCH: $VIDJIL_DIR/$EXEC -v -g $VIDJIL_DIR/germline -c designations $VIDJIL_DATA/segment_simul.fa | grep '^[>#]'
$ First sequence, easy segmentation (no error, few deletions at the windows, small N)
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -c designations $VIDJIL_DATA/segment_S22.fa | grep '^>' ; cat out/segment_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -c designations $VIDJIL_DATA/segment_S22.fa | grep '^>' ; cat out/segment_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ First sequence Stanford
# 164 175 195 203
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c segment -aAtl reads 2>&1
!LAUNCH: $VIDJIL_DIR/$EXEC -c segment -aAtl reads 2>&1
!EXIT_CODE: 1
$ Deprecated options
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --hello reads 2>&1
!LAUNCH: $VIDJIL_DIR/$EXEC --hello reads 2>&1
!EXIT_CODE: 109
$ Unknown option
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --analysis-filter 10 --all -x 30 -v -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC --analysis-filter 10 --all -x 30 -v -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Clone 13 is correctly analyzed
1:FLN1FA001EP9M2.* IGHV2-26.* 2/GAT.*GCC/8 IGHJ2
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c designations -x 2 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -x 2 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Segments the good number of sequences in Stanford S22
2: >lcl
......
!LAUNCH: $VIDJIL_DIR/$EXEC -y 0 -x 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Analyze the good number of sequences in Stanford S22
1: found .* of 100 reads
!LAUNCH: $VIDJIL_DIR/$EXEC -y 0 -X 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Skip the good number of reads
1:Processing every 131th read
$ Analyze the good number of reads
1: found .* of 100 reads
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS $VIDJIL_DATA/Stanford_S22.fasta 2>&1
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DATA/Stanford_S22.fasta 2>&1
!EXIT_CODE: 1
$ Error, no germlines
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/Makefile $VIDJIL_DATA/Stanford_S22.fasta 2>&1
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/Makefile $VIDJIL_DATA/Stanford_S22.fasta 2>&1
!EXIT_CODE: 1
$ Error, incorrect *.g
......
!LAUNCH: ($LAUNCHER $VIDJIL_DIR/$EXEC $EXTRA $VIDJIL_DEFAULT_OPTIONS -c germlines -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRB,TRD,TRG,IGH,IGK,IGL --trim 100 -s '######-######' $VIDJIL_DATA/Stanford_S22.fasta)
!LAUNCH: ($LAUNCHER $VIDJIL_DIR/$EXEC $EXTRA -c germlines -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRB,TRD,TRG,IGH,IGK,IGL --trim 100 -s '######-######' $VIDJIL_DATA/Stanford_S22.fasta)
$ number of reads and kmers
1:13153 reads, 3020179 kmers
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline -g $VIDJIL_DIR/germline/homo-sapiens-isotypes.g $VIDJIL_DATA/isotypes.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline -g $VIDJIL_DIR/germline/homo-sapiens-isotypes.g $VIDJIL_DATA/isotypes.fa
$ Report the correct species
1: Homo sapiens .9606.
......
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/clones_simul.fa > out-fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -b clones_simul $VIDJIL_DATA/clones_simul.fa.gz > out-fa-gz
!NO_EXTRA:
diff -s -I '\#' -I 'index' -I 'Command line' out-fa out-fa-gz ; echo 'Diff: '$?; wc -l out-fa-gz
$ Identical output
1:Diff: 0
$ Testing out-fa-gz has approximately 90 lines
1: [89]. out-fa-gz
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/clones_simul.fa > out-fa ; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -b clones_simul $VIDJIL_DATA/clones_simul.fa.gz > out-fa-gz ; diff -s -I '\#' -I 'index' -I 'Command line' out-fa out-fa-gz ; echo 'Diff: '$?; wc -l out-fa-gz
$ Identical output
1:Diff: 0
$ Testing out-fa-gz has approximately 90 lines
1: [89]. out-fa-gz
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -h
!LAUNCH: $VIDJIL_DIR/$EXEC -h
$ License
1:vidjil-algo is free software
......@@ -16,3 +16,25 @@ $ Do not display advanced options
$ Correct number of regular options
24:^..-
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC -H
$ License
1:vidjil-algo is free software
$ Check default costs
1:analysis.* "4, -6, -10, -1, -10"
1:clustering .* "1, -4, -4, 0, 0"
$ Show seeds
1: 9c.#########
1: 13s.#######-######
$ Display advanced options
: , experimental options
: custom Cost
$ Correct number of options
54:^..-
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -3 --max-clones 10 -y 5 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 4 -b limits $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/limits.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -3 --max-clones 10 -y 5 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 4 -b limits $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/limits.vidjil
# Test limits in the .vidjil json output
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c clones --all -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/segment_lec.fq > /dev/null ; cat out/segment_lec.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -c clones --all -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/segment_lec.fq > /dev/null ; cat out/segment_lec.vidjil
$ Window
1:"id": "GGGGTCTATTACTGTGCCACCTGGGCCTTATTATAAGAAACTCTTTGGCA"
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -3 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -w 60 -r 5 -e 10 -b data $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/data.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC -3 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -w 60 -r 5 -e 10 -b data $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/data.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ From homo-sapiens.g
1:"ref": "http://www.vidjil.org/germlines/germline-.*.tar.gz"
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -y 0 -k 14 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -y 0 -k 14 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Find the good number of windows in Stanford S22 (contiguous seed 14)
1: found 10796 windows in 13152 reads
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -z 0 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -x 100 -r 5 --label-file $VIDJIL_DATA/Stanford_S22.label $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -z 0 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -x 100 -r 5 --label-file $VIDJIL_DATA/Stanford_S22.label $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vidjil
$ Some clone has only one read, bypassing the -r 5 option, and the good label
1: clone-00..*0001-.* my-clone
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 10 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --label ACCGGTATTACT --label CAGCTGCTCCCC --label TGGGCCACTC --label ATCAACGCTGGCAATGGTAACACTAAATATTCACAGAAGTTCCAGGGCAGAGTCACCATTACCAGGGACACATACGCGAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCTCTGTATTACTGTGCGAGAGTGCGCAGCAGCTGGTCTGATGCTTTTGATTATCTGG $VIDJIL_DATA/clones_simul.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -e 10 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --label ACCGGTATTACT --label CAGCTGCTCCCC --label TGGGCCACTC --label ATCAACGCTGGCAATGGTAACACTAAATATTCACAGAAGTTCCAGGGCAGAGTCACCATTACCAGGGACACATACGCGAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCTCTGTATTACTGTGCGAGAGTGCGCAGCAGCTGGTCTGATGCTTTTGATTATCTGG $VIDJIL_DATA/clones_simul.fa
$ ACCGGTATTACT is found (in window and representative and in the command line)
3:ACCGGTATTACT
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -x 100 -z 0 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 5 --label ACTGTGCGAGAGTTGGAATTAGTAGTGGCTGGCCTGATTCCTGGGGCCAG $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -x 100 -z 0 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 5 --label ACTGTGCGAGAGTTGGAATTAGTAGTGGCTGGCCTGATTCCTGGGGCCAG $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vidjil
$ Some clone has only one read, bypassing the -r 5 option, and the good label
1: clone-00..*0001-.* --label
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -x 2000 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --out-reads --label-filter --label GAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACT $VIDJIL_DATA/Stanford_S22.fasta ; cat out/seq/clone.fa-1
!LAUNCH: $VIDJIL_DIR/$EXEC -x 2000 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --out-reads --label-filter --label GAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACT $VIDJIL_DATA/Stanford_S22.fasta ; cat out/seq/clone.fa-1
# See also combo-grep-reads.should-get
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --max-consensus 0 --first-reads 10 --label-json ../data/labels-json.vidjil $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --max-consensus 0 --first-reads 10 --label-json ../data/labels-json.vidjil $VIDJIL_DATA/Stanford_S22.fasta
$ Labels are taken into account
: 2 labels
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 1 $VIDJIL_DATA/large_N.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 1 $VIDJIL_DATA/large_N.fa
$ Find a huge insertion in the segmentation
1:>clone-001.*/[ACGT]{100}
!LAUNCH: (i=1; while [ $i -le 100000 ]; do echo '>read' ; echo ccgtgtattactgtgcgagagagctgaatacttccagcactg ; i=$((i+1)); done ;) > same-igh-100k.fa ; $LAUNCHER $VIDJIL_DIR/$EXEC $EXTRA $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 5000 -w 15 same-igh-100k.fa; rm -f same-igh-100k.fa
!LAUNCH: (i=1; while [ $i -le 100000 ]; do echo '>read' ; echo ccgtgtattactgtgcgagagagctgaatacttccagcactg ; i=$((i+1)); done ;) > same-igh-100k.fa ; $LAUNCHER $VIDJIL_DIR/$EXEC $EXTRA -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 5000 -w 15 same-igh-100k.fa; rm -f same-igh-100k.fa
$ Find a unique clone with all reads
1:>clone-001--IGH--0100000--100.--window
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline $VIDJIL_DATA/multi-complete.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline $VIDJIL_DATA/multi-complete.fa
$ Segment all the seven reads
1:junction detected in 7 reads
......@@ -29,3 +29,31 @@ $ Compute the diversity. All windows have only one read, full diversity.
1: E = 1.000
1: Ds = 1.000
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline $VIDJIL_DATA/multi-short.fa
$ Segment all the seven reads
1:junction detected in 7 reads
$ Segment one read on TRA
1:TRA .* -> .* 1
$ Segment one read on TRB
1:TRB .* -> .* 1
$ Segment one read on TRG
1:TRG .* -> .* 1
$ Segment one read on TRD
1:TRD .* -> .* 1
$ Segment one read on IGH
1:IGH .* -> .* 1
$ Segment one read on IGK
1:IGK .* -> .* 1
$ Segment one read on IGL
1:IGL .* -> .* 1
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline $VIDJIL_DATA/multi-complete.fa
$ Segment all the seven reads
1:junction detected in 7 reads
$ Segment one read on TRA
1:TRA .* -> .* 1
$ Segment one read on TRB
1:TRB .* -> .* 1
$ Segment one read on TRG
1:TRG .* -> .* 1
$ Segment one read on TRD
1:TRD .* -> .* 1
$ Segment one read on IGH
1:IGH .* -> .* 1
$ Segment one read on IGK
1:IGK .* -> .* 1
$ Segment one read on IGL
1:IGL .* -> .* 1
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline $VIDJIL_DATA/multi-short.fa
$ Segment all the seven reads
1:junction detected in 7 reads
$ Segment one read on TRA
1:TRA .* -> .* 1
$ Segment one read on TRB
1:TRB .* -> .* 1
$ Segment one read on TRG
1:TRG .* -> .* 1
$ Segment one read on TRD
1:TRD .* -> .* 1
$ Segment one read on IGH
1:IGH .* -> .* 1
$ Segment one read on IGK
1:IGK .* -> .* 1
$ Segment one read on IGL
1:IGL .* -> .* 1
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH,IGK,IGL $VIDJIL_DATA/multi-complete.fa ; cat out/multi-complete.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH,IGK,IGL $VIDJIL_DATA/multi-complete.fa ; cat out/multi-complete.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ Segment the