Commit 06d307d5 authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/3105-should' into 'dev'

Feature a/3105 should

Closes #3197 and #3167

See merge request !185
parents 69eb5123 6843a137
Pipeline #22996 failed with stages
in 49 seconds
......@@ -67,15 +67,8 @@ snapshot_diff_current:
-diff -u -I ".*20..-..-...*" snapshot-last $(SNAPSHOT_CURRENT)
should_raw: vidjil $(SHOULD_LOG)
should:
-$(MAKE) -k should_raw
python should-status.py
failed_should:
-sh failed-should-get-tests.sh
python should-status.py
should: vidjil
python3 should.py $(SHOULD)
shouldvdj_if_python:
if python ../../tools/check_python_version.py ; \
......@@ -124,9 +117,9 @@ curated-vdj.zip: $(SHOULD_VDJ_ARCHIVE)
%.tap: %.should-get force
./should-to-tap.sh $<
valgrind_should:
valgrind_should: vidjil
$(MAKE) -C "$(REPORTS_PATH)" clean_valgrind
LAUNCHER="$(VALGRIND_CMD)" $(MAKE) should
python3 should.py --launcher "$(VALGRIND_CMD)" $(SHOULD)
valgrind_%:
LAUNCHER="$(VALGRIND_CMD)" $(MAKE) $(patsubst valgrind_%,%,$@)
......
......@@ -6,5 +6,5 @@ $ Bug with J deletion being 122
# V10*01 ends at gtttactactgtgctgcgtgg
# JP1*01 starts at ataccactggttg
# N1 is G
1:Clone #001 – 4 reads – 100%
1:Clone #001 .* 4 reads .* 100%
!NO_LAUNCHER:
!LAUNCH: (cd $VIDJIL_DIR/germline ; md5sum {homo-sapiens,mus-musculus,rattus-norvegicus}/*.fa || md5 -r {homo-sapiens,mus-musculus,rattus-norvegicus}/*.fa)
!LAUNCH: (cd $VIDJIL_DIR/germline ; md5sum */*.fa || md5 -r */*.fa)
$ Check md5 in germline/, sequences split and processed from germline and other databases
1:b64be21f03b290c6850ce2cb2f1d6f02 homo-sapiens/IGHD.fa
......
!NO_LAUNCHER:
!LAUNCH: grep vidjil.org $VIDJIL_DIR/germline/get-saved-germline | sed "s/..germline_id./`cat $VIDJIL_DIR/germline/germline_id`/" | sed 's/.*http:..//' > url-1 \
&& grep vidjil.org $VIDJIL_DIR/germline/homo-sapiens.g | sed 's/.*http:..\\(.*\\)\\".*/\\1/' > url-2 \
&& diff url-1 url-2 ; echo 'Diff: '\\$? \
&& grep vidjil.org $VIDJIL_DIR/germline/homo-sapiens.g | sed 's/.*http:..\(.*\)\".*/\1/' > url-2 \
&& diff url-1 url-2 ; echo 'Diff: '$? \
&& wc -c url-1
$ Same url with "get-saved-germline" (using "germline_id") and inside "homo-sapiens.g"
......
......@@ -7,7 +7,7 @@ $ Window found
1:^CACCCCCCCCCTTTTTTTCT$
$ Representative computation
1:>clone-001--custom--0000004--100%--seq.-\\[8,49\\]
1:>clone-001--custom--0000004--100%--seq.-\[8,49\]
$ Representative sequence
1: TATTTGTATANCACCCCCCCCCTTTTTTTCTNGCGCGAGCGA
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c segment -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/cdr3-stopcodon.fa
!LOG: out/cdr3-stopcodon.vidjil
!OUTPUT_FILE: out/cdr3-stopcodon.vidjil
$ Two identical junctions in JSON
2: "CATWDRKNYYKKLF"
......
......@@ -13,7 +13,7 @@ $ Find the good length statistics
1: SEG -> .* 84.0
$ Four segmented on forward
1:SEG_\\+ -> 4
1:SEG_\+ -> 4
$ Three segmented on reverse (Junc#01 doesn't have its rc)
1:SEG_- -> 3
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -KA -z 0 -s \\\\#\\\\#\\\\#\\\\#\\\\#-\\\\#\\\\#\\\\#\\\\#\\\\# -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa $VIDJIL_DATA/common-V-D.fa ; cat out/common-V-D.affects
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -KA -z 0 -s "#####-#####" -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa $VIDJIL_DATA/common-V-D.fa ; cat out/common-V-D.affects
$ Segments the sequence
1: SEG .* -> .* 1
$ Find the good affects for the D segment, including a AMBIGUOUS k-mer, common with V
1:_ _ _ \\?.f.f.f.f.f.f _ _ _ _
1:_ _ _ \?.f.f.f.f.f.f _ _ _ _
$ The windows found by the KmerSegmenter has a fairly good position
2: 1 163 180 220
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/clones_simul.fa > out-fa ; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -b clones_simul $VIDJIL_DATA/clones_simul.fa.gz > out-fa-gz ; diff -s -I '\#' -I 'index' -I 'Command line' out-fa out-fa-gz ; echo 'Diff: '\\$?; wc -l out-fa-gz
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/clones_simul.fa > out-fa ; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -b clones_simul $VIDJIL_DATA/clones_simul.fa.gz > out-fa-gz ; diff -s -I '\#' -I 'index' -I 'Command line' out-fa out-fa-gz ; echo 'Diff: '$?; wc -l out-fa-gz
$ Identical output
1:Diff: 0
......
!LAUNCH: (for i in {1..100000}; do echo '>read' ; echo ccgtgtattactgtgcgagagagctgaatacttccagcactg ; done ;) > same-igh-100k.fa ; $LAUNCHER $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 5000 -w 15 same-igh-100k.fa; rm -f same-igh-100k.fa
!LAUNCH: (i=1; while [ $i -le 100000 ]; do echo '>read' ; echo ccgtgtattactgtgcgagagagctgaatacttccagcactg ; i=$((i+1)); done ;) > same-igh-100k.fa ; $LAUNCHER $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 5000 -w 15 same-igh-100k.fa; rm -f same-igh-100k.fa
$ Find a unique clone with all reads
1:>clone-001--IGH--0100000--100.--window
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c segment -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -A $VIDJIL_DATA/overlap-d-j.fa | grep -v web | tail -4 | tr -d '\\\\n' | wc -c
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c segment -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -A $VIDJIL_DATA/overlap-d-j.fa | grep -v web | tail -4 | tr -d '\n' | wc -c
$ Exported sequence has all the bases
1:116
......
......@@ -12,7 +12,7 @@ $ First sequence, easy segmentation (no error, few deletions at the windows, sma
# D3-16*01: gtattatgattacgtttgggggaGTTATGcttatacc
# J5*01 : 1 acaactggttcgactcctggggccaaggaaccctggtcaccgtctcctcag
# -2/CTTC/-3 -1/GTTA/5
1:^>seq1 \\+ VDJ 1 73 78 90 95 140 IGHV5-10-1.0[1-4] 2/CTTC/3 IGHD1-14.01 1/GTTA/5 IGHJ5.01
1:^>seq1 \+ VDJ 1 73 78 90 95 140 IGHV5-10-1.0[1-4] 2/CTTC/3 IGHD1-14.01 1/GTTA/5 IGHJ5.01
# Note that a second D (D3-16*01) can be detected (6 common nucleotides, or even 7 with an overlap on the first D)
# This is tested in segment_simul.should-vdj
......
......@@ -6,9 +6,9 @@
# other reads. This is what is tested, we first put 10 sequences, then 5 and
# finally just the sequence of interest alone.
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa; \
$VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -x 6 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa;\
$VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -x 6 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa
$ Three times the same window
3: TGTGCGGGATCTTCGTCCTCTTATCATAATAATGGTTTTTTGGCGGGGGAGTCATGGGGC
......
......@@ -2,10 +2,10 @@
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -z 0 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python $VIDJIL_DIR/tools/format_json.py -1
$ Points list
1:"original_names": [".*data//Stanford_S22.fasta", ".*data//Stanford_S22.fasta"]
1:"original_names": \[".*data//Stanford_S22.fasta", ".*data//Stanford_S22.fasta"\]
$ Most abundant window, twice, fused
1:"id": "CCACCTATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTGGGGCC".*"reads": \\[8, 8\\].*"top": 2
1:"id": "CCACCTATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTGGGGCC".*"reads": \[8, 8\].*"top": 2
# Fails since 49046ca6b97, no more 'others'
$ Windows that are not in the top 50
......
......@@ -7,13 +7,13 @@ $ From homo-sapiens.g
1:"species_taxon_id": 9606
$ Number of reads
e1:"total": [13153]
1:"total": \[13153\]
$ Number of segmented reads
e1:"segmented": [13152]
1:"segmented": \[13152\]
$ Most abundant window
1:"id": "CCACCTATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTGGGGCC".*"reads": \\[8\\]
1:"id": "CCACCTATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTGGGGCC".*"reads": \[8\]
$ Affect values are over all the sequence
1: "affectValues": .[^}]*"start": 1, "stop": 127
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 10 -z 0 -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -s \\\\#\\\\#\\\\#\\\\#\\\\#\\\\#-\\\\#\\\\#\\\\#\\\\#\\\\#\\\\# $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 10 -z 0 -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -s "######-######" $VIDJIL_DATA/Stanford_S22.fasta
$ Germlines are custom
1: custom germlines
......
!LAUNCH: ../../tools/vdj_assign - $VIDJIL_DIR/germline/homo-sapiens/IGH V J "IGHV3-15*01" "IGHJ4*02" <<< "CCGAGGACACAGCCGTGTATTTTTTCCCCTAGTGGTTGCCCCTTTGACTACTGGGGCCAGGGAACC"
!LAUNCH: echo "CCGAGGACACAGCCGTGTATTTTTTCCCCTAGTGGTTGCCCCTTTGACTACTGGGGCCAGGGAACC" | ../../tools/vdj_assign - $VIDJIL_DIR/germline/homo-sapiens/IGH V J "IGHV3-15*01" "IGHJ4*02"
$ Compute correct coordinates
1: read .* 1 .* 60
......
!NO_LAUNCHER:
!LAUNCH: (cd $VIDJIL_DIR ; ./$EXEC -h 2>&1 > /dev/null | grep '$EXEC -c' | sed 's/X 50/X 5/' | sed 's/demo.LIL-L4/-X 1000 demo\\/LIL-L4/' | sh)
!LAUNCH: (cd $VIDJIL_DIR ; ./$EXEC -h 2>&1 > /dev/null | grep '$EXEC -c' | sed 's/X 50/X 5/' | sed 's/demo.LIL-L4/-X 1000 demo\/LIL-L4/' | sh)
# Test examples embedded in './vidjil-algo -h'
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/long-segmentation.fa
$ Sequence should be segmented by k-mer segmenter
e1:SEG_+ -> 1
b:SEG_[+] -> 1
'''Get status from TAPS, output stats as well as FAILED_TESTS_SH'''
from __future__ import print_function
import sys
import os
import glob
from collections import defaultdict
TAPS = 'should-get-tests/*.tap'
FAILED_TESTS_SH = './failed-should-get-tests.sh'
TEST_COMMAND = 'sh should-to-tap.sh %s.should-get'
stats = defaultdict(int)
failed = open(FAILED_TESTS_SH, 'w')
failed.write('#!/bin/sh\n\n')
for tap in glob.glob('should-get-tests/*.tap'):
ok = True
for l in open(tap):
if 'not ok' in l and not 'SKIP' in l and not 'TODO' in l:
ok = False
break
if not ok:
failed.write(TEST_COMMAND % tap.replace('.tap', '') + '\n')
stats[ok] += 1
failed.close()
print("=== %s " % TAPS,
"--> %s ok, %s bad, %s total" % (stats[True], stats[False], stats[True] + stats[False]), end='')
if stats[False]:
print(" --> %s" % FAILED_TESTS_SH)
os.system('cat %s' % FAILED_TESTS_SH)
sys.exit(1)
print()
--cd-same
--mod
rZb
--log
--tap
--var
VIDJIL_DIR=../../../
--var
VIDJIL_DATA=../data/
--var
EXEC=vidjil-algo
--var
VIDJIL_DEFAULT_OPTIONS=
../../tools/should.py
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment