Commit 55b80e0e authored by Mathieu Giraud's avatar Mathieu Giraud

merge - clean algo/tests directory structure

parents f62a73a1 605106b4
......@@ -144,9 +144,9 @@ cleanall: clean
RELEASE_TAG="notag"
RELEASE_H = $(VIDJIL_ALGO_SRC)/release.h
RELEASE_SOURCE = $(wildcard $(VIDJIL_ALGO_SRC)/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/tests/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/tests/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/cgi/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.hpp) $(wildcard tools/*.py)
RELEASE_SOURCE = $(wildcard $(VIDJIL_ALGO_SRC)/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/tests/unit-tests/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/tests/unit-tests/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/cgi/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.hpp) $(wildcard tools/*.py)
RELEASE_MAKE = ./Makefile $(VIDJIL_ALGO_SRC)/Makefile $(VIDJIL_ALGO_SRC)/core/Makefile $(VIDJIL_ALGO_SRC)/tests/Makefile $(VIDJIL_ALGO_SRC)/lib/Makefile germline/Makefile data/Makefile tools/tests/Makefile doc/Makefile
RELEASE_TESTS = doc/format-analysis.org data/get-sequences $(wildcard data/*.vidjil) $(wildcard data/*.analysis) $(wildcard data/*.data) $(wildcard data/*.fa) $(wildcard data/*.fq) $(VIDJIL_ALGO_SRC)/tests/should-vdj-to-tap.py $(wildcard $(VIDJIL_ALGO_SRC)/tests/should-vdj-tests/*.should-vdj.fa) $(wildcard $(VIDJIL_ALGO_SRC)/tests/should-vdj-tests/*.should-locus.fa) $(VIDJIL_ALGO_SRC)/tests/should-to-tap.sh $(wildcard $(VIDJIL_ALGO_SRC)/tests/*.should_get) $(wildcard $(VIDJIL_ALGO_SRC)/tests/bugs/*.fa) $(wildcard $(VIDJIL_ALGO_SRC)/tests/bugs/*.should_get) $(VIDJIL_ALGO_SRC)/tests/format-json.sh $(wildcard doc/analysis-example*.vidjil) $(wildcard tools/tests/*.should_get) tools/tests/should-to-tap.sh tools/diff_json.sh
RELEASE_TESTS = doc/format-analysis.org data/get-sequences $(wildcard data/*.vidjil) $(wildcard data/*.analysis) $(wildcard data/*.data) $(wildcard data/*.fa) $(wildcard data/*.fq) $(VIDJIL_ALGO_SRC)/tests/should-vdj-to-tap.py $(wildcard $(VIDJIL_ALGO_SRC)/tests/should-vdj-tests/*.should-vdj.fa) $(wildcard $(VIDJIL_ALGO_SRC)/tests/should-vdj-tests/*.should-locus.fa) $(VIDJIL_ALGO_SRC)/tests/should-to-tap.sh $(wildcard $(VIDJIL_ALGO_SRC)/tests/should-get-tests/*.should-get) $(wildcard $(VIDJIL_ALGO_SRC)/tests/bugs/*.fa) $(wildcard $(VIDJIL_ALGO_SRC)/tests/bugs/*.should_get) $(VIDJIL_ALGO_SRC)/tests/format-json.sh $(wildcard doc/analysis-example*.vidjil) $(wildcard tools/tests/*.should_get) tools/tests/should-to-tap.sh tools/diff_json.sh
RELEASE_GERMLINES = germline/germline_id germline/get-saved-germline germline/get-germline germline/split-from-imgt.py germline/germlines.data germline/revcomp-fasta.py germline/fasta.py
RELEASE_HELP = doc/algo.org doc/locus.org doc/dev.org doc/should-vdj.org doc/credits.org doc/CHANGELOG doc/LICENSE
RELEASE_FILES = $(RELEASE_SOURCE) $(RELEASE_TESTS) $(RELEASE_MAKE) $(RELEASE_GERMLINES) $(RELEASE_HELP) data/segmentation.fasta $(wildcard data/*.fa.gz) $(wildcard data/*.label)
......
......@@ -3,34 +3,36 @@ INCLUDE=-I ..
OPTIM=-g -O0 $(COVERAGE)
override CXXFLAGS += -std=c++11 -W -Wall $(OPTIM)
LDLIBS=-lz
SRC= tests.cpp
SRC= unit-tests/tests.cpp
EXEC=$(SRC:.cpp=)
OTHER_SRC=$(wildcard *.cpp)
LIB=../core/vidjil.a ../lib/lib.a
SHOULD=$(wildcard *.should_get)
SHOULD_LOG=$(SHOULD:.should_get=.tap)
SHOULD=$(wildcard should-get-tests/*.should-get)
SHOULD_LOG=$(SHOULD:.should-get=.tap)
SHOULD_VDJ=$(wildcard should-vdj-tests/*.should-vdj.fa)
SHOULD_LOCUS=$(wildcard should-vdj-tests/*.should-locus.fa)
REPORTS_PATH := $(patsubst %/Makefile,%,$(abspath $(lastword $(MAKEFILE_LIST))))/../../reports
VALGRIND=valgrind -v --tool=memcheck --leak-check=full --show-reachable=yes --undef-value-errors=yes --track-origins=yes --xml=yes --xml-file="$$(mktemp "$(REPORTS_PATH)/valgrind.xml.XXXXXX")"
DATE=$(shell date +%Y-%m-%d:%H:%M:%S)
SNAPSHOT=snapshot-log-$(DATE)
DATE=$(shell date +%Y-%m-%d--%H:%M:%S)
SNAPSHOTS_DIR=./snapshots
SNAPSHOT=$(SNAPSHOTS_DIR)/snapshot-log--$(DATE)
.PHONY: all force clean forcedep
test: all
@echo
@echo "*** Launching unit tests..."
./tests
cat tests.cpp.tap
./unit-tests/tests
cat ./unit-tests/tests.cpp.tap
@echo "*** All unit tests passed"
all: core $(EXEC)
@echo "All tests compiled."
snapshot:
grep . *.log should-vdj-tests/*.log > $(SNAPSHOT)
mkdir -p $(SNAPSHOTS_DIR)
grep . should-get-tests/*.log should-vdj-tests/*.log > $(SNAPSHOT)
touch snapshot-last ; mv snapshot-last snapshot-prev
ln -s $(SNAPSHOT) snapshot-last
diff -I "20..-..-.." snapshot-prev snapshot-last | diffstat -cC
......@@ -59,7 +61,7 @@ shouldvdj_with_rc_merged:
cat $(SHOULD_VDJ) > should-vdj-tests/should-vdj.merged.fa
python should-vdj-to-tap.py -r should-vdj-tests/should-vdj.merged.fa
%.tap: %.should_get force
%.tap: %.should-get force
./should-to-tap.sh $<
valgrind_should:
......
!NO_LAUNCHER:
!LAUNCH: ../../vidjil -G ../../germline/IGH ../../data/clones_simul.fa > out-fa ; ../../vidjil -G ../../germline/IGH -b clones_simul ../../data/clones_simul.fa.gz > out-fa-gz ; diff -s -I '\#' -I 'index' -I 'data/clones_simul' out-fa out-fa-gz ; echo 'Diff: '\\$?; wc -l out-fa-gz
$ Identical output
1:Diff: 0
$ Testing out-fa-gz has approximately 90 lines
1: [89]. out-fa-gz
!LAUNCH: ../../vidjil -c segment -G ../../germline/IGH -A ../../data/overlap-d-j.fa | tail -4 | tr -d '\\\\n' | wc -c
$ Exported sequence has all the bases
1:116
!NO_LAUNCHER:
!LAUNCH: (cd ../../data ; md5sum *.fasta || md5 -r *.fasta)
!LAUNCH: (cd $VIDJIL_DIR/data ; md5sum *.fasta || md5 -r *.fasta)
$ Check md5 in data/
1:985219d9156b6d9e8966807595beada3 Stanford_S22.fa
......
!NO_LAUNCHER:
!LAUNCH: (cd ../../germline ; md5sum *.fa || md5 -r *.fa)
!LAUNCH: (cd $VIDJIL_DIR/germline ; md5sum *.fa || md5 -r *.fa)
$ Check md5 in germline/, sequences split and processed from IMGT
1:3a655c9d99ca04907a120f1b69febf2e IGHD.fa
......
!LAUNCH: export REQUEST_METHOD=POST ; export CONTENT_TYPE=test ; $LAUNCHER ../../browser/cgi/align.cgi < ../../data/msa.fa
!LAUNCH: export REQUEST_METHOD=POST ; export CONTENT_TYPE=test ; $LAUNCHER $VIDJIL_DIR/browser/cgi/align.cgi < $VIDJIL_DIR/data/msa.fa
$ no spurious info
0: .* bp in .* sequences
......
!LAUNCH: ../../vidjil -A -t 0 -g ../../germline -2 -i ../../data/chimera-fake.fa
!LAUNCH: $VIDJIL_DIR/vidjil -A -t 0 -g $VIDJIL_DIR/germline -2 -i $VIDJIL_DIR/data/chimera-fake.fa
$ The KmerSegmenter segments the three chimera reads on PSEUDO_MAX12 germline (-2)
1:xxx .* -> .* 3
......
!LAUNCH: ../../vidjil -A -g ../../data/chimera-fake-VJ-germlines.data -i ../../data/chimera-fake-VJ.fa
!LAUNCH: $VIDJIL_DIR/vidjil -A -g $VIDJIL_DIR/data/chimera-fake-VJ-germlines.data -i $VIDJIL_DIR/data/chimera-fake-VJ.fa
# Testing a custom (fake) germlines.data
$ Loads from 'chimera-fake-VJ-germlines.data' a custom germline
......
!LAUNCH: ../../vidjil -A -g ../../germline -i -2 ../../data/chimera-fake-VJ.fa
!LAUNCH: $VIDJIL_DIR/vidjil -A -g $VIDJIL_DIR/germline -i -2 $VIDJIL_DIR/data/chimera-fake-VJ.fa
$ The KmerSegmenter segments the five chimera reads on PSEUDO_MAX12 germline (-2)
1:xxx .* -> .* 5
......
!LAUNCH: ../../vidjil -e 100 -A -t 0 -g ../../germline -4 -i ../../data/chimera-fake-half.fa
!LAUNCH: $VIDJIL_DIR/vidjil -e 100 -A -t 0 -g $VIDJIL_DIR/germline -4 -i $VIDJIL_DIR/data/chimera-fake-half.fa
# TODO: a more precise modeling should give a e-value computation that could make this work even with -e 1
$ The KmerSegmenter segments the six chimera reads on PSEUDO_MAX1U germline (-4)
......
!LAUNCH: ../../vidjil -g ../../germline -i ../../data/chimera-fake.fa
!LAUNCH: $VIDJIL_DIR/vidjil -g $VIDJIL_DIR/germline -i $VIDJIL_DIR/data/chimera-fake.fa
$ Do not segment on any germline, even incomplete
1:junction detected in 0 reads
......
!LAUNCH: ../../vidjil -A -uU -g ../../germline ../../data/chimera-trg.fa
!LAUNCH: $VIDJIL_DIR/vidjil -A -uU -g $VIDJIL_DIR/germline $VIDJIL_DIR/data/chimera-trg.fa
$ Do not segment on IGL by chance
1:IGL .* -> .* 0
......
!LAUNCH: ../../vidjil -G ../../germline/TRG -c clones -A -3 ../../data/segment_lec.fa
!LAUNCH: $VIDJIL_DIR/vidjil -G $VIDJIL_DIR/germline/TRG -c clones -A -3 $VIDJIL_DIR/data/segment_lec.fa
$ Extract 50bp windows (TRG)
1:found . 50-windows
......
!LAUNCH: ../../vidjil -k 14 -w 50 -c clones -G ../../germline/IGH -y 3 -z 1 -r 1 ../../data/clones_simul.fa
!LAUNCH: $VIDJIL_DIR/vidjil -k 14 -w 50 -c clones -G $VIDJIL_DIR/germline/IGH -y 3 -z 1 -r 1 $VIDJIL_DIR/data/clones_simul.fa
$ Junction extractions
1:found 25 50-windows in 66 reads
......
!LAUNCH: ../../vidjil -k 14 -w 50 -c clones -G ../../germline/IGH -y 3 -z 0 -r 1 -n 5 ../../data/clones_simul.fa ; cat out/clones_simul.vidjil
!LAUNCH: $VIDJIL_DIR/vidjil -k 14 -w 50 -c clones -G $VIDJIL_DIR/germline/IGH -y 3 -z 0 -r 1 -n 5 $VIDJIL_DIR/data/clones_simul.fa ; cat out/clones_simul.vidjil
$ Window extractions
2:found 25 50-windows in 66 reads
......
!LAUNCH: ../../vidjil -KA -z 0 -s \\\\#\\\\#\\\\#\\\\#\\\\#-\\\\#\\\\#\\\\#\\\\#\\\\# -G ../../germline/IGH ../../data/common-V-D.fa ; cat out/common-V-D.affects
!LAUNCH: $VIDJIL_DIR/vidjil -KA -z 0 -s \\\\#\\\\#\\\\#\\\\#\\\\#-\\\\#\\\\#\\\\#\\\\#\\\\# -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/common-V-D.fa ; cat out/common-V-D.affects
$ Segments the sequence
1: SEG .* -> .* 1
......
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/vidjil -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/clones_simul.fa > out-fa ; $VIDJIL_DIR/vidjil -G $VIDJIL_DIR/germline/IGH -b clones_simul $VIDJIL_DIR/data/clones_simul.fa.gz > out-fa-gz ; diff -s -I '\#' -I 'index' -I 'data/clones_simul' out-fa out-fa-gz ; echo 'Diff: '\\$?; wc -l out-fa-gz
$ Identical output
1:Diff: 0
$ Testing out-fa-gz has approximately 90 lines
1: [89]. out-fa-gz
!LAUNCH: ../../vidjil -G ../../germline/IGH -r 1 ../../data/large_N.fa
!LAUNCH: $VIDJIL_DIR/vidjil -G $VIDJIL_DIR/germline/IGH -r 1 $VIDJIL_DIR/data/large_N.fa
$ Find a huge insertion in the segmentation
1:>clone-001.*/[ACGT]{100}
!LAUNCH: (for i in {1..100000}; do echo '>read' ; echo ccgtgtattactgtgcgagagagctgaatacttccagcactg ; done ;) > same-igh-100k.fa ; $LAUNCHER ../../vidjil -G ../../germline/IGH -r 5000 -w 15 same-igh-100k.fa; rm -f same-igh-100k.fa
!LAUNCH: (for i in {1..100000}; do echo '>read' ; echo ccgtgtattactgtgcgagagagctgaatacttccagcactg ; done ;) > same-igh-100k.fa ; $LAUNCHER $VIDJIL_DIR/vidjil -G $VIDJIL_DIR/germline/IGH -r 5000 -w 15 same-igh-100k.fa; rm -f same-igh-100k.fa
$ Find a unique clone with all reads
1:>clone-001--IGH--0100000--100.--window
......
!LAUNCH: ../../vidjil -g ../../germline ../../data/multi-complete.fa
!LAUNCH: $VIDJIL_DIR/vidjil -g $VIDJIL_DIR/germline $VIDJIL_DIR/data/multi-complete.fa
$ Segment all the seven reads
1:junction detected in 7 reads
......
!LAUNCH: ../../vidjil -g ../../germline -i ../../data/multi-short.fa
!LAUNCH: $VIDJIL_DIR/vidjil -g $VIDJIL_DIR/germline -i $VIDJIL_DIR/data/multi-complete.fa
$ Segment all the seven reads
1:junction detected in 7 reads
......
!LAUNCH: ../../vidjil -g ../../germline ../../data/multi-short.fa
!LAUNCH: $VIDJIL_DIR/vidjil -g $VIDJIL_DIR/germline -i $VIDJIL_DIR/data/multi-short.fa
$ Segment all the seven reads
1:junction detected in 7 reads
......
!LAUNCH: ../../vidjil -r 5 -K -g ../../germline ../../data/multi-short.fa ; head -n 17 out/multi-short.affects
!LAUNCH: $VIDJIL_DIR/vidjil -r 5 -K -g $VIDJIL_DIR/germline $VIDJIL_DIR/data/multi-short.fa ; head -n 17 out/multi-short.affects
# Testing .affects output (-K)
$ First sequence (TRA), display sequence
......
!LAUNCH: ../../vidjil -g ../../germline -i ../../data/multi-complete.fa
!LAUNCH: $VIDJIL_DIR/vidjil -g $VIDJIL_DIR/germline $VIDJIL_DIR/data/multi-short.fa
$ Segment all the seven reads
1:junction detected in 7 reads
......
!LAUNCH: ../../vidjil -c segment -g ../../germline ../../data/multi-tiny.fa
!LAUNCH: $VIDJIL_DIR/vidjil -c segment -g $VIDJIL_DIR/germline $VIDJIL_DIR/data/multi-tiny.fa
$ Do not segment any of the seven reads
7:UNSEG
......
!LAUNCH: ../../vidjil -r 1 -k 4 -w 20 -z 0 -c clones -V ../../data/toy_V.fa -J ../../data/toy_J.fa ../../data/no_representative.fa
!LAUNCH: $VIDJIL_DIR/vidjil -r 1 -k 4 -w 20 -z 0 -c clones -V $VIDJIL_DIR/data/toy_V.fa -J $VIDJIL_DIR/data/toy_J.fa $VIDJIL_DIR/data/no_representative.fa
$ Short reads properly segmented
1:SEG_+.* -> .* 4
......
!LAUNCH: $VIDJIL_DIR/vidjil -c segment -G $VIDJIL_DIR/germline/IGH -A $VIDJIL_DIR/data/overlap-d-j.fa | tail -4 | tr -d '\\\\n' | wc -c
$ Exported sequence has all the bases
1:116
!LAUNCH: ../../vidjil -A -G ../../germline/TRG ./should-vdj-tests/ext-nucleotides-N.should-vdj.fa
!LAUNCH: $VIDJIL_DIR/vidjil -A -G $VIDJIL_DIR/germline/TRG ../should-vdj-tests/ext-nucleotides-N.should-vdj.fa
$ Segments on TRG
1: TRG .* -> .* 1
......
!LAUNCH: $LAUNCHER ../../vidjil -k 9 -G ../../germline/IGH -K -c clones ../../data/revcomp.fa ; grep 'X.X.X' out/revcomp.affects | sed 's/[^X]//g' | sort -u ; grep '#>' out/revcomp.affects | sed 's/.*SEG.../e-value:/' | cut -f 1 -d' '
!LAUNCH: $LAUNCHER $VIDJIL_DIR/vidjil -k 9 -G $VIDJIL_DIR/germline/IGH -K -c clones $VIDJIL_DIR/data/revcomp.fa ; grep 'X.X.X' out/revcomp.affects | sed 's/[^X]//g' | sort -u ; grep '#>' out/revcomp.affects | sed 's/.*SEG.../e-value:/' | cut -f 1 -d' '
$ Segments both reads, normal and reverse
1:junction detected in 2 reads
......
!LAUNCH: ../../vidjil -G ../../germline/IGH -c segment ../../data/segment_S22.fa | grep '^>'
!LAUNCH: $VIDJIL_DIR/vidjil -G $VIDJIL_DIR/germline/IGH -c segment $VIDJIL_DIR/data/segment_S22.fa | grep '^>'
$ First sequence Stanford
# 164 175 195 203
......
!LAUNCH: ../../vidjil -v -g ../../germline -c segment ../../data/segment_simul.fa | grep '^[>#]'
!LAUNCH: $VIDJIL_DIR/vidjil -v -g $VIDJIL_DIR/germline -c segment $VIDJIL_DIR/data/segment_simul.fa | grep '^[>#]'
$ First sequence, easy segmentation (no error, few deletions at the windows, small N)
......
!LAUNCH: ../../vidjil -y 0 -X 100 -G ../../germline/IGH ../../data/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/vidjil -y 0 -X 100 -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford_S22.fasta
$ Skip the good number of reads
1:Processing every 131th read
......
!LAUNCH: ../../vidjil -\# FA -k 16 -z 0 -w 60 -r 5 -o out2 -u -U -v -G ../../germline/IGH ../../data/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
!LAUNCH: $VIDJIL_DIR/vidjil -\# FA -k 16 -z 0 -w 60 -r 5 -o out2 -u -U -v -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
# Testing uncommon and debug options
$ verbose (-v)
......
!LAUNCH: ../../vidjil -z 2 -r 5 -a -G ../../germline/IGH ../../data/Stanford_S22.fasta ; cat out/seq/clone.fa-2
!LAUNCH: $VIDJIL_DIR/vidjil -z 2 -r 5 -a -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford_S22.fasta ; cat out/seq/clone.fa-2
# Testing detailed clone output (-a)
$ Detailed clone output (out/seq/clone.fa-2), germline
......
!REQUIRES: python ../../tools/check_python_version.py
!LAUNCH: ../../vidjil -z 0 -w 60 -G ../../germline/IGH ../../data/Stanford_S22.fasta ; python ../../tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python ../../tools/format_json.py -1
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/vidjil -z 0 -w 60 -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python $VIDJIL_DIR/tools/format_json.py -1
$ Points list
e1:"original_names": ["../../data/Stanford_S22.fasta", "../../data/Stanford_S22.fasta"]
e1:"original_names": ["../../..//data/Stanford_S22.fasta", "../../..//data/Stanford_S22.fasta"]
$ Most abundant window, twice, fused
1:"id": "CCACCTATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTGGGGCC".*"reads": \\[8, 8\\].*"top": 2
......
!LAUNCH: (cd ../.. ; $LAUNCHER ./vidjil -c germlines -t 100 -s '######-######' data/Stanford_S22.fasta)
!LAUNCH: (cd $VIDJIL_DIR ; $LAUNCHER ./vidjil -c germlines -t 100 -s '######-######' data/Stanford_S22.fasta)
$ number of reads and kmers
1:13153 reads, 3020179 kmers
......
!REQUIRES: python ../../tools/check_python_version.py
!LAUNCH: ../../vidjil -z 1 -G ../../germline/IGH -w 60 -r 5 -e 10 -b data ../../data/Stanford_S22.fasta > /dev/null ; cat out/data.vidjil | python ../../tools/format_json.py -1
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/vidjil -z 1 -G $VIDJIL_DIR/germline/IGH -w 60 -r 5 -e 10 -b data $VIDJIL_DIR/data/Stanford_S22.fasta > /dev/null ; cat out/data.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ Number of reads
e1:"total": [13153]
......
!LAUNCH: ../../vidjil -y 0 -k 14 -G ../../germline/IGH ../../data/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/vidjil -y 0 -k 14 -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford_S22.fasta
!LOG: stanford-k14.log
$ Find the good number of windows in Stanford S22 (contiguous seed 14)
......
!LAUNCH: ../../vidjil -t 0 -G ../../germline/IGH -FaW GAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACT ../../data/Stanford_S22.fasta ; cat out/seq/clone.fa-1
!LAUNCH: $VIDJIL_DIR/vidjil -t 0 -G $VIDJIL_DIR/germline/IGH -FaW GAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACT $VIDJIL_DIR/data/Stanford_S22.fasta ; cat out/seq/clone.fa-1
$ Keep only one windows, the one given by -W, with only 5 reads (it is actually the second clone in Stanford_S22.fasta)
1: keep 1 windows in 5 reads
......
!LAUNCH: ../../vidjil -z 0 -G ../../germline/IGH -r 5 -W GAGAGATGGACGGGATACGTAAAACGACATATGGTTCGGGGTTTGGTGCT ../../data/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/vidjil -z 0 -G $VIDJIL_DIR/germline/IGH -r 5 -W GAGAGATGGACGGGATACGTAAAACGACATATGGTTCGGGGTTTGGTGCT $VIDJIL_DIR/data/Stanford_S22.fasta
$ Some clone has only one read, bypassing the -r 5 option, and the good label
1: clone-00..*0001-.* -W
......
!LAUNCH: ../../vidjil -z 0 -G ../../germline/IGH -r 5 -l ../../data/Stanford_S22.label ../../data/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/vidjil -z 0 -G $VIDJIL_DIR/germline/IGH -r 5 -l $VIDJIL_DIR/data/Stanford_S22.label $VIDJIL_DIR/data/Stanford_S22.fasta
$ Some clone has only one read, bypassing the -r 5 option, and the good label
1: clone-00..*0001-.* my-clone
......
!LAUNCH: ../../vidjil -c segment -x 2 -G ../../germline/IGH ../../data/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/vidjil -c segment -x 2 -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford_S22.fasta
$ Segments the good number of sequences in Stanford S22
2: >lcl
......
!LAUNCH: ../../vidjil -e 10 -y 0 -s '#####-#####' -w 100 -G ../../germline/IGH ../../data/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/vidjil -e 10 -y 0 -s '#####-#####' -w 100 -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford_S22.fasta
!LOG: stanford-w100.log
$ Find the good number of "too short sequences" for windows of size 100
......
!LAUNCH: ../../vidjil -y 0 -x 100 -G ../../germline/IGH ../../data/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/vidjil -y 0 -x 100 -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford_S22.fasta
$ Analyze the good number of sequences in Stanford S22
1: found 99 ..-windows in 100 reads .100. of 100 reads
!LAUNCH: ../../vidjil -e 10 -z 0 -V ../../germline/IGHV.fa -D ../../germline/IGHD.fa -J ../../germline/IGHJ.fa -s \\\\#\\\\#\\\\#\\\\#\\\\#\\\\#-\\\\#\\\\#\\\\#\\\\#\\\\#\\\\# ../../data/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/vidjil -e 10 -z 0 -V $VIDJIL_DIR/germline/IGHV.fa -D $VIDJIL_DIR/germline/IGHD.fa -J $VIDJIL_DIR/germline/IGHJ.fa -s \\\\#\\\\#\\\\#\\\\#\\\\#\\\\#-\\\\#\\\\#\\\\#\\\\#\\\\#\\\\# $VIDJIL_DIR/data/Stanford_S22.fasta
$ Parses IGHV.fa germline
1: 102221 bp in 350 sequences
......
!LAUNCH: ../../vidjil -A -G ../../germline/TRB ../../data/trb-only-VJ.fa ; cat out/trb-only-VJ.vidjil
!LAUNCH: $VIDJIL_DIR/vidjil -A -G $VIDJIL_DIR/germline/TRB $VIDJIL_DIR/data/trb-only-VJ.fa ; cat out/trb-only-VJ.vidjil
$ Segments the read on TRB (the information is given twice, stdout + .vidjil)
2: TRB .* -> .* 1
......
!LAUNCH: ../../vidjil -A -g ../../germline ../../data/trd-dd2-dd3.fa
!LAUNCH: $VIDJIL_DIR/vidjil -A -g $VIDJIL_DIR/germline $VIDJIL_DIR/data/trd-dd2-dd3.fa
$ Segment only 2 reads, because there is no -i
1:junction detected in 2 reads .40..
......
!LAUNCH: ../../vidjil -w 10 -e 10 -A -i -g ../../germline ../../data/trd-dd2-dd3.fa
!LAUNCH: $VIDJIL_DIR/vidjil -w 10 -e 10 -A -i -g $VIDJIL_DIR/germline $VIDJIL_DIR/data/trd-dd2-dd3.fa
$ Segment all 5 reads, thanks to -i
1:junction detected in 5 reads .100..
......
!LAUNCH: ../../vidjil -e 10 -k 8 -w 20 -V ../../germline/TRDV.fa -V ../../germline/TRDD2_upstream.fa -J ../../germline/TRDD3_downstream.fa -J ../../germline/TRDJ.fa ../../data/trd-dd2-dd3.fa
!LAUNCH: $VIDJIL_DIR/vidjil -e 10 -k 8 -w 20 -V $VIDJIL_DIR/germline/TRDV.fa -V $VIDJIL_DIR/germline/TRDD2_upstream.fa -J $VIDJIL_DIR/germline/TRDD3_downstream.fa -J $VIDJIL_DIR/germline/TRDJ.fa $VIDJIL_DIR/data/trd-dd2-dd3.fa
$ Segment all 5 reads, thanks to TRDD2 and TRDD3
1: junction detected in 5 reads .100..
......
!LAUNCH: ../../vidjil -g ../../germline -u ../../data/segmentation-2.fa ; cat out/segmentation-2.unsegmented.vdj.fa
!LAUNCH: $VIDJIL_DIR/vidjil -g $VIDJIL_DIR/germline -u $VIDJIL_DIR/data/segmentation-2.fa ; cat out/segmentation-2.unsegmented.vdj.fa
$ Only one sequence is segmented, but it is too small for a window (too short w)
1: junction detected in 1 reads
......
!LAUNCH: ../../vidjil 2>&1
!LAUNCH: $VIDJIL_DIR/vidjil 2>&1
!EXIT_CODE: 1
$ Error, no germlines
......
!LAUNCH: ../../vidjil -w 10 -G ../../germline/IGH ../../data/Stanford-S22.fa 2>&1
!LAUNCH: $VIDJIL_DIR/vidjil -w 10 -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/Stanford-S22.fa 2>&1
!EXIT_CODE: 1
$ Error, too small -w
......
!LAUNCH: ../../vidjil -h 2> out-v ; cat out-v
!LAUNCH: $VIDJIL_DIR/vidjil -h 2> out-v ; cat out-v
$ License
1:Vidjil is free software
......
!LAUNCH: ../../vidjil -A -G ../../germline/IGH ../../data/toy_V.fa
!LAUNCH: $VIDJIL_DIR/vidjil -A -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/toy_V.fa
$ Warning, -A
1:WARNING
......
!NO_LAUNCHER:
!LAUNCH: ../../vidjil -z 200 -G ../../germline/IGH ../../data/trd-dd2-dd3.fa
!LAUNCH: $VIDJIL_DIR/vidjil -z 200 -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/trd-dd2-dd3.fa
$ Warning, -z
1:WARNING
......
!LAUNCH: ../../vidjil -G ../../germline/IGH ../../data/long-segmentation.fa
!LAUNCH: $VIDJIL_DIR/vidjil -G $VIDJIL_DIR/germline/IGH $VIDJIL_DIR/data/long-segmentation.fa
$ Sequence should be segmented by k-mer segmenter
e1:SEG_+ -> 1
!LAUNCH: ../../vidjil -i -g ../../germline/ -r 1 -c clones ../../data/revcomp-VdJa.fa
!LAUNCH: $VIDJIL_DIR/vidjil -i -g $VIDJIL_DIR/germline/ -r 1 -c clones $VIDJIL_DIR/data/revcomp-VdJa.fa
$ Just one window found
1:==> found 1 ..-windows
......
!LAUNCH: $LAUNCHER ../../vidjil -z 1 -k 9 -G ../../germline/IGH -% 0.001 -r 2 -x 1000 -y 1 -c clones ../../data/Stanford_S22.fasta | sed 's/--IGH--.*VDJ\\(.*\\).$/\\1/' | sed 's/IGH SEG_./IGH SEG_X/' > vidjil_s22.log && $LAUNCHER ../../vidjil -z 1 -k 9 -G ../../germline/IGH -% 0.001 -r 2 -x 1000 -y 1 -c clones ../../data/Stanford_S22.rc.fasta | sed 's/--IGH--.*VDJ\\(.*\\).$/\\1/' | sed 's/IGH SEG_./IGH SEG_X/' > vidjil_s22_rc.log && diff out/Stanford_S22{,.rc}.vidjil | grep GGG && diff vidjil_s22.log vidjil_s22_rc.log
!LAUNCH: $LAUNCHER $VIDJIL_DIR/vidjil -z 1 -k 9 -G $VIDJIL_DIR/germline/IGH -% 0.001 -r 2 -x 1000 -y 1 -c clones $VIDJIL_DIR/data/Stanford_S22.fasta | sed 's/--IGH--.*VDJ\\(.*\\).$/\\1/' | sed 's/IGH SEG_./IGH SEG_X/' > vidjil_s22.log && $LAUNCHER $VIDJIL_DIR/vidjil -z 1 -k 9 -G $VIDJIL_DIR/germline/IGH -% 0.001 -r 2 -x 1000 -y 1 -c clones $VIDJIL_DIR/data/Stanford_S22.rc.fasta | sed 's/--IGH--.*VDJ\\(.*\\).$/\\1/' | sed 's/IGH SEG_./IGH SEG_X/' > vidjil_s22_rc.log && diff out/Stanford_S22{,.rc}.vidjil | grep GGG && diff vidjil_s22.log vidjil_s22_rc.log
!EXIT_CODE: 1
$ Same number segmented
......
......@@ -73,6 +73,7 @@ DIR=$(dirname $file)
BASE=$(basename $file)
cd "$DIR"
VIDJIL_DIR=../../../
OUTPUT_DIR=.
TAP_FILE=${BASE%.*}.tap
LOG_FILE=${BASE%.*}.log
......
......@@ -115,7 +115,7 @@ void testFastaAddThrows() {
caught = false;
try {
fa1.add("testTools.cpp");
fa1.add("Makefile");
} catch (invalid_argument e) {
TAP_TEST(string(e.what()).find("The file seems to be malformed") != string::npos, TEST_FASTA_INVALID_FILE, "");
caught = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment