Commit 5a1a72da authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'dev' into feature-a/2732-cli

parents fb6da11d 8680ec70
before_script:
- make demo
- make data
- make germline
- make -C browser sha1
- cp -p doc/analysis-example.vidjil browser/
stages:
- test_tools
- test_quality
- test_unit
- deploy_review
- test_functional
- test_functional_external
- test_shouldvdj
- prepare_release
- valgrind_unit
- valgrind_functional
- deploy_prod
# Tools
test_tools:
stage: test_tools
script: make -C tools/tests
# Algorithm
test_algo_unit:
......@@ -49,6 +58,8 @@ algo_valgrind_unit:
script: make -C algo valgrind_unit
only:
- /^feature-a\/.*$/
tags:
- valgrind
algo_valgrind_functional:
stage: valgrind_functional
......@@ -56,8 +67,15 @@ algo_valgrind_functional:
when: manual
only:
- /^feature-a\/.*$/
tags:
- valgrind
prepare_release:
stage: prepare_release
script: make -C algo release RELEASE_TAG='rc'
when: manual
only:
- /^feature-a\/.*$/
# Client
......@@ -72,6 +90,8 @@ test_browser_unit:
- /^hotfix-[cw]\/.*$/
- prod-client
- dev
tags:
- web
test_browser-functional:
stage: test_functional
......@@ -87,6 +107,8 @@ test_browser-functional:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
- prod-client
tags:
- web
test_browser-functional-external:
stage: test_functional_external
......@@ -103,6 +125,8 @@ test_browser-functional-external:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
- prod-client
tags:
- web
code_quality:
stage: test_quality
......@@ -112,6 +136,8 @@ code_quality:
- /^hotfix-[cw]\/.*$/
- prod-client
- dev
tags:
- web
deploy_review:
stage: deploy_review
......@@ -125,6 +151,8 @@ deploy_review:
only:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
tags:
- web
deploy_prod:
stage: deploy_prod
......@@ -136,7 +164,9 @@ deploy_prod:
url: http://app.vidjil.org/?data=analysis-example.vidjil
only:
- prod-client
tags:
- web
stop_deploy_review:
stage: deploy_review
variables:
......@@ -150,3 +180,5 @@ stop_deploy_review:
only:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
tags:
- web
\ No newline at end of file
......@@ -3,8 +3,8 @@
** Algorithm -- Installation instructions
#+BEGIN_SRC sh
make ; make germline ; make data
./vidjil -h
make
./vidjil-algo -h
#+END_SRC
Detailed compilation, installation and usage instructions
......
......@@ -5,6 +5,9 @@ VIDJIL_SERVER_SRC = server/
TEE = python tools/tee.py -v
algo:
$(MAKE) -C algo
test_browser: unit_browser functional_browser
test_server: unit_server
......@@ -41,7 +44,7 @@ unit_server:
data:
$(MAKE) -C algo/tests/data
germline browser server: %:
demo germline browser server: %:
$(MAKE) -C $@
cleanall: clean
......@@ -50,21 +53,9 @@ cleanall: clean
$(MAKE) -C $(VIDJIL_ALGO_SRC) cleanall
$(MAKE) -C server cleanall
.PHONY: all test should clean cleanall distrib data germline unit_coverage should_coverage coverage data germline browser server doc
.PHONY: all test should clean cleanall distrib data demo germline unit_coverage should_coverage coverage data germline browser server doc algo
RELEASE_TAG="notag"
RELEASE_H = $(VIDJIL_ALGO_SRC)/release.h
RELEASE_SOURCE = $(wildcard $(VIDJIL_ALGO_SRC)/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.hpp) $(wildcard $(VIDJIL_ALGO_SRC)/tests/unit-tests/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/core/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/tests/unit-tests/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/cgi/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/tools/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/tools/Makefile) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.cpp) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/lib/*.hpp) $(wildcard $(VIDJIL_ALGO_SRC)/lib/unbam/*.c) $(wildcard $(VIDJIL_ALGO_SRC)/lib/unbam/*.h) $(wildcard $(VIDJIL_ALGO_SRC)/lib/unbam/Makefile) $(wildcard $(VIDJIL_ALGO_SRC)/lib/unbam/LICENSE) $(wildcard tools/*.py)
RELEASE_MAKE = ./Makefile $(VIDJIL_ALGO_SRC)/Makefile $(VIDJIL_ALGO_SRC)/core/Makefile $(VIDJIL_ALGO_SRC)/tests/Makefile $(VIDJIL_ALGO_SRC)/lib/Makefile germline/Makefile data/Makefile tools/tests/Makefile doc/Makefile
RELEASE_TESTS = doc/format-analysis.org data/get-sequences $(wildcard data/*.vidjil) $(wildcard data/*.analysis) $(wildcard data/*.g) $(wildcard data/*.fa) $(wildcard data/*.fq) $(wildcard data/*.bam) $(VIDJIL_ALGO_SRC)/tests/repseq_vdj.py $(VIDJIL_ALGO_SRC)/tests/should-vdj-to-tap.py $(VIDJIL_ALGO_SRC)/tests/ansi.py $(wildcard $(VIDJIL_ALGO_SRC)/tests/should-vdj-tests/*.should-vdj.fa) $(wildcard $(VIDJIL_ALGO_SRC)/tests/should-vdj-tests/*.should-locus.fa) $(VIDJIL_ALGO_SRC)/tests/should-to-tap.sh $(wildcard $(VIDJIL_ALGO_SRC)/tests/should-get-tests/*.should-get) $(wildcard $(VIDJIL_ALGO_SRC)/tests/bugs/*.fa) $(wildcard $(VIDJIL_ALGO_SRC)/tests/bugs/*.should-get) $(VIDJIL_ALGO_SRC)/tests/format-json.sh $(wildcard doc/analysis-example.vidjil) $(wildcard tools/tests/*.should_get) tools/tests/should-to-tap.sh tools/diff_json.sh
RELEASE_GERMLINES = germline/germline_id germline/get-saved-germline germline/get-germline germline/split-from-imgt.py $(wildcard germline/*.g) germline/revcomp-fasta.py germline/fasta.py
RELEASE_HELP = doc/algo.org doc/locus.org doc/dev.org doc/should-vdj.org doc/credits.org doc/CHANGELOG doc/LICENSE README.org INSTALL.org
RELEASE_FILES = $(RELEASE_SOURCE) $(RELEASE_TESTS) $(RELEASE_MAKE) $(RELEASE_GERMLINES) $(RELEASE_HELP) data/segmentation.fasta $(wildcard data/*.fa.gz) $(wildcard data/*.label)
RELEASE_ARCHIVE = vidjil-$(RELEASE_TAG).tgz
CURRENT_DIR = vidjil
DIST_DIR=$(CURRENT_DIR)-$(RELEASE_TAG)
RELEASE_FILES_VID = $(RELEASE_FILES)
# Browser
RELEASE_JS = $(VIDJIL_BROWSER_SRC)/js/release.js
......@@ -73,40 +64,6 @@ DIST_BROWSER_DIR=vidjil-browser-$(RELEASE_TAG)
TEST_FILES_BROWSER= Makefile $(VIDJIL_BROWSER_SRC)/test/Makefile $(wildcard $(VIDJIL_BROWSER_SRC)/test/*.rb) $(wildcard $(VIDJIL_BROWSER_SRC)/test/QUnit/*) $(wildcard $(VIDJIL_BROWSER_SRC)/test/QUnit/testFiles/*.js)
RELEASE_FILES_BROWSER=$(TEST_FILES_BROWSER) $(wildcard $(VIDJIL_BROWSER_SRC)/*.html) $(wildcard $(VIDJIL_BROWSER_SRC)/js/*.js) $(wildcard $(VIDJIL_BROWSER_SRC)/js/lib/*.js) $(wildcard $(VIDJIL_BROWSER_SRC)/css/*.css)
# $(MAKE) distrib RELEASE_TAG=2013.04alpha
distrib:
$(info ==== Release $(RELEASE_TAG) ====)
# Tag the release
if test "$(RELEASE_TAG)" != "notag"; then \
git tag -f release-$(RELEASE_TAG); \
echo '#define RELEASE_TAG "$(RELEASE_TAG)"' > $(RELEASE_H); \
fi
mkdir -p release
rm -rf release/$(RELEASE_ARCHIVE) release/$(DIST_DIR)
mkdir -p release/$(DIST_DIR)
for file in $(RELEASE_FILES_VID); do\
dir=release/$(DIST_DIR)/`dirname "$$file"`; \
mkdir -p $$dir; \
cp "$$file" $$dir; \
done
make -C release/$(DIST_DIR)/doc html || true
cd release && tar cvzf $(RELEASE_ARCHIVE) $(DIST_DIR) \
&& rm -rf $(DIST_DIR)
# Untag the source
rm -f $(RELEASE_H) ; touch $(RELEASE_H)
# Check archive
cd release && tar xvfz $(RELEASE_ARCHIVE)
cd release/$(DIST_DIR) && $(MAKE)
cd release/$(DIST_DIR) && $(MAKE) germline
cd release/$(DIST_DIR) && $(MAKE) data
cd release/$(DIST_DIR) && $(MAKE) -C algo test
cd release/$(DIST_DIR) && $(MAKE) clean && $(MAKE) static && mv vidjil vidjil-$(RELEASE_TAG)_`uname -m`
release_browser:
$(info ==== Browser Release $(RELEASE_TAG) ====)
......
# Becomes ../Makefile in a release
.PHONY: all germline vidjil-algo demo test
all: germline vidjil-algo demo
germline:
$(MAKE) -C germline get-saved-data
vidjil-algo:
$(MAKE) -C src
demo:
cd demo && sh get-sequences
test:
$(MAKE) -C src/tests/data
$(MAKE) -C src test
......@@ -7,7 +7,7 @@
# https://coveralls.io/r/vidjil/vidjil http://img.shields.io/coveralls/vidjil/vidjil.svg
# Vidjil -- V(D)J recombinations analysis -- [[http://www.vidjil.org]]
# Copyright (C) 2011-2017 by Bonsai bioinformatics at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
# Copyright (C) 2011-2018 by Bonsai bioinformatics at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
# [[contact@vidjil.org]]
V(D)J recombinations in lymphocytes are essential for immunological
......@@ -17,9 +17,9 @@ patient follow-up.
High-throughput sequencing (NGS/HTS) now enables the deep sequencing
of a lymphoid population with dedicated [[http://omictools.com/rep-seq-c424-p1.html][Rep-Seq]] methods and softwares.
The Vidjil platform contains three components. The Vidjil algorithm
process high-througput sequencing data to *extract V(D)J
junctions and gather them into clones*. Vidjil starts
The Vidjil platform contains three components.
Vidjil-algo processes high-througput sequencing data to *extract V(D)J
junctions and gather them into clones*. Vidjil-algo starts
from a set of reads and detects "windows" overlapping the actual CDR3.
This is based on an fast and reliable seed-based heuristic and allows
to output all sequenced clones. The analysis is extremely fast
......@@ -40,7 +40,7 @@ can manage, upload, analyze and annotate their runs directly on the web applicai
* Vidjil components
** The algorithm
** Vidjil-algo
- Stable releases can be downloaded from http://bioinfo.lille.inria.fr/vidjil
- Development code is under [[algo/]]
......
......@@ -38,7 +38,7 @@ MAINCORE=$(wildcard *.cpp)
LIBCORE=core/vidjil.a lib/lib.a
BINDIR=../
CGIDIR=../browser/cgi/
CGIDIR=../tools/
TOOLDIR=tools/
VIDJIL=$(BINDIR)$(EXEC)
......@@ -148,7 +148,6 @@ test:
$(MAKE) -C tests cleantests
$(TEE) '$(MAKE) COVERAGE="$(COVERAGE)" unit' tests/out-unit.log
$(MAKE) functional
$(TEE) '$(MAKE) -C .. test_tools_if_python' tests/out-tools.log
@echo
@echo "*** All tests passed. Congratulations !"
@echo
......@@ -241,6 +240,94 @@ shouldvdj_generated_fine: base
python tests/tap-stats.py tests/data/gen/0-*.1.tap
python tests/tap-stats.py tests/data/gen/5-*.1.tap
###
# Release
###
# $(MAKE) release RELEASE_TAG=2013.04alpha
RELEASE_TAG = notag
RELEASE_H = release.h
GIT_VERSION_H = git-version.h
RELEASES_DIR = releases/
RELEASE_NAME = vidjil-$(RELEASE_TAG)
RELEASE_ARCHIVE_TAR = $(RELEASE_NAME).tar
RELEASE_ARCHIVE_TGZ = $(RELEASE_NAME).tar.gz
RELEASE_DIR_TMP = releases/tmp
RELEASE_ALGO = src/
RELEASE_TOOLS = $(wildcard ../tools/*.py)
RELEASE_MAKE = ../tools/tests/Makefile ../doc/Makefile
RELEASE_TESTS = ../doc/format-analysis.org $(wildcard ../doc/analysis-example.vidjil) $(wildcard ../tools/tests/*.should_get) ../tools/tests/should-to-tap.sh ../tools/diff_json.sh ../demo/get-sequences ../demo/Demo-X5.fa ../demo/Makefile
RELEASE_GERMLINES = ../germline/Makefile ../germline/germline_id ../germline/get-saved-germline ../germline/get-germline ../germline/split-from-imgt.py $(wildcard ../germline/*.g) ../germline/revcomp-fasta.py ../germline/fasta.py
RELEASE_HELP = ../doc/algo.org ../doc/locus.org ../doc/dev.org ../doc/should-vdj.org ../doc/credits.org ../doc/CHANGELOG ../doc/LICENSE ../README.org ../INSTALL.org
RELEASE_FILES = $(RELEASE_TOOLS) $(RELEASE_TESTS) $(RELEASE_MAKE) $(RELEASE_GERMLINES) $(RELEASE_HELP)
release: release_create release_check
release_create:
$(info ==== Release $(RELEASE_NAME) ====)
# Prepare directories
mkdir -p $(RELEASES_DIR)
rm -rf $(RELEASES_DIR)/$(RELEASE_ARCHIVE_TAR) $(RELEASES_DIR)/$(RELEASE_ARCHIVE_TGZ) $(RELEASE_DIR_TMP)/$(RELEASE_NAME)
mkdir -p $(RELEASE_DIR_TMP)/$(RELEASE_NAME)
mkdir -p $(RELEASE_DIR_TMP)/$(RELEASE_NAME)/$(RELEASE_ALGO)
# Do the archive
git archive --format=tar --prefix $(RELEASE_NAME)/$(RELEASE_ALGO) -o $(RELEASES_DIR)/$(RELEASE_ARCHIVE_TAR) HEAD .
# Tag the release
if test "$(RELEASE_TAG)" != "notag"; then \
git tag -f $(RELEASE_NAME); \
fi
# Create release.h and git-version.h
echo '#define RELEASE_TAG "$(RELEASE_TAG)"' > $(RELEASE_DIR_TMP)/$(RELEASE_NAME)/$(RELEASE_ALGO)/$(RELEASE_H); \
sh create-git-version-h.sh ; cp git-version.h $(RELEASE_DIR_TMP)/$(RELEASE_NAME)/$(RELEASE_ALGO)/$(GIT_VERSION_H)
# Store a global Makefile
cp ../Makefile.algo $(RELEASE_DIR_TMP)/$(RELEASE_NAME)/Makefile
# Store also other files in archive
for file in $(RELEASE_FILES); do\
dir=$(RELEASE_DIR_TMP)/$(RELEASE_NAME)/algo/`dirname "$$file"`/; \
mkdir -p $$dir; \
cp -v "$$file" $$dir/; \
done
# A spurious algo/ directory is created, remove it
rmdir $(RELEASE_DIR_TMP)/$(RELEASE_NAME)/algo
# Store these files in archive
cd $(RELEASE_DIR_TMP) ; tar rf ../$(RELEASE_ARCHIVE_TAR) $(RELEASE_NAME)/
# TODO: make -C $(RELEASE_NAME)/doc html || true
# Compress the archive
cd $(RELEASES_DIR) && gzip $(RELEASE_ARCHIVE_TAR)
release_check:
# Check archive
$(info)
$(info ==== Checking $(RELEASE_ARCHIVE_TGZ) ====)
cd $(RELEASES_DIR) && tar xvfz $(RELEASE_ARCHIVE_TGZ)
cd $(RELEASES_DIR)/$(RELEASE_NAME) && $(MAKE) germline
cd $(RELEASES_DIR)/$(RELEASE_NAME) && $(MAKE) vidjil-algo
cd $(RELEASES_DIR)/$(RELEASE_NAME) && $(MAKE) demo
cd $(RELEASES_DIR)/$(RELEASE_NAME) && $(MAKE) test
cd $(RELEASES_DIR)/$(RELEASE_NAME) && $(MAKE) -C src clean && $(MAKE) -C src static && mv $(EXEC) $(EXEC)-$(RELEASE_TAG)_`uname -m`
###
# dep.mk
###
......
......@@ -128,6 +128,7 @@ int main(int argc, char* argv[])
}
}
delete [] align_str;
}
}
......
......@@ -28,6 +28,6 @@ forcedep:
DEP=$(wildcard dep.mk)
ifeq (${DEP},)
$(shell $(CXX) $(CXXFLAGS) -M $(SRCCORE) > dep.mk)
$(shell $(CXX) $(CXXFLAGS) $(INC) -M $(SRCCORE) > dep.mk)
endif
include dep.mk
......@@ -59,6 +59,34 @@ int KmerAffectAnalyser::count(const KmerAffect &affect) const{
}
int KmerAffectAnalyser::minimize(const KmerAffect &affect, int margin, int width) const {
int i = margin ;
uint64_t val_max = 0 ;
int i_max = NO_MINIMIZING_POSITION ;
for (vector<KmerAffect>::const_iterator it = affectations.begin() + margin;
it < affectations.end() - margin && i <= (int) seq.length() - width;
it++, i++) {
if (*it == affect)
{
uint64_t val = dna_to_hash(&seq[i], width) ;
if (val > val_max) {
val_max = val ;
i_max = i ;
}
}
}
if (i_max == NO_MINIMIZING_POSITION)
return i_max ;
return i_max + (seq.length() - affectations.size() + 1) / 2;
}
const KmerAffect&KmerAffectAnalyser::getAffectation(int i) const{
assert(i >= 0 && i < count());
return affectations[i];
......
......@@ -11,6 +11,8 @@
#include <iostream>
#include <iomanip>
#define NO_MINIMIZING_POSITION -1
// Define two constant affectations: ambiguous and unknown.
/* Declaration of types */
......@@ -148,6 +150,15 @@ class KmerAffectAnalyser: public AffectAnalyser {
int count(const KmerAffect &affect) const;
/**
* @return a minimizing position on the matching affectations (taking into account the next affectations),
* or NO_MINIMIZING_POSITION if there is no such position
* @param affect: affectation to match
* @param margin: number of positions at both ends in which one does not take into account the matching affectation
* @param width: number of affectations to consider, starting from the matching affectation, for the minimization
*/
int minimize(const KmerAffect &affect, int margin, int width) const;
const KmerAffect &getAffectation(int i) const;
vector<KmerAffect> getAllAffectations(affect_options_t options) const;
......
......@@ -45,7 +45,7 @@ OnlineBAM::OnlineBAM(const string &input_filename,
:OnlineBioReader(input_filename, extract_field, extract_separator, nb_sequences_max, only_nth_sequence) {this->init();}
OnlineBAM::~OnlineBAM() {
free(bam_entry);
bam_destroy1(bam_entry);
bam_hdr_destroy(header);
if (input_allocated)
bgzf_close(input);
......
......@@ -164,7 +164,13 @@ void BioReader::add(const string &filename, bool verbose) {
if (verbose)
cout << " <== " << filename ;
add(*reader);
try {
add(*reader);
} catch (...) {
delete reader;
throw;
}
delete reader;
if (verbose)
......
......@@ -381,6 +381,7 @@ int DynProg::compute(bool onlyBottomTriangle, int onlyBottomTriangleShift)
}
// End. Find best_i and best_j, put FIN keywords where the backtrack should stop
if (mode == Local || mode == LocalEndWithSomeDeletions)
......
......@@ -240,7 +240,10 @@ MultiGermline::MultiGermline(IndexTypes indexType, bool _one_index_per_germline)
}
MultiGermline::~MultiGermline() {
for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
if (index && --(index->refs) == 0) {
delete index;
}
for (list<Germline*>::iterator it = germlines.begin(); it != germlines.end(); ++it)
{
delete *it ;
}
......@@ -373,6 +376,7 @@ void MultiGermline::build_with_one_index(string seed, bool set_index)
{
bool rc = true ;
index = KmerStoreFactory<KmerAffect>::createIndex(indexType, seed, rc);
index->refs = 1;
insert_in_one_index(index, set_index);
}
......
......@@ -21,6 +21,16 @@ LazyMsa::LazyMsa(int max, string reference)
LazyMsa::~LazyMsa()
{
for (int i = 0; i <= sizeUsed; i++) {
delete [] gapRef[i];
delete [] gapSeq[i];
delete [] link[i];
}
delete [] gapRef;
delete [] gapSeq;
delete [] link;
delete [] sequences;
}
void LazyMsa::add(string sequence){
......@@ -127,6 +137,8 @@ void LazyMsa::align(string *align){
align[i+1]=stream2.str();
}
delete [] maxGap;
}
......
......@@ -211,7 +211,7 @@ Sequence Segmenter::getSequence() const {
string Segmenter::getJunction(int l, int shift) {
assert(isSegmented());
shiftedJunction = false;
junctionChanged = false;
// '-w all'
if (l == NO_LIMIT_VALUE)
return getSequence().sequence;
......@@ -230,7 +230,7 @@ string Segmenter::getJunction(int l, int shift) {
if (length_shift.first < l || length_shift.second != 0) {
info += " w" + string_of_int(length_shift.first) + "/" + string_of_int(length_shift.second);
shiftedJunction = true;
junctionChanged = true;
}
// Window succesfully extracted
......@@ -265,8 +265,8 @@ bool Segmenter::isDSegmented() const {
return dSegmented;
}
bool Segmenter::isJunctionShifted() const {
return shiftedJunction;
bool Segmenter::isJunctionChanged() const {
return junctionChanged;
}
// E-values
......@@ -477,11 +477,17 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold,
return ;
}
int pos = kaa.minimize(kmer, DEFAULT_MINIMIZE_ONE_MARGIN, DEFAULT_MINIMIZE_WIDTH);
if (pos == NO_MINIMIZING_POSITION)
{
because = UNSEG_TOO_SHORT_FOR_WINDOW;
return ;
}
segmented = true ;
because = reversed ? SEG_MINUS : SEG_PLUS ;
int pos = sequence.size() / 2 ;
info = "=" + string_of_int(c) + " @" + string_of_int(pos) ;
// getJunction() will be centered on pos
......@@ -830,12 +836,13 @@ bool comp_pair (pair<int,int> i,pair<int,int> j)
* @param local: if true, Local alignment (D segment), otherwise LocalEndWithSomeDeletions and onlyBottomTriangle (V and J segments)
* @param box: the AligBox to fill
* @param segment_cost: the cost used by the dynamic programing
* @param banded_dp: should we perform a banded DP?
* @post box is filled
*/
void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id,
bool reverse_ref, bool reverse_both, bool local,
AlignBox *box, Cost segment_cost)
AlignBox *box, Cost segment_cost, bool banded_dp)
{
int best_score = MINUS_INF ;
......@@ -852,6 +859,7 @@ void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id
// With reverse_ref, the read is reversed to prevent calling revcomp on each reference sequence
string sequence_or_rc = revcomp(read, reverse_ref);
bool onlyBottomTriangle = !local && banded_dp ;
for (int r = 0 ; r < rep.size() ; r++)
{
......@@ -864,7 +872,6 @@ void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id
reverse_both, reverse_both,
rep.read(r).marked_pos);
bool onlyBottomTriangle = !local ;
int score = dp.compute(onlyBottomTriangle, BOTTOM_TRIANGLE_SHIFT);
if (local==true){
......@@ -895,6 +902,14 @@ void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id
#endif
}
if (onlyBottomTriangle && (rep.sequence(box->ref_nb).size() - best_best_j) > BOTTOM_TRIANGLE_SHIFT) {
// Too many deletions, let's do a full DP
align_against_collection(read, rep, forbidden_rep_id, reverse_ref, reverse_both,
local, box, segment_cost, false);
return;
}
sort(score_r.begin(),score_r.end(),comp_pair);
box->ref = rep.sequence(box->ref_nb);
......
......@@ -46,6 +46,14 @@
be overriden by the command line by
providing a shorter -w*/
#define DEFAULT_MINIMIZE_WIDTH 12 /* Number of nucleotides on which the minimization integer is computed */
#define DEFAULT_MINIMIZE_ONE_MARGIN 20 /* Number of nucleotides on ends of the reads excluded from the minimization in SEG_METHOD_ONE.
- A read smaller than (DEFAULT_MINIMIZE_ONE_MARGIN*2 + max(k, DEFAULT_MINIMIZE_WIDTH)), that is 52,
will trigger UNSEG_TOO_SHORT_FOR_WINDOW.
- Margins above the half of the window length may produce shortened or shifted windows
*/
using namespace std;
using json = nlohmann::json;
......@@ -53,7 +61,7 @@ using json = nlohmann::json;
enum SEGMENTED { NOT_PROCESSED,
TOTAL_SEG_AND_WINDOW,
SEG_PLUS, SEG_MINUS,
SEG_SHORTER_WINDOW,
SEG_CHANGED_WINDOW,
UNSEG_TOO_SHORT, UNSEG_STRAND_NOT_CONSISTENT,
UNSEG_TOO_FEW_ZERO, UNSEG_ONLY_V, UNSEG_ONLY_J,
UNSEG_BAD_DELTA_MIN, UNSEG_AMBIGUOUS,
......@@ -163,7 +171,7 @@ protected:
string CDR3aa;
bool reversed, segmented, dSegmented;
bool shiftedJunction;
bool junctionChanged;
int because;
/**
......@@ -209,7 +217,7 @@ protected:
* The junction is revcomp-ed if the original string comes from reverse
* strand.
* @post If the size or position of the window had to be dynamically adapted to fit
* in the read, isJunctionShifted() will return true
* in the read, isJunctionChanged() will return true
*/
string getJunction(int l, int shift=0);
......@@ -252,7 +260,7 @@ protected:
* @return true iff the junction has been shifted or shortened
* dynamically to fit into the sequence
*/
bool isJunctionShifted() const;
bool isJunctionChanged() const;
/**
* @return the status of the segmentation. Tells if the Sequence has been segmented
......@@ -382,6 +390,6 @@ class FineSegmenter : public Segmenter
void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id,
bool reverse_ref, bool reverse_both, bool local,
AlignBox *box, Cost segment_cost);
AlignBox *box, Cost segment_cost, bool banded_dp=true);
#endif
......@@ -137,6 +137,17 @@ int dna_to_int(const string &word, int size) {
return index_word;
}
uint64_t dna_to_hash(const string &word, int size) {
// djb2-xor hash
// no collision on 12-char strings on "ACGT" (nor on 8-char strings on "ACGTacgt")
// min/max for 12-char strings on "AGCT" are "AAGAACCAACCC"/"TGAACCAACCAA"
uint64_t hash = 5381;
for(int i = 0 ; i < size ; i++){
hash = ((hash << 5) + hash) ^ word[i];
}
return hash;
}
string nuc_to_aa(const string &word) {
string aa;
int index_word = 0;
......@@ -401,3 +412,12 @@ void output_label_average(ostream &out, string label, long long int nb, double a
else
out << "-" ;
}
void json_add_warning(json &clone, string code, string msg)
{
if (!clone.count("warn"))
clone["warn"] = {} ;
clone["warn"] += { {"code", code}, {"msg", msg} } ;
}
......@@ -27,6 +27,8 @@
#include <cassert>
#include <vector>
#include "bioreader.hpp"
#include "../lib/json.hpp"
using json = nlohmann::json;
using namespace std;
#define PRINT_VAR(v) cerr << #v << " = " << v << endl
......@@ -134,9 +136,10 @@ inline int nuc_to_int(char nuc) {
}
/**
* Convert size nucleotides from a DNA string to an integer.
* Convert size nucleotides from a DNA string to an integer or to an hash.
*/