Commit eedefd37 authored by Thonier Florian's avatar Thonier Florian

Merge branch 'dev' of gitlab.inria.fr:vidjil/vidjil into feature-s/3555-erreur-si-clones-null

parents f11ae76e 1fcf5a1c
Pipeline #72285 passed with stages
in 9 minutes and 6 seconds
...@@ -223,7 +223,7 @@ test_browser-functional: ...@@ -223,7 +223,7 @@ test_browser-functional:
- make -C browser - make -C browser
- source /etc/profile.d/rvm.sh - source /etc/profile.d/rvm.sh
- rvm use 2.6.1 - rvm use 2.6.1
- HEADLESS=1 make -C browser/test functional - HEADLESS=1 make -C browser/test functional BROWSERS=--browsers-from-file
artifacts: artifacts:
paths: paths:
- browser/ - browser/
......
# Becomes ../Makefile in a release # algo/Makefile.algo (git)
# /Makefile (in a release)
.PHONY: all germline vidjil-algo demo test .PHONY: all germline vidjil-algo demo test
......
...@@ -61,12 +61,13 @@ int KmerAffectAnalyser::count(const KmerAffect &affect) const{ ...@@ -61,12 +61,13 @@ int KmerAffectAnalyser::count(const KmerAffect &affect) const{
int KmerAffectAnalyser::minimize(const KmerAffect &affect, int margin, int width) const { int KmerAffectAnalyser::minimize(const KmerAffect &affect, int margin, int width) const {
int i = margin ; int i = margin ;
int i_stop = MIN(affectations.size() - margin - kms.getS(), seq.length() - width);
uint64_t val_max = 0 ; uint64_t val_max = 0 ;
int i_max = NO_MINIMIZING_POSITION ; int i_max = NO_MINIMIZING_POSITION ;
for (vector<KmerAffect>::const_iterator it = affectations.begin() + margin; for (vector<KmerAffect>::const_iterator it = affectations.begin() + margin;
it < affectations.end() - margin && i <= (int) seq.length() - width; i <= i_stop;
it++, i++) { it++, i++) {
...@@ -83,7 +84,7 @@ int KmerAffectAnalyser::minimize(const KmerAffect &affect, int margin, int width ...@@ -83,7 +84,7 @@ int KmerAffectAnalyser::minimize(const KmerAffect &affect, int margin, int width
if (i_max == NO_MINIMIZING_POSITION) if (i_max == NO_MINIMIZING_POSITION)
return i_max ; return i_max ;
return i_max + (seq.length() - affectations.size() + 1) / 2; return i_max + kms.getS() / 2;
} }
...@@ -217,10 +218,16 @@ affect_infos KmerAffectAnalyser::getMaximum(const KmerAffect &before, ...@@ -217,10 +218,16 @@ affect_infos KmerAffectAnalyser::getMaximum(const KmerAffect &before,
results.nb_before_right++; results.nb_before_right++;
} }
left_evalue = kms.getProbabilityAtLeastOrAbove(before, KmerAffect left_affect = before;
KmerAffect right_affect = after;
if (kms.multiple_in_one) {
left_affect = AFFECT_NOT_UNKNOWN;
right_affect = AFFECT_NOT_UNKNOWN;
}
left_evalue = kms.getProbabilityAtLeastOrAbove(left_affect,
results.nb_before_left, results.nb_before_left,
1 + results.last_pos_max); 1 + results.last_pos_max);
right_evalue = kms.getProbabilityAtLeastOrAbove(after, right_evalue = kms.getProbabilityAtLeastOrAbove(right_affect,
results.nb_after_right, results.nb_after_right,
seq.size() - 1 - results.first_pos_max); seq.size() - 1 - results.first_pos_max);
...@@ -244,7 +251,11 @@ affect_infos KmerAffectAnalyser::getMaximum(const KmerAffect &before, ...@@ -244,7 +251,11 @@ affect_infos KmerAffectAnalyser::getMaximum(const KmerAffect &before,
double KmerAffectAnalyser::getProbabilityAtLeastOrAbove(const KmerAffect &kmer, int at_least) const { double KmerAffectAnalyser::getProbabilityAtLeastOrAbove(const KmerAffect &kmer, int at_least) const {
return kms.getProbabilityAtLeastOrAbove(kmer, at_least, seq.size()); KmerAffect affect = kmer;
if (kms.multiple_in_one) {
affect = AFFECT_NOT_UNKNOWN;
}
return kms.getProbabilityAtLeastOrAbove(affect, at_least, seq.size());
} }
pair <double, double> KmerAffectAnalyser::getLeftRightProbabilityAtLeastOrAbove() const { pair <double, double> KmerAffectAnalyser::getLeftRightProbabilityAtLeastOrAbove() const {
......
...@@ -169,12 +169,14 @@ void PointerACAutomaton<Info>::insert(const seqtype &seq, Info info) { ...@@ -169,12 +169,14 @@ void PointerACAutomaton<Info>::insert(const seqtype &seq, Info info) {
pointer_state<Info> *state = getInitialState(); pointer_state<Info> *state = getInitialState();
size_t seq_length = seq.length(); size_t seq_length = seq.length();
size_t i; size_t i;
bool existing_final = true;
for (i = 0; i < seq_length && state->transition(seq[i]) != NULL; i++) { for (i = 0; i < seq_length && state->transition(seq[i]) != NULL; i++) {
state = state->transition(seq[i]); state = state->transition(seq[i]);
} }
if (i < seq_length) { if (i < seq_length) {
existing_final = false;
// Need to create more states // Need to create more states
for (; i < seq_length; i++) { for (; i < seq_length; i++) {
pointer_state<Info> *new_state = new pointer_state<Info>(); pointer_state<Info> *new_state = new pointer_state<Info>();
...@@ -183,16 +185,14 @@ void PointerACAutomaton<Info>::insert(const seqtype &seq, Info info) { ...@@ -183,16 +185,14 @@ void PointerACAutomaton<Info>::insert(const seqtype &seq, Info info) {
} }
} }
state->is_final = true; state->is_final = true;
if (state->informations.front().isNull()) { if (! existing_final) {
this->nb_kmers_inserted++; this->nb_kmers_inserted++;
this->kmers_inserted[info]++; this->kmers_inserted[info]++;
state->informations.front() += info;
} else {
if (this->multiple_info)
state->informations.push_back(info);
else
state->informations.front() += info;
} }
if (state->informations.front().isNull() || ! this->multiple_info)
state->informations.front() += info;
else
state->informations.push_back(info);
} }
template <class Info> template <class Info>
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
...@@ -387,6 +387,7 @@ void MultiGermline::build_with_one_index(string seed, bool set_index) ...@@ -387,6 +387,7 @@ void MultiGermline::build_with_one_index(string seed, bool set_index)
index = KmerStoreFactory<KmerAffect>::createIndex(indexType, expand_seed(seed), rc); index = KmerStoreFactory<KmerAffect>::createIndex(indexType, expand_seed(seed), rc);
index->refs = 1; index->refs = 1;
insert_in_one_index(index, set_index); insert_in_one_index(index, set_index);
index->multiple_in_one = true;
} }
void MultiGermline::finish() { void MultiGermline::finish() {
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
......
...@@ -82,6 +82,7 @@ public: ...@@ -82,6 +82,7 @@ public:
static int last_id; static int last_id;
int id; // id of this index int id; // id of this index
int refs; // number of germlines using this index int refs; // number of germlines using this index
bool multiple_in_one;
list< pair <T, BioReader> > labels; list< pair <T, BioReader> > labels;
...@@ -205,6 +206,7 @@ IKmerStore<T>::IKmerStore() { ...@@ -205,6 +206,7 @@ IKmerStore<T>::IKmerStore() {
id = ++last_id; id = ++last_id;
refs = 0; refs = 0;
finished_building = false; finished_building = false;
multiple_in_one = false;
} }
template<class T> int IKmerStore<T>::last_id = 0; template<class T> int IKmerStore<T>::last_id = 0;
......
...@@ -4,6 +4,35 @@ ...@@ -4,6 +4,35 @@
#include <cstdlib> #include <cstdlib>
#include "tools.h" #include "tools.h"
#include "lib/json.hpp"
using nlohmann::json;
json load_into_map_from_json(map <string, string> &the_map, string json_file)
{
if (!json_file.size())
return {};
cout << " <== " << json_file << endl ;
std::ifstream json_file_stream(json_file);
json j;
json_file_stream >> j;
json jj = j["config"]["labels"] ;
int n = 0;
for(json::iterator label = jj.begin(); label != jj.end(); ++label) {
string name = (*label)["name"].get<std::string>();
string sequence = (*label)["sequence"].get<std::string>();
the_map[sequence] = name;
n++ ;
}
cout << " ==> " << n << " labels" << endl;
return jj;
}
void load_into_map(map <string, string> &the_map, string map_file, string default_value) void load_into_map(map <string, string> &the_map, string map_file, string default_value)
{ {
......
...@@ -7,4 +7,4 @@ ...@@ -7,4 +7,4 @@
#include "bioreader.hpp" #include "bioreader.hpp"
void load_into_map(map <string, string> &the_map, string map_file, string default_value); void load_into_map(map <string, string> &the_map, string map_file, string default_value);
json load_into_map_from_json(map <string, string> &the_map, string json_file);
...@@ -105,6 +105,7 @@ SampleOutput::~SampleOutput() ...@@ -105,6 +105,7 @@ SampleOutput::~SampleOutput()
void SampleOutput::out(ostream &s) void SampleOutput::out(ostream &s)
{ {
UNUSED(s);
} }
void SampleOutput::addClone(junction junction, CloneOutput *clone) void SampleOutput::addClone(junction junction, CloneOutput *clone)
......
...@@ -101,3 +101,15 @@ float ReadQualityScore::getScore(const Sequence &sequence) const { ...@@ -101,3 +101,15 @@ float ReadQualityScore::getScore(const Sequence &sequence) const {
percent_quality = GOOD_QUALITY; percent_quality = GOOD_QUALITY;
return percent_quality * sequence.sequence.size() / GOOD_QUALITY; return percent_quality * sequence.sequence.size() / GOOD_QUALITY;
} }
////////////////////////////////////////////////////////////////////////////////
////////////////////////////// RandomScore ///////////////////////////////
////////////////////////////////////////////////////////////////////////////////
RandomScore::RandomScore(){srand(1);} // Ensures a deterministic output
RandomScore::~RandomScore(){}
float RandomScore::getScore(const Sequence &sequence) const {
UNUSED(sequence);
return rand() % 500;
}
...@@ -91,4 +91,17 @@ class ReadQualityScore: public VirtualReadScore { ...@@ -91,4 +91,17 @@ class ReadQualityScore: public VirtualReadScore {
*/ */
float getScore(const Sequence &sequence) const; float getScore(const Sequence &sequence) const;
}; };
/**
* A very simple implementation of VirtualReadScore.
* The score is random.
*/
class RandomScore: public VirtualReadScore {
public:
RandomScore();
~RandomScore();
float getScore(const Sequence &sequence) const;
};
#endif #endif
...@@ -81,7 +81,7 @@ KmerRepresentativeComputer::KmerRepresentativeComputer(list<Sequence> &r, ...@@ -81,7 +81,7 @@ KmerRepresentativeComputer::KmerRepresentativeComputer(list<Sequence> &r,
string seed) string seed)
:RepresentativeComputer(r),seed(seed),stability_limit(DEFAULT_STABILITY_LIMIT){} :RepresentativeComputer(r),seed(seed),stability_limit(DEFAULT_STABILITY_LIMIT){}
void KmerRepresentativeComputer::compute(bool try_hard) { void KmerRepresentativeComputer::compute(VirtualReadScore &readScorer, bool try_hard) {
assert(coverage_reference_length > 0); assert(coverage_reference_length > 0);
assert(required.length() > 0); assert(required.length() > 0);
is_computed = false; is_computed = false;
...@@ -114,10 +114,8 @@ void KmerRepresentativeComputer::compute(bool try_hard) { ...@@ -114,10 +114,8 @@ void KmerRepresentativeComputer::compute(bool try_hard) {
index[i]->insert(it->sequence, it->label, false, 0, seeds[i]); index[i]->insert(it->sequence, it->label, false, 0, seeds[i]);
} }
// Create a read chooser to have the sequences sorted by length // Create a read chooser to have the sequences sorted on the criteria we want
ReadQualityScore *rlc = new ReadQualityScore(); ReadChooser rc(sequences, readScorer);
ReadChooser rc(sequences, *rlc);
delete rlc;
// Traverse the sequences to get the desired representative // Traverse the sequences to get the desired representative
size_t pos_longest_run = 0; size_t pos_longest_run = 0;
...@@ -219,7 +217,7 @@ void KmerRepresentativeComputer::compute(bool try_hard) { ...@@ -219,7 +217,7 @@ void KmerRepresentativeComputer::compute(bool try_hard) {
if (coverage < THRESHOLD_BAD_COVERAGE && ! try_hard) { if (coverage < THRESHOLD_BAD_COVERAGE && ! try_hard) {
compute(true); compute(readScorer, true);
delete index[0]; delete index[0];
if (cover_longest_run) if (cover_longest_run)
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <list> #include <list>
#include "bioreader.hpp" #include "bioreader.hpp"
#include "kmerstore.h" #include "kmerstore.h"
#include "read_score.h"
using namespace std; using namespace std;
...@@ -13,6 +14,8 @@ using namespace std; ...@@ -13,6 +14,8 @@ using namespace std;
#define THRESHOLD_BAD_COVERAGE .5 /* Threshold below which the representatie #define THRESHOLD_BAD_COVERAGE .5 /* Threshold below which the representatie
coverage is considered bad */ coverage is considered bad */
static ReadQualityScore DEFAULT_READ_SCORE;
/** /**
* Compute a representative sequence from a list of sequences. * Compute a representative sequence from a list of sequences.
* The sequences are supposed to share a common juction. * The sequences are supposed to share a common juction.
...@@ -57,7 +60,8 @@ public: ...@@ -57,7 +60,8 @@ public:
* Compute the representative depending on the parameters set by the functions * Compute the representative depending on the parameters set by the functions
* @pre setRequiredSequence() must have been called (with a non-empty string). * @pre setRequiredSequence() must have been called (with a non-empty string).
*/ */
virtual void compute(bool try_hard=false) = 0; virtual void compute(VirtualReadScore & readScorer = DEFAULT_READ_SCORE,
bool try_hard=false) = 0;
/** /**
* @param min_cover: minimal number of reads supporting each position of the * @param min_cover: minimal number of reads supporting each position of the
...@@ -129,7 +133,8 @@ public: ...@@ -129,7 +133,8 @@ public:
/** /**
* @pre setCoverageReferenceLength() must have been called previously * @pre setCoverageReferenceLength() must have been called previously
*/ */
void compute(bool try_hard = false); void compute(VirtualReadScore & readScorer = DEFAULT_READ_SCORE,
bool try_hard = false);
private: private:
......
/* /*
This file is part of Vidjil <http://www.vidjil.org> This file is part of Vidjil <http://www.vidjil.org>
Copyright (C) 2011-2017 by Bonsai bioinformatics Copyright (C) 2011-2019 by VidjilNet consortium and Bonsai bioinformatics
at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille at CRIStAL (UMR CNRS 9189, Université Lille) and Inria Lille
Contributors: Contributors:
Mathieu Giraud <mathieu.giraud@vidjil.org> Mathieu Giraud <mathieu.giraud@vidjil.org>
...@@ -361,7 +361,7 @@ string Segmenter::getInfoLine() const ...@@ -361,7 +361,7 @@ string Segmenter::getInfoLine() const
{ {
string s = "" ; string s = "" ;
s += (segmented ? "" : "! ") + info ; s += (segmented ? "" : "\t ! ") + info ;
s += " " + info_extra ; s += " " + info_extra ;
s += " " + segmented_germline->code ; s += " " + segmented_germline->code ;
s += " " + string(segmented_mesg[because]) ; s += " " + string(segmented_mesg[because]) ;
...@@ -759,7 +759,7 @@ void Segmenter::setSegmentationStatus(int status) { ...@@ -759,7 +759,7 @@ void Segmenter::setSegmentationStatus(int status) {
string check_and_resolve_overlap(string seq, int seq_begin, int seq_end, string check_and_resolve_overlap(string seq, int seq_begin, int seq_end,
AlignBox *box_left, AlignBox *box_right, AlignBox *box_left, AlignBox *box_right,
Cost segment_cost) Cost segment_cost, bool reverse_V, bool reverse_J)
{ {
// Overlap size // Overlap size
int overlap = box_left->end - box_right->start + 1; int overlap = box_left->end - box_right->start + 1;
...@@ -773,7 +773,7 @@ string check_and_resolve_overlap(string seq, int seq_begin, int seq_end, ...@@ -773,7 +773,7 @@ string check_and_resolve_overlap(string seq, int seq_begin, int seq_end,
int score_l[overlap+1]; int score_l[overlap+1];
//LEFT //LEFT