Commit 4635afff authored by Cyprien Borée's avatar Cyprien Borée Committed by Mikaël Salson

Add boolean parameter to build automaton

A boolean parameter has been added in the Germline constructors in order to
check if the automaton needs to be constructed or not.
For example with -Z all, the automaton is not constructed.

For more informations see #3268
parent 9601d8c8
......@@ -6,7 +6,7 @@
void Germline::init(string _code, char _shortcut,
string seed,
int max_indexing)
int max_indexing, bool build_automaton)
{
seg_method = SEG_METHOD_53 ;
code = _code ;
......@@ -24,20 +24,20 @@ void Germline::init(string _code, char _shortcut,
affect_5 = string(1, toupper(shortcut)) + "-" + code + "V";
affect_4 = string(1, 14 + shortcut) + "-" + code + "D";
affect_3 = string(1, tolower(shortcut)) + "-" + code + "J";
automaton_5 = buildACAutomatonToFilterBioReader(rep_5, seed);
automaton_5 = build_automaton ? buildACAutomatonToFilterBioReader(rep_5, seed) : nullptr;
}
Germline::Germline(string _code, char _shortcut,
string seed, int max_indexing)
string seed, int max_indexing, bool build_automaton)
{
init(_code, _shortcut, seed, max_indexing);
init(_code, _shortcut, seed, max_indexing, build_automaton);
}
Germline::Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
string seed, int max_indexing)
string f_rep_5, string f_rep_4, string f_rep_3,
string seed, int max_indexing, bool build_automaton)
{
f_reps_5.push_back(f_rep_5);
......@@ -48,8 +48,8 @@ Germline::Germline(string _code, char _shortcut,
rep_5 = BioReader(f_rep_5, 2, "|");
rep_4 = BioReader(f_rep_4, 2, "|");
rep_3 = BioReader(f_rep_3, 2, "|");
init(_code, _shortcut, seed, max_indexing);
init(_code, _shortcut, seed, max_indexing, build_automaton);
if (rep_4.size())
seg_method = SEG_METHOD_543 ;
......@@ -57,8 +57,8 @@ Germline::Germline(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
string seed, int max_indexing)
list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
string seed, int max_indexing, bool build_automaton)
{
f_reps_5 = _f_reps_5 ;
......@@ -73,9 +73,9 @@ Germline::Germline(string _code, char _shortcut,
for (list<string>::const_iterator it = f_reps_5.begin(); it != f_reps_5.end(); ++it)
rep_5.add(*it);
init(_code, _shortcut, seed, max_indexing);
init(_code, _shortcut, seed, max_indexing, build_automaton);
for (list<string>::const_iterator it = f_reps_4.begin(); it != f_reps_4.end(); ++it)
rep_4.add(*it);
......@@ -89,20 +89,20 @@ Germline::Germline(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
BioReader _rep_5, BioReader _rep_4, BioReader _rep_3,
string seed, int max_indexing)
string seed, int max_indexing, bool build_automaton)
{
rep_5 = _rep_5 ;
rep_4 = _rep_4 ;
rep_3 = _rep_3 ;
init(_code, _shortcut, seed, max_indexing);
init(_code, _shortcut, seed, max_indexing, build_automaton);
if (rep_4.size())
seg_method = SEG_METHOD_543 ;
}
Germline::Germline(string code, char shortcut, string path, json json_recom,
string seed, int max_indexing)
string seed, int max_indexing, bool build_automaton)
{
bool regular = (code.find("+") == string::npos);
......@@ -118,9 +118,9 @@ Germline::Germline(string code, char shortcut, string path, json json_recom,
f_reps_5.push_back(path + filename);
rep_5.add(path + filename);
}
init(code, shortcut, seed, max_indexing);
init(code, shortcut, seed, max_indexing, build_automaton);
if (json_recom.find("4") != json_recom.end()) {
for (json::iterator it = json_recom["4"].begin();
it != json_recom["4"].end(); ++it)
......@@ -279,7 +279,7 @@ void MultiGermline::add_germline(Germline *germline)
}
void MultiGermline::build_from_json(string path, string json_filename_and_filter, int filter,
string default_seed, int default_max_indexing)
string default_seed, int default_max_indexing, bool build_automaton)
{
//extract json_filename and systems_filter
......@@ -362,14 +362,14 @@ void MultiGermline::build_from_json(string path, string json_filename_and_filter
seedMap["9s"] = SEED_9;
seed = (default_seed.size() == 0) ? seedMap[seed] : default_seed;
//for each set of recombination 3/4/5
for (json::iterator it2 = recom.begin(); it2 != recom.end(); ++it2) {
add_germline(new Germline(code, shortcut, path + "/", *it2,
seed, max_indexing));
seed, max_indexing, build_automaton));
}
}
}
/* if 'one_index_per_germline' was not set, this should be called once all germlines have been loaded */
......
......@@ -44,7 +44,7 @@ class Germline {
int max_indexing;
void init(string _code, char _shortcut,
string seed, int max_indexing);
string seed, int max_indexing, bool build_automaton=false);
public:
/*
......@@ -53,21 +53,21 @@ class Germline {
Germline(string _code, char _shortcut,
list <string> f_rep_5, list <string> f_rep_4, list <string> f_rep_3,
string seed="", int max_indexing=0);
string seed="", int max_indexing=0, bool build_automaton=false);
Germline(string _code, char _shortcut,
Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
string seed="", int max_indexing=0);
string seed="", int max_indexing=0, bool build_automaton=false);
Germline(string _code, char _shortcut,
Germline(string _code, char _shortcut,
BioReader _rep_5, BioReader _rep_4, BioReader _rep_3,
string seed="", int max_indexing=0);
string seed="", int max_indexing=0, bool build_automaton=false);
Germline(string _code, char _shortcut,
string seed="", int max_indexing=0);
string seed="", int max_indexing=0, bool build_automaton=false);
Germline(string _code, char shortcut, string path, json json_recom,
string seed="", int max_indexing=0);
string seed="", int max_indexing=0, bool build_automaton=false);
~Germline();
......@@ -156,7 +156,7 @@ class MultiGermline {
* max_indexing:
*/
void build_from_json(string path, string json_filename_and_filter, int filter,
string default_seed="", int default_max_indexing=0);
string default_seed="", int default_max_indexing=0, bool build_automaton=false);
/**
* Finishes the construction of the multi germline so that it can be used
......
......@@ -839,7 +839,7 @@ int main (int argc, char **argv)
try {
multigermline->build_from_json(path_file.first, path_file.second, GERMLINES_REGULAR,
FIRST_IF_UNCHANGED("", seed, seed_changed),
FIRST_IF_UNCHANGED(0, trim_sequences, trim_sequences_changed));
FIRST_IF_UNCHANGED(0, trim_sequences, trim_sequences_changed), (kmer_threshold != NO_LIMIT_VALUE));
} catch (std::exception& e) {
cerr << ERROR_STRING << PROGNAME << " cannot properly read " << path_file.first << "/" << path_file.second << ": " << e.what() << endl;
delete multigermline;
......@@ -852,8 +852,8 @@ int main (int argc, char **argv)
// Custom germline
Germline *germline;
germline = new Germline("custom", 'X',
f_reps_V, f_reps_D, f_reps_J,
seed, trim_sequences);
f_reps_V, f_reps_D, f_reps_J,
seed, trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
germline->new_index(indexType);
......@@ -874,14 +874,14 @@ int main (int argc, char **argv)
}
if (multi_germline_unexpected_recombinations_12) {
Germline *pseudo = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", trim_sequences);
Germline *pseudo = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
pseudo->seg_method = SEG_METHOD_MAX12 ;
pseudo->set_index(multigermline->index);
multigermline->germlines.push_back(pseudo);
}
if (multi_germline_unexpected_recombinations_1U) {
Germline *pseudo_u = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", trim_sequences);
Germline *pseudo_u = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
pseudo_u->seg_method = SEG_METHOD_MAX1U ;
// TODO: there should be more up/downstream regions for the PSEUDO_UNEXPECTED germline. And/or smaller seeds ?
pseudo_u->set_index(multigermline->index);
......@@ -893,7 +893,7 @@ int main (int argc, char **argv)
for (pair <string, string> path_file: multi_germline_paths_and_files)
multigermline->build_from_json(path_file.first, path_file.second, GERMLINES_INCOMPLETE,
FIRST_IF_UNCHANGED("", seed, seed_changed),
FIRST_IF_UNCHANGED(0, trim_sequences, trim_sequences_changed));
FIRST_IF_UNCHANGED(0, trim_sequences, trim_sequences_changed), (kmer_threshold != NO_LIMIT_VALUE));
if ((! multigermline->one_index_per_germline) && (command != CMD_GERMLINES)) {
multigermline->insert_in_one_index(multigermline->index, true);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment