Commit 14595f9c authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'feature-a/remove-delta-min' into 'dev'

merge -- remove obsolete delta_min "-m" option

Closes #1674

See merge request !69
parents 0cb291e2 3e42c5b4
......@@ -5,7 +5,7 @@
#include <ctype.h>
void Germline::init(string _code, char _shortcut,
int _delta_min, string seed,
string seed,
int max_indexing)
{
seg_method = SEG_METHOD_53 ;
......@@ -24,25 +24,21 @@ void Germline::init(string _code, char _shortcut,
affect_5 = string(1, toupper(shortcut)) + "-" + code + "V";
affect_4 = string(1, 14 + shortcut) + "-" + code + "D";
affect_3 = string(1, tolower(shortcut)) + "-" + code + "J";
delta_min = _delta_min ;
}
Germline::Germline(string _code, char _shortcut,
int _delta_min, string seed,
int max_indexing)
string seed, int max_indexing)
{
init(_code, _shortcut, _delta_min, seed, max_indexing);
init(_code, _shortcut, seed, max_indexing);
}
Germline::Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
int _delta_min, string seed,
int max_indexing)
string seed, int max_indexing)
{
init(_code, _shortcut, _delta_min, seed, max_indexing);
init(_code, _shortcut, seed, max_indexing);
f_reps_5.push_back(f_rep_5);
f_reps_4.push_back(f_rep_4);
......@@ -60,10 +56,9 @@ Germline::Germline(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
int _delta_min, string seed,
int max_indexing)
string seed, int max_indexing)
{
init(_code, _shortcut, _delta_min, seed, max_indexing);
init(_code, _shortcut, seed, max_indexing);
f_reps_5 = _f_reps_5 ;
f_reps_4 = _f_reps_4 ;
......@@ -91,10 +86,9 @@ Germline::Germline(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
BioReader _rep_5, BioReader _rep_4, BioReader _rep_3,
int _delta_min, string seed,
int max_indexing)
string seed, int max_indexing)
{
init(_code, _shortcut, _delta_min, seed, max_indexing);
init(_code, _shortcut, seed, max_indexing);
rep_5 = _rep_5 ;
rep_4 = _rep_4 ;
......@@ -105,17 +99,9 @@ Germline::Germline(string _code, char _shortcut,
}
Germline::Germline(string code, char shortcut, string path, json json_recom,
int delta_min, string seed, int max_indexing)
string seed, int max_indexing)
{
if (delta_min == UNSET_DELTA_MIN) {
delta_min = DEFAULT_DELTA_MIN;
if (json_recom.find("4") != json_recom.end()) {
delta_min = DEFAULT_DELTA_MIN_D;
}
}
init(code, shortcut, delta_min, seed, max_indexing);
init(code, shortcut, seed, max_indexing);
bool regular = (code.find("+") == string::npos);
......@@ -213,7 +199,6 @@ Germline::~Germline()
ostream &operator<<(ostream &out, const Germline &germline)
{
out << setw(5) << left << germline.code << right << " '" << germline.shortcut << "' "
<< setw(3) << germline.delta_min
<< " ";
size_t seed_span = germline.seed.size();
......@@ -260,7 +245,7 @@ void MultiGermline::add_germline(Germline *germline)
germlines.push_back(germline);
}
void MultiGermline::build_from_json(string path, string json_filename_and_filter, int filter, int default_delta_min,
void MultiGermline::build_from_json(string path, string json_filename_and_filter, int filter,
string default_seed, int default_max_indexing)
{
......@@ -301,7 +286,6 @@ void MultiGermline::build_from_json(string path, string json_filename_and_filter
//for each germline
for (json::iterator it = j.begin(); it != j.end(); ++it) {
int delta_min = default_delta_min;
int max_indexing = default_max_indexing;
json json_value = it.value();
......@@ -311,11 +295,6 @@ void MultiGermline::build_from_json(string path, string json_filename_and_filter
json json_parameters = json_value["parameters"];
string seed = json_parameters["seed"];
if (default_delta_min == UNSET_DELTA_MIN) {
if (json_parameters.count("delta_min") > 0) {
delta_min = json_parameters["delta_min"];
}
}
if (default_max_indexing == 0) {
if (json_parameters.count("trim_sequences") > 0) {
max_indexing = json_parameters["trim_sequences"];
......@@ -353,8 +332,8 @@ void MultiGermline::build_from_json(string path, string json_filename_and_filter
//for each set of recombination 3/4/5
for (json::iterator it2 = recom.begin(); it2 != recom.end(); ++it2) {
add_germline(new Germline(code, shortcut, path + "/", *it2 , delta_min, seed,
max_indexing));
add_germline(new Germline(code, shortcut, path + "/", *it2,
seed, max_indexing));
}
}
......
......@@ -14,9 +14,6 @@
#include "bioreader.hpp"
#include <climits>
#define DEFAULT_DELTA_MIN -10
#define DEFAULT_DELTA_MIN_D 0
#define UNSET_DELTA_MIN INT_MAX
#define DEFAULT_GERMLINE_SEED SEED_S10
enum SEGMENTATION_METHODS {
......@@ -45,38 +42,30 @@ class Germline {
int max_indexing;
void init(string _code, char _shortcut,
int _delta_min, string seed,
int max_indexing);
string seed, int max_indexing);
public:
/*
* @param delta_min: the minimal distance between the right bound and the left bound
* so that the segmentation is accepted
* (left bound: end of V, right bound : start of J)
* @param max_indexing: maximal length of the sequence to be indexed (0: all)
*/
Germline(string _code, char _shortcut,
list <string> f_rep_5, list <string> f_rep_4, list <string> f_rep_3,
int _delta_min, string seed="",
int max_indexing=0);
string seed="", int max_indexing=0);
Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
int _delta_min, string seed="",
int max_indexing=0);
string seed="", int max_indexing=0);
Germline(string _code, char _shortcut,
BioReader _rep_5, BioReader _rep_4, BioReader _rep_3,
int _delta_min, string seed="",
int max_indexing=0);
string seed="", int max_indexing=0);
Germline(string _code, char _shortcut,
int _delta_min, string seed="",
int max_indexing=0);
string seed="", int max_indexing=0);
Germline(string _code, char shortcut, string path, json json_recom,
int delta_min=UNSET_DELTA_MIN, string seed="", int max_indexing=0);
string seed="", int max_indexing=0);
~Germline();
......@@ -124,8 +113,6 @@ class Germline {
BioReader rep_4 ;
BioReader rep_3 ;
IKmerStore<KmerAffect> *index;
int delta_min;
};
ostream &operator<<(ostream &out, const Germline &germline);
......@@ -163,7 +150,7 @@ class MultiGermline {
* filter: see GERMLINES_FILTER
* max_indexing:
*/
void build_from_json(string path, string json_filename_and_filter, int filter, int default_delta_min=UNSET_DELTA_MIN,
void build_from_json(string path, string json_filename_and_filter, int filter,
string default_seed="", int default_max_indexing=0);
/**
......
......@@ -7,13 +7,13 @@
using namespace std;
void testSegmentationBug1(IndexTypes index, int delta_min) {
void testSegmentationBug1(IndexTypes index) {
string buggy_sequences = "bugs/kmersegment.fa";
BioReader seqV("../../germline/homo-sapiens/TRGV.fa");
BioReader seqJ("../../germline/homo-sapiens/TRGJ.fa");
Germline *germline ;
germline = new Germline("custom", 'x', seqV, seqV, seqJ, delta_min, "#############");
germline = new Germline("custom", 'x', seqV, seqV, seqJ, "#############");
germline->new_index(index);
germline->finish();
......@@ -54,6 +54,6 @@ void testSegmentationBug1(IndexTypes index, int delta_min) {
}
void testBugs() {
testSegmentationBug1(KMER_INDEX, -10);
testSegmentationBug1(AC_AUTOMATON, -10);
testSegmentationBug1(KMER_INDEX);
testSegmentationBug1(AC_AUTOMATON);
}
......@@ -53,7 +53,7 @@ void testFineSegment(IndexTypes index)
data.next();
Germline *germline ;
germline = new Germline("IGH", 'G', seqV, seqD, seqJ, 0, "########");
germline = new Germline("IGH", 'G', seqV, seqD, seqJ, "########");
germline->new_index(index);
germline->finish();
......@@ -105,11 +105,11 @@ void testSegmentOverlap(IndexTypes index)
BioReader data("../../data/bug-segment-overlap.fa", 1, " ");
Germline *germline1 ;
germline1 = new Germline("TRG", 'G', seqV, BioReader(), seqJ, -50, "##########");
germline1 = new Germline("TRG", 'G', seqV, BioReader(), seqJ, "##########");
germline1->new_index(index);
Germline *germline2 ;
germline2 = new Germline("TRG2", 'G', seqV, BioReader(), seqJ, -50, "##########");
germline2 = new Germline("TRG2", 'G', seqV, BioReader(), seqJ, "##########");
germline2->new_index(index);
germline1->finish();
......@@ -141,7 +141,7 @@ void testSegmentationCause(IndexTypes index) {
BioReader data("../../data/segmentation.fasta", 1, " ");
Germline *germline ;
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, 0, "##########");
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, "##########");
germline->new_index(index);
germline->finish();
......@@ -267,7 +267,7 @@ void testBug2224(IndexTypes index) {
Germline *germline ;
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, 0, "###########");
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, "###########");
germline->new_index(index);
germline->finish();
......@@ -289,7 +289,7 @@ void testExtractor(IndexTypes index) {
OnlineFasta data("../../data/segmentation.fasta", 1, " ");
Germline *germline ;
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, 0, "##########");
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, "##########");
germline->new_index(index);
MultiGermline *multi ;
......@@ -372,7 +372,7 @@ void testProbability(IndexTypes index) {
V.add(v);
J.add(j);
}
Germline germline("Test", 'T', V, BioReader(), J, 0, "####");
Germline germline("Test", 'T', V, BioReader(), J, "####");
germline.new_index(index);
germline.finish();
......
......@@ -7,7 +7,7 @@ void testWSAdd() {
map<string, string> labels;
WindowsStorage ws(labels);
Sequence seq = {"label", "l", "GATACATTAGACAGCT", "", NULL, 0};
Germline germline("Test", 't', "../../data/small_V.fa", "", "../../data/small_J.fa", -10, "");
Germline germline("Test", 't', "../../data/small_V.fa", "", "../../data/small_J.fa", "");
TAP_TEST(ws.size() == 0, TEST_WS_SIZE_NONE, "");
......@@ -49,7 +49,7 @@ void testWSAdd() {
TAP_TEST(it->label_full == "other", TEST_WS_GET_READS, "");
TAP_TEST(it->sequence == "TAAGATTAGCCACGGACT", TEST_WS_GET_READS, "");
Germline germline2("Other test", 'o', "../../data/small_V.fa", "", "../../data/small_J.fa", -20, "");
Germline germline2("Other test", 'o', "../../data/small_V.fa", "", "../../data/small_J.fa", "");
// Insert a sequence from another germline 2 times
for (int i = 0; i < 2; i++) {
ws.add("CATT", seq, SEG_MINUS, &germline2);
......@@ -59,7 +59,7 @@ void testWSAdd() {
TAP_TEST(ws.getGermline("ATTAG") == &germline,TEST_WS_GET_GERMLINE, "");
TAP_TEST(ws.getGermline("CATT") == &germline2,TEST_WS_GET_GERMLINE, "");
Germline germline3("Another test", 'a', "../../data/small_V.fa", "", "../../data/small_J.fa", -52, "");
Germline germline3("Another test", 'a', "../../data/small_V.fa", "", "../../data/small_J.fa", "");
// Insert a sequence from another germline 6 times
for (int i = 0; i < 6; i++) {
ws.add("ATAGCAT", seq, SEG_MINUS, &germline3);
......@@ -114,7 +114,7 @@ void testWSAddWithLimit() {
ws.setBinParameters(1, 20);
Sequence seq = {"label", "l", "GATACATTAGACAGCT", "", NULL, 0};
Sequence seq_long = {"label", "l", "GATACATTAGACAGCTTATATATATATTTATAT", "", NULL, 0};
Germline germline("Test", 't', "../../data/small_V.fa", "", "../../data/small_J.fa", -10, "");
Germline germline("Test", 't', "../../data/small_V.fa", "", "../../data/small_J.fa", "");
ws.add("ATTAG", seq, SEG_PLUS, &germline);
ws.add("ATTAG", seq, SEG_PLUS, &germline);
......
......@@ -218,7 +218,6 @@ void usage(char *progname, bool advanced)
if (advanced)
cerr << "Fine segmentation options (second pass)" << endl
<< " -f <string> use custom Cost for fine segmenter : format \"match, subst, indels, homo, del_end\" (default "<<VDJ<<" )"<< endl
<< " -m <int> minimal admissible delta between the end of the V and the start of the J (default: " << DEFAULT_DELTA_MIN << ") (default with -D: " << DEFAULT_DELTA_MIN_D << ")" << endl
<< " -E <float> maximal e-value for determining if a D segment can be trusted (default: " << THRESHOLD_NB_EXPECTED_D << ")" << endl
<< endl ;
......@@ -355,11 +354,8 @@ int main (int argc, char **argv)
float ratio_representative = DEFAULT_RATIO_REPRESENTATIVE;
unsigned int max_auditionned = DEFAULT_MAX_AUDITIONED;
// Admissible delta between left and right segmentation points
int delta_min = DEFAULT_DELTA_MIN ; // Kmer+Fine
int trim_sequences = DEFAULT_TRIM;
bool delta_min_changed = false;
bool trim_sequences_changed = false;
bool output_sequences_by_cluster = false;
......@@ -398,7 +394,7 @@ int main (int argc, char **argv)
//$$ options: getopt
while ((c = getopt(argc, argv, "A!x:X:hHadI124g:V:D:J:k:r:vw:e:E:C:f:W:l:Fc:m:N:s:b:Sn:o:L%:y:z:uUK3E:t:#:q")) != EOF)
while ((c = getopt(argc, argv, "A!x:X:hHadI124g:V:D:J:k:r:vw:e:E:C:f:W:l:Fc:N:s:b:Sn:o:L%:y:z:uUK3E:t:#:q")) != EOF)
switch (c)
{
......@@ -440,7 +436,6 @@ int main (int argc, char **argv)
case 'D':
f_reps_D.push_back(optarg);
delta_min = DEFAULT_DELTA_MIN_D ;
break;
case 'J':
......@@ -514,11 +509,6 @@ int main (int argc, char **argv)
wmer_size = atoi_NO_LIMIT(optarg);
break;
case 'm':
delta_min = atoi(optarg);
delta_min_changed = true;
break;
case '!':
keep_unsegmented_as_clone = true;
break;
......@@ -838,7 +828,6 @@ int main (int argc, char **argv)
{
try {
multigermline->build_from_json(path_file.first, path_file.second, GERMLINES_REGULAR,
FIRST_IF_UNCHANGED(UNSET_DELTA_MIN, delta_min, delta_min_changed),
FIRST_IF_UNCHANGED("", seed, seed_changed),
FIRST_IF_UNCHANGED(0, trim_sequences, trim_sequences_changed));
} catch (std::exception& e) {
......@@ -853,7 +842,7 @@ int main (int argc, char **argv)
Germline *germline;
germline = new Germline("custom", 'X',
f_reps_V, f_reps_D, f_reps_J,
delta_min, seed, trim_sequences);
seed, trim_sequences);
germline->new_index(indexType);
......@@ -874,14 +863,14 @@ int main (int argc, char **argv)
}
if (multi_germline_unexpected_recombinations_12) {
Germline *pseudo = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, DEFAULT_DELTA_MIN, "", trim_sequences);
Germline *pseudo = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", trim_sequences);
pseudo->seg_method = SEG_METHOD_MAX12 ;
pseudo->index = multigermline->index ;
multigermline->germlines.push_back(pseudo);
}
if (multi_germline_unexpected_recombinations_1U) {
Germline *pseudo_u = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, DEFAULT_DELTA_MIN, "", trim_sequences);
Germline *pseudo_u = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", trim_sequences);
pseudo_u->seg_method = SEG_METHOD_MAX1U ;
// TODO: there should be more up/downstream regions for the PSEUDO_UNEXPECTED germline. And/or smaller seeds ?
pseudo_u->index = multigermline->index ;
......@@ -892,7 +881,6 @@ int main (int argc, char **argv)
{
for (pair <string, string> path_file: multi_germline_paths_and_files)
multigermline->build_from_json(path_file.first, path_file.second, GERMLINES_INCOMPLETE,
FIRST_IF_UNCHANGED(UNSET_DELTA_MIN, delta_min, delta_min_changed),
FIRST_IF_UNCHANGED("", seed, seed_changed),
FIRST_IF_UNCHANGED(0, trim_sequences, trim_sequences_changed));
if ((! multigermline->one_index_per_germline) && (command != CMD_GERMLINES)) {
......@@ -1601,7 +1589,7 @@ int main (int argc, char **argv)
int nb_segmented = 0 ;
map <string, int> nb_segmented_by_germline ;
Germline *not_segmented = new Germline(PSEUDO_NOT_ANALYZED, PSEUDO_NOT_ANALYZED_CODE, 0);
Germline *not_segmented = new Germline(PSEUDO_NOT_ANALYZED, PSEUDO_NOT_ANALYZED_CODE);
while (reads->hasNext())
{
......
......@@ -434,11 +434,6 @@ and when the sequence does not contain any in-frame stop codons.
The advanced =-f= option sets the parameters used in the comparisons between
the clone sequence and the V(D)J germline genes. The default values should work.
The advanced =-m= option controls the minimum difference of positions between the end
of the V and the start of the J. Note that it is even possible to set =-m -10=
(meaning that V and J could overlap 10 bp). This is the default for VJ recombinations
(except when using a =germline/*.g= file).
The e-value set by =-e= is also applied to the V/J designation.
The =-E= option further sets the e-value for the detection of D segments.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment