Commit 41b4f071 authored by Mathieu Giraud's avatar Mathieu Giraud Committed by Vidjil Team

core/segment.cpp: UNSEG_AMBIGUOUS of the previous commits becomes a 'detected' flag

ABIGUOUS_THRESHOLD becomes DETECTED_THRESHOLD, such reads are flag 'detected'

This is cleaner: on one side we have the cause of unsegmentation, on the other we have the
flag 'detected' if there were enough V+J (now total V+J, we don't need to have both V and J).
Currently, the flag can be set when SEG_+/, TOO_FEW_V/J, or DELTA_MIN/MAX (but not STRAND).

We do not continue to the remaining germlines when 'detected' or STRAND.
parent b3bcd31c
......@@ -215,6 +215,9 @@ KmerSegmenter::KmerSegmenter(Sequence seq, MultiGermline *multigermline)
strand = 2;
}
// Are there enoguh V/J to assert that this was the correct germline (and thus that we won't test other ones) ?
detected = (nb_strand[0] + nb_strand[1] >= DETECT_THRESHOLD);
computeSegmentation(strand, germline);
if (segmented)
......@@ -234,8 +237,8 @@ KmerSegmenter::KmerSegmenter(Sequence seq, MultiGermline *multigermline)
return ;
}
// If a read is AMBIGUOUS, do not test other germlines
if (because == UNSEG_AMBIGUOUS)
// If the germline was detected, do not test other germlines
if (detected)
return ;
} // end for (Germlines)
......@@ -275,20 +278,13 @@ void KmerSegmenter::computeSegmentation(int strand, Germline* germline) {
if (! max.max_found) {
if ((strand == 1 && max.nb_before_left == 0)
|| (strand == -1 && max.nb_after_right == 0))
because = UNSEG_TOO_FEW_V ;
because = detected ? UNSEG_AMBIGUOUS : UNSEG_TOO_FEW_V ;
else if ((strand == 1 && max.nb_after_right == 0)
|| (strand == -1 && max.nb_before_left == 0))
{
because = UNSEG_TOO_FEW_J ;
because = detected ? UNSEG_AMBIGUOUS : UNSEG_TOO_FEW_J ;
} else
because = UNSEG_AMBIGUOUS;
// The sequence is not segmented.
// We labeled it AMBIGUOUS if there were both enough affect_5 and enough affect_3
if ((max.nb_before_left + max.nb_before_right >= AMBIGUOUS_THRESHOLD)
&& (max.nb_after_left + max.nb_after_right >= AMBIGUOUS_THRESHOLD))
because = UNSEG_AMBIGUOUS;
} else {
Vend = max.first_pos_max;
Jstart = max.last_pos_max + 1;
......
......@@ -17,9 +17,11 @@
strand and the other, to safely attribute a
segment to a given strand */
#define AMBIGUOUS_THRESHOLD 4 /* If the number of both V and J affectations
is above this threshold, then a non-segmented
sequence will be labeled as AMBIGUOUS */
#define DETECT_THRESHOLD 10 /* If the number of total V/J affectations
is above this threshold, then the sequence
will be labeled as 'detected', and, if it
not segmented, the remaining germlines will
not be tested */
#define JSON_REMEMBER_BEST 4 /* The number of V/D/J predictions to keep */
......@@ -131,6 +133,7 @@ ostream &operator<<(ostream &out, const Segmenter &s);
class KmerSegmenter : public Segmenter
{
private:
int detected;
int because;
KmerAffectAnalyser *kaa;
protected:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment