Commit 2d2a340d authored by Mathieu Giraud's avatar Mathieu Giraud
Browse files

Merge branch 'feature-a/heuristic-bug' into 'dev'

Resolve heuristic bug with overlapping k-mers

Closes #3296

See merge request !215
parents aa1d9bb8 2bd73afd
Pipeline #29359 passed with stages
in 49 seconds
......@@ -140,6 +140,8 @@ affect_infos KmerAffectAnalyser::getMaximum(const KmerAffect &before,
results.first_pos_max = results.last_pos_max = 0;
results.nb_before_left = results.nb_before_right = results.nb_after_right = results.nb_after_left = 0;
currentValue = 0;
int nb_during_max = 0;
bool continue_max;
for (int i = 0; i < min(length,span - maxOverlap); i++) {
if (affectations[i] == after) {
......@@ -151,7 +153,8 @@ affect_infos KmerAffectAnalyser::getMaximum(const KmerAffect &before,
for (int i = span - maxOverlap; i < length; i++) {
/* i - span + maxOverlap, to avoir overlapping k-mers */
continue_max = false;
/* Read the current affectations, and store them both in currentValue and at the right of the previous maximum.
The affectation of 'before' is interpreted relatively to span and maxOverlap */
......@@ -168,18 +171,39 @@ affect_infos KmerAffectAnalyser::getMaximum(const KmerAffect &before,
/* If we raise above the max, or if we continue a previous maximum (even from a distant position), store in results */
if (currentValue >= results.max_value) {
if (currentValue > results.max_value)
if (currentValue > results.max_value) {
results.first_pos_max = i;
// We are above the previous max. We reaffect the ignored affectations
// during the previous plateau.
results.nb_after_left += nb_during_max;
results.nb_before_left += nb_during_max;
nb_during_max = 0;
} else if (results.last_pos_max == i - 1) {
if (affectations[i] == after) {
// in such a case we also have affectations[i - span + maxOverlap] == before
// because we are still on a maximum this means currentValue hasn't changed
assert (affectations[i - span + maxOverlap] == before);
nb_during_max++;
continue_max = true;
}
}
results.max_value = currentValue;
results.last_pos_max = i;
/* What was at the right of the previous maximum is now at the left of the current maximum */
results.nb_after_left += results.nb_after_right;
results.nb_before_left += results.nb_before_right;
if (! continue_max) {
/* What was at the right of the previous maximum is now at the left of
* the current maximum. But we only count them if we are not on a
* plateau. If we later reach a higher maximum they will be counted
* back (see above) */
results.nb_after_left += results.nb_after_right;
results.nb_before_left += results.nb_before_right;
}
results.nb_after_right = 0;
results.nb_before_right = 0;
}
#ifdef DEBUG_GET_MAXIMUM
cout << setw(3) << i
<< " " << affectations[i - span + maxOverlap] << ((affectations[i - span + maxOverlap] == before)?"!":" ")
......
......@@ -22,15 +22,25 @@ typedef enum affect_options_e {
} affect_options_t;
/* Stores results during .getMaximum() computation */
/*
Stores results during .getMaximum() computation
See examples in tests/unit-tests/testAffectAnalyser.cpp:testGetMaximum()
*/
typedef struct affect_infos_s {
// Position of the plateau
int first_pos_max; /* First position of maximum */
int last_pos_max; /* Last position of maximum */
// Value on the plateau
int max_value; /* Maximal value */
// Number of complete affectations at both sides of the plateau
int nb_before_right; /* Nb of “before” right of the maximum */
int nb_after_right; /* Same with “after” */
int nb_before_left; /* Nb of “before” left of the maximum */
int nb_after_left; /* Same with “after” */
bool max_found; /* We have found a maximum */
} affect_infos;
......
......@@ -245,6 +245,7 @@ void testGetMaximum() {
KmerAffect a[] = {AFFECT_J_BWD, AFFECT_J_BWD, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_V_BWD, AFFECT_V_BWD, AFFECT_V_BWD, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_J_BWD, AFFECT_J_BWD};
// 0 1 2 3 4 5 6 7 8 9 11 13
// J-J- _ _ _ _V-V-V- _ _ _J-J-
// ^^^^^^^^^^
vector<KmerAffect> affectations(a, a+sizeof(a)/sizeof(KmerAffect));
KmerAffectAnalyser kaa(*index, "", affectations);
......@@ -282,7 +283,8 @@ void testGetMaximum() {
AFFECT_J, AFFECT_J, AFFECT_J,
AFFECT_V, AFFECT_V, AFFECT_V};
// 0 1 2 3 4 5 6 7 8 9 11 13 15
// V+V+V+V+V+V+V+V+V+V+J+J+J+V+V+V+
// V V V V V V V V V V J J J V V V
// ^^^^^^^^^^^^
vector<KmerAffect> affectations2(a2, a2+sizeof(a2)/sizeof(KmerAffect));
KmerAffectAnalyser kaa2(*index, "", affectations2);
results = kaa2.getMaximum(AFFECT_V, AFFECT_J, 2., 0);
......@@ -322,14 +324,19 @@ void testGetMaximum() {
AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN,
AFFECT_J, AFFECT_J, AFFECT_V, AFFECT_J};
// 0 1 2 3 4 5 6 7 8 9 11 13 15 17 19
// V+V+V+V+V+V+V+V+V+V+ _ _ _ _ _ _J-J-V+J-
// V V V V V V V V V V _ _ _ _ _ _ J J V J
//0 0 0 0 0 1 2 3 4 5 6 7 8 9101010 9 8 7 6
// ^^^^^^
// V V V V V V J J J // nb_before_left (6), nb_after_right (3)
// V // nb_before_right (1)
vector<KmerAffect> affectations3(a3, a3+sizeof(a3)/sizeof(KmerAffect));
KmerAffectAnalyser kaa3(*index, "", affectations3);
// span = 4, maxOverlap = 0
results = kaa3.getMaximum(AFFECT_V, AFFECT_J, 2., 0);
TAP_TEST(results.max_found, TEST_AA_GET_MAXIMUM_MAX_FOUND,
"max_found = " << results.max_found);
TAP_TEST(results.max_value, TEST_AA_GET_MAXIMUM_VALUE,
TAP_TEST(results.max_value == 10, TEST_AA_GET_MAXIMUM_VALUE,
"max = " << results.max_value);
TAP_TEST(results.first_pos_max == 13 && results.last_pos_max == 15,
TEST_AA_GET_MAXIMUM_POSITIONS,
......@@ -343,6 +350,98 @@ void testGetMaximum() {
<< results.nb_after_left << ", right: "
<< results.nb_after_right);
KmerAffect a4[] = {AFFECT_V, AFFECT_V, AFFECT_V, AFFECT_V, AFFECT_V,
AFFECT_V, AFFECT_V, AFFECT_V, AFFECT_V, AFFECT_V,
AFFECT_J, AFFECT_J, AFFECT_J, AFFECT_J, AFFECT_J,
AFFECT_J, AFFECT_J, AFFECT_J, AFFECT_J, AFFECT_J};
// 0 1 2 3 4 5 6 7 8 9 11 13 15 17 19 // i
// V V V V V V V V V V J J J J J J J J J J
//0 0 0 0 0 1 2 3 4 5 6 6 6 6 6 5 4 3 2 1 0 // currentValue, after iteration i
// ^^^^^^^^^
// V V V V V V J J J J J J // nb_before_left (6), nb_after_right (6)
vector<KmerAffect> affectations4(a4, a4+sizeof(a4)/sizeof(KmerAffect));
KmerAffectAnalyser kaa4(*index, "", affectations4);
// span = 4, maxOverlap = 0
results = kaa4.getMaximum(AFFECT_V, AFFECT_J, 2., 0);
TAP_TEST(results.max_found, TEST_AA_GET_MAXIMUM_MAX_FOUND,
"max_found = " << results.max_found);
TAP_TEST(results.max_value == 6, TEST_AA_GET_MAXIMUM_VALUE,
"max = " << results.max_value);
TAP_TEST(results.first_pos_max == 9 && results.last_pos_max == 13,
TEST_AA_GET_MAXIMUM_POSITIONS,
"first = " << results.first_pos_max
<< ", last = " << results.last_pos_max);
TAP_TEST(results.nb_before_left == 6 && results.nb_before_right == 0
&& results.nb_after_left == 0 && results.nb_after_right == 6,
TEST_AA_GET_MAXIMUM_COUNTS,
"before:: left: " << results.nb_before_left <<", right: "
<< results.nb_before_right << "\nafter:: left: "
<< results.nb_after_left << ", right: "
<< results.nb_after_right);
KmerAffect a5[] = {AFFECT_V, AFFECT_V, AFFECT_V, AFFECT_V, AFFECT_J,
AFFECT_J, AFFECT_J, AFFECT_V, AFFECT_V, AFFECT_V,
AFFECT_V, AFFECT_V, AFFECT_J, AFFECT_J, AFFECT_J,
AFFECT_J, AFFECT_J, AFFECT_J, AFFECT_J, AFFECT_J};
// 0 1 2 3 4 5 6 7 8 9 11 13 15 17 19 // i
// V V V V J J J V V V V V J J J J J J J J
//0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 1 0-1-2 // currentValue, after iteration i
// ^^^^^^^^^
// V V V V V J J J J // nb_before_left (5), nb_after_right (4)
// J J J // nb_after_left (3)
vector<KmerAffect> affectations5(a5, a5+sizeof(a5)/sizeof(KmerAffect));
KmerAffectAnalyser kaa5(*index, "", affectations5);
results = kaa5.getMaximum(AFFECT_V, AFFECT_J, 2., 0);
TAP_TEST(! results.max_found, TEST_AA_GET_MAXIMUM_MAX_FOUND,
"max_found = " << results.max_found);
TAP_TEST(results.max_value == 2, TEST_AA_GET_MAXIMUM_VALUE,
"max = " << results.max_value);
TAP_TEST(results.first_pos_max == 11 && results.last_pos_max == 15,
TEST_AA_GET_MAXIMUM_POSITIONS,
"first = " << results.first_pos_max
<< ", last = " << results.last_pos_max);
TAP_TEST(results.nb_before_left == 5 && results.nb_before_right == 0
&& results.nb_after_left == 3 && results.nb_after_right == 4,
TEST_AA_GET_MAXIMUM_COUNTS,
"before:: left: " << results.nb_before_left <<", right: "
<< results.nb_before_right << "\nafter:: left: "
<< results.nb_after_left << ", right: "
<< results.nb_after_right);
KmerAffect a6[] = {AFFECT_J_BWD, AFFECT_J_BWD, AFFECT_J_BWD,
AFFECT_J_BWD, AFFECT_J_BWD, AFFECT_J_BWD, AFFECT_J_BWD, AFFECT_J_BWD,
AFFECT_V_BWD, AFFECT_V_BWD, AFFECT_V_BWD,
AFFECT_V_BWD, AFFECT_V_BWD, AFFECT_J_BWD,
AFFECT_J_BWD, AFFECT_J_BWD, AFFECT_V_BWD, AFFECT_V_BWD, AFFECT_V_BWD, AFFECT_V_BWD};
// 0 1 2 3 4 5 6 7 8 9 11 13 15 17 19
// J-J-J-J-J-J-J-J-V-V-V-V-V-J-J-J-V-V-V-V-
// ^^^^^^^^^^
vector<KmerAffect> affectations6(a6, a6+sizeof(a6)/sizeof(KmerAffect));
KmerAffectAnalyser kaa6(*index, "", affectations6);
results = kaa6.getMaximum(AFFECT_J_BWD, AFFECT_V_BWD, 2., 0);
TAP_TEST(! results.max_found, TEST_AA_GET_MAXIMUM_MAX_FOUND,
"max_found = " << results.max_found);
TAP_TEST(results.max_value == 4, TEST_AA_GET_MAXIMUM_VALUE,
"max = " << results.max_value);
TAP_TEST(results.first_pos_max == 7 && results.last_pos_max == 11,
TEST_AA_GET_MAXIMUM_POSITIONS,
"first = " << results.first_pos_max
<< ", last = " << results.last_pos_max);
TAP_TEST(results.nb_before_left == 4 && results.nb_before_right == 3
&& results.nb_after_left == 0 && results.nb_after_right == 5,
TEST_AA_GET_MAXIMUM_COUNTS,
"before:: left: " << results.nb_before_left <<", right: "
<< results.nb_before_right << "\nafter:: left: "
<< results.nb_after_left << ", right: "
<< results.nb_after_right);
delete index;
}
......@@ -425,5 +524,6 @@ void testAffectAnalyser() {
testAffectAnalyser2<PointerACAutomaton>();
testAffectAnalyserMaxes<PointerACAutomaton>();
testBugAffectAnalyser<PointerACAutomaton>();
testGetMaximum<PointerACAutomaton>();
testAffectAnalyserSpaced<PointerACAutomaton>();
}
......@@ -216,11 +216,11 @@ void testSegmentationCause(IndexTypes index) {
TAP_TEST_EQUAL(ks.getSegmentationStatus(), UNSEG_AMBIGUOUS, TEST_KMER_SEGMENTATION_CAUSE, ks.getInfoLineWithAffects());
nb_checked++;
} else if (data.read(i).label == "seq-delta-min") {
// This test was a test for delta_min but the read is now reported as ambiguous,
// as they are, at the left of the segmentation point, not twice more "V" k-mers than "J" k-mers.
// This test was a test for delta_min but the read is now reported as UNSEG_ONLY_J,
// as they are, at the left of the segmentation point, much not enough Vs
// We keep the test, but change it.
TAP_TEST(! ks.isSegmented(), TEST_KMER_IS_SEGMENTED, "delta-min: " << ks.getInfoLineWithAffects());
TAP_TEST_EQUAL(ks.getSegmentationStatus(), UNSEG_AMBIGUOUS, TEST_KMER_SEGMENTATION_CAUSE, ks.getInfoLineWithAffects());
TAP_TEST_EQUAL(ks.getSegmentationStatus(), UNSEG_ONLY_J, TEST_KMER_SEGMENTATION_CAUSE, ks.getInfoLineWithAffects());
nb_checked++;
} else if (data.read(i).label == "seq-delta-min-padded") {
// This one has enough k-mers to be segmented
......@@ -316,10 +316,10 @@ void testExtractor(IndexTypes index) {
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_TOO_SHORT), 1, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_STRAND_NOT_CONSISTENT), 1, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_TOO_FEW_ZERO), 1, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_ONLY_J), 3, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_ONLY_J), 4, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_ONLY_V), 3, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_BAD_DELTA_MIN), 0, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_AMBIGUOUS), 2, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_AMBIGUOUS), 1, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(UNSEG_TOO_SHORT_FOR_WINDOW), 1, TEST_EXTRACTOR_NB_SEGMENTED, "");
TAP_TEST_EQUAL(we.getNbSegmented(TOTAL_SEG_AND_WINDOW), 3, TEST_EXTRACTOR_NB_SEGMENTED, "");
......@@ -328,9 +328,9 @@ void testExtractor(IndexTypes index) {
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_TOO_SHORT), 4, TEST_EXTRACTOR_AVG_LENGTH, "");
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_STRAND_NOT_CONSISTENT), 36, TEST_EXTRACTOR_AVG_LENGTH, "");
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_TOO_FEW_ZERO), 36, TEST_EXTRACTOR_AVG_LENGTH, "");
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_ONLY_J), 46, TEST_EXTRACTOR_AVG_LENGTH, "average: " << we.getAverageSegmentationLength(UNSEG_ONLY_J));
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_ONLY_J), 42.75, TEST_EXTRACTOR_AVG_LENGTH, "average: " << we.getAverageSegmentationLength(UNSEG_ONLY_J));
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_ONLY_V), 48, TEST_EXTRACTOR_AVG_LENGTH, "average: " << we.getAverageSegmentationLength(UNSEG_ONLY_V));
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_AMBIGUOUS), 52.5, TEST_EXTRACTOR_AVG_LENGTH, "average: " << we.getAverageSegmentationLength(UNSEG_AMBIGUOUS));
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_AMBIGUOUS), 72, TEST_EXTRACTOR_AVG_LENGTH, "average: " << we.getAverageSegmentationLength(UNSEG_AMBIGUOUS));
TAP_TEST_EQUAL(we.getAverageSegmentationLength(UNSEG_TOO_SHORT_FOR_WINDOW), 20, TEST_EXTRACTOR_AVG_LENGTH, "average: " << we.getAverageSegmentationLength(UNSEG_TOO_SHORT_FOR_WINDOW));
TAP_TEST_EQUAL(we.getAverageSegmentationLength(TOTAL_SEG_AND_WINDOW), 71, TEST_EXTRACTOR_AVG_LENGTH, "average: " << we.getAverageSegmentationLength(TOTAL_SEG_AND_WINDOW));
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment