From a6ea37f479921c91eb8141ca14c8b9f461c5e800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cyprien=20Bor=C3=A9e?= Date: Thu, 14 Jun 2018 15:52:44 +0200 Subject: [PATCH 1/6] Complete test for ex aequo K-mers In order to be sure that the filter function returns the accurates sequences, every sequences is checked and also the size of them. --- algo/tests/unit-tests/testFilter.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/algo/tests/unit-tests/testFilter.cpp b/algo/tests/unit-tests/testFilter.cpp index 90645ae31..63334a145 100644 --- a/algo/tests/unit-tests/testFilter.cpp +++ b/algo/tests/unit-tests/testFilter.cpp @@ -385,6 +385,7 @@ void testExAequoKmersWhenSignificantParameter(){ seqtype seq; pair*, AbstractACAutomaton*>* p; string BIOREADER_EXAEQUO = "BioReader doesn't have ex-aequo"; + string SIZE_BIOREADER = "BioReader doesn't contain the good amount of sequences"; Sequence sequences[13]; sequences[0] = {"seq1-full_name", "seq-01*01", "AGCTAGCTA","", NULL, 0}; sequences[1] = {"seq1-full_name", "seq-01*02", "AGCTAGCTT", "", NULL, 0}; @@ -418,18 +419,29 @@ void testExAequoKmersWhenSignificantParameter(){ while(j < filtered.size() && extractGeneName(filtered.label(j)) != extractGeneName(testedBioReader.label(10))){ ++j; } + /* Check that filtered BioReader contains sequence n°11 which is the most present in the sequence. */ + int k = 0; + while(k < filtered.size() && extractGeneName(filtered.label(k)) != extractGeneName(testedBioReader.label(11))){ + ++k; + } + + /* Even though the filtered function got 2 as a parameter, since there are two ex-aequo the size is 3 */ + TAP_TEST(filtered.size() == 3, TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, SIZE_BIOREADER); TAP_TEST(i < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); TAP_TEST(j < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); + TAP_TEST(k < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); /* Add a third ex-aequo: k-mer belonging to sequence n°12 appearing 29 times */ seq += "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"; delete p->first; delete p->second; delete p; p = buildACAutomatonToFilterBioReader(testedBioReader, "####"); filtered = filterBioReaderWithACAutomaton(p, testedBioReader, seq, 2); - int k = 0; - while(k < filtered.size() && extractGeneName(filtered.label(k)) != extractGeneName(testedBioReader.label(12))){ - ++k; + int l = 0; + while(l < filtered.size() && extractGeneName(filtered.label(l)) != extractGeneName(testedBioReader.label(12))){ + ++l; } - TAP_TEST(k < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); + /* Even though the filtered function got 2 as a parameter, since there are three ex-aequo the size is 4 */ + TAP_TEST(filtered.size() == 4, TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, SIZE_BIOREADER); + TAP_TEST(l < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); delete p->first; delete p->second; delete p; } -- GitLab From 077c61de139ec078292c37e05de7f5a761000d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cyprien=20Bor=C3=A9e?= Date: Thu, 14 Jun 2018 14:13:19 +0200 Subject: [PATCH 2/6] Change way to compare ex-aequo K-mer occurences in filter Since the last way to compare produced segfault (due to undefined behaviour of the advance method), to bypass this problem we just look at the previous of occurences to see if it's the same as the last -1. For more informations see #3279. --- algo/core/filter.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/algo/core/filter.cpp b/algo/core/filter.cpp index 41227348f..330a45fdb 100644 --- a/algo/core/filter.cpp +++ b/algo/core/filter.cpp @@ -90,19 +90,13 @@ BioReader filterBioReaderWithACAutomaton( set, Comparator> setOfWords(mapAho.begin(), mapAho.end(), compFunctor); set, Comparator>::iterator setIt = setOfWords.begin(); // Iterate over the pair and not the map - int nbKmers = 0; + int nbKmers = 0, previousOccurences = 0; for(pair element : setOfWords){ // Add corresponding sequences to the BioReader - if(nbKmers < kmer_threshold){ + if(nbKmers <= kmer_threshold){ nbKmers++; - /* Check if next K-mer has same occurence */ - if(nbKmers == kmer_threshold){ - std::advance(setIt, nbKmers); - pair pNext = *setIt; - int nextKmerOccurs = pNext.second; - if(nextKmerOccurs == element.second){ - nbKmers--; - } + if(nbKmers == kmer_threshold && previousOccurences == element.second){ + nbKmers--; } tmpKmer = element.first; asciiChar = tmpKmer.getLabel().at(0); @@ -113,6 +107,7 @@ BioReader filterBioReaderWithACAutomaton( for(int i = indexes->at(asciiNum - 1); i < indexes->at(asciiNum); ++i){ result.add(origin.read(i)); } + previousOccurences = element.second; } else{ /* Enough K-mers used for filtering, no need to go further */ -- GitLab From 0dc21dcede60ca48d6c0c9aba90a1d6131e76892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cyprien=20Bor=C3=A9e?= Date: Thu, 14 Jun 2018 16:03:09 +0200 Subject: [PATCH 3/6] Fix filter method in algo/filter.cpp For more informations see issue #3279 --- algo/core/filter.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/algo/core/filter.cpp b/algo/core/filter.cpp index 330a45fdb..3ecf76ff4 100644 --- a/algo/core/filter.cpp +++ b/algo/core/filter.cpp @@ -93,10 +93,12 @@ BioReader filterBioReaderWithACAutomaton( int nbKmers = 0, previousOccurences = 0; for(pair element : setOfWords){ // Add corresponding sequences to the BioReader - if(nbKmers <= kmer_threshold){ - nbKmers++; if(nbKmers == kmer_threshold && previousOccurences == element.second){ - nbKmers--; + //Keep the same amount of genes + }else if(nbKmers < kmer_threshold){ + nbKmers++; + }else{ + break; } tmpKmer = element.first; asciiChar = tmpKmer.getLabel().at(0); @@ -108,11 +110,6 @@ BioReader filterBioReaderWithACAutomaton( result.add(origin.read(i)); } previousOccurences = element.second; - } - else{ - /* Enough K-mers used for filtering, no need to go further */ - break; - } } } return (result.size() == 0) ? origin : result; -- GitLab From 53b44e2c47ade78115af3d96562c486af68260db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cyprien=20Bor=C3=A9e?= Date: Fri, 15 Jun 2018 15:10:35 +0200 Subject: [PATCH 4/6] Complete filtered test while testing ex-aequo In the second part of the test, the program verify that previous filtered sequences are still in the BioReader when there is one more ex-aequo found. For more informations about it, see #3279. --- algo/tests/unit-tests/testFilter.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/algo/tests/unit-tests/testFilter.cpp b/algo/tests/unit-tests/testFilter.cpp index 63334a145..c6880c641 100644 --- a/algo/tests/unit-tests/testFilter.cpp +++ b/algo/tests/unit-tests/testFilter.cpp @@ -435,12 +435,28 @@ void testExAequoKmersWhenSignificantParameter(){ delete p->first; delete p->second; delete p; p = buildACAutomatonToFilterBioReader(testedBioReader, "####"); filtered = filterBioReaderWithACAutomaton(p, testedBioReader, seq, 2); + /* Check that BioReader contains previous sequences and n°12 */ + i = 0; + while(i < filtered.size() && extractGeneName(filtered.label(i)) != extractGeneName(testedBioReader.label(5))){ + ++i; + } + j = 0; + while(j < filtered.size() && extractGeneName(filtered.label(j)) != extractGeneName(testedBioReader.label(10))){ + ++j; + } + k = 0; + while(k < filtered.size() && extractGeneName(filtered.label(k)) != extractGeneName(testedBioReader.label(11))){ + ++k; + } int l = 0; while(l < filtered.size() && extractGeneName(filtered.label(l)) != extractGeneName(testedBioReader.label(12))){ ++l; } /* Even though the filtered function got 2 as a parameter, since there are three ex-aequo the size is 4 */ TAP_TEST(filtered.size() == 4, TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, SIZE_BIOREADER); + TAP_TEST(i < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); + TAP_TEST(j < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); + TAP_TEST(k < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); TAP_TEST(l < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); delete p->first; delete p->second; delete p; } -- GitLab From 9c30e441ed499acc234de37abd1ddf808487d6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cyprien=20Bor=C3=A9e?= Date: Fri, 15 Jun 2018 15:16:46 +0200 Subject: [PATCH 5/6] Revert "Complete filtered test while testing ex-aequo" This reverts commit 53b44e2c47ade78115af3d96562c486af68260db. --- algo/tests/unit-tests/testFilter.cpp | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/algo/tests/unit-tests/testFilter.cpp b/algo/tests/unit-tests/testFilter.cpp index c6880c641..63334a145 100644 --- a/algo/tests/unit-tests/testFilter.cpp +++ b/algo/tests/unit-tests/testFilter.cpp @@ -435,28 +435,12 @@ void testExAequoKmersWhenSignificantParameter(){ delete p->first; delete p->second; delete p; p = buildACAutomatonToFilterBioReader(testedBioReader, "####"); filtered = filterBioReaderWithACAutomaton(p, testedBioReader, seq, 2); - /* Check that BioReader contains previous sequences and n°12 */ - i = 0; - while(i < filtered.size() && extractGeneName(filtered.label(i)) != extractGeneName(testedBioReader.label(5))){ - ++i; - } - j = 0; - while(j < filtered.size() && extractGeneName(filtered.label(j)) != extractGeneName(testedBioReader.label(10))){ - ++j; - } - k = 0; - while(k < filtered.size() && extractGeneName(filtered.label(k)) != extractGeneName(testedBioReader.label(11))){ - ++k; - } int l = 0; while(l < filtered.size() && extractGeneName(filtered.label(l)) != extractGeneName(testedBioReader.label(12))){ ++l; } /* Even though the filtered function got 2 as a parameter, since there are three ex-aequo the size is 4 */ TAP_TEST(filtered.size() == 4, TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, SIZE_BIOREADER); - TAP_TEST(i < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); - TAP_TEST(j < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); - TAP_TEST(k < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); TAP_TEST(l < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); delete p->first; delete p->second; delete p; } -- GitLab From 201de9c6c1a4c87baad5fd46b0da96f96f97f43e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cyprien=20Bor=C3=A9e?= Date: Fri, 15 Jun 2018 15:19:44 +0200 Subject: [PATCH 6/6] Complete test in testFilter : testExAequo The second part of the test now verify that previous sequences are still in the filtered BioReader even though a new ex-aequo has been found. For more informations see #3284 --- algo/tests/unit-tests/testFilter.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/algo/tests/unit-tests/testFilter.cpp b/algo/tests/unit-tests/testFilter.cpp index 63334a145..fc3ba609e 100644 --- a/algo/tests/unit-tests/testFilter.cpp +++ b/algo/tests/unit-tests/testFilter.cpp @@ -435,12 +435,28 @@ void testExAequoKmersWhenSignificantParameter(){ delete p->first; delete p->second; delete p; p = buildACAutomatonToFilterBioReader(testedBioReader, "####"); filtered = filterBioReaderWithACAutomaton(p, testedBioReader, seq, 2); + /* Check that filtered BioReader contains previous sequences and n°12 */ + i = 0; + while(i < filtered.size() && extractGeneName(filtered.label(i)) != extractGeneName(testedBioReader.label(5))){ + ++i; + } + j = 0; + while(j < filtered.size() && extractGeneName(filtered.label(j)) != extractGeneName(testedBioReader.label(10))){ + ++j; + } + k = 0; + while(k < filtered.size() && extractGeneName(filtered.label(k)) != extractGeneName(testedBioReader.label(11))){ + ++k; + } int l = 0; while(l < filtered.size() && extractGeneName(filtered.label(l)) != extractGeneName(testedBioReader.label(12))){ ++l; } /* Even though the filtered function got 2 as a parameter, since there are three ex-aequo the size is 4 */ TAP_TEST(filtered.size() == 4, TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, SIZE_BIOREADER); + TAP_TEST(i < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); + TAP_TEST(j < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); + TAP_TEST(k < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); TAP_TEST(l < filtered.size(), TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, BIOREADER_EXAEQUO); delete p->first; delete p->second; delete p; } -- GitLab