Commit a951046f authored by Cyprien Borée's avatar Cyprien Borée

add public attribute for original BioReader in Filter.cpp and update function calls

Since the original BioReader is called through every Filter's functions, it made
more sense to add it as an attribute in its class.

For more informations see #3259.
parent 1644b431
#include "filter.h"
FilterWithACAutomaton::FilterWithACAutomaton(BioReader &origin, string seed){
FilterWithACAutomaton::FilterWithACAutomaton(BioReader &origin, string seed) : originalBioReader(origin){
this->filtered_sequences_nb = 0;
this->filtered_sequences_calls = 0;
buildACAutomatonToFilterBioReader(origin, seed);
buildACAutomatonToFilterBioReader(seed);
}
FilterWithACAutomaton::~FilterWithACAutomaton(){
......@@ -15,14 +15,13 @@ FilterWithACAutomaton::~FilterWithACAutomaton(){
}
}
void FilterWithACAutomaton::buildACAutomatonToFilterBioReader
(BioReader &origin, string seed){
void FilterWithACAutomaton::buildACAutomatonToFilterBioReader(string seed){
char asciiChar;
int asciiNumber;
string currentLabel;
string previousLabel;
if(origin.size() < 1){
if(originalBioReader.size() < 1){
automaton = nullptr;
indexes = nullptr;
return;
......@@ -30,11 +29,11 @@ void FilterWithACAutomaton::buildACAutomatonToFilterBioReader
automaton = new PointerACAutomaton<KmerAffect>(seed, false, true);
indexes = new vector<int>();
asciiNumber = SPECIFIC_KMERS_NUMBER;
automaton->insert(origin.sequence(0),std::string("") + char(asciiNumber), true, 0, seed);
automaton->insert(originalBioReader.sequence(0),std::string("") + char(asciiNumber), true, 0, seed);
indexes->push_back(0);
previousLabel = extractGeneName(origin.label(0));
for(int i = 1;i < origin.size(); ++i){
currentLabel = extractGeneName(origin.label(i));
previousLabel = extractGeneName(originalBioReader.label(0));
for(int i = 1;i < originalBioReader.size(); ++i){
currentLabel = extractGeneName(originalBioReader.label(i));
if(currentLabel != previousLabel){
indexes->push_back(i);
asciiNumber++;
......@@ -46,10 +45,10 @@ void FilterWithACAutomaton::buildACAutomatonToFilterBioReader
return;
}
asciiChar = char(asciiNumber);
automaton->insert(origin.sequence(i),std::string("") + asciiChar, true, 0, seed);
automaton->insert(originalBioReader.sequence(i),std::string("") + asciiChar, true, 0, seed);
previousLabel = currentLabel;
}
indexes->push_back(origin.size());
indexes->push_back(originalBioReader.size());
automaton->build_failure_functions();
}
......@@ -58,15 +57,14 @@ void FilterWithACAutomaton::buildACAutomatonToFilterBioReader
based on it.
*/
BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
BioReader &origin, seqtype &seq,
int kmer_threshold){
seqtype &seq, int kmer_threshold){
BioReader result;
map<KmerAffect, int> mapAho;
this->filtered_sequences_calls += 1;
if(!automaton || !indexes || kmer_threshold < 0){
this->filtered_sequences_nb += origin.size();
return origin;
this->filtered_sequences_nb += originalBioReader.size();
return originalBioReader;
}
mapAho = automaton->getMultiResults(seq);
......@@ -74,7 +72,7 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
if(kmer_threshold == ALL_KMERS_VALUE || kmer_threshold > (int)mapAho.size()){
for(auto const mx: mapAho){
if(mx.first.isGeneric()){
transferBioReaderSequences(origin, result, mx.first);
transferBioReaderSequences(originalBioReader, result, mx.first);
}
}
/* The most significant k-mers selected : iterate over a portion of the
......@@ -101,12 +99,12 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
}else{
break;
}
transferBioReaderSequences(origin, result, element.first);
transferBioReaderSequences(originalBioReader, result, element.first);
previousOccurences = element.second;
}
}
this->filtered_sequences_nb += (result.size () == 0) ? origin.size() : result.size();
return (result.size() == 0) ? origin : result;
this->filtered_sequences_nb += (result.size () == 0) ? originalBioReader.size() : result.size();
return (result.size() == 0) ? originalBioReader : result;
}
void FilterWithACAutomaton::transferBioReaderSequences(const BioReader &src, BioReader &dst, KmerAffect k) const{
......
......@@ -10,6 +10,7 @@ class FilterWithACAutomaton {
AbstractACAutomaton<KmerAffect>* automaton;
public:
BioReader &originalBioReader;
/* The number of times filterBioReaderWithACAutomaton is called. */
int filtered_sequences_calls;
......@@ -40,8 +41,7 @@ class FilterWithACAutomaton {
significant K-mers returned by getMultiResults.
*/
BioReader filterBioReaderWithACAutomaton(
BioReader &origin, seqtype &seq,
int kmer_threshold = NO_LIMIT_VALUE);
seqtype &seq, int kmer_threshold = NO_LIMIT_VALUE);
/*
This function takes a BioReader as a parameter and returns
a couple containing an int vector pointer and an automaton
......@@ -90,7 +90,7 @@ class FilterWithACAutomaton {
The param "seed" is used while inserting sequences in the automaton. By default
the seed has a size of 10.
*/
void buildACAutomatonToFilterBioReader(BioReader &origin, string seed);
void buildACAutomatonToFilterBioReader(string seed);
/**
* Return the vector of indexes used while building the automaton.
......
......@@ -1051,7 +1051,7 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,
/* Regular 53 Segmentation */
if(kmer_threshold != NO_LIMIT_VALUE){
FilterWithACAutomaton* f = germline->getFilter_5();
BioReader filtered = f->filterBioReaderWithACAutomaton(germline->rep_5, sequence_or_rc, kmer_threshold);
BioReader filtered = f->filterBioReaderWithACAutomaton(sequence_or_rc, kmer_threshold);
align_against_collection(sequence_or_rc, filtered, NO_FORBIDDEN_ID, reverse_V, reverse_V, false,
box_V, segment_cost);
}else{
......
......@@ -288,9 +288,9 @@ void testFilterBioReaderWithACAutomaton(){
a2 = f2->getAutomaton();
a3 = f3->getAutomaton();
filteredBioReader1 = f1->filterBioReaderWithACAutomaton(testedBioReader1, sequence1);
filteredBioReader2 = f2->filterBioReaderWithACAutomaton(testedBioReader2, sequence2);
filteredBioReader3 = f3->filterBioReaderWithACAutomaton(testedBioReader3, sequence3);
filteredBioReader1 = f1->filterBioReaderWithACAutomaton(sequence1);
filteredBioReader2 = f2->filterBioReaderWithACAutomaton(sequence2);
filteredBioReader3 = f3->filterBioReaderWithACAutomaton(sequence3);
//check filteredBioReader size
TAP_TEST(filteredBioReader1.size() <= testedBioReader1.size(),
......@@ -353,7 +353,7 @@ void testGetNSignicativeKmers(){
for(int i = 0; i < seqV.size(); ++i){
Sequence seq = seqV.read(i);
FilterWithACAutomaton *f = new FilterWithACAutomaton(seqV, "########");
filtered = f->filterBioReaderWithACAutomaton(seqV, seq.sequence, 1);
filtered = f->filterBioReaderWithACAutomaton(seq.sequence, 1);
delete f;
int j = 0;
while(j < filtered.size()){
......@@ -406,7 +406,7 @@ void testExAequoKmersWhenSignificantParameter(){
/* Filter using the 2 most significant K-mers, the first one is belonging to
sequence n°11 (with more than 60 occurences) and second one is sequence n°5
and n°10 appearing 29 times both. */
filtered = f->filterBioReaderWithACAutomaton(testedBioReader, seq, 2);
filtered = f->filterBioReaderWithACAutomaton(seq, 2);
/* Check that filtered BioReader contains sequence n°5 and sequence n°10 which are ex-aequo. */
int i = 0;
while(i < filtered.size() && extractGeneName(filtered.label(i)) != extractGeneName(testedBioReader.label(5))){
......@@ -431,7 +431,7 @@ void testExAequoKmersWhenSignificantParameter(){
seq += "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC";
delete f;
f = new FilterWithACAutomaton(testedBioReader, "####");
filtered = f->filterBioReaderWithACAutomaton(testedBioReader, seq, 2);
filtered = f->filterBioReaderWithACAutomaton(seq, 2);
k = 0;
while(k < filtered.size() && extractGeneName(filtered.label(k)) != extractGeneName(testedBioReader.label(12))){
++k;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment