Commit a2f04e45 authored by Cyprien Borée's avatar Cyprien Borée Committed by Mathieu Giraud

Factorize filterBioReaderWithACAutomaton with transferBioReaderSequences and...

Factorize filterBioReaderWithACAutomaton with transferBioReaderSequences and use isGeneric/SPECIFIC_KMERS_NUMBER

For more informations see #3282.
parent 36072a47
...@@ -27,12 +27,11 @@ void FilterWithACAutomaton::buildACAutomatonToFilterBioReader ...@@ -27,12 +27,11 @@ void FilterWithACAutomaton::buildACAutomatonToFilterBioReader
} }
automaton = new PointerACAutomaton<KmerAffect>(seed, false, true); automaton = new PointerACAutomaton<KmerAffect>(seed, false, true);
indexes = new vector<int>(); indexes = new vector<int>();
automaton->insert(origin.sequence(0),std::string("") + char(1), true, 0, seed); asciiNumber = SPECIFIC_KMERS_NUMBER;
asciiNumber = 1; automaton->insert(origin.sequence(0),std::string("") + char(asciiNumber), true, 0, seed);
indexes->push_back(0); indexes->push_back(0);
previousLabel = extractGeneName(origin.label(0)); previousLabel = extractGeneName(origin.label(0));
int i; for(int i = 1;i < origin.size(); ++i){
for(i = 1;i < origin.size(); ++i){
currentLabel = extractGeneName(origin.label(i)); currentLabel = extractGeneName(origin.label(i));
if(currentLabel != previousLabel){ if(currentLabel != previousLabel){
indexes->push_back(i); indexes->push_back(i);
...@@ -62,9 +61,6 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton( ...@@ -62,9 +61,6 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
BioReader result; BioReader result;
map<KmerAffect, int> mapAho; map<KmerAffect, int> mapAho;
KmerAffect tmpKmer;
unsigned int asciiNum;
char asciiChar;
if(!automaton || !indexes || kmer_threshold < 0){ if(!automaton || !indexes || kmer_threshold < 0){
return origin; return origin;
} }
...@@ -91,6 +87,9 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton( ...@@ -91,6 +87,9 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
int nbKmers = 0, previousOccurences = 0; int nbKmers = 0, previousOccurences = 0;
for(pair<KmerAffect, int> element : setOfWords){ for(pair<KmerAffect, int> element : setOfWords){
// Add corresponding sequences to the BioReader // Add corresponding sequences to the BioReader
if(!element.first.isGeneric()){
continue;
}
if(nbKmers == kmer_threshold && previousOccurences == element.second){ if(nbKmers == kmer_threshold && previousOccurences == element.second){
//Keep the same amount of genes //Keep the same amount of genes
}else if(nbKmers < kmer_threshold){ }else if(nbKmers < kmer_threshold){
...@@ -98,21 +97,25 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton( ...@@ -98,21 +97,25 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
}else{ }else{
break; break;
} }
tmpKmer = element.first; transferBioReaderSequences(origin, result, element.first);
asciiChar = tmpKmer.getLabel().at(0);
asciiNum = int(asciiChar);
if(asciiNum > indexes->size() - 1){
break;
}
for(int i = indexes->at(asciiNum - 1); i < indexes->at(asciiNum); ++i){
result.add(origin.read(i));
}
previousOccurences = element.second; previousOccurences = element.second;
} }
} }
return (result.size() == 0) ? origin : result; return (result.size() == 0) ? origin : result;
} }
void FilterWithACAutomaton::transferBioReaderSequences(const BioReader &src, BioReader &dst, KmerAffect k) const{
char asciiChar = k.getLabel().at(0);
unsigned int asciiNum = int(asciiChar);
if(asciiNum > indexes->size() || !k.isGeneric()){
throw invalid_argument("Incorrect K-mer transmitted.");
}
for(int i = indexes->at(asciiNum - SPECIFIC_KMERS_NUMBER); i < indexes->at(asciiNum - SPECIFIC_KMERS_NUMBER + 1); ++i){
dst.add(src.read(i));
}
}
vector<int>* FilterWithACAutomaton::getIndexes() const{ vector<int>* FilterWithACAutomaton::getIndexes() const{
return this->indexes; return this->indexes;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment