From a2f04e452aa1f914e07dfac567146c22ff890cf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cyprien=20Bor=C3=A9e?= Date: Tue, 3 Jul 2018 18:13:12 +0200 Subject: [PATCH] Factorize filterBioReaderWithACAutomaton with transferBioReaderSequences and use isGeneric/SPECIFIC_KMERS_NUMBER For more informations see #3282. --- algo/core/filter.cpp | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/algo/core/filter.cpp b/algo/core/filter.cpp index 2318dee8a..8230ce591 100644 --- a/algo/core/filter.cpp +++ b/algo/core/filter.cpp @@ -27,12 +27,11 @@ void FilterWithACAutomaton::buildACAutomatonToFilterBioReader } automaton = new PointerACAutomaton(seed, false, true); indexes = new vector(); - automaton->insert(origin.sequence(0),std::string("") + char(1), true, 0, seed); - asciiNumber = 1; + asciiNumber = SPECIFIC_KMERS_NUMBER; + automaton->insert(origin.sequence(0),std::string("") + char(asciiNumber), true, 0, seed); indexes->push_back(0); previousLabel = extractGeneName(origin.label(0)); - int i; - for(i = 1;i < origin.size(); ++i){ + for(int i = 1;i < origin.size(); ++i){ currentLabel = extractGeneName(origin.label(i)); if(currentLabel != previousLabel){ indexes->push_back(i); @@ -62,9 +61,6 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton( BioReader result; map mapAho; - KmerAffect tmpKmer; - unsigned int asciiNum; - char asciiChar; if(!automaton || !indexes || kmer_threshold < 0){ return origin; } @@ -91,6 +87,9 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton( int nbKmers = 0, previousOccurences = 0; for(pair element : setOfWords){ // Add corresponding sequences to the BioReader + if(!element.first.isGeneric()){ + continue; + } if(nbKmers == kmer_threshold && previousOccurences == element.second){ //Keep the same amount of genes }else if(nbKmers < kmer_threshold){ @@ -98,21 +97,25 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton( }else{ break; } - tmpKmer = element.first; - asciiChar = tmpKmer.getLabel().at(0); - asciiNum = int(asciiChar); - if(asciiNum > indexes->size() - 1){ - break; - } - for(int i = indexes->at(asciiNum - 1); i < indexes->at(asciiNum); ++i){ - result.add(origin.read(i)); - } + transferBioReaderSequences(origin, result, element.first); previousOccurences = element.second; } } return (result.size() == 0) ? origin : result; } +void FilterWithACAutomaton::transferBioReaderSequences(const BioReader &src, BioReader &dst, KmerAffect k) const{ + char asciiChar = k.getLabel().at(0); + unsigned int asciiNum = int(asciiChar); + + if(asciiNum > indexes->size() || !k.isGeneric()){ + throw invalid_argument("Incorrect K-mer transmitted."); + } + for(int i = indexes->at(asciiNum - SPECIFIC_KMERS_NUMBER); i < indexes->at(asciiNum - SPECIFIC_KMERS_NUMBER + 1); ++i){ + dst.add(src.read(i)); + } +} + vector* FilterWithACAutomaton::getIndexes() const{ return this->indexes; } -- GitLab