Commit b73b4903 authored by Mathieu Giraud's avatar Mathieu Giraud
Browse files

core/filter.{h,cpp}: keys_compress

see #4660
parent fbbfb919
#include "filter.h"
#include "math.hpp"
FilterWithACAutomaton::FilterWithACAutomaton(BioReader &origin, string seed) : originalBioReader(origin){
FilterWithACAutomaton::FilterWithACAutomaton(BioReader &origin, string seed, float keys_compress) : originalBioReader(origin){
this->filtered_sequences_nb = 0;
this->filtered_sequences_calls = 0;
buildACAutomatonToFilterBioReader(seed);
buildACAutomatonToFilterBioReader(seed, keys_compress);
}
FilterWithACAutomaton::~FilterWithACAutomaton(){
......@@ -16,7 +16,7 @@ FilterWithACAutomaton::~FilterWithACAutomaton(){
}
}
void FilterWithACAutomaton::buildACAutomatonToFilterBioReader(string seed){
void FilterWithACAutomaton::buildACAutomatonToFilterBioReader(string seed, float keys_compress){
char asciiChar;
int asciiNumber;
string currentLabel;
......@@ -32,12 +32,22 @@ void FilterWithACAutomaton::buildACAutomatonToFilterBioReader(string seed){
asciiNumber = SPECIFIC_KMERS_NUMBER;
automaton->insert(originalBioReader.sequence(0),std::string("") + char(asciiNumber), true, 0, seed);
indexes->push_back(0);
int previousAsciiNumber = asciiNumber;
int rawNumber = 0;
previousLabel = extractGeneName(originalBioReader.label(0));
for(int i = 1;i < originalBioReader.size(); ++i){
currentLabel = extractGeneName(originalBioReader.label(i));
if(currentLabel != previousLabel){
asciiNumber = SPECIFIC_KMERS_NUMBER + 1 + (int) rawNumber / keys_compress;
rawNumber++;
}
if (asciiNumber > previousAsciiNumber)
{
indexes->push_back(i);
asciiNumber++;
previousAsciiNumber = asciiNumber;
}
if(asciiNumber > 127){
cerr << WARNING_STRING << "Pre-filtering disabled" << endl;
......
......@@ -19,7 +19,7 @@ class FilterWithACAutomaton {
/* The size of the BioReader returned after filtering.*/
int filtered_sequences_nb;
FilterWithACAutomaton(BioReader &origin, string seed);
FilterWithACAutomaton(BioReader &origin, string seed, float keys_compress=1.0);
~FilterWithACAutomaton();
......@@ -93,7 +93,7 @@ class FilterWithACAutomaton {
The param "seed" is used while inserting sequences in the automaton. By default
the seed has a size of 10.
*/
void buildACAutomatonToFilterBioReader(string seed);
void buildACAutomatonToFilterBioReader(string seed, float keys_compress);
/**
* Return the vector of indexes used while building the automaton.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment