Commit 7cf92edc authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch...

Merge branch 'feature-a/3282-repetitive-filterbioreaderwithacautomaton-parts-to-reusable-function' into 'dev'

Feature a/3282 repetitive filterbioreaderwithacautomaton parts to reusable function

Closes #3282, #3312, and #3299

See merge request !228
parents df17a08c 1719e236
Pipeline #31554 failed with stages
in 6 minutes and 14 seconds
......@@ -27,12 +27,11 @@ void FilterWithACAutomaton::buildACAutomatonToFilterBioReader
}
automaton = new PointerACAutomaton<KmerAffect>(seed, false, true);
indexes = new vector<int>();
automaton->insert(origin.sequence(0),std::string("") + char(1), true, 0, seed);
asciiNumber = 1;
asciiNumber = SPECIFIC_KMERS_NUMBER;
automaton->insert(origin.sequence(0),std::string("") + char(asciiNumber), true, 0, seed);
indexes->push_back(0);
previousLabel = extractGeneName(origin.label(0));
int i;
for(i = 1;i < origin.size(); ++i){
for(int i = 1;i < origin.size(); ++i){
currentLabel = extractGeneName(origin.label(i));
if(currentLabel != previousLabel){
indexes->push_back(i);
......@@ -62,9 +61,6 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
BioReader result;
map<KmerAffect, int> mapAho;
KmerAffect tmpKmer;
unsigned int asciiNum;
char asciiChar;
if(!automaton || !indexes || kmer_threshold < 0){
return origin;
}
......@@ -73,14 +69,8 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
//All k-mers selected : iterate over all map
if(kmer_threshold == ALL_KMERS_VALUE || kmer_threshold > (int)mapAho.size()){
for(auto const mx: mapAho){
tmpKmer = mx.first;
asciiChar = tmpKmer.getLabel().at(0);
asciiNum = int(asciiChar);
if(asciiNum > indexes->size() - 1){
break;
}
for(int i = indexes->at(asciiNum - 1); i < indexes->at(asciiNum); ++i){
result.add(origin.read(i));
if(mx.first.isGeneric()){
transferBioReaderSequences(origin, result, mx.first);
}
}
/* The most significant k-mers selected : iterate over a portion of the
......@@ -97,6 +87,9 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
int nbKmers = 0, previousOccurences = 0;
for(pair<KmerAffect, int> element : setOfWords){
// Add corresponding sequences to the BioReader
if(!element.first.isGeneric()){
continue;
}
if(nbKmers == kmer_threshold && previousOccurences == element.second){
//Keep the same amount of genes
}else if(nbKmers < kmer_threshold){
......@@ -104,21 +97,25 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
}else{
break;
}
tmpKmer = element.first;
asciiChar = tmpKmer.getLabel().at(0);
asciiNum = int(asciiChar);
if(asciiNum > indexes->size() - 1){
break;
}
for(int i = indexes->at(asciiNum - 1); i < indexes->at(asciiNum); ++i){
result.add(origin.read(i));
}
transferBioReaderSequences(origin, result, element.first);
previousOccurences = element.second;
}
}
return (result.size() == 0) ? origin : result;
}
void FilterWithACAutomaton::transferBioReaderSequences(const BioReader &src, BioReader &dst, KmerAffect k) const{
char asciiChar = k.getLabel().at(0);
unsigned int asciiNum = int(asciiChar);
if(asciiNum > indexes->size() || !k.isGeneric()){
throw invalid_argument("Incorrect K-mer transmitted.");
}
for(int i = indexes->at(asciiNum - SPECIFIC_KMERS_NUMBER); i < indexes->at(asciiNum - SPECIFIC_KMERS_NUMBER + 1); ++i){
dst.add(src.read(i));
}
}
vector<int>* FilterWithACAutomaton::getIndexes() const{
return this->indexes;
}
......
......@@ -92,5 +92,16 @@ class FilterWithACAutomaton {
* Return the automaton stored.
*/
AbstractACAutomaton<KmerAffect>* getAutomaton() const;
/**
* Transfer sequences from a BioReader to another.
* @param src The BioReader from where the transfer will operate.
* @param dst The BioReader that will receive the new sequences.
* @param k The K-mer that indicate which sequences will be transfered.
* The label stored in the K-mer is used to select sequences. For more informations
* about how the label is used, see buildACAutomatonToFilterBioReader's doc.
*/
void transferBioReaderSequences(const BioReader &src, BioReader &dst, const KmerAffect k) const;
};
#endif
......@@ -178,6 +178,10 @@ bool KmerAffect::isUnknown() const {
return affect.c == (int) AFFECT_UNKNOWN_CHAR;
}
bool KmerAffect::isGeneric() const {
return !(isUnknown() || isAmbiguous());
}
string KmerAffect::toString() const {
return ::toString(affect);
}
......
......@@ -152,6 +152,11 @@ public:
* @return true iff the value is the same as the one given by default constructor
*/
bool isNull() const;
/**
* @return true if the K-mer is not odd (ambiguous or unknown)
*/
bool isGeneric() const;
string toString() const;
string toStringValues()const;
......@@ -184,6 +189,9 @@ ostream &operator<<(ostream &os, const KmerAffect &kmer);
/* Define how an unknown kmeraffect looks like in a string */
#define AFFECT_UNKNOWN_TO_STRING "_"
/* Define how meny specific k-mers exist. For now there is only ambiguous and unknown. */
#define SPECIFIC_KMERS_NUMBER 2
/**
* Constant defining any not-unknown affectation
* Could be used by .getIndexLoad(), but now any non-AFFECT_UNKNOWN kmer will work.
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment