Commit 9b256a64 authored by Mikaël Salson's avatar Mikaël Salson

IKmerStore: getIndexLoad depends on the type of index.

Some indexes store global informations regarding all the kmers (ArrayKmerStore and MapKmerStore).
Other indexes have a more detailed view on the kmers that are stored (Aho-Corasick automaton).
These indexes are more accurate on the index load per affectation.
parent 7f60e487
......@@ -44,6 +44,11 @@ public:
*/
float getIndexLoad(Info kmer) const;
/**
* @inherited from IKMerStore
*/
bool hasDifferentKmerTypes() const;
/**
* @return the information stored for this state
*/
......
......@@ -24,11 +24,15 @@ float AbstractACAutomaton<Info>::getIndexLoad(Info kmer) const {
for(auto iter: kmers_inserted) {
load += getIndexLoad(iter.first);
}
return load;
return (kmer.isUnknown()) ? 1 - load : load;
} else {
load = kmers_inserted.at(kmer) / pow(4.0, kmer.getLength());
return kmers_inserted.at(kmer) / pow(4.0, kmer.getLength());
}
return (kmer.isUnknown()) ? 1 - load : load;
}
template<class T>
bool AbstractACAutomaton<T>::hasDifferentKmerTypes() const {
return true;
}
template <class Info>
......
......@@ -127,7 +127,10 @@ public:
virtual T& get(seqtype &word) = 0;
/**
* @return the percentage of kmers that are set in the index
* @return the percentage of kmers that are set in the index.
* When ! hasDifferentKmerTypes(), the index load is always the same
* (apart for the unknown kmer).
* When kmer.isUnknown(), we return the load of all th other kmers.
*/
virtual float getIndexLoad(T kmer) const;
......@@ -163,6 +166,11 @@ public:
*/
Fasta getLabel(T kmer) const;
/**
* @return whether the index differentiate kmer types
*/
virtual bool hasDifferentKmerTypes() const;
/**
* @param seq: a sequence
* @param no_revcomp: force not to revcomp the sequence, even if
......@@ -395,6 +403,11 @@ Fasta IKmerStore<T>::getLabel(T kmer) const {
return FASTA_AMBIGUOUS ;
}
template<class T>
bool IKmerStore<T>::hasDifferentKmerTypes() const {
return false;
}
// .getResults()
template<class T>
vector<T> MapKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp, string seed) {
......
......@@ -338,9 +338,13 @@ void testProbability() {
germline.new_index();
germline.finish();
TAP_TEST(germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, 4)) == .5, TEST_GET_INDEX_LOAD, "");
TAP_TEST(germline.index->getIndexLoad(AFFECT_NOT_UNKNOWN) == .75, TEST_GET_INDEX_LOAD, ".getIndexLoad with AFFECT_NOT_UNKNOWN");
TAP_TEST(germline.index->getIndexLoad(AFFECT_UNKNOWN) == .25, TEST_GET_INDEX_LOAD, ".getIndexLoad with AFFECT_UNKNOWN");
if (! germline.index->hasDifferentKmerTypes()) {
TAP_TEST(germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, 4)) == .75, TEST_GET_INDEX_LOAD, "index load = " << germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, 4)));
} else {
TAP_TEST(germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, 4)) == 58./256, TEST_GET_INDEX_LOAD, "index load = " << germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, 4)));
}
TAP_TEST(germline.index->getIndexLoad(AFFECT_NOT_UNKNOWN) == .75, TEST_GET_INDEX_LOAD, ".getIndexLoad with AFFECT_NOT_UNKNOWN = " << germline.index->getIndexLoad(AFFECT_NOT_UNKNOWN));
TAP_TEST(germline.index->getIndexLoad(AFFECT_UNKNOWN) == .25, TEST_GET_INDEX_LOAD, ".getIndexLoad with AFFECT_UNKNOWN : " << germline.index->getIndexLoad(AFFECT_UNKNOWN));
Sequence seq = {"to_segment", "to_segment", "TATCG", "", NULL, 0};
KmerSegmenter kseg(seq, &germline);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment