Commit bd3525c3 authored by Mikaël Salson's avatar Mikaël Salson

CountKmerAffectAnalyser: By default, don't allow overlap between two affectation.

When searching for the maximum don't allow for overlaps (by default, but we can change it).
So that when we have something like that:
+V+V+V+V _ _ _ _ _ _ _ _ _ _ +V _ _ _ +J+J+J+J+J
the +V in the middle is ignored, which is what we want to achieve.
parent e4e59d75
......@@ -95,7 +95,7 @@ class AffectAnalyser {
template <class T>
class KmerAffectAnalyser: public AffectAnalyser<T> {
private:
protected:
IKmerStore<T> &kms;
const string &seq;
vector<T> affectations;
......@@ -135,6 +135,7 @@ template <class T>
class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
private:
map<T, int* >counts;
int overlap;
public:
CountKmerAffectAnalyser(IKmerStore<T> &kms, const string &seq);
......@@ -161,22 +162,39 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
/**
* @return the first position pos in the sequence such that
* countBefore(before, pos) + countAfter(after, pos) is maximal
* countBefore(before, pos - overlap + 1 + overlap)
+ countAfter(after, pos) is maximal
* and pos >= start, and the maximum is greater than min;
* or -1 if such a position doesn't exist
* or -1 if such a position doesn't exist.
* Where overlap is getAllowedOverlap().
* @complexity linear in getSequence().size()
*/
int firstMax(const T&before, const T&after, int start=0, int min=-1) const;
/**
* @return the last position pos in the sequence such that
* countBefore(before, pos) + countAfter(after, pos) is maximal
* countBefore(before, pos - overlap + 1+ overlap)
* + countAfter(after, pos) is maximal
* and pos <= end (if end == -1 considers end of sequence), and the
* maximum is greater than min; or -1 if such a position doesn't exist.
* Where overlap is getAllowedOverlap().
* @complexity linear in getSequence().size()
*/
int lastMax(const T&before, const T&after, int end=-1, int min=-1) const;
/**
* @return the allowed overlap between two k-mers with distinct affectations
* (default is 0)
*/
int getAllowedOverlap();
/**
* Set the overlap allowed between two k-mers with two different affectations,
* when looking for the maximum.
* The overlap should not be greater than the span of the seed used.
*/
void setAllowedOverlap(int overlap);
private:
/**
* Build the counts map.
......@@ -285,6 +303,7 @@ string KmerAffectAnalyser<T>::toString() const{
template <class T>
CountKmerAffectAnalyser<T>::CountKmerAffectAnalyser(IKmerStore<T> &kms, const string &seq): KmerAffectAnalyser<T>(kms, seq) {
buildCounts();
overlap=0;
}
template <class T>
......@@ -341,13 +360,28 @@ int CountKmerAffectAnalyser<T>::lastMax(const T&before, const T&after,
return searchMax(before, after, end, 0, -1, min);
}
template <class T>
int CountKmerAffectAnalyser<T>::getAllowedOverlap() {
return overlap;
}
template <class T>
void CountKmerAffectAnalyser<T>::setAllowedOverlap(int overlap) {
this->overlap = overlap;
}
template <class T>
int CountKmerAffectAnalyser<T>::searchMax(const T&before, const T& after,
int start, int end, int iter, int min) const {
int first_pos_max = -1;
int max_value = min;
for (int i = start; i*iter <= end; i+=iter) {
int value = countBefore(before, i) + countAfter(after, i);
int shift = KmerAffectAnalyser<T>::kms.getS() - overlap - 1;
for (int i = start; i*iter <= iter*end; i+=iter) {
int value;
if (iter*(i - shift) >= iter*start && iter*(i - shift) <= iter*end)
value= countBefore(before, i - shift) + countAfter(after, i);
else
value = countAfter(after, i);
if (value > max_value) {
max_value = value;
first_pos_max = i;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment