Commit bd3525c3 authored by Mikaël Salson's avatar Mikaël Salson

CountKmerAffectAnalyser: By default, don't allow overlap between two affectation.

When searching for the maximum don't allow for overlaps (by default, but we can change it).
So that when we have something like that:
+V+V+V+V _ _ _ _ _ _ _ _ _ _ +V _ _ _ +J+J+J+J+J
the +V in the middle is ignored, which is what we want to achieve.
parent e4e59d75
...@@ -95,7 +95,7 @@ class AffectAnalyser { ...@@ -95,7 +95,7 @@ class AffectAnalyser {
template <class T> template <class T>
class KmerAffectAnalyser: public AffectAnalyser<T> { class KmerAffectAnalyser: public AffectAnalyser<T> {
private: protected:
IKmerStore<T> &kms; IKmerStore<T> &kms;
const string &seq; const string &seq;
vector<T> affectations; vector<T> affectations;
...@@ -135,6 +135,7 @@ template <class T> ...@@ -135,6 +135,7 @@ template <class T>
class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> { class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
private: private:
map<T, int* >counts; map<T, int* >counts;
int overlap;
public: public:
CountKmerAffectAnalyser(IKmerStore<T> &kms, const string &seq); CountKmerAffectAnalyser(IKmerStore<T> &kms, const string &seq);
...@@ -161,22 +162,39 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> { ...@@ -161,22 +162,39 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
/** /**
* @return the first position pos in the sequence such that * @return the first position pos in the sequence such that
* countBefore(before, pos) + countAfter(after, pos) is maximal * countBefore(before, pos - overlap + 1 + overlap)
+ countAfter(after, pos) is maximal
* and pos >= start, and the maximum is greater than min; * and pos >= start, and the maximum is greater than min;
* or -1 if such a position doesn't exist * or -1 if such a position doesn't exist.
* Where overlap is getAllowedOverlap().
* @complexity linear in getSequence().size() * @complexity linear in getSequence().size()
*/ */
int firstMax(const T&before, const T&after, int start=0, int min=-1) const; int firstMax(const T&before, const T&after, int start=0, int min=-1) const;
/** /**
* @return the last position pos in the sequence such that * @return the last position pos in the sequence such that
* countBefore(before, pos) + countAfter(after, pos) is maximal * countBefore(before, pos - overlap + 1+ overlap)
* + countAfter(after, pos) is maximal
* and pos <= end (if end == -1 considers end of sequence), and the * and pos <= end (if end == -1 considers end of sequence), and the
* maximum is greater than min; or -1 if such a position doesn't exist. * maximum is greater than min; or -1 if such a position doesn't exist.
* Where overlap is getAllowedOverlap().
* @complexity linear in getSequence().size() * @complexity linear in getSequence().size()
*/ */
int lastMax(const T&before, const T&after, int end=-1, int min=-1) const; int lastMax(const T&before, const T&after, int end=-1, int min=-1) const;
/**
* @return the allowed overlap between two k-mers with distinct affectations
* (default is 0)
*/
int getAllowedOverlap();
/**
* Set the overlap allowed between two k-mers with two different affectations,
* when looking for the maximum.
* The overlap should not be greater than the span of the seed used.
*/
void setAllowedOverlap(int overlap);
private: private:
/** /**
* Build the counts map. * Build the counts map.
...@@ -285,6 +303,7 @@ string KmerAffectAnalyser<T>::toString() const{ ...@@ -285,6 +303,7 @@ string KmerAffectAnalyser<T>::toString() const{
template <class T> template <class T>
CountKmerAffectAnalyser<T>::CountKmerAffectAnalyser(IKmerStore<T> &kms, const string &seq): KmerAffectAnalyser<T>(kms, seq) { CountKmerAffectAnalyser<T>::CountKmerAffectAnalyser(IKmerStore<T> &kms, const string &seq): KmerAffectAnalyser<T>(kms, seq) {
buildCounts(); buildCounts();
overlap=0;
} }
template <class T> template <class T>
...@@ -341,13 +360,28 @@ int CountKmerAffectAnalyser<T>::lastMax(const T&before, const T&after, ...@@ -341,13 +360,28 @@ int CountKmerAffectAnalyser<T>::lastMax(const T&before, const T&after,
return searchMax(before, after, end, 0, -1, min); return searchMax(before, after, end, 0, -1, min);
} }
template <class T>
int CountKmerAffectAnalyser<T>::getAllowedOverlap() {
return overlap;
}
template <class T>
void CountKmerAffectAnalyser<T>::setAllowedOverlap(int overlap) {
this->overlap = overlap;
}
template <class T> template <class T>
int CountKmerAffectAnalyser<T>::searchMax(const T&before, const T& after, int CountKmerAffectAnalyser<T>::searchMax(const T&before, const T& after,
int start, int end, int iter, int min) const { int start, int end, int iter, int min) const {
int first_pos_max = -1; int first_pos_max = -1;
int max_value = min; int max_value = min;
for (int i = start; i*iter <= end; i+=iter) { int shift = KmerAffectAnalyser<T>::kms.getS() - overlap - 1;
int value = countBefore(before, i) + countAfter(after, i); for (int i = start; i*iter <= iter*end; i+=iter) {
int value;
if (iter*(i - shift) >= iter*start && iter*(i - shift) <= iter*end)
value= countBefore(before, i - shift) + countAfter(after, i);
else
value = countAfter(after, i);
if (value > max_value) { if (value > max_value) {
max_value = value; max_value = value;
first_pos_max = i; first_pos_max = i;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment