From bd3525c3e490c5d7d731e21692b4b4eb2543f90d Mon Sep 17 00:00:00 2001
From: Mikael Salson
Date: Tue, 15 Apr 2014 17:08:18 +0200
Subject: [PATCH] CountKmerAffectAnalyser: By default, don't allow overlap
between two affectation.
When searching for the maximum don't allow for overlaps (by default, but we can change it).
So that when we have something like that:
+V+V+V+V _ _ _ _ _ _ _ _ _ _ +V _ _ _ +J+J+J+J+J
the +V in the middle is ignored, which is what we want to achieve.
---
algo/core/affectanalyser.h | 46 +++++++++++++++++++++++++++++++++-----
1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/algo/core/affectanalyser.h b/algo/core/affectanalyser.h
index 03970d328..1b79d84ce 100644
--- a/algo/core/affectanalyser.h
+++ b/algo/core/affectanalyser.h
@@ -95,7 +95,7 @@ class AffectAnalyser {
template
class KmerAffectAnalyser: public AffectAnalyser {
- private:
+ protected:
IKmerStore &kms;
const string &seq;
vector affectations;
@@ -135,6 +135,7 @@ template
class CountKmerAffectAnalyser: public KmerAffectAnalyser {
private:
mapcounts;
+ int overlap;
public:
CountKmerAffectAnalyser(IKmerStore &kms, const string &seq);
@@ -161,22 +162,39 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser {
/**
* @return the first position pos in the sequence such that
- * countBefore(before, pos) + countAfter(after, pos) is maximal
+ * countBefore(before, pos - overlap + 1 + overlap)
+ + countAfter(after, pos) is maximal
* and pos >= start, and the maximum is greater than min;
- * or -1 if such a position doesn't exist
+ * or -1 if such a position doesn't exist.
+ * Where overlap is getAllowedOverlap().
* @complexity linear in getSequence().size()
*/
int firstMax(const T&before, const T&after, int start=0, int min=-1) const;
/**
* @return the last position pos in the sequence such that
- * countBefore(before, pos) + countAfter(after, pos) is maximal
+ * countBefore(before, pos - overlap + 1+ overlap)
+ * + countAfter(after, pos) is maximal
* and pos <= end (if end == -1 considers end of sequence), and the
* maximum is greater than min; or -1 if such a position doesn't exist.
+ * Where overlap is getAllowedOverlap().
* @complexity linear in getSequence().size()
*/
int lastMax(const T&before, const T&after, int end=-1, int min=-1) const;
+ /**
+ * @return the allowed overlap between two k-mers with distinct affectations
+ * (default is 0)
+ */
+ int getAllowedOverlap();
+
+ /**
+ * Set the overlap allowed between two k-mers with two different affectations,
+ * when looking for the maximum.
+ * The overlap should not be greater than the span of the seed used.
+ */
+ void setAllowedOverlap(int overlap);
+
private:
/**
* Build the counts map.
@@ -285,6 +303,7 @@ string KmerAffectAnalyser::toString() const{
template
CountKmerAffectAnalyser::CountKmerAffectAnalyser(IKmerStore &kms, const string &seq): KmerAffectAnalyser(kms, seq) {
buildCounts();
+ overlap=0;
}
template
@@ -341,13 +360,28 @@ int CountKmerAffectAnalyser::lastMax(const T&before, const T&after,
return searchMax(before, after, end, 0, -1, min);
}
+template
+int CountKmerAffectAnalyser::getAllowedOverlap() {
+ return overlap;
+}
+
+template
+void CountKmerAffectAnalyser::setAllowedOverlap(int overlap) {
+ this->overlap = overlap;
+}
+
template
int CountKmerAffectAnalyser::searchMax(const T&before, const T& after,
int start, int end, int iter, int min) const {
int first_pos_max = -1;
int max_value = min;
- for (int i = start; i*iter <= end; i+=iter) {
- int value = countBefore(before, i) + countAfter(after, i);
+ int shift = KmerAffectAnalyser::kms.getS() - overlap - 1;
+ for (int i = start; i*iter <= iter*end; i+=iter) {
+ int value;
+ if (iter*(i - shift) >= iter*start && iter*(i - shift) <= iter*end)
+ value= countBefore(before, i - shift) + countAfter(after, i);
+ else
+ value = countAfter(after, i);
if (value > max_value) {
max_value = value;
first_pos_max = i;
--
2.24.1