Commit 68671cfd authored by Mathieu Giraud's avatar Mathieu Giraud

core/affectanalyser.{h,cpp}: De-templatization of KmerAffectAnalyser

As KmerAffectAnalyser is only used with KmerAffect, we do not need to keep the templates.
The code should be easier to read for C++ newbies such as the author of this commit.
parent b0882b22
......@@ -10,3 +10,323 @@ bool operator==(const affect_infos &ai1, const affect_infos &ai2) {
&& ai1.nb_after_left == ai2.nb_after_left
&& ai1.max_found == ai2.max_found;
}
KmerAffectAnalyser::KmerAffectAnalyser(IKmerStore<KmerAffect> &kms,
const string &seq)
:kms(kms), seq(seq) {
assert(seq.length() >= (size_t)kms.getS());
affectations = kms.getResults(seq, true);
}
KmerAffectAnalyser::KmerAffectAnalyser(IKmerStore<KmerAffect> &kms,
const string &seq,
vector <KmerAffect> a):
kms(kms), seq(seq), affectations(a){}
KmerAffectAnalyser::~KmerAffectAnalyser(){}
int KmerAffectAnalyser::count() const{
return affectations.size();
}
int KmerAffectAnalyser::count(const KmerAffect &affect) const{
int count = 0;
for (vector<KmerAffect>::const_iterator it = affectations.begin();
it < affectations.end(); it++) {
if (*it == affect)
count++;
}
return count;
}
const KmerAffect&KmerAffectAnalyser::getAffectation(int i) const{
assert(i >= 0 && i < count());
return affectations[i];
}
vector<KmerAffect> KmerAffectAnalyser::getAllAffectations(affect_options_t options) const{
if (options == AO_NONE)
return affectations;
vector<KmerAffect> result;
KmerAffect previous = affectations[0];
result.push_back(previous);
for (size_t i = 1; i < affectations.size(); i++) {
if (! (previous == affectations[i])) {
result.push_back(affectations[i]);
previous = affectations[i];
}
}
return result;
}
set<KmerAffect> KmerAffectAnalyser::getDistinctAffectations() const{
set<KmerAffect> result;
for (size_t i = 0; i < affectations.size(); i++) {
result.insert(affectations[i]);
}
return result;
}
affect_infos KmerAffectAnalyser::getMaximum(const KmerAffect &before,
const KmerAffect &after,
float ratioMin,
int maxOverlap) const {
/* currentValue is the { affectations[t] == before | t \in 1..i } - | { affectations[i] == after | t \in 1..i } */
int currentValue;
int span = kms.getS();
int length = count();
affect_infos results;
if (maxOverlap > span)
maxOverlap = span;
/* Initialize results */
results.max_found = false;
results.max_value = 0;
results.first_pos_max = results.last_pos_max = -1;
results.nb_before_left = results.nb_before_right = results.nb_after_right = results.nb_after_left = 0;
currentValue = 0;
for (int i = 0; i < min(length,span - maxOverlap); i++) {
if (affectations[i] == after) {
currentValue--;
results.nb_after_right++;
}
}
for (int i = span - maxOverlap; i < length; i++) {
/* i - span + maxOverlap, to avoir overlapping k-mers */
/* Read the current affectations, and store them both in currentValue and at the right of the previous maximum.
The affectation of 'before' is interpreted relatively to span and maxOverlap */
if (affectations[i - span + maxOverlap] == before) {
currentValue++;
results.nb_before_right++;
}
if (affectations[i] == after) {
currentValue--;
results.nb_after_right++;
}
/* Now currentValue = | { affectations[t - span + maxOverlap] == 'before' | t \in span-maxOverlap..i } | - | { affectations[i] == 'after' | t \in 0..i } | */
/* If we raise above the max, or if we continue a previous maximum (even from a distant position), store in results */
if (currentValue >= results.max_value) {
if (currentValue > results.max_value)
results.first_pos_max = i;
results.max_value = currentValue;
results.last_pos_max = i;
/* What was at the right of the previous maximum is now at the left of the current maximum */
results.nb_after_left += results.nb_after_right;
results.nb_before_left += results.nb_before_right;
results.nb_after_right = 0;
results.nb_before_right = 0;
}
}
for (int i = length - span + maxOverlap; i < length && i >= 0; i++) {
if (affectations[i] == before)
results.nb_before_right++;
}
/* Main test:
1) do we have enough affectations in good positions ('before' at the left and 'after' at the right) ?
We tolerate some of them in bad positions, but there must be 'ratioMin' more in good positions
2) there should be at least one 'before' and one 'after' (? CHECK ?)
*/
if (results.nb_after_right >= results.nb_before_right*ratioMin
&& (results.nb_after_right > 0 || results.nb_before_right == 0)
&& currentValue < results.max_value
&& results.max_value > 0) {
results.max_found = true;
return results;
}
return results;
}
const string &KmerAffectAnalyser::getSequence() const{
return seq;
}
int KmerAffectAnalyser::first(const KmerAffect &affect) const{
for (size_t i = 0; i < affectations.size(); i++)
if (affect == affectations[i])
return i;
return (int) string::npos;
}
int KmerAffectAnalyser::last(const KmerAffect &affect) const{
for (size_t i = affectations.size(); i > 0; i--)
if (affect == affectations[i-1])
return i-1;
return (int) string::npos;
}
string KmerAffectAnalyser::toString() const{
string kmer;
for (size_t i = 0; i < affectations.size(); i++) {
kmer += affectations[i].toString();
#ifdef DEBUG_KMERS
kmer += ": "+spaced(seq.substr(i,kms.getS()), kms.getSeed())+"\n";
#endif
}
return kmer;
}
/* CountKmerAffectAnalyser */
CountKmerAffectAnalyser::CountKmerAffectAnalyser(IKmerStore<KmerAffect> &kms, const string &seq): KmerAffectAnalyser(kms, seq) {
buildCounts();
overlap=0;
}
CountKmerAffectAnalyser::~CountKmerAffectAnalyser() {
set<KmerAffect> affects = this->getDistinctAffectations();
/* Initialize each key with a 0-integer array */
for (set<KmerAffect>::iterator it = affects.begin();
it != affects.end(); it++) {
delete [] counts[*it];
}
}
int CountKmerAffectAnalyser::count() const {
return KmerAffectAnalyser::count();
}
int CountKmerAffectAnalyser::count(const KmerAffect &affect) const {
if (counts.count(affect) == 0)
return 0;
return counts.find(affect)->second[KmerAffectAnalyser::count() - 1];
}
KmerAffect CountKmerAffectAnalyser::max(const set<KmerAffect> forbidden) const {
map<KmerAffect, int* >::const_iterator it = counts.begin();
KmerAffect max_affect = KmerAffect::getUnknown();
int max_count = -1;
for (; it != counts.end(); it++) {
if (forbidden.count(it->first) == 0) {
int current_count = count(it->first);
if (current_count > max_count) {
max_affect = it->first;
max_count = current_count;
}
}
}
return max_affect;
}
int CountKmerAffectAnalyser::countBefore(const KmerAffect&affect, int pos) const {
if (pos == 0 || counts.count(affect) == 0)
return 0;
return counts.find(affect)->second[pos-1];
}
int CountKmerAffectAnalyser::countAfter(const KmerAffect&affect, int pos) const {
if (counts.count(affect) == 0)
return 0;
int length = KmerAffectAnalyser::count();
map<KmerAffect, int*>::const_iterator it = counts.find(affect);
return it->second[length-1] - it->second[pos];
}
int CountKmerAffectAnalyser::firstMax(const KmerAffect&before, const KmerAffect&after,
int start, int min) const {
return searchMax(before, after, start, KmerAffectAnalyser::count()-1,1, min);
}
int CountKmerAffectAnalyser::lastMax(const KmerAffect&before, const KmerAffect&after,
int end, int min) const {
if (end == -1)
end = KmerAffectAnalyser::count()-1;
return searchMax(before, after, end, 0, -1, min);
}
int CountKmerAffectAnalyser::getAllowedOverlap() {
return overlap;
}
void CountKmerAffectAnalyser::setAllowedOverlap(int overlap) {
this->overlap = overlap;
}
int CountKmerAffectAnalyser::searchMax(const KmerAffect&before, const KmerAffect& after,
int start, int end, int iter, int min) const {
if (count(before) == 0 || count(after) == 0)
return -1;
int first_pos_max = -1;
int max_value = min;
int shift = KmerAffectAnalyser::kms.getS() - overlap - 1;
int shiftedStart = start, shiftedEnd = end;
if (iter == 1)
shiftedStart += shift;
else
shiftedEnd += shift;
for (int i = shiftedStart; (i)*iter <= iter*shiftedEnd; i+=iter) {
int valueBefore = countBefore(before, i - shift);
int valueAfter = countAfter(after, i);
if (valueAfter + valueBefore > max_value
&& valueAfter > 0 && valueBefore > 0) {
max_value = valueAfter + valueBefore;
first_pos_max = i;
}
}
return first_pos_max;
}
void CountKmerAffectAnalyser::buildCounts() {
int length = KmerAffectAnalyser::count();
set<KmerAffect> affects = this->getDistinctAffectations();
for (set<KmerAffect>::iterator it = affects.begin();
it != affects.end(); it++) {
int *array = new int[length];
/* Initialize each key with a 0-integer array */
array[0] = (this->getAffectation(0) == *it) ? 1 : 0;
/* Fill the array with actual values */
for (int i = 1; i < length; i++) {
KmerAffect current = this->getAffectation(i);
int value = (current == *it) ? 1 : 0;
array[i] = array[i-1]+value;
}
counts[*it] = array;
}
}
#ifndef AFFECT_ANALYSER_H
#define AFFECT_ANALYSER_H
#include "kmerstore.h"
#include "kmeraffect.h"
#include <set>
#include <vector>
#include <cassert>
......@@ -41,7 +43,7 @@ bool operator==(const affect_infos &ai1, const affect_infos &ai2);
* Input: Index that constitutes the k-mer sequence repertoire
* Input: Sequence whose k-mers must be affected
*/
template<class T>
class AffectAnalyser {
public:
......@@ -58,14 +60,14 @@ class AffectAnalyser {
* @param affect: An affectation
* @return the number of times this affectation has been given in the read.
*/
virtual int count(const T &affect) const = 0;
virtual int count(const KmerAffect &affect) const = 0;
/**
* @param i: the position to consider
* @pre i >= 0 && i < count()
* @return the affectation of the k-mer at position i.
*/
virtual const T&getAffectation(int i) const = 0;
virtual const KmerAffect &getAffectation(int i) const = 0;
/**
* @param options: options can either be AO_NONE or AO_NO_CONSECUTIVE
......@@ -74,12 +76,12 @@ class AffectAnalyser {
* will be different (we remove consecutive
* duplicates)
*/
virtual vector<T> getAllAffectations(affect_options_t options) const = 0;
virtual vector<KmerAffect> getAllAffectations(affect_options_t options) const = 0;
/**
* @return the distinct affectations
*/
virtual set<T> getDistinctAffectations() const = 0;
virtual set<KmerAffect> getDistinctAffectations() const = 0;
/**
* @return the sequence we are analysing
......@@ -93,7 +95,7 @@ class AffectAnalyser {
* @post getAffectation(first(affect)) == affect
* ==> getAffectation(1...first(affect)-1) != affect
*/
virtual int first(const T &affect) const = 0;
virtual int first(const KmerAffect &affect) const = 0;
/**
* @param affect: an affectation
......@@ -102,7 +104,7 @@ class AffectAnalyser {
* @post getAffectation(last(affect)) == affect
* ==> getAffectation(last(affect)+1 ... count() -1) != affect
*/
virtual int last(const T &affect) const = 0;
virtual int last(const KmerAffect &affect) const = 0;
/**
* @return a string representation of the object
......@@ -110,19 +112,19 @@ class AffectAnalyser {
virtual string toString() const = 0;
};
template <class T>
class KmerAffectAnalyser: public AffectAnalyser<T> {
class KmerAffectAnalyser: public AffectAnalyser {
protected:
IKmerStore<T> &kms;
IKmerStore<KmerAffect> &kms;
const string &seq;
vector<T> affectations;
vector<KmerAffect> affectations;
public:
/**
* @param kms: the index storing the affectation for the k-mers
* (parameter is not copied)
* @param seq: the sequence to analyse (parameter is not copied)
*/
KmerAffectAnalyser(IKmerStore<T> &kms, const string &seq);
KmerAffectAnalyser(IKmerStore<KmerAffect> &kms, const string &seq);
/**
* This constructor must be seen as a “toy” constructor, used for
......@@ -133,19 +135,19 @@ class KmerAffectAnalyser: public AffectAnalyser<T> {
* @param seq: basically not used in the class
* @param a: the affectation we must use.
*/
KmerAffectAnalyser(IKmerStore<T> &kms, const string &seq, vector<T> a);
KmerAffectAnalyser(IKmerStore<KmerAffect> &kms, const string &seq, vector<KmerAffect> a);
~KmerAffectAnalyser();
int count() const;
int count(const T &affect) const;
int count(const KmerAffect &affect) const;
const T&getAffectation(int i) const;
const KmerAffect &getAffectation(int i) const;
vector<T> getAllAffectations(affect_options_t options) const;
vector<KmerAffect> getAllAffectations(affect_options_t options) const;
set<T> getDistinctAffectations() const;
set<KmerAffect> getDistinctAffectations() const;
/**
* @param maxOverlap: if greater than kms.getS(), it is automatically set
......@@ -162,15 +164,15 @@ class KmerAffectAnalyser: public AffectAnalyser<T> {
*
* @complexity time: linear in count(), space: constant
*/
affect_infos getMaximum(const T &before, const T &after,
affect_infos getMaximum(const KmerAffect &before, const KmerAffect &after,
float ratioMin=2.,
int maxOverlap=1) const;
const string &getSequence() const;
int first(const T &affect) const;
int first(const KmerAffect &affect) const;
int last(const T &affect) const ;
int last(const KmerAffect &affect) const ;
string toString() const;
};
......@@ -179,14 +181,14 @@ class KmerAffectAnalyser: public AffectAnalyser<T> {
* Class that allows to count in constant time the number of affectations
* before or after a given point.
*/
template <class T>
class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
class CountKmerAffectAnalyser: public KmerAffectAnalyser {
private:
map<T, int* >counts;
map<KmerAffect, int* >counts;
int overlap;
public:
CountKmerAffectAnalyser(IKmerStore<T> &kms, const string &seq);
CountKmerAffectAnalyser(IKmerStore<KmerAffect> &kms, const string &seq);
~CountKmerAffectAnalyser();
int count() const;
......@@ -194,19 +196,19 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
/**
* @complexity constant time
*/
int count(const T &affect) const;
int count(const KmerAffect &affect) const;
/**
* Count the number of an affectation before (strictly) than a position
* @complexity constant time
*/
int countBefore(const T&affect, int pos) const;
int countBefore(const KmerAffect &affect, int pos) const;
/**
* Count the number of an affectation after (strictly) than a position)
* @complexity constant time
*/
int countAfter(const T&affect, int pos) const;
int countAfter(const KmerAffect &affect, int pos) const;
/**
* @return the first position pos in the sequence such that
......@@ -217,7 +219,7 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
* Where s is kms.getS() - getAllowedOverlap() - 1.
* @complexity linear in getSequence().size()
*/
int firstMax(const T&before, const T&after, int start=0, int min=-1) const;
int firstMax(const KmerAffect &before, const KmerAffect &after, int start=0, int min=-1) const;
/**
* @return the last position pos in the sequence such that
......@@ -228,7 +230,7 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
* Where s is kms.getS() - getAllowedOverlap() - 1.
* @complexity linear in getSequence().size()
*/
int lastMax(const T&before, const T&after, int end=-1, int min=-1) const;
int lastMax(const KmerAffect &before, const KmerAffect &after, int end=-1, int min=-1) const;
/**
* @return the allowed overlap between two k-mers with distinct affectations
......@@ -245,7 +247,7 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
* taken apart the forbidden ones.
* @complexity Time: linear on the number of distinct affectations
*/
T max(const set<T> forbidden = set<T>()) const;
KmerAffect max(const set<KmerAffect> forbidden = set<KmerAffect>()) const;
/**
* Set the overlap allowed between two k-mers with two different affectations,
......@@ -264,325 +266,8 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser<T> {
* Search the maximum. Used by firstMax and lastMax.
* @param iter: should be either 1 or -1
*/
int searchMax(const T&before, const T &after,
int searchMax(const KmerAffect &before, const KmerAffect &after,
int start, int end, int iter, int min) const;
};
template <class T>
KmerAffectAnalyser<T>::KmerAffectAnalyser(IKmerStore<T> &kms,
const string &seq)
:kms(kms), seq(seq) {
assert(seq.length() >= (size_t)kms.getS());
affectations = kms.getResults(seq, true);
}
template <class T>
KmerAffectAnalyser<T>::KmerAffectAnalyser(IKmerStore<T> &kms,
const string &seq,
vector <T> a):
kms(kms), seq(seq), affectations(a){}
template <class T>
KmerAffectAnalyser<T>::~KmerAffectAnalyser(){}
template <class T>
int KmerAffectAnalyser<T>::count() const{
return affectations.size();
}
template <class T>
int KmerAffectAnalyser<T>::count(const T &affect) const{
int count = 0;
for (typename vector<T>::const_iterator it = affectations.begin();
it < affectations.end(); it++) {
if (*it == affect)
count++;
}
return count;
}
template <class T>
const T&KmerAffectAnalyser<T>::getAffectation(int i) const{
assert(i >= 0 && i < count());
return affectations[i];
}
template <class T>
vector<T> KmerAffectAnalyser<T>::getAllAffectations(affect_options_t options) const{
if (options == AO_NONE)
return affectations;
vector<T> result;
T previous = affectations[0];
result.push_back(previous);
for (size_t i = 1; i < affectations.size(); i++) {
if (! (previous == affectations[i])) {
result.push_back(affectations[i]);
previous = affectations[i];
}
}
return result;
}
template <class T>
set<T> KmerAffectAnalyser<T>::getDistinctAffectations() const{
set<T> result;
for (size_t i = 0; i < affectations.size(); i++) {
result.insert(affectations[i]);
}
return result;
}
template <class T>
affect_infos KmerAffectAnalyser<T>::getMaximum(const T &before,
const T &after,
float ratioMin,
int maxOverlap) const {
/* currentValue is the { affectations[t] == before | t \in 1..i } - | { affectations[i] == after | t \in 1..i } */
int currentValue;
int span = kms.getS();
int length = count();
affect_infos results;
if (maxOverlap > span)
maxOverlap = span;
/* Initialize results */
results.max_found = false;
results.max_value = 0;
results.first_pos_max = results.last_pos_max = -1;
results.nb_before_left = results.nb_before_right = results.nb_after_right = results.nb_after_left = 0;
currentValue = 0;
for (int i = 0; i < min(length,span - maxOverlap); i++) {
if (affectations[i] == after) {
currentValue--;
results.nb_after_right++;
}
}
for (int i = span - maxOverlap; i < length; i++) {
/* i - span + maxOverlap, to avoir overlapping k-mers */
/* Read the current affectations, and store them both in currentValue and at the right of the previous maximum.
The affectation of 'before' is interpreted relatively to span and maxOverlap */
if (affectations[i - span + maxOverlap] == before) {
currentValue++;
results.nb_before_right++;
}
if (affectations[i] == after) {
currentValue--;
results.nb_after_right++;
}
/* Now currentValue = | { affectations[t - span + maxOverlap] == 'before' | t \in span-maxOverlap..i } | - | { affectations[i] == 'after' | t \in 0..i } | */
/* If we raise above the max, or if we continue a previous maximum (even from a distant position), store in results */
if (currentValue >= results.max_value) {
if (currentValue > results.max_value)
results.first_pos_max = i;
results.max_value = currentValue;
results.last_pos_max = i;
/* What was at the right of the previous maximum is now at the left of the current maximum */
results.nb_after_left += results.nb_after_right;
results.nb_before_left += results.nb_before_right;
results.nb_after_right = 0;
results.nb_before_right = 0;
}
}
for (int i = length - span + maxOverlap; i < length && i >= 0; i++) {
if (affectations[i] == before)
results.nb_before_right++;
}
/* Main test:
1) do we have enough affectations in good positions ('before' at the left and 'after' at the right) ?
We tolerate some of them in bad positions, but there must be 'ratioMin' more in good positions
2) there should be at least one 'before' and one 'after' (? CHECK ?)
*/
if (results.nb_after_right >= results.nb_before_right*ratioMin
&& (results.nb_after_right > 0 || results.nb_before_right == 0)
&& currentValue < results.max_value
&& results.max_value > 0) {
results.max_found = true;
return results;
}
return results;
}
template <class T>
const string &KmerAffectAnalyser<T>::getSequence() const{
return seq;
}
template <class T>
int KmerAffectAnalyser<T>::first(const T &affect) const{
for (size_t i = 0; i < affectations.size(); i++)
if (affect == affectations[i])
return i;
return (int) string::npos;
}
template <class T>
int KmerAffectAnalyser<T>::last(const T &affect) const{
for (size_t i = affectations.size(); i > 0; i--)
if (affect == affectations[i-1])
return i-1;
return (int) string::npos;
}
template <class T>
string KmerAffectAnalyser<T>::toString() const{
string kmer;
for (size_t i = 0; i < affectations.size(); i++) {
kmer += affectations[i].toString();
#ifdef DEBUG_KMERS
kmer += ": "+spaced(seq.substr(i,kms.getS()), kms.getSeed())+"\n";
#endif
}
return kmer;
}
/* CountKmerAffectAnalyser */
template <class T>
CountKmerAffectAnalyser<T>::CountKmerAffectAnalyser(IKmerStore<T> &kms, const string &seq): KmerAffectAnalyser<T>(kms, seq) {
buildCounts();
overlap=0;
}
template <class T>