Commit ddd48c77 authored by Mathieu Giraud's avatar Mathieu Giraud

merge -- improve representative computation, 'N' for ambiguous positions

A great new feature by @m-salson: Now the generated representative sequences
can span over some ambiguous positions (including some sequencing errors).
parents 1bd6d4c2 623347e8
......@@ -59,16 +59,18 @@ public:
/**
* @param input: A single FASTA file
* @param label: label that must be associated to the given files
* @param seed: the seed to use for indexing. By default it will be the seed of the index
* @post All the sequences in the FASTA files have been indexed, and the label is stored in the list of labels
*/
void insert(Fasta& input, const string& label="", int keep_only = 0);
void insert(Fasta& input, const string& label="", int keep_only = 0, string seed = "");
/**
* @param input: A list of FASTA files
* @param label: label that must be associated to the given files
* @param seed: the seed to use for indexing. By default it will be the seed of the index
* @post All the sequences in the FASTA files have been indexed, and the label is stored in the list of labels
*/
void insert(list<Fasta>& input, const string& label="", int keep_only = 0);
void insert(list<Fasta>& input, const string& label="", int keep_only = 0, string seed = "");
/**
* @param input: A sequence to be cut in k-mers
......@@ -77,11 +79,13 @@ public:
* of the sequence. if < 0 will keep at most the first
* keep_only nucleotides of the sequence. if == 0,
* will keep all the sequence.
* @param seed: the seed to use for indexing. By default it will be the seed of the index
* @post All the k-mers in the sequence have been indexed.
*/
void insert(const seqtype &sequence,
const string &label,
bool ignore_extended_nucleotides=true, int keep_only = 0);
bool ignore_extended_nucleotides=true, int keep_only = 0,
string seed="");
/**
* @param word: a k-mer
......@@ -134,7 +138,7 @@ public:
* @return a vector of length seq.length() - getK() + 1 containing
* for each k-mer the corresponding value in the index.
*/
virtual vector<T> getResults(const seqtype &seq, bool no_revcomp=false) = 0;
virtual vector<T> getResults(const seqtype &seq, bool no_revcomp=false, string seed="") = 0;
/**
* @return true iff the revcomp is indexed
......@@ -165,7 +169,7 @@ public:
MapKmerStore(string seed, bool=false);
MapKmerStore(int k, bool=false);
vector<T> getResults(const seqtype &seq, bool no_revcomp=false);
vector<T> getResults(const seqtype &seq, bool no_revcomp=false, string seed="");
T& get(seqtype &word);
T& operator[](seqtype & word);
......@@ -196,7 +200,7 @@ public:
ArrayKmerStore(int k, bool=false);
~ArrayKmerStore();
vector<T> getResults(const seqtype &seq, bool no_revcomp=false);
vector<T> getResults(const seqtype &seq, bool no_revcomp=false, const string seed="");
T& get(seqtype &word);
T& operator[](seqtype & word);
T& operator[](int word);
......@@ -211,18 +215,20 @@ IKmerStore<T>::~IKmerStore(){}
template<class T>
void IKmerStore<T>::insert(list<Fasta>& input,
const string &label,
int keep_only){
int keep_only,
string seed){
for(list<Fasta>::iterator it = input.begin() ; it != input.end() ; it++){
insert(*it, label, keep_only);
insert(*it, label, keep_only, seed);
}
}
template<class T>
void IKmerStore<T>::insert(Fasta& input,
const string &label,
int keep_only){
int keep_only,
string seed){
for (int r = 0; r < input.size(); r++) {
insert(input.sequence(r), label, true, keep_only);
insert(input.sequence(r), label, true, keep_only, seed);
}
labels.push_back(make_pair(T(label, 1), input)) ;
......@@ -236,7 +242,8 @@ template<class T>
void IKmerStore<T>::insert(const seqtype &sequence,
const string &label,
bool ignore_extended_nucleotides,
int keep_only){
int keep_only,
string seed){
size_t start_indexing = 0;
size_t end_indexing = sequence.length();
if (keep_only > 0 && sequence.length() > (size_t)keep_only) {
......@@ -245,13 +252,17 @@ void IKmerStore<T>::insert(const seqtype &sequence,
end_indexing = -keep_only;
}
if (seed.empty())
seed = this->seed;
size_t seed_span = seed.length();
size_t size_indexing = end_indexing - start_indexing;
if (size_indexing > max_size_indexing) {
max_size_indexing = size_indexing;
}
for(size_t i = start_indexing ; i + s < end_indexing + 1 ; i++) {
seqtype substr = sequence.substr(i, s);
for(size_t i = start_indexing ; i + seed_span < end_indexing + 1 ; i++) {
seqtype substr = sequence.substr(i, seed_span);
seqtype kmer = spaced(substr, seed);
if (ignore_extended_nucleotides && has_extended_nucleotides(kmer))
......@@ -343,16 +354,19 @@ Fasta IKmerStore<T>::getLabel(T kmer) const {
// .getResults()
template<class T>
vector<T> MapKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp) {
vector<T> MapKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp, string seed) {
int s = IKmerStore<T>::getS();
string local_seed = seed;
if (! seed.size())
local_seed = IKmerStore<T>::seed;
int s = local_seed.size();
if ((int)seq.length() < s - 1) {
return vector<T>(0);
}
vector<T> result(seq.length() - s + 1);
for (size_t i=0; i + s < seq.length() + 1; i++) {
seqtype kmer = spaced(seq.substr(i, s), IKmerStore<T>::seed);
seqtype kmer = spaced(seq.substr(i, s), local_seed);
// seqtype kmer = seq.substr(i, s);
// cout << kmer << endl << kmer0 << endl << endl ;
if (IKmerStore<T>::revcomp_indexed && no_revcomp) {
......@@ -365,9 +379,12 @@ vector<T> MapKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp) {
}
template<class T>
vector<T> ArrayKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp) {
int s = IKmerStore<T>::getS();
vector<T> ArrayKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp, string seed) {
string local_seed = seed;
if (! seed.size())
local_seed = IKmerStore<T>::seed;
int s = local_seed.size();
int N = (int)seq.length();
......@@ -385,7 +402,7 @@ vector<T> ArrayKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp) {
/* Compute results */
for (size_t i=0; (int) i+s < N+1; i++) {
int kmer = spaced_int(intseq + i, IKmerStore<T>::seed);
int kmer = spaced_int(intseq + i, local_seed);
if (IKmerStore<T>::revcomp_indexed && no_revcomp) {
result[i] = store[kmer]; // getfromint(kmer); // store[kmer];
// cout << i << "/" << N << " " << kmer << result[i] << endl ;
......
......@@ -20,7 +20,7 @@ ReadChooser::ReadChooser(list<Sequence> &r, VirtualReadScore &scorer) {
size_t i = 0;
for (list <Sequence>::iterator it = r.begin(); it != r.end(); ++it) {
reads[i].score = scorer.getScore(it->sequence);
reads[i].score = scorer.getScore(*it);
reads[i].seq = &(*it);
i++;
}
......
#include "read_score.h"
#include <cstdlib>
#include <cstring>
KmerAffectReadScore::KmerAffectReadScore(IKmerStore<KmerAffect> &idx,
float unambiguous_score,
......@@ -9,8 +11,8 @@ KmerAffectReadScore::KmerAffectReadScore(IKmerStore<KmerAffect> &idx,
KmerAffectReadScore::~KmerAffectReadScore() {}
float KmerAffectReadScore::getScore(const string &sequence) const {
vector<KmerAffect> answers = index.getResults(sequence);
float KmerAffectReadScore::getScore(const Sequence &sequence) const {
vector<KmerAffect> answers = index.getResults(sequence.sequence);
float score = 0;
for (size_t i = 0; i < answers.size(); i++) {
if (answers[i].affect == AFFECT_AMBIGUOUS)
......@@ -62,6 +64,40 @@ void KmerAffectReadScore::setUnknownScore(float score) {
ReadLengthScore::ReadLengthScore(){}
ReadLengthScore::~ReadLengthScore(){}
float ReadLengthScore::getScore(const string &sequence) const {
return sequence.size();
float ReadLengthScore::getScore(const Sequence &sequence) const {
return sequence.sequence.size();
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////// ReadQualityScore ///////////////////////////////
////////////////////////////////////////////////////////////////////////////////
ReadQualityScore::ReadQualityScore(){}
ReadQualityScore::~ReadQualityScore(){}
size_t ReadQualityScore::qualities[MAX_QUALITY];
float ReadQualityScore::getScore(const Sequence &sequence) const {
memset(qualities, 0, MAX_QUALITY * sizeof(size_t));
for (size_t i = 0; i < sequence.quality.size(); i++) {
int current_quality = (sequence.quality[i]) - ' ';
if(current_quality >= MAX_QUALITY)
current_quality = MAX_QUALITY - 1;
else if (current_quality < 0)
current_quality = 0;
qualities[current_quality]++;
}
int max_percentile = (int) round(sequence.quality.size()*1. / 100);
int percent_quality = 0;
// Computes the percentile of the quality
for (size_t i = 0; i < MAX_QUALITY; i++) {
max_percentile -= qualities[i];
if (max_percentile < 0) {
percent_quality = i;
break;
}
}
if (! percent_quality)
percent_quality = GOOD_QUALITY;
return percent_quality * sequence.sequence.size() / GOOD_QUALITY;
}
......@@ -5,6 +5,9 @@
#include "kmerstore.h"
#include "kmeraffect.h"
#define MAX_QUALITY 50 /* Maximal value for the quality */
#define GOOD_QUALITY 30 /* Min value considered as a good quality */
/**
* This virtual class contains a single method that allows to compute a score
* for a string.
......@@ -18,7 +21,7 @@ class VirtualReadScore {
* @return the score associated to the sequence.
* getScore(a) > getScore(b) ==> a is better than b
*/
virtual float getScore(const string &sequence)const = 0;
virtual float getScore(const Sequence &sequence)const = 0;
};
/**
......@@ -42,7 +45,7 @@ public:
* The score is computed using the affectation in the sequence and the scores
* that have been attributed (or the default ones).
*/
float getScore(const string &sequence) const;
float getScore(const Sequence &sequence) const;
// Getters
float getAmbiguousScore() const;
......@@ -69,6 +72,23 @@ class ReadLengthScore: public VirtualReadScore {
/**
* @return the sequence length
*/
float getScore(const string &sequence) const;
float getScore(const Sequence &sequence) const;
};
/**
* A simple implementation of VirtualReadScore.
* The score is a trade-off between quality and length of the read
*/
class ReadQualityScore: public VirtualReadScore {
private:
static size_t qualities[MAX_QUALITY];
public:
ReadQualityScore();
~ReadQualityScore();
/**
* @return the sequence quality
*/
float getScore(const Sequence &sequence) const;
};
#endif
......@@ -17,6 +17,7 @@ BinReadStorage::BinReadStorage()
nb_inserted(0), nb_stored(0), smallest_bin_not_empty(~0),label() {}
void BinReadStorage::init(size_t nb_bins, size_t max_score, const VirtualReadScore *vrs, bool no_list) {
this->all_read_lengths = 0;
this->nb_bins = nb_bins;
this->max_score = max_score;
if (no_list)
......@@ -44,7 +45,7 @@ BinReadStorage::~BinReadStorage() {
}
void BinReadStorage::addScore(Sequence &s) {
addScore(scorer->getScore(s.sequence));
addScore(scorer->getScore(s));
}
void BinReadStorage::addScore(float score) {
......@@ -58,9 +59,10 @@ void BinReadStorage::addScore(size_t bin, float score) {
}
void BinReadStorage::add(Sequence &s) {
float score = scorer->getScore(s.sequence);
float score = scorer->getScore(s);
size_t bin = scoreToBin(score);
addScore(bin, score);
all_read_lengths += s.sequence.length();
if (nb_stored < getMaxNbReadsStored()) {
bins[bin].push_back(s);
nb_stored++;
......@@ -79,6 +81,10 @@ void BinReadStorage::add(Sequence &s) {
nb_inserted++;
}
list<Sequence> BinReadStorage::getBin(size_t bin) const{
return bins[bin];
}
size_t BinReadStorage::getNbBins() const {
return nb_bins;
}
......@@ -88,13 +94,17 @@ size_t BinReadStorage::getNbInserted() const {
}
double BinReadStorage::getAverageScoreBySeq(Sequence &s) {
return getAverageScoreByScore(scorer->getScore(s.sequence));
return getAverageScoreByScore(scorer->getScore(s));
}
double BinReadStorage::getAverageScoreByScore(float score) {
return getAverageScore(scoreToBin(score));
}
double BinReadStorage::getAverageLength() const{
return all_read_lengths *1. / getNbScores();
}
double BinReadStorage::getAverageScore(size_t bin) {
return getScore(bin) / getNbScores(bin);
}
......@@ -103,7 +113,7 @@ double BinReadStorage::getInvertedAverageScore(size_t bin) {
}
double BinReadStorage::getScoreBySeq(Sequence &s) {
return getScoreByScore(scorer->getScore(s.sequence));
return getScoreByScore(scorer->getScore(s));
}
double BinReadStorage::getScoreByScore(float score) {
......@@ -143,6 +153,29 @@ list<Sequence> BinReadStorage::getReads() const {
return results;
}
list<Sequence> BinReadStorage::getBestReads(size_t max_nb, size_t min_score) const {
list<Sequence>best_reads;
size_t smallest_interesting_bin = max(smallest_bin_not_empty, scoreToBin(min_score));
for (size_t i = nb_bins+1; i > smallest_interesting_bin; i--) {
size_t j = i-1;
if (bins[j].size() > 0) {
if (bins[j].size() <= max_nb) {
best_reads.insert(best_reads.end(), bins[j].begin(), bins[j].end());
max_nb -= bins[j].size();
} else {
for (list<Sequence>::iterator it = bins[j].begin(); max_nb > 0 && it != bins[j].end();
it++) {
best_reads.push_back(*it);
max_nb--;
}
}
}
}
return best_reads;
}
string BinReadStorage::getLabel() const {
return label;
}
......@@ -159,7 +192,7 @@ void BinReadStorage::out_average_scores(ostream &out, bool inversed) {
output_label_average(out, getLabel(), getNbScores(), inversed ? getInvertedAverageScore() : getAverageScore(), inversed ? 3 : 1);
}
size_t BinReadStorage::scoreToBin(float score) {
size_t BinReadStorage::scoreToBin(float score) const{
assert(score >= 0);
if (score > max_score)
return nb_bins;
......
......@@ -14,7 +14,7 @@ class VirtualReadStorage {
protected:
size_t maxNbStored;
const VirtualReadScore *scorer;
int64_t all_read_lengths;
public:
virtual ~VirtualReadStorage() {}
......@@ -31,6 +31,11 @@ class VirtualReadStorage {
*/
void setMaxNbReadsStored(size_t nb);
/**
* @return the average read length of all the sequences that have been added
*/
virtual double getAverageLength() const = 0;
/**
* @return the maximal number of reads stored
*/
......@@ -50,6 +55,12 @@ class VirtualReadStorage {
* @return all the stored reads
*/
virtual list<Sequence> getReads() const = 0;
/**
* @return at most max_nb reads whose score >= min_score
*/
virtual list<Sequence> getBestReads(size_t max_nb, size_t min_score=0) const = 0;
};
/**
......@@ -86,6 +97,8 @@ public:
void add(Sequence &s);
list<Sequence> getBin(size_t bin) const;
/**
* @return the number of bins requested by the used. Note that an additional
* bin is created for the values greater than the provided max value.
......@@ -113,6 +126,11 @@ public:
*/
void addScore(size_t bin, float score);
/**
* @inherited from VirtualReadScore
*/
double getAverageLength() const;
/**
* @return the average score stored in the bin corresponding to the score
* obtained for the provided sequence.
......@@ -170,6 +188,14 @@ public:
list<Sequence> getReads() const;
/**
* @inherited from VirtualReadScore
* The implementation does not guarantee that no sequence will be below min_score.
* As the implementation relies on bins, the score will be inferred depending on the bin
* the sequence belongs to.
*/
list<Sequence> getBestReads(size_t max_nb, size_t min_score=0) const;
/**
* Set the label of the statistics
*/
......@@ -181,7 +207,7 @@ public:
/**
* @return the bin a sequence of the given score must lie.
*/
size_t scoreToBin(float score);
size_t scoreToBin(float score) const;
/**
* Search for a largest value such that the bin is not empty.
......
......@@ -3,6 +3,7 @@
#include "read_score.h"
#include "read_chooser.h"
#include "stats.h"
#include "tools.h"
#include <cstring>
#include <iostream>
......@@ -80,38 +81,59 @@ KmerRepresentativeComputer::KmerRepresentativeComputer(list<Sequence> &r,
string seed)
:RepresentativeComputer(r),seed(seed),stability_limit(DEFAULT_STABILITY_LIMIT){}
void KmerRepresentativeComputer::compute() {
void KmerRepresentativeComputer::compute(bool try_hard) {
assert(coverage_reference_length > 0);
assert(required.length() > 0);
is_computed = false;
string seed = getSeed();
if (seed.size() == 0)
seed = "##########";
assert(seed.find('-') == string::npos);
string seeds[] = {seed, // The first seed should be a contiguous seed.
"##-##-##-##-##-",
"#-##-##-##-##-#",
"-##-##-##-##-##",
"--#--#--#--#--#--#--#--#--#--#"};
size_t nb_seeds = 5;
if (! try_hard)
nb_seeds = 1;
// First create an index on the set of reads
IKmerStore<Kmer> *index = new MapKmerStore<Kmer>(getSeed(), revcomp);
IKmerStore<Kmer> *index[nb_seeds];
for (size_t i = 0; i < nb_seeds; i++) {
index[i] = new MapKmerStore<Kmer>(getSeed(), revcomp);
}
// Add sequences to the index, allowing extended nucleotides (false)
for (list<Sequence>::iterator it=sequences.begin(); it != sequences.end(); ++it) {
index->insert(it->sequence, it->label, false);
for (size_t i = 0; i < nb_seeds; i++)
index[i]->insert(it->sequence, it->label, false, 0, seeds[i]);
}
size_t max = sequences.size();
// Create a read chooser to have the sequences sorted by length
ReadLengthScore *rlc = new ReadLengthScore();
ReadQualityScore *rlc = new ReadQualityScore();
ReadChooser rc(sequences, *rlc);
delete rlc;
delete rlc;
// Traverse the sequences to get the desired representative
size_t pos_longest_run = 0;
size_t length_longest_run = 0;
size_t seq_index_longest_run = 1;
size_t length_longest_cover = 0;
Sequence sequence_longest_run;
bool *cover_longest_run = NULL;
int sequence_used_for_quality = 0;
int window_quality_sum [required.length()];
memset(window_quality_sum, 0, required.length()*sizeof(int));
size_t k = getSeed().length();
for (size_t seq = 1; seq <= sequences.size() && seq <= seq_index_longest_run + stability_limit ; seq++) {
Sequence sequence = rc.getithBest(seq);
size_t length_cover = 0;
// Break as soon as the sequences are too small
if (sequence.sequence.size() < required.size()) {
break;
......@@ -135,50 +157,89 @@ void KmerRepresentativeComputer::compute() {
// When sequences are smaller than length_longest_run,
// they are used only for the above quality computation
if (sequence.sequence.size() <= length_longest_run) {
if (sequence.sequence.size() <= length_longest_cover) {
continue;
}
bool cover[sequence.sequence.size()];
memset(cover, false, sequence.sequence.size()*sizeof(bool));
size_t pos_end_required = pos_required + required.length();
vector<Kmer> counts = index->getResults(sequence.sequence);
memset(&cover[pos_required], true, required.length()*sizeof(bool));
length_cover = required.length();
vector<Kmer> *counts = new vector<Kmer>[nb_seeds];
for (size_t i = 0; i < nb_seeds; i++)
counts[i] = index[i]->getResults(sequence.sequence, false, seeds[i]);
size_t length_run = 0;
size_t i = pos_required;
bool was_extended = true;
// Extend to the left, starting from 'pos_required'
while (i > 0 && isSufficienlyExpressed(counts[i-1].count, max)) {
while (i > 0 && was_extended) {
was_extended = tryToExtendRepresentative(counts, seeds, nb_seeds, i, cover, length_cover, -1);
if (was_extended) {
i--;
length_run++;
}
}
// Extend to the right, starting from 'pos_required'
for (i = pos_required; i < counts.size(); i++) {
while (i < counts.size() &&
isSufficienlyExpressed(counts[i].count, max)) {
length_run++;
// Extend to the right, starting from 'pos_end_required'
i = pos_end_required-1;
length_run += pos_end_required - pos_required;
was_extended = true;
while (i < sequence.sequence.size() && was_extended) {
was_extended = tryToExtendRepresentative(counts, seeds, nb_seeds, i, cover, length_cover, 1);
if (was_extended) {
i++;
length_run++;
}
if (length_run)
// Take into account the whole k-mer, not just the starting positions
length_run += k - 1;
if (length_run > length_longest_run) {
}
if (length_cover > length_longest_cover
|| ((length_cover == length_longest_cover) && (length_run > length_longest_run))) {
length_longest_run = length_run;
pos_longest_run = i - (length_run - k + 1);
pos_longest_run = i - (length_run - 1);
sequence_longest_run = sequence;
length_longest_cover = length_cover;
seq_index_longest_run = seq;
if (cover_longest_run)
delete [] cover_longest_run;
cover_longest_run = new bool[sequence.sequence.size()];
memcpy(cover_longest_run, cover, sizeof(bool) * sequence.sequence.size());
}
// We have a requirement (ie. a non empty string). We reached it, exit.
if (pos_required != pos_end_required)
break;
length_run = 0;
}
delete [] counts;
}
coverage = (float) length_longest_run / coverage_reference_length;
if (coverage < THRESHOLD_BAD_COVERAGE && ! try_hard) {
compute(true);
delete index[0];
return;
}
if (length_longest_run) {
is_computed = true;
representative = sequence_longest_run;
if (nb_seeds > 1) {
size_t last_pos_covered = 0;
for (size_t i = 0; i < sequence_longest_run.sequence.size(); i++) {
if (!cover_longest_run[i])
representative.sequence[i] = 'N';
else
last_pos_covered = i;
}
// Update length_longest_run with its actual value
length_longest_run = last_pos_covered - pos_longest_run + 1;
trimSequence(representative.sequence, pos_longest_run, length_longest_run);
}
delete [] cover_longest_run;
representative.sequence = representative.sequence.substr(pos_longest_run, length_longest_run);
representative.label = representative.label + "-[" + string_of_int(pos_longest_run) + ","
+ string_of_int(pos_longest_run + length_longest_run - 1) + "]"
......@@ -204,12 +265,53 @@ void KmerRepresentativeComputer::compute() {
}
}
coverage = (float) length_longest_run / coverage_reference_length;
coverage_info = string_of_int(length_longest_run) + " bp"
+ " (" + string_of_int(100 * coverage) + "% of " + fixed_string_of_float(coverage_reference_length, 1) + " bp)";
representative.label += " - " + coverage_info ;
representative.label += " - " + coverage_info;
}
delete index;
for (size_t i = 0; i < nb_seeds; i++)
delete index[i];
}
bool KmerRepresentativeComputer::tryToExtendRepresentative(const vector<Kmer> counts[],
string seeds[],
size_t nb_seeds,
size_t i,
bool *cover,
size_t &length_cover,
int direction) {
bool was_extended = false;
for (size_t current_seed = 0; current_seed < nb_seeds && ! was_extended; current_seed++) {
int pos_of_interest;
if (direction == 1) {
pos_of_interest = (i+1) - seeds[current_seed].size() + 1;
}
else
pos_of_interest = i - 1;
if (pos_of_interest < (int) counts[current_seed].size() && pos_of_interest >= 0) {
if (isSufficienlyExpressed(counts[current_seed][pos_of_interest].count, sequences.size())) {
i += direction;
if (nb_seeds > 1) {
size_t seed_length = seeds[current_seed].size();
for (size_t pos = 0; pos < seed_length; pos++) {
bool previous_cover = cover[pos_of_interest+pos];
cover[pos_of_interest+pos] |= (seeds[current_seed][pos] == '#');
if (! previous_cover && cover[pos_of_interest+pos])
length_cover++;
}
} else {
int pos_modif = (direction == -1) ? pos_of_interest : i;
if (!cover[pos_modif])
length_cover++;
cover[pos_modif] = true;
}
was_extended = true;
}
}
}
return was_extended;
}
......@@ -4,11 +4,15 @@
#include <cassert>
#include <list>
#include "fasta.h"
#include "kmerstore.h"
using namespace std;
#define DEFAULT_STABILITY_LIMIT 30
#define THRESHOLD_BAD_COVERAGE .5 /* Threshold below which the representatie
coverage is considered bad */
/**