Commit a9257568 authored by Mikaël Salson's avatar Mikaël Salson

Windows: Setters and getters for the maximal number of reads stored.

This number is set by default to the max_auditioned sequences since we need that many sequences in maximum
(unless we want to see all the sequences).
parent 6b4bd2a4
#include "windowExtractor.h"
#include "segment.h"
WindowExtractor::WindowExtractor(): out_segmented(NULL), out_unsegmented(NULL), out_affects(NULL){}
WindowExtractor::WindowExtractor(): out_segmented(NULL), out_unsegmented(NULL), out_affects(NULL), max_reads_per_window(~0){}
WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *multigermline,
size_t w,
......@@ -10,7 +10,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult
init_stats();
WindowsStorage *windowsStorage = new WindowsStorage(windows_labels);
windowsStorage->setMaximalNbReadsPerWindow(max_reads_per_window);
for (list<Germline*>::const_iterator it = multigermline->germlines.begin(); it != multigermline->germlines.end(); ++it)
{
Germline *germline = *it ;
......@@ -82,6 +82,10 @@ float WindowExtractor::getAverageSegmentationLength(SEGMENTED seg) {
return stats[seg].getAverage();
}
size_t WindowExtractor::getMaximalNbReadsPerWindow() {
return max_reads_per_window;
}
size_t WindowExtractor::getNbReads() {
return nb_reads;
}
......@@ -94,6 +98,10 @@ size_t WindowExtractor::getNbReadsGermline(string germline) {
return nb_reads_germline[germline];
}
void WindowExtractor::setMaximalNbReadsPerWindow(size_t max_reads) {
max_reads_per_window = max_reads;
}
void WindowExtractor::setSegmentedOutput(ostream *out) {
out_segmented = out;
}
......
......@@ -26,7 +26,7 @@ class WindowExtractor {
ostream *out_affects;
Stats stats[STATS_SIZE];
size_t max_reads_per_window;
public:
WindowExtractor();
......@@ -56,6 +56,11 @@ class WindowExtractor {
*/
float getAverageSegmentationLength(SEGMENTED seg);
/**
* cf. WindowsStorage::getMaximalNbReadsPerWindow()
*/
size_t getMaximalNbReadsPerWindow();
/**
* @return Total number of processed reads by the previous call to extract()
*/
......@@ -74,7 +79,12 @@ class WindowExtractor {
* @pre extract() must have been launched.
*/
size_t getNbReadsGermline(string germline_code);
/**
* cf. WindowsStorage::setMaximalNbReadsPerWindow()
*/
void setMaximalNbReadsPerWindow(size_t max_reads);
/**
* Defines the output stream where the segmented sequences will be output.
* Otherwise no output will be given.
......
......@@ -29,6 +29,10 @@ Germline *WindowsStorage::getGermline(junction window) {
return result->second;
}
size_t WindowsStorage::getMaximalNbReadsPerWindow() {
return max_reads_per_window;
}
JsonList WindowsStorage::statusToJson(junction window) {
JsonList result;
......@@ -57,6 +61,7 @@ Sequence WindowsStorage::getRepresentative(junction window,
float percent_cover,
size_t nb_sampled,
size_t nb_buckets) {
assert(! hasLimitForReadsPerWindow() || nb_sampled <= getMaximalNbReadsPerWindow());
list<Sequence> auditioned_sequences
= getSample(window,nb_sampled, nb_buckets);
KmerRepresentativeComputer repComp(auditioned_sequences, seed);
......@@ -97,6 +102,10 @@ set<Germline *> WindowsStorage::getTopGermlines(size_t top, size_t min_reads) {
return top_germlines;
}
bool WindowsStorage::hasLimitForReadsPerWindow() {
return max_reads_per_window != (size_t)~0;
}
bool WindowsStorage::hasWindow(junction window) {
map<junction, Germline *>::iterator result = germline_by_window.find(window);
return (result != germline_by_window.end());
......@@ -171,6 +180,10 @@ pair <int, size_t> WindowsStorage::keepInterestingWindows(size_t min_reads_windo
return make_pair(removes, nb_reads);
}
void WindowsStorage::setMaximalNbReadsPerWindow(size_t max_reads){
max_reads_per_window = max_reads;
}
void WindowsStorage::sort() {
sort_all_windows.clear();
for (map <junction, list<Sequence> >::const_iterator it = seqs_by_window.begin();
......
......@@ -32,6 +32,7 @@ class WindowsStorage {
map<string, string> windows_labels;
list<pair <junction, size_t> > sort_all_windows;
map<junction, int> id_by_window;
size_t max_reads_per_window;
public:
/**
* Build an empty storage, with the labels that correspond to specific
......@@ -48,6 +49,11 @@ class WindowsStorage {
JsonList statusToJson(junction window);
/**
* @return the maximal number of reads that can be stored for a window.
*/
size_t getMaximalNbReadsPerWindow();
/**
* @pre hasWindow(window)
* @return the total number of reads supporting a window.
......@@ -68,6 +74,7 @@ class WindowsStorage {
* Sampling sequences allow to have a more time efficient
* algorithm.
* @param nb_buckets: Number of buckets for sampling (see SequenceSampler)
* @pre nb_sampled <= getMaximalNbReadsPerWindow() if hasLimitForReadsPerWindow()
* @return the representative sequence of a window or NULL_SEQUENCE if we
* cannot find any representative
*/
......@@ -88,6 +95,12 @@ class WindowsStorage {
*/
void fillStatsClones();
/**
* @return true iff a limit has been set for the maximal number of reads per
* window
*/
bool hasLimitForReadsPerWindow();
/**
* @return true iff the window has been reported.
*/
......@@ -115,6 +128,21 @@ class WindowsStorage {
*/
void setIdToAll();
/**
* For each window the maximal number of reads actually stored is
* max_reads. This applies only to future reads added. Not to reads that
* have been previously added. In other words if for some window w,
* getReads(w).size() > max_reads, no reads will be removed. However no
* reads will be added for that window. getNbReads() still returns the real
* number of reads for a given window, not the number of reads stored for a
* window.
* When the limit is reached the better reads are preferred over the less good
* therefore reads may be replaced so that the list contains the best ones.
* @param max_reads: Maximal number of reads stored for a window.
* ~0 for no limit.
*/
void setMaximalNbReadsPerWindow(size_t max_reads);
/**
* Add a new window with its sequence.
* @param window: the window to add
......
......@@ -903,6 +903,8 @@ int main (int argc, char **argv)
ofstream *out_affects = NULL;
WindowExtractor we;
if (! output_sequences_by_cluster)
we.setMaximalNbReadsPerWindow(max_auditionned);
if (output_segmented) {
string f_segmented = out_dir + f_basename + SEGMENTED_FILENAME ;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment