Commit 1c48a2f8 authored by Mikaël Salson's avatar Mikaël Salson

windowExtractor.{h,cpp}: Stats are centralized in WindowExtractor.

Some statistics were stored in Germline while those statistics were filled in
WindowExtractor and WindowsStorage. Now they are stored and filled in
WindowExtractor.

We take benefit of this change to use BinReadStorage. It will give us more
detailed statistics on read lengths and clones/reads (the information is
there, but it is still not output yet).

By the way the function fillStatsClones is moved from WindowsStorage to WindowExtractor
parent 539f5527
......@@ -21,9 +21,6 @@ void Germline::init(string _code, char _shortcut,
delta_min = _delta_min ;
delta_max = _delta_max ;
stats_reads.setLabel(code);
stats_clones.setLabel("");
}
......
......@@ -80,9 +80,6 @@ class Germline {
int delta_min;
int delta_max;
Stats stats_reads;
Stats stats_clones;
};
......
......@@ -6,6 +6,13 @@
#define PROGRESS_LINE 40
WindowExtractor::WindowExtractor(MultiGermline *multigermline): out_segmented(NULL), out_unsegmented(NULL), out_affects(NULL), max_reads_per_window(~0), multigermline(multigermline){
for (list<Germline*>::const_iterator it = multigermline->germlines.begin(); it != multigermline->germlines.end(); ++it)
{
Germline *germline = *it ;
stats_reads[germline->code].init(NB_BINS, MAX_VALUE_BINS, NULL, true);
stats_reads[germline->code].setLabel(germline->code);
stats_clones[germline->code].init(NB_BINS_CLONES, MAX_VALUE_BINS_CLONES, NULL, true);
}
}
WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
......@@ -17,11 +24,6 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
WindowsStorage *windowsStorage = new WindowsStorage(windows_labels);
windowsStorage->setMaximalNbReadsPerWindow(max_reads_per_window);
for (list<Germline*>::const_iterator it = multigermline->germlines.begin(); it != multigermline->germlines.end(); ++it)
{
Germline *germline = *it ;
nb_reads_germline[germline->code] = 0;
}
int nb_reads_all = 0;
unsigned long long int bp_total = 0;
......@@ -66,9 +68,8 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
windowsStorage->add(junc, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
// Update stats
seg->segmented_germline->stats_reads.insert(read_length);
stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ;
nb_reads_germline[seg->system]++;
stats_reads[seg->system].addScore(read_length);
if (out_segmented) {
*out_segmented << *seg ; // KmerSegmenter output (V/N/J)
......@@ -107,6 +108,8 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
cout << endl ;
fillStatsClones(windowsStorage);
return windowsStorage;
}
......@@ -127,7 +130,7 @@ size_t WindowExtractor::getNbSegmented(SEGMENTED seg) {
}
size_t WindowExtractor::getNbReadsGermline(string germline) {
return nb_reads_germline[germline];
return stats_reads[germline].getNbScores();
}
void WindowExtractor::setMaximalNbReadsPerWindow(size_t max_reads) {
......@@ -146,6 +149,20 @@ void WindowExtractor::setAffectsOutput(ostream *out) {
out_affects = out;
}
void WindowExtractor::fillStatsClones(WindowsStorage *storage)
{
for (map <junction, BinReadStorage >::iterator it = storage->begin();
it != storage->end();
it++)
{
junction junc = it->first;
int nb_reads = it->second.getNbInserted();
Germline *germline = storage->getGermline(junc);
stats_clones[germline->code].addScore(nb_reads);
}
}
void WindowExtractor::init_stats() {
for (int i = 0; i < STATS_SIZE; i++) {
stats[i].label = segmented_mesg[i];
......
......@@ -9,6 +9,10 @@
#include "kmerstore.h"
#include "kmeraffect.h"
#include "windows.h"
#include "read_storage.h"
#define NB_BINS_CLONES 10
#define MAX_VALUE_BINS_CLONES 1000
using namespace std;
......@@ -19,7 +23,8 @@ using namespace std;
class WindowExtractor {
private:
size_t nb_reads;
map<string, size_t> nb_reads_germline;
map<string, BinReadStorage> stats_reads;
map<string, BinReadStorage> stats_clones;
ostream *out_segmented;
ostream *out_unsegmented;
......@@ -122,6 +127,11 @@ class WindowExtractor {
* Initialize the statistics (put 0 everywhere).
*/
void init_stats();
/*
* Fill the stats_clone member of the different Germlines
*/
void fillStatsClones(WindowsStorage *storage);
};
#endif
......@@ -152,20 +152,6 @@ void WindowsStorage::add(junction window, Sequence sequence, int status, Germlin
germline_by_window[window] = germline;
}
void WindowsStorage::fillStatsClones()
{
for (map <junction, BinReadStorage >::iterator it = seqs_by_window.begin();
it != seqs_by_window.end();
it++)
{
junction junc = it->first;
int nb_reads = it->second.getNbInserted();
Germline *germline = germline_by_window[junc];
germline->stats_clones.insert(nb_reads);
}
}
pair <int, size_t> WindowsStorage::keepInterestingWindows(size_t min_reads_window) {
int removes = 0 ;
size_t nb_reads = 0 ;
......
......@@ -106,11 +106,6 @@ class WindowsStorage {
size_t nb_buckets=HISTOGRAM_SIZE_AUDITIONED);
/*
* Fill the stats_clone member of the different Germlines
*/
void fillStatsClones();
/**
* @return true iff a limit has been set for the maximal number of reads per
* window
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment