Commit 003789c0 authored by Mathieu Giraud's avatar Mathieu Giraud

core/segment.{h,cpp}, core/windowExtractor.{cpp.h}: e-value check depends on the number of reads

parent 1e0503be
...@@ -326,7 +326,8 @@ KmerSegmenter::~KmerSegmenter() { ...@@ -326,7 +326,8 @@ KmerSegmenter::~KmerSegmenter() {
delete kaa; delete kaa;
} }
KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, double threshold) KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented,
double threshold, int nb_reads_for_evalue)
{ {
int best_score_seg = 0 ; // Best score, segmented sequences int best_score_seg = 0 ; // Best score, segmented sequences
int best_score_unseg = 0 ; // Best score, unsegmented sequences int best_score_unseg = 0 ; // Best score, unsegmented sequences
...@@ -396,9 +397,9 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin ...@@ -396,9 +397,9 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin
if (threshold_nb_expected > NO_LIMIT_VALUE) if (threshold_nb_expected > NO_LIMIT_VALUE)
if (the_kseg->isSegmented()) { if (the_kseg->isSegmented()) {
// the_kseg->evalue also depends on the number of germlines from the *Multi*KmerSegmenter // the_kseg->evalue also depends on the number of germlines from the *Multi*KmerSegmenter
the_kseg->evalue = getNbExpected(); the_kseg->evalue = getNbExpected(nb_reads_for_evalue);
pair <double, double> p = getNbExpectedLeftRight(); pair <double, double> p = getNbExpectedLeftRight(nb_reads_for_evalue);
the_kseg->evalue_left = p.first; the_kseg->evalue_left = p.first;
the_kseg->evalue_right = p.second; the_kseg->evalue_right = p.second;
...@@ -414,14 +415,14 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin ...@@ -414,14 +415,14 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin
} }
} }
double KmerMultiSegmenter::getNbExpected() const { double KmerMultiSegmenter::getNbExpected(int multiplier) const {
pair <double, double> p = getNbExpectedLeftRight(); pair <double, double> p = getNbExpectedLeftRight(multiplier);
return (p.first + p.second); return (p.first + p.second);
} }
pair<double,double> KmerMultiSegmenter::getNbExpectedLeftRight() const { pair<double,double> KmerMultiSegmenter::getNbExpectedLeftRight(int multiplier) const {
pair <double, double> p = the_kseg->getKmerAffectAnalyser()->getLeftRightProbabilityAtLeastOrAbove(); pair <double, double> p = the_kseg->getKmerAffectAnalyser()->getLeftRightProbabilityAtLeastOrAbove();
return pair<double, double>(p.first * multi_germline->germlines.size(), p.second * multi_germline->germlines.size()); return pair<double, double>(p.first * multiplier * multi_germline->germlines.size(), p.second * multiplier * multi_germline->germlines.size());
} }
KmerMultiSegmenter::~KmerMultiSegmenter() { KmerMultiSegmenter::~KmerMultiSegmenter() {
......
...@@ -207,18 +207,18 @@ class KmerMultiSegmenter ...@@ -207,18 +207,18 @@ class KmerMultiSegmenter
* @param threshold: threshold of randomly expected segmentation * @param threshold: threshold of randomly expected segmentation
*/ */
KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented,
double threshold = THRESHOLD_NB_EXPECTED); double threshold = THRESHOLD_NB_EXPECTED, int nb_reads_for_evalue = 1);
/** /**
* @return expected number of Segmenter that would have yield the maximum score by chance * @return expected number of Segmenter that would have yield the maximum score by chance
*/ */
double getNbExpected() const; double getNbExpected(int multiplier) const;
/** /**
* @return expected number of Segmenter that would have yield the maximum score by chance * @return expected number of Segmenter that would have yield the maximum score by chance
* on the left part of the read and on the right part of the read respectively. * on the left part of the read and on the right part of the read respectively.
*/ */
pair<double,double> getNbExpectedLeftRight() const; pair<double,double> getNbExpectedLeftRight(int multiplier) const;
~KmerMultiSegmenter(); ~KmerMultiSegmenter();
......
...@@ -11,7 +11,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult ...@@ -11,7 +11,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult
size_t w, size_t w,
map<string, string> &windows_labels, map<string, string> &windows_labels,
int stop_after, int only_nth_read, bool keep_unsegmented_as_clone, int stop_after, int only_nth_read, bool keep_unsegmented_as_clone,
double nb_expected) { double nb_expected, int nb_reads_for_evalue) {
init_stats(); init_stats();
WindowsStorage *windowsStorage = new WindowsStorage(windows_labels); WindowsStorage *windowsStorage = new WindowsStorage(windows_labels);
...@@ -38,7 +38,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult ...@@ -38,7 +38,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult
*out_affects << reads->getSequence(); *out_affects << reads->getSequence();
} }
KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected); KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected, nb_reads_for_evalue);
KmerSegmenter *seg = kmseg.the_kseg ; KmerSegmenter *seg = kmseg.the_kseg ;
int read_length = seg->getSequence().sequence.length(); int read_length = seg->getSequence().sequence.length();
......
...@@ -40,6 +40,7 @@ class WindowExtractor { ...@@ -40,6 +40,7 @@ class WindowExtractor {
* @param w: length of the window * @param w: length of the window
* @param windows_labels: Windows that must be kept and registered as such. * @param windows_labels: Windows that must be kept and registered as such.
* @param nb_expected: maximal e-value of the segmentation * @param nb_expected: maximal e-value of the segmentation
* @param nb_reads_for_evalue: number of reads, used for e-value computation. Can be approximate or faked.
* @return a pointer to a WindowsStorage that will contain all the windows. * @return a pointer to a WindowsStorage that will contain all the windows.
* It is a pointer so that the WindowsStorage is not duplicated. * It is a pointer so that the WindowsStorage is not duplicated.
* @post Statistics on segmentation will be provided through the getSegmentationStats() methods * @post Statistics on segmentation will be provided through the getSegmentationStats() methods
...@@ -49,7 +50,7 @@ class WindowExtractor { ...@@ -49,7 +50,7 @@ class WindowExtractor {
size_t w, size_t w,
map<string, string> &windows_labels, map<string, string> &windows_labels,
int stop_after=-1, int only_nth_reads=1, bool keep_unsegmented_as_clone=false, int stop_after=-1, int only_nth_reads=1, bool keep_unsegmented_as_clone=false,
double nb_expected = THRESHOLD_NB_EXPECTED); double nb_expected = THRESHOLD_NB_EXPECTED, int nb_reads_for_evalue = 1);
/** /**
* @return the average length of sequences whose segmentation has been classified as seg * @return the average length of sequences whose segmentation has been classified as seg
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment