From 003789c0b22c681f8255f50723dbcd4a3563dea1 Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Wed, 8 Apr 2015 08:46:04 +0200 Subject: [PATCH] core/segment.{h,cpp}, core/windowExtractor.{cpp.h}: e-value check depends on the number of reads --- algo/core/segment.cpp | 15 ++++++++------- algo/core/segment.h | 6 +++--- algo/core/windowExtractor.cpp | 4 ++-- algo/core/windowExtractor.h | 3 ++- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/algo/core/segment.cpp b/algo/core/segment.cpp index 3b5fd5c43..7c84cc579 100644 --- a/algo/core/segment.cpp +++ b/algo/core/segment.cpp @@ -326,7 +326,8 @@ KmerSegmenter::~KmerSegmenter() { delete kaa; } -KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, double threshold) +KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, + double threshold, int nb_reads_for_evalue) { int best_score_seg = 0 ; // Best score, segmented sequences int best_score_unseg = 0 ; // Best score, unsegmented sequences @@ -396,9 +397,9 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin if (threshold_nb_expected > NO_LIMIT_VALUE) if (the_kseg->isSegmented()) { // the_kseg->evalue also depends on the number of germlines from the *Multi*KmerSegmenter - the_kseg->evalue = getNbExpected(); + the_kseg->evalue = getNbExpected(nb_reads_for_evalue); - pair p = getNbExpectedLeftRight(); + pair p = getNbExpectedLeftRight(nb_reads_for_evalue); the_kseg->evalue_left = p.first; the_kseg->evalue_right = p.second; @@ -414,14 +415,14 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin } } -double KmerMultiSegmenter::getNbExpected() const { - pair p = getNbExpectedLeftRight(); +double KmerMultiSegmenter::getNbExpected(int multiplier) const { + pair p = getNbExpectedLeftRight(multiplier); return (p.first + p.second); } -pair KmerMultiSegmenter::getNbExpectedLeftRight() const { +pair KmerMultiSegmenter::getNbExpectedLeftRight(int multiplier) const { pair p = the_kseg->getKmerAffectAnalyser()->getLeftRightProbabilityAtLeastOrAbove(); - return pair(p.first * multi_germline->germlines.size(), p.second * multi_germline->germlines.size()); + return pair(p.first * multiplier * multi_germline->germlines.size(), p.second * multiplier * multi_germline->germlines.size()); } KmerMultiSegmenter::~KmerMultiSegmenter() { diff --git a/algo/core/segment.h b/algo/core/segment.h index ff5a4ca27..8e7618869 100644 --- a/algo/core/segment.h +++ b/algo/core/segment.h @@ -207,18 +207,18 @@ class KmerMultiSegmenter * @param threshold: threshold of randomly expected segmentation */ KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, - double threshold = THRESHOLD_NB_EXPECTED); + double threshold = THRESHOLD_NB_EXPECTED, int nb_reads_for_evalue = 1); /** * @return expected number of Segmenter that would have yield the maximum score by chance */ - double getNbExpected() const; + double getNbExpected(int multiplier) const; /** * @return expected number of Segmenter that would have yield the maximum score by chance * on the left part of the read and on the right part of the read respectively. */ - pair getNbExpectedLeftRight() const; + pair getNbExpectedLeftRight(int multiplier) const; ~KmerMultiSegmenter(); diff --git a/algo/core/windowExtractor.cpp b/algo/core/windowExtractor.cpp index a955e11cf..92de05c1b 100644 --- a/algo/core/windowExtractor.cpp +++ b/algo/core/windowExtractor.cpp @@ -11,7 +11,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult size_t w, map &windows_labels, int stop_after, int only_nth_read, bool keep_unsegmented_as_clone, - double nb_expected) { + double nb_expected, int nb_reads_for_evalue) { init_stats(); WindowsStorage *windowsStorage = new WindowsStorage(windows_labels); @@ -38,7 +38,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult *out_affects << reads->getSequence(); } - KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected); + KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected, nb_reads_for_evalue); KmerSegmenter *seg = kmseg.the_kseg ; int read_length = seg->getSequence().sequence.length(); diff --git a/algo/core/windowExtractor.h b/algo/core/windowExtractor.h index c65b73c3b..6c834b583 100644 --- a/algo/core/windowExtractor.h +++ b/algo/core/windowExtractor.h @@ -40,6 +40,7 @@ class WindowExtractor { * @param w: length of the window * @param windows_labels: Windows that must be kept and registered as such. * @param nb_expected: maximal e-value of the segmentation + * @param nb_reads_for_evalue: number of reads, used for e-value computation. Can be approximate or faked. * @return a pointer to a WindowsStorage that will contain all the windows. * It is a pointer so that the WindowsStorage is not duplicated. * @post Statistics on segmentation will be provided through the getSegmentationStats() methods @@ -49,7 +50,7 @@ class WindowExtractor { size_t w, map &windows_labels, int stop_after=-1, int only_nth_reads=1, bool keep_unsegmented_as_clone=false, - double nb_expected = THRESHOLD_NB_EXPECTED); + double nb_expected = THRESHOLD_NB_EXPECTED, int nb_reads_for_evalue = 1); /** * @return the average length of sequences whose segmentation has been classified as seg -- GitLab