diff --git a/algo/core/segment.cpp b/algo/core/segment.cpp index 3b5fd5c435a1270aaf8a2b6b23dd5d4355096c81..7c84cc57942d236215dee3e3d67bff64f1d44232 100644 --- a/algo/core/segment.cpp +++ b/algo/core/segment.cpp @@ -326,7 +326,8 @@ KmerSegmenter::~KmerSegmenter() { delete kaa; } -KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, double threshold) +KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, + double threshold, int nb_reads_for_evalue) { int best_score_seg = 0 ; // Best score, segmented sequences int best_score_unseg = 0 ; // Best score, unsegmented sequences @@ -396,9 +397,9 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin if (threshold_nb_expected > NO_LIMIT_VALUE) if (the_kseg->isSegmented()) { // the_kseg->evalue also depends on the number of germlines from the *Multi*KmerSegmenter - the_kseg->evalue = getNbExpected(); + the_kseg->evalue = getNbExpected(nb_reads_for_evalue); - pair p = getNbExpectedLeftRight(); + pair p = getNbExpectedLeftRight(nb_reads_for_evalue); the_kseg->evalue_left = p.first; the_kseg->evalue_right = p.second; @@ -414,14 +415,14 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin } } -double KmerMultiSegmenter::getNbExpected() const { - pair p = getNbExpectedLeftRight(); +double KmerMultiSegmenter::getNbExpected(int multiplier) const { + pair p = getNbExpectedLeftRight(multiplier); return (p.first + p.second); } -pair KmerMultiSegmenter::getNbExpectedLeftRight() const { +pair KmerMultiSegmenter::getNbExpectedLeftRight(int multiplier) const { pair p = the_kseg->getKmerAffectAnalyser()->getLeftRightProbabilityAtLeastOrAbove(); - return pair(p.first * multi_germline->germlines.size(), p.second * multi_germline->germlines.size()); + return pair(p.first * multiplier * multi_germline->germlines.size(), p.second * multiplier * multi_germline->germlines.size()); } KmerMultiSegmenter::~KmerMultiSegmenter() { diff --git a/algo/core/segment.h b/algo/core/segment.h index ff5a4ca27986bd35150c228d793c8c4ab7932927..8e7618869026da984fe47712011f35ce6ef3bb2b 100644 --- a/algo/core/segment.h +++ b/algo/core/segment.h @@ -207,18 +207,18 @@ class KmerMultiSegmenter * @param threshold: threshold of randomly expected segmentation */ KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, - double threshold = THRESHOLD_NB_EXPECTED); + double threshold = THRESHOLD_NB_EXPECTED, int nb_reads_for_evalue = 1); /** * @return expected number of Segmenter that would have yield the maximum score by chance */ - double getNbExpected() const; + double getNbExpected(int multiplier) const; /** * @return expected number of Segmenter that would have yield the maximum score by chance * on the left part of the read and on the right part of the read respectively. */ - pair getNbExpectedLeftRight() const; + pair getNbExpectedLeftRight(int multiplier) const; ~KmerMultiSegmenter(); diff --git a/algo/core/windowExtractor.cpp b/algo/core/windowExtractor.cpp index a955e11cf8e7850e93aee7e8c26856a692ee7fe2..92de05c1b25b1f3b1ead7287d6431dcec15cdca7 100644 --- a/algo/core/windowExtractor.cpp +++ b/algo/core/windowExtractor.cpp @@ -11,7 +11,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult size_t w, map &windows_labels, int stop_after, int only_nth_read, bool keep_unsegmented_as_clone, - double nb_expected) { + double nb_expected, int nb_reads_for_evalue) { init_stats(); WindowsStorage *windowsStorage = new WindowsStorage(windows_labels); @@ -38,7 +38,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult *out_affects << reads->getSequence(); } - KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected); + KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected, nb_reads_for_evalue); KmerSegmenter *seg = kmseg.the_kseg ; int read_length = seg->getSequence().sequence.length(); diff --git a/algo/core/windowExtractor.h b/algo/core/windowExtractor.h index c65b73c3b867578d28b86bed1bea618614dba78e..6c834b5831c769645c2cef066ef6b1c0e57eb0f8 100644 --- a/algo/core/windowExtractor.h +++ b/algo/core/windowExtractor.h @@ -40,6 +40,7 @@ class WindowExtractor { * @param w: length of the window * @param windows_labels: Windows that must be kept and registered as such. * @param nb_expected: maximal e-value of the segmentation + * @param nb_reads_for_evalue: number of reads, used for e-value computation. Can be approximate or faked. * @return a pointer to a WindowsStorage that will contain all the windows. * It is a pointer so that the WindowsStorage is not duplicated. * @post Statistics on segmentation will be provided through the getSegmentationStats() methods @@ -49,7 +50,7 @@ class WindowExtractor { size_t w, map &windows_labels, int stop_after=-1, int only_nth_reads=1, bool keep_unsegmented_as_clone=false, - double nb_expected = THRESHOLD_NB_EXPECTED); + double nb_expected = THRESHOLD_NB_EXPECTED, int nb_reads_for_evalue = 1); /** * @return the average length of sequences whose segmentation has been classified as seg