Commit 003789c0 authored by Mathieu Giraud's avatar Mathieu Giraud

core/segment.{h,cpp}, core/windowExtractor.{cpp.h}: e-value check depends on the number of reads

parent 1e0503be
......@@ -326,7 +326,8 @@ KmerSegmenter::~KmerSegmenter() {
delete kaa;
}
KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented, double threshold)
KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented,
double threshold, int nb_reads_for_evalue)
{
int best_score_seg = 0 ; // Best score, segmented sequences
int best_score_unseg = 0 ; // Best score, unsegmented sequences
......@@ -396,9 +397,9 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin
if (threshold_nb_expected > NO_LIMIT_VALUE)
if (the_kseg->isSegmented()) {
// the_kseg->evalue also depends on the number of germlines from the *Multi*KmerSegmenter
the_kseg->evalue = getNbExpected();
the_kseg->evalue = getNbExpected(nb_reads_for_evalue);
pair <double, double> p = getNbExpectedLeftRight();
pair <double, double> p = getNbExpectedLeftRight(nb_reads_for_evalue);
the_kseg->evalue_left = p.first;
the_kseg->evalue_right = p.second;
......@@ -414,14 +415,14 @@ KmerMultiSegmenter::KmerMultiSegmenter(Sequence seq, MultiGermline *multigermlin
}
}
double KmerMultiSegmenter::getNbExpected() const {
pair <double, double> p = getNbExpectedLeftRight();
double KmerMultiSegmenter::getNbExpected(int multiplier) const {
pair <double, double> p = getNbExpectedLeftRight(multiplier);
return (p.first + p.second);
}
pair<double,double> KmerMultiSegmenter::getNbExpectedLeftRight() const {
pair<double,double> KmerMultiSegmenter::getNbExpectedLeftRight(int multiplier) const {
pair <double, double> p = the_kseg->getKmerAffectAnalyser()->getLeftRightProbabilityAtLeastOrAbove();
return pair<double, double>(p.first * multi_germline->germlines.size(), p.second * multi_germline->germlines.size());
return pair<double, double>(p.first * multiplier * multi_germline->germlines.size(), p.second * multiplier * multi_germline->germlines.size());
}
KmerMultiSegmenter::~KmerMultiSegmenter() {
......
......@@ -207,18 +207,18 @@ class KmerMultiSegmenter
* @param threshold: threshold of randomly expected segmentation
*/
KmerMultiSegmenter(Sequence seq, MultiGermline *multigermline, ostream *out_unsegmented,
double threshold = THRESHOLD_NB_EXPECTED);
double threshold = THRESHOLD_NB_EXPECTED, int nb_reads_for_evalue = 1);
/**
* @return expected number of Segmenter that would have yield the maximum score by chance
*/
double getNbExpected() const;
double getNbExpected(int multiplier) const;
/**
* @return expected number of Segmenter that would have yield the maximum score by chance
* on the left part of the read and on the right part of the read respectively.
*/
pair<double,double> getNbExpectedLeftRight() const;
pair<double,double> getNbExpectedLeftRight(int multiplier) const;
~KmerMultiSegmenter();
......
......@@ -11,7 +11,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult
size_t w,
map<string, string> &windows_labels,
int stop_after, int only_nth_read, bool keep_unsegmented_as_clone,
double nb_expected) {
double nb_expected, int nb_reads_for_evalue) {
init_stats();
WindowsStorage *windowsStorage = new WindowsStorage(windows_labels);
......@@ -38,7 +38,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult
*out_affects << reads->getSequence();
}
KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected);
KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected, nb_reads_for_evalue);
KmerSegmenter *seg = kmseg.the_kseg ;
int read_length = seg->getSequence().sequence.length();
......
......@@ -40,6 +40,7 @@ class WindowExtractor {
* @param w: length of the window
* @param windows_labels: Windows that must be kept and registered as such.
* @param nb_expected: maximal e-value of the segmentation
* @param nb_reads_for_evalue: number of reads, used for e-value computation. Can be approximate or faked.
* @return a pointer to a WindowsStorage that will contain all the windows.
* It is a pointer so that the WindowsStorage is not duplicated.
* @post Statistics on segmentation will be provided through the getSegmentationStats() methods
......@@ -49,7 +50,7 @@ class WindowExtractor {
size_t w,
map<string, string> &windows_labels,
int stop_after=-1, int only_nth_reads=1, bool keep_unsegmented_as_clone=false,
double nb_expected = THRESHOLD_NB_EXPECTED);
double nb_expected = THRESHOLD_NB_EXPECTED, int nb_reads_for_evalue = 1);
/**
* @return the average length of sequences whose segmentation has been classified as seg
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment