Commit aaf2e250 authored by Mikaël Salson's avatar Mikaël Salson

algo/core/segment: Add e-value parameter to align_against_collection

Allow it to assess the significance of an alignment.
parent a42f8c39
...@@ -845,7 +845,8 @@ bool comp_pair (pair<int,int> i,pair<int,int> j) ...@@ -845,7 +845,8 @@ bool comp_pair (pair<int,int> i,pair<int,int> j)
void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id, void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id,
bool reverse_ref, bool reverse_both, bool local, bool reverse_ref, bool reverse_both, bool local,
AlignBox *box, Cost segment_cost, bool banded_dp) AlignBox *box, Cost segment_cost, bool banded_dp,
double evalue_threshold)
{ {
int best_score = MINUS_INF ; int best_score = MINUS_INF ;
...@@ -921,7 +922,7 @@ void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id ...@@ -921,7 +922,7 @@ void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id
if (onlyBottomTriangle && best_score < score_with_limit_number_of_indels) { if (onlyBottomTriangle && best_score < score_with_limit_number_of_indels) {
// Too many indels/mismatches, let's do a full DP // Too many indels/mismatches, let's do a full DP
align_against_collection(read, rep, forbidden_rep_id, reverse_ref, reverse_both, align_against_collection(read, rep, forbidden_rep_id, reverse_ref, reverse_both,
local, box, segment_cost, false); local, box, segment_cost, false,evalue_threshold);
return; return;
} }
...@@ -1027,13 +1028,15 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c, ...@@ -1027,13 +1028,15 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,
sequence_or_rc = revcomp(sequence, reversed); // sequence, possibly reversed sequence_or_rc = revcomp(sequence, reversed); // sequence, possibly reversed
// the threshold is lowered by the number of independent tests made
double standardised_threshold_evalue = threshold / multiplier;
/* Read mapping */ /* Read mapping */
if (germline->seg_method == SEG_METHOD_ONE) if (germline->seg_method == SEG_METHOD_ONE)
{ {
align_against_collection(sequence_or_rc, germline->rep_4, NO_FORBIDDEN_ID, false, false, align_against_collection(sequence_or_rc, germline->rep_4, NO_FORBIDDEN_ID, false, false,
true, // local true, // local
box_D, segment_cost); box_D, segment_cost, false, standardised_threshold_evalue);
segmented = true ; segmented = true ;
because = reversed ? SEG_MINUS : SEG_PLUS ; because = reversed ? SEG_MINUS : SEG_PLUS ;
...@@ -1055,13 +1058,13 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c, ...@@ -1055,13 +1058,13 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,
FilterWithACAutomaton* f = germline->getFilter_5(); FilterWithACAutomaton* f = germline->getFilter_5();
BioReader filtered = f->filterBioReaderWithACAutomaton(sequence_or_rc, kmer_threshold); BioReader filtered = f->filterBioReaderWithACAutomaton(sequence_or_rc, kmer_threshold);
align_against_collection(sequence_or_rc, filtered, NO_FORBIDDEN_ID, reverse_V, reverse_V, false, align_against_collection(sequence_or_rc, filtered, NO_FORBIDDEN_ID, reverse_V, reverse_V, false,
box_V, segment_cost); box_V, segment_cost, false, standardised_threshold_evalue);
}else{ }else{
align_against_collection(sequence_or_rc, germline->rep_5, NO_FORBIDDEN_ID, reverse_V, reverse_V, false, align_against_collection(sequence_or_rc, germline->rep_5, NO_FORBIDDEN_ID, reverse_V, reverse_V, false,
box_V, segment_cost); box_V, segment_cost, false, standardised_threshold_evalue);
} }
align_against_collection(sequence_or_rc, germline->rep_3, NO_FORBIDDEN_ID, reverse_J, !reverse_J, false, align_against_collection(sequence_or_rc, germline->rep_3, NO_FORBIDDEN_ID, reverse_J, !reverse_J, false,
box_J, segment_cost); box_J, segment_cost, false, standardised_threshold_evalue);
// J was run with '!reverseJ', we copy the box informations from right to left // J was run with '!reverseJ', we copy the box informations from right to left
// Should this directly be handled in align_against_collection() ? // Should this directly be handled in align_against_collection() ?
box_J->start = box_J->end ; box_J->start = box_J->end ;
...@@ -1141,7 +1144,7 @@ bool FineSegmenter::FineSegmentD(Germline *germline, ...@@ -1141,7 +1144,7 @@ bool FineSegmenter::FineSegmentD(Germline *germline,
// Align // Align
align_against_collection(str, germline->rep_4, forbidden_id, false, false, true, align_against_collection(str, germline->rep_4, forbidden_id, false, false, true,
box_DD, segment_cost); box_DD, segment_cost, false, evalue_standardised_threshold_evalue);
box_DD->start += l ; box_DD->start += l ;
box_DD->end += l ; box_DD->end += l ;
......
...@@ -359,6 +359,7 @@ class FineSegmenter : public Segmenter ...@@ -359,6 +359,7 @@ class FineSegmenter : public Segmenter
* Build a fineSegmenter based on KmerSegmentation * Build a fineSegmenter based on KmerSegmentation
* @param seq: An object read from a FASTA/FASTQ file * @param seq: An object read from a FASTA/FASTQ file
* @param germline: germline used * @param germline: germline used
* @param threshold: threshold of randomly expected segmentation
* @param kmer_threshold: This threshold is used while filtering the V * @param kmer_threshold: This threshold is used while filtering the V
* BioReader in Germline. If this value is 0, every K-mer from getMultiResults * BioReader in Germline. If this value is 0, every K-mer from getMultiResults
* is used for the filtering. Otherwise if N > 0, the N best K-mers are used * is used for the filtering. Otherwise if N > 0, the N best K-mers are used
...@@ -394,9 +395,14 @@ class FineSegmenter : public Segmenter ...@@ -394,9 +395,14 @@ class FineSegmenter : public Segmenter
}; };
/**
* @param segment_cost: cost used
* @param banded_dp: Should we use banded dynamic programming?
* @param evalue_threshold: threshold for randomly expected segmentation (evalue)
*/
void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id, void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id,
bool reverse_ref, bool reverse_both, bool local, bool reverse_ref, bool reverse_both, bool local,
AlignBox *box, Cost segment_cost, bool banded_dp=true); AlignBox *box, Cost segment_cost, bool banded_dp=true,
double evalue_threshold=1.);
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment