Commit 14602c22 authored by Mikaël Salson's avatar Mikaël Salson Committed by Mathieu Giraud

algo/{vidjil, core/{windows, windowExtractor}}: Add sequences of interest

DNA sequences that should be found in the window (or windows that should
be found into it) can be kept whatever their concentration is. We
therefore extend what is already done for labelled windows.

Now the sequences do not need to be the length of the window to be found
but they can be of any length. However we require that the sequence of
interest contains the window (or is contained in it).
parent 71902aa5
......@@ -67,7 +67,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
if (seg->isSegmented()) {
// Filter
if (!only_labeled_windows || (windows_labels.find(junc) != windows_labels.end()))
if (!only_labeled_windows || windowsStorage->isInterestingJunction(junc))
// Store the window
windowsStorage->add(junc, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
......
......@@ -78,7 +78,7 @@ KmerRepresentativeComputer WindowsStorage::getRepresentativeComputer(junction wi
= getSample(window,nb_sampled, nb_buckets);
KmerRepresentativeComputer repComp(auditioned_sequences, seed);
repComp.setRevcomp(true);
repComp.setMinCover((windows_labels.find(window) == windows_labels.end()) ? min_cover : 1);
repComp.setMinCover((! isInterestingJunction(window)) ? min_cover : 1);
repComp.setPercentCoverage(percent_cover);
repComp.setRequiredSequence(window);
repComp.setCoverageReferenceLength(getAverageLength(window));
......@@ -126,6 +126,23 @@ bool WindowsStorage::hasWindow(junction window) {
return (result != germline_by_window.end());
}
bool WindowsStorage::isInterestingJunction(junction window) {
bool found = false;
for (auto it: windows_labels) {
string sequence_of_interest = it.first;
if (sequence_of_interest.size() < window.size()) {
found = window.find(sequence_of_interest) != string::npos
|| window.find(revcomp(sequence_of_interest)) != string::npos;
} else {
found = sequence_of_interest.find(window) != string::npos
|| sequence_of_interest.find(revcomp(window)) != string::npos;
}
if (found)
return true;
}
return false;
}
size_t WindowsStorage::size() {
return seqs_by_window.size();
}
......@@ -170,7 +187,7 @@ pair <int, size_t> WindowsStorage::keepInterestingWindows(size_t min_reads_windo
// Is it not supported by enough reads?
if (!(nb_reads_this_window >= min_reads_window)
// Is it not a labelled junction?
&& (windows_labels.find(junc) == windows_labels.end()))
&& ! isInterestingJunction(junc))
{
map <junction, BinReadStorage >::iterator toBeDeleted = it;
it++;
......
......@@ -121,6 +121,11 @@ class WindowsStorage {
*/
bool hasWindow(junction window);
/**
* @return true iff the window is contained (or contains) a sequence of interest
*/
bool isInterestingJunction(junction window);
/**
* @return a list of windows together with the number of reads they appear in.
* @pre sort() must have been called at least once and must have been called
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment