Commit 56bcf19f authored by Mathieu Giraud's avatar Mathieu Giraud
Browse files

core/windowExtractor.{cpp,h}, vidjil.cpp: option '-!', keep unsegmented reads as clones

With this debug option, all unsegmented reads of sufficient length will be kept,
with a fake window being their complete sequence.
These reads can then be seen in the browser.
parent c9ce0c4f
......@@ -5,7 +5,8 @@ WindowExtractor::WindowExtractor(): out_segmented(NULL), out_unsegmented(NULL),
WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *multigermline,
size_t w,
map<string, string> &windows_labels, int stop_after) {
map<string, string> &windows_labels,
int stop_after, bool keep_unsegmented_as_clone) {
init_stats();
WindowsStorage *windowsStorage = new WindowsStorage(windows_labels);
......@@ -51,8 +52,16 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *mult
nb_reads_germline[seg->system]++;
} else if (out_unsegmented) {
} else {
if (keep_unsegmented_as_clone && (reads->getSequence().sequence.length() >= w))
{
// Keep the unsegmented read, taking the full sequence as the junction
windowsStorage->add(reads->getSequence().sequence, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
}
if (out_unsegmented) {
*out_unsegmented << *seg ;
}
}
// Last line of detailed affects output
......
......@@ -46,7 +46,8 @@ class WindowExtractor {
*/
WindowsStorage *extract(OnlineFasta *reads, MultiGermline *multigermline,
size_t w,
map<string, string> &windows_labels, int stop_after=-1);
map<string, string> &windows_labels,
int stop_after=-1, bool keep_unsegmented_as_clone=false);
/**
* @return the average length of sequences whose segmentation has been classified as seg
......
......@@ -161,6 +161,7 @@ void usage(char *progname)
<< "Experimental options (do not use)" << endl
<< " -I ignore k-mers common to different germline systems (experimental, must be used with -g, do not use)" << endl
<< " -1 use a unique index for all germline systems (experimental, must be used with -g, do not use)" << endl
<< " -! keep unsegmented reads as clones, taking for junction the complete sequence, to be used on very small datasets (for example -!AX 20)" << endl
<< endl
<< "Window prediction" << endl
......@@ -302,6 +303,8 @@ int main (int argc, char **argv)
bool output_segmented = false;
bool output_unsegmented = false;
bool output_affects = false;
bool keep_unsegmented_as_clone = false;
bool multi_germline = false;
bool multi_germline_incomplete = false;
bool multi_germline_mark = false;
......@@ -322,7 +325,7 @@ int main (int argc, char **argv)
//$$ options: getopt
while ((c = getopt(argc, argv, "AX:haiI1g:G:V:D:J:k:r:vw:e:C:f:l:c:m:M:N:s:b:Sn:o:L%:y:z:uUK3")) != EOF)
while ((c = getopt(argc, argv, "A!X:haiI1g:G:V:D:J:k:r:vw:e:C:f:l:c:m:M:N:s:b:Sn:o:L%:y:z:uUK3")) != EOF)
switch (c)
{
......@@ -432,6 +435,10 @@ int main (int argc, char **argv)
delta_max = atoi(optarg);
break;
case '!':
keep_unsegmented_as_clone = true;
break;
// Output
case 'o':
......@@ -906,7 +913,7 @@ int main (int argc, char **argv)
we.setAffectsOutput(out_affects);
}
WindowsStorage *windowsStorage = we.extract(reads, multigermline, w, windows_labels, max_reads_processed);
WindowsStorage *windowsStorage = we.extract(reads, multigermline, w, windows_labels, max_reads_processed, keep_unsegmented_as_clone);
windowsStorage->setIdToAll();
size_t nb_total_reads = we.getNbReads();
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment