Commit f8c82aff authored by Mathieu Giraud's avatar Mathieu Giraud

vidjil.cpp, windowExtractor.{h,cpp}: new/updated -u/-uu/-uuu unsegmentation options

Especially with RNAseq data, one may want to output reads with V/J similarities,
but not random reads for other parts of the genome.
The new behavior of the '-u' option is to return these interesting reads.
parent 6e48f2f5
...@@ -91,6 +91,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, ...@@ -91,6 +91,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
*out_unsegmented << *seg ; *out_unsegmented << *seg ;
} }
if (out_unsegmented_detail && (seg->getSegmentationStatus() >= STATS_FIRST_UNSEG)) { if (out_unsegmented_detail && (seg->getSegmentationStatus() >= STATS_FIRST_UNSEG)) {
if (unsegmented_detail_full || (seg->getSegmentationStatus() != UNSEG_TOO_FEW_ZERO && seg->getSegmentationStatus() != UNSEG_TOO_SHORT))
*out_unsegmented_detail[seg->getSegmentationStatus()] << *seg ; *out_unsegmented_detail[seg->getSegmentationStatus()] << *seg ;
} }
} }
...@@ -153,8 +154,9 @@ void WindowExtractor::setUnsegmentedOutput(ostream *out) { ...@@ -153,8 +154,9 @@ void WindowExtractor::setUnsegmentedOutput(ostream *out) {
out_unsegmented = out; out_unsegmented = out;
} }
void WindowExtractor::setUnsegmentedDetailOutput(ofstream **outs) { void WindowExtractor::setUnsegmentedDetailOutput(ofstream **outs, bool unsegmented_detail_full) {
out_unsegmented_detail = outs; out_unsegmented_detail = outs;
this->unsegmented_detail_full = unsegmented_detail_full;
} }
void WindowExtractor::setAffectsOutput(ostream *out) { void WindowExtractor::setAffectsOutput(ostream *out) {
......
...@@ -29,6 +29,7 @@ class WindowExtractor { ...@@ -29,6 +29,7 @@ class WindowExtractor {
ostream *out_segmented; ostream *out_segmented;
ostream *out_unsegmented; ostream *out_unsegmented;
ofstream **out_unsegmented_detail; ofstream **out_unsegmented_detail;
bool unsegmented_detail_full;
ostream *out_affects; ostream *out_affects;
Stats stats[STATS_SIZE]; Stats stats[STATS_SIZE];
...@@ -113,9 +114,10 @@ class WindowExtractor { ...@@ -113,9 +114,10 @@ class WindowExtractor {
/** /**
* Defines the output streams where the unsegmented sequences will be output, split by unsegmentation cause. * Defines the output streams where the unsegmented sequences will be output, split by unsegmentation cause.
* Otherwise no output will be given. * Otherwise no output will be given.
* @param out: The output streams * @param outs: The output streams
* @param unsegmented_detail_full: Whether we should output UNSEG_TOO_FEW_ZERO reads
*/ */
void setUnsegmentedDetailOutput(ofstream **outs); void setUnsegmentedDetailOutput(ofstream **outs, bool unsegmented_detail_full=false);
/** /**
* Defines the output stream where the detailed affects will be output. * Defines the output stream where the detailed affects will be output.
......
...@@ -240,7 +240,8 @@ void usage(char *progname, bool advanced) ...@@ -240,7 +240,8 @@ void usage(char *progname, bool advanced)
cerr << "Detailed output per read (generally not recommended, large files, but may be used for filtering, as in -uu -X 1000)" << endl cerr << "Detailed output per read (generally not recommended, large files, but may be used for filtering, as in -uu -X 1000)" << endl
<< " -U output segmented reads (in " << SEGMENTED_FILENAME << " file)" << endl << " -U output segmented reads (in " << SEGMENTED_FILENAME << " file)" << endl
<< " -u output unsegmented reads (in " << UNSEGMENTED_FILENAME << " file)" << endl << " -u output unsegmented reads (in " << UNSEGMENTED_FILENAME << " file)" << endl
<< " -uu output unsegmented reads, gathered by unsegmentation cause (in *" << UNSEGMENTED_DETAIL_FILENAME << " files)" << endl << " -uu output unsegmented reads, gathered by unsegmentation cause, except for very short and 'too few V/J' reads (in *" << UNSEGMENTED_DETAIL_FILENAME << " files)" << endl
<< " -uuu output unsegmented reads, gathered by unsegmentation cause, all reads (in *" << UNSEGMENTED_DETAIL_FILENAME << " files) (use only for debug)" << endl
<< " -K output detailed k-mer affectation on all reads (in " << AFFECTS_FILENAME << " file) (use only for debug, for example -KX 100)" << endl << " -K output detailed k-mer affectation on all reads (in " << AFFECTS_FILENAME << " file) (use only for debug, for example -KX 100)" << endl
<< endl << endl
...@@ -361,6 +362,7 @@ int main (int argc, char **argv) ...@@ -361,6 +362,7 @@ int main (int argc, char **argv)
bool output_segmented = false; bool output_segmented = false;
bool output_unsegmented = false; bool output_unsegmented = false;
bool output_unsegmented_detail = false; bool output_unsegmented_detail = false;
bool output_unsegmented_detail_full = false;
bool output_affects = false; bool output_affects = false;
bool keep_unsegmented_as_clone = false; bool keep_unsegmented_as_clone = false;
...@@ -648,8 +650,9 @@ int main (int argc, char **argv) ...@@ -648,8 +650,9 @@ int main (int argc, char **argv)
break; break;
case 'u': case 'u':
output_unsegmented_detail = output_unsegmented; // -uu output_unsegmented = output_unsegmented_detail_full ; // -uuu
output_unsegmented = true; output_unsegmented_detail_full = output_unsegmented_detail; // -uu
output_unsegmented_detail = true; // -u
break; break;
case 'U': case 'U':
output_segmented = true; output_segmented = true;
...@@ -1067,7 +1070,7 @@ int main (int argc, char **argv) ...@@ -1067,7 +1070,7 @@ int main (int argc, char **argv)
out_unsegmented_detail[i] = new ofstream(f_unsegmented_detail.c_str()); out_unsegmented_detail[i] = new ofstream(f_unsegmented_detail.c_str());
} }
we.setUnsegmentedDetailOutput(out_unsegmented_detail); we.setUnsegmentedDetailOutput(out_unsegmented_detail, output_unsegmented_detail_full);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment