Commit f8c82aff authored by Mathieu Giraud's avatar Mathieu Giraud

vidjil.cpp, windowExtractor.{h,cpp}: new/updated -u/-uu/-uuu unsegmentation options

Especially with RNAseq data, one may want to output reads with V/J similarities,
but not random reads for other parts of the genome.
The new behavior of the '-u' option is to return these interesting reads.
parent 6e48f2f5
......@@ -91,6 +91,7 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
*out_unsegmented << *seg ;
}
if (out_unsegmented_detail && (seg->getSegmentationStatus() >= STATS_FIRST_UNSEG)) {
if (unsegmented_detail_full || (seg->getSegmentationStatus() != UNSEG_TOO_FEW_ZERO && seg->getSegmentationStatus() != UNSEG_TOO_SHORT))
*out_unsegmented_detail[seg->getSegmentationStatus()] << *seg ;
}
}
......@@ -153,8 +154,9 @@ void WindowExtractor::setUnsegmentedOutput(ostream *out) {
out_unsegmented = out;
}
void WindowExtractor::setUnsegmentedDetailOutput(ofstream **outs) {
void WindowExtractor::setUnsegmentedDetailOutput(ofstream **outs, bool unsegmented_detail_full) {
out_unsegmented_detail = outs;
this->unsegmented_detail_full = unsegmented_detail_full;
}
void WindowExtractor::setAffectsOutput(ostream *out) {
......
......@@ -29,6 +29,7 @@ class WindowExtractor {
ostream *out_segmented;
ostream *out_unsegmented;
ofstream **out_unsegmented_detail;
bool unsegmented_detail_full;
ostream *out_affects;
Stats stats[STATS_SIZE];
......@@ -113,9 +114,10 @@ class WindowExtractor {
/**
* Defines the output streams where the unsegmented sequences will be output, split by unsegmentation cause.
* Otherwise no output will be given.
* @param out: The output streams
* @param outs: The output streams
* @param unsegmented_detail_full: Whether we should output UNSEG_TOO_FEW_ZERO reads
*/
void setUnsegmentedDetailOutput(ofstream **outs);
void setUnsegmentedDetailOutput(ofstream **outs, bool unsegmented_detail_full=false);
/**
* Defines the output stream where the detailed affects will be output.
......
......@@ -240,7 +240,8 @@ void usage(char *progname, bool advanced)
cerr << "Detailed output per read (generally not recommended, large files, but may be used for filtering, as in -uu -X 1000)" << endl
<< " -U output segmented reads (in " << SEGMENTED_FILENAME << " file)" << endl
<< " -u output unsegmented reads (in " << UNSEGMENTED_FILENAME << " file)" << endl
<< " -uu output unsegmented reads, gathered by unsegmentation cause (in *" << UNSEGMENTED_DETAIL_FILENAME << " files)" << endl
<< " -uu output unsegmented reads, gathered by unsegmentation cause, except for very short and 'too few V/J' reads (in *" << UNSEGMENTED_DETAIL_FILENAME << " files)" << endl
<< " -uuu output unsegmented reads, gathered by unsegmentation cause, all reads (in *" << UNSEGMENTED_DETAIL_FILENAME << " files) (use only for debug)" << endl
<< " -K output detailed k-mer affectation on all reads (in " << AFFECTS_FILENAME << " file) (use only for debug, for example -KX 100)" << endl
<< endl
......@@ -361,6 +362,7 @@ int main (int argc, char **argv)
bool output_segmented = false;
bool output_unsegmented = false;
bool output_unsegmented_detail = false;
bool output_unsegmented_detail_full = false;
bool output_affects = false;
bool keep_unsegmented_as_clone = false;
......@@ -648,8 +650,9 @@ int main (int argc, char **argv)
break;
case 'u':
output_unsegmented_detail = output_unsegmented; // -uu
output_unsegmented = true;
output_unsegmented = output_unsegmented_detail_full ; // -uuu
output_unsegmented_detail_full = output_unsegmented_detail; // -uu
output_unsegmented_detail = true; // -u
break;
case 'U':
output_segmented = true;
......@@ -1067,7 +1070,7 @@ int main (int argc, char **argv)
out_unsegmented_detail[i] = new ofstream(f_unsegmented_detail.c_str());
}
we.setUnsegmentedDetailOutput(out_unsegmented_detail);
we.setUnsegmentedDetailOutput(out_unsegmented_detail, output_unsegmented_detail_full);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment