Commit f37be391 authored by Mathieu Giraud's avatar Mathieu Giraud

vidjil.cpp, core/windowExtractor.{h,cpp}: new option '-uu', split reads...

vidjil.cpp, core/windowExtractor.{h,cpp}: new option '-uu', split reads according to their unsegmentation cause
parent 27c9d54c
......@@ -5,7 +5,8 @@
#define PROGRESS_POINT 25000
#define PROGRESS_LINE 40
WindowExtractor::WindowExtractor(MultiGermline *multigermline): out_segmented(NULL), out_unsegmented(NULL), out_affects(NULL), max_reads_per_window(~0), multigermline(multigermline){
WindowExtractor::WindowExtractor(MultiGermline *multigermline): out_segmented(NULL), out_unsegmented(NULL), out_unsegmented_detail(NULL), out_affects(NULL),
max_reads_per_window(~0), multigermline(multigermline){
for (list<Germline*>::const_iterator it = multigermline->germlines.begin(); it != multigermline->germlines.end(); ++it)
{
Germline *germline = *it ;
......@@ -89,6 +90,9 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
if (out_unsegmented) {
*out_unsegmented << *seg ;
}
if (out_unsegmented_detail) {
*out_unsegmented_detail[seg->getSegmentationStatus()] << *seg ;
}
}
// Last line of detailed affects output
......@@ -149,6 +153,10 @@ void WindowExtractor::setUnsegmentedOutput(ostream *out) {
out_unsegmented = out;
}
void WindowExtractor::setUnsegmentedDetailOutput(ofstream **outs) {
out_unsegmented_detail = outs;
}
void WindowExtractor::setAffectsOutput(ostream *out) {
out_affects = out;
}
......
......@@ -28,6 +28,7 @@ class WindowExtractor {
ostream *out_segmented;
ostream *out_unsegmented;
ofstream **out_unsegmented_detail;
ostream *out_affects;
Stats stats[STATS_SIZE];
......@@ -109,6 +110,13 @@ class WindowExtractor {
*/
void setUnsegmentedOutput(ostream *out);
/**
* Defines the output streams where the unsegmented sequences will be output, split by unsegmentation cause.
* Otherwise no output will be given.
* @param out: The output streams
*/
void setUnsegmentedDetailOutput(ofstream **outs);
/**
* Defines the output stream where the detailed affects will be output.
* Otherwise no output will be given.
......
......@@ -94,6 +94,7 @@ enum { CMD_WINDOWS, CMD_CLONES, CMD_SEGMENT, CMD_GERMLINES } ;
#define WINDOWS_FILENAME ".windows.fa"
#define SEGMENTED_FILENAME ".segmented.vdj.fa"
#define UNSEGMENTED_FILENAME ".unsegmented.vdj.fa"
#define UNSEGMENTED_DETAIL_FILENAME ".fa"
#define AFFECTS_FILENAME ".affects"
#define EDGES_FILENAME ".edges"
#define COMP_FILENAME "comp.vidjil"
......@@ -229,6 +230,7 @@ void usage(char *progname, bool advanced)
cerr << "Detailed output per read (not recommended, large files)" << endl
<< " -U output segmented reads (in " << SEGMENTED_FILENAME << " file)" << endl
<< " -u output unsegmented reads (in " << UNSEGMENTED_FILENAME << " file)" << endl
<< " -uu output unsegmented reads, gathered by unsegmentation cause (in *" << UNSEGMENTED_DETAIL_FILENAME << " files)" << endl
<< " -K output detailed k-mer affectation on all reads (in " << AFFECTS_FILENAME << " file) (use only for debug, for example -KX 100)" << endl
<< endl
......@@ -347,6 +349,7 @@ int main (int argc, char **argv)
bool output_sequences_by_cluster = false;
bool output_segmented = false;
bool output_unsegmented = false;
bool output_unsegmented_detail = false;
bool output_affects = false;
bool keep_unsegmented_as_clone = false;
......@@ -631,6 +634,7 @@ int main (int argc, char **argv)
break;
case 'u':
output_unsegmented_detail = output_unsegmented; // -uu
output_unsegmented = true;
break;
case 'U':
......@@ -1006,6 +1010,7 @@ int main (int argc, char **argv)
ofstream *out_segmented = NULL;
ofstream *out_unsegmented = NULL;
ofstream *out_unsegmented_detail[STATS_SIZE];
ofstream *out_affects = NULL;
WindowExtractor we(multigermline);
......@@ -1026,6 +1031,25 @@ int main (int argc, char **argv)
we.setUnsegmentedOutput(out_unsegmented);
}
if (output_unsegmented_detail) {
for (int i=0; i<STATS_SIZE; i++)
{
// Sanitize segmented_mesg[i]
string s = segmented_mesg[i] ;
replace(s.begin(), s.end(), '?', '_');
replace(s.begin(), s.end(), ' ', '_');
replace(s.begin(), s.end(), '/', '_');
replace(s.begin(), s.end(), '<', '_');
string f_unsegmented_detail = out_dir + f_basename + "." + s + UNSEGMENTED_DETAIL_FILENAME ;
cout << " ==> " << f_unsegmented_detail << endl ;
out_unsegmented_detail[i] = new ofstream(f_unsegmented_detail.c_str());
}
we.setUnsegmentedDetailOutput(out_unsegmented_detail);
}
if (output_affects) {
string f_affects = out_dir + f_basename + AFFECTS_FILENAME ;
cout << " ==> " << f_affects << endl ;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment