Commit bd1eadc7 authored by Mikaël Salson's avatar Mikaël Salson Committed by Mathieu Giraud

core/{windowExtractor,windows,segment}: Short/shifted windows

Warn when we have one.

This requires to add a new segmentation status. The problem is that
Segment::getSegmentationStatus() returns just an integer which doesn't make
possible to have several statuses at once (which could be convenient). A
solution would be to use masks for the statuses (ie. if s is the status we
would store 1 << s).  Since this would require quite some code modification to
achieve that, I added some extra tests (that's bad) to deal with the case
where the segmentation is achieved with a shorter or shifted window. In such a
case we warn the user in the JSON (warning on the CLI should be done too).

See #2913, #2916.
parent 57d31ed2
......@@ -53,6 +53,7 @@ using json = nlohmann::json;
enum SEGMENTED { NOT_PROCESSED,
TOTAL_SEG_AND_WINDOW,
SEG_PLUS, SEG_MINUS,
SEG_SHORTER_WINDOW,
UNSEG_TOO_SHORT, UNSEG_STRAND_NOT_CONSISTENT,
UNSEG_TOO_FEW_ZERO, UNSEG_ONLY_V, UNSEG_ONLY_J,
UNSEG_BAD_DELTA_MIN, UNSEG_AMBIGUOUS,
......@@ -65,6 +66,7 @@ enum SEGMENTED { NOT_PROCESSED,
const char* const segmented_mesg[] = { "?",
"SEG",
"SEG_+", "SEG_-",
"SEG short w",
"UNSEG too short", "UNSEG strand",
"UNSEG too few V/J", "UNSEG only V/5'", "UNSEG only J/3'",
"UNSEG < delta_min", "UNSEG ambiguous",
......
......@@ -67,13 +67,19 @@ WindowsStorage *WindowExtractor::extract(OnlineBioReader *reads,
if (seg->isSegmented()) {
// Filter
if (!only_labeled_windows || windowsStorage->isInterestingJunction(junc))
if (!only_labeled_windows || windowsStorage->isInterestingJunction(junc)) {
// Store the window
windowsStorage->add(junc, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
// Store the window
if (seg->isJunctionShifted())
windowsStorage->add(junc, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline, {SEG_SHORTER_WINDOW});
else
windowsStorage->add(junc, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
}
// Update stats
stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ;
if (seg->isJunctionShifted())
stats[SEG_SHORTER_WINDOW].insert(read_length);
stats_reads[seg->system].addScore(read_length);
if (out_segmented) {
......
......@@ -152,7 +152,7 @@ void WindowsStorage::setIdToAll() {
}
}
void WindowsStorage::add(junction window, Sequence sequence, int status, Germline *germline) {
void WindowsStorage::add(junction window, Sequence sequence, int status, Germline *germline, list<int> extra_statuses) {
if (! hasWindow(window)) {
// First time we see that window: init
status_by_window[window].resize(STATS_SIZE);
......@@ -163,6 +163,9 @@ void WindowsStorage::add(junction window, Sequence sequence, int status, Germlin
seqs_by_window[window].add(sequence);
status_by_window[window][status]++;
for (int extra: extra_statuses)
status_by_window[window][extra]++;
germline_by_window[window] = germline;
}
......@@ -277,6 +280,10 @@ json WindowsStorage::sortedWindowsToJson(map <junction, json> json_data_segment)
json reads = {it->second};
windowsList["id"] = it->first;
if (status_by_window[it->first][SEG_SHORTER_WINDOW])
windowsList["warn"] = "Short or shifted window";
windowsList["reads"] = reads;
windowsList["top"] = top++;
windowsList["germline"] = germline_by_window[it->first]->code;
......
......@@ -180,7 +180,8 @@ class WindowsStorage {
* @param status: the segmentation status
* @param germline: the germline where this sequence has been segmented
*/
void add(junction window, Sequence sequence, int status, Germline *germline);
void add(junction window, Sequence sequence, int status, Germline *germline,
list<int> extra_statuses = {});
/**
* @pre should be called before keepInterestingWindows()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment