Commit 7d09552a authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/4599-unproductive-causes' into 'dev'

Unproductivity causes

Closes #4599

See merge request !860
parents 187ad4af add143db
Pipeline #199628 passed with stages
in 9 minutes and 15 seconds
......@@ -1311,9 +1311,12 @@ void FineSegmenter::findCDR3(){
return ;
}
// Now a junction is detected. Is it productive?
JUNCTIONproductive = false ;
// We require at least one more nucleotide to export a CDR3
if (JUNCTIONend - JUNCTIONstart + 1 < 7) {
JUNCTIONproductive = false ;
JUNCTIONunproductive = UNPROD_TOO_SHORT;
return ;
}
......@@ -1326,10 +1329,25 @@ void FineSegmenter::findCDR3(){
if (CDR3nuc.length() % 3 == 0)
{
CDR3aa = nuc_to_aa(CDR3nuc);
string sequence_startV_stopJ = subsequence(getSequence().sequence, box_V->start+1, box_J->end+1);
int frame = (JUNCTIONstart-1 - box_V->start) % 3;
if (hasInFrameStopCodon(sequence_startV_stopJ, frame))
{
// Non-productive CDR3
JUNCTIONunproductive = UNPROD_STOP_CODON;
}
else
{
// Productive CDR3
JUNCTIONproductive = true;
}
}
else
{
// Non-productive CDR3
JUNCTIONunproductive = UNPROD_OUT_OF_FRAME;
// We want to output a '#' somewhere around the end of the N, and then restart
// at the start of the first codon fully included in the germline J
int CDR3startJfull = JUNCTIONend - ((JUNCTIONend - box_J->start) / 3) * 3 + 1 ;
......@@ -1342,11 +1360,7 @@ void FineSegmenter::findCDR3(){
JUNCTIONaa = nuc_to_aa(subsequence(getSequence().sequence, JUNCTIONstart, CDR3start-1))
+ CDR3aa + nuc_to_aa(subsequence(getSequence().sequence, CDR3end+1, JUNCTIONend));
string sequence_startV_stopJ = subsequence(getSequence().sequence, box_V->start+1, box_J->end+1);
int frame = (JUNCTIONstart-1 - box_V->start) % 3;
// Reminder: JUNCTIONstart is 1-based
JUNCTIONproductive = (CDR3nuc.length() % 3 == 0) && (!hasInFrameStopCodon(sequence_startV_stopJ, frame));
}
void FineSegmenter::checkWarnings(CloneOutput *clone, bool phony)
......@@ -1412,6 +1426,10 @@ void FineSegmenter::toOutput(CloneOutput *clone){
{"aa", JUNCTIONaa},
{"productive", JUNCTIONproductive}
});
if (JUNCTIONunproductive.length())
{
clone->set(KEY_SEG, "junction", "unproductive", JUNCTIONunproductive);
}
}
}
}
......
......@@ -83,7 +83,10 @@ const char* const segmented_mesg[] = { "?",
"UNSEG too short w",
} ;
// Unproductivity causes
#define UNPROD_TOO_SHORT "too-short"
#define UNPROD_OUT_OF_FRAME "out-of-frame"
#define UNPROD_STOP_CODON "stop-codon"
/**
* An alignment box (AlignBox) gather all parameters for a recombined gene segment (V, D, J, other D...)
......@@ -190,6 +193,7 @@ protected:
int JUNCTIONstart, JUNCTIONend;
string JUNCTIONaa;
bool JUNCTIONproductive;
string JUNCTIONunproductive;
int CDR3start, CDR3end;
string CDR3nuc;
......
......@@ -7,3 +7,13 @@ gaattattataagaaactctttggcagtggaacaacactggttgtcacag
>TRGV1*01 0/AA/0 TRGJ1*01 [TRG] {CATWDRKNYYKKLF}
ctgcaaaatctaattaaaaaTGAttctgggttctattactgtgccacctgggacaggAA
gaattattataagaaactctttggcagtggaacaacactggttgtcacag
# Not in-frame
>TRGV1*01 0/AAG/0 TRGJ1*01 [TRG] {CATWDRK#NYYKKLF}
ctgcaaaatctaattaaaaaTGAttctgggttctattactgtgccacctgggacaggAAG
gaattattataagaaactctttggcagtggaacaacactggttgtcacag
# Too short
>TRGV1*01 15//19 TRGJ1*01 [TRG] {}
ctgcaaaatctaattaaaaaTGAttctgggttctattactgt
tttggcagtggaacaacactggttgtcacag
\ No newline at end of file
!OUTPUT_FILE: out/cdr3-stopcodon.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/cdr3-stopcodon.fa
$ Two identical junctions in JSON
2: "CATWDRKNYYKKLF"
$ But only one productive
1: "productive": true
!OUTPUT_FILE: out/productive_stop_outframe.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/productive_stop_outframe.fa
!OPTIONS: --mod jR
$ Two identical junctions, and one with a spurious nucleotide
:clones[0].seg.junction.aa: CATWDRKNYYKKLF
:clones[1].seg.junction.aa: CATWDRKNYYKKLF
:clones[2].seg.junction.aa: CATWDRK#NYYKKLF
$ But only one productive
:clones[0].seg.junction.productive: False
:clones[1].seg.junction.productive: True
:clones[2].seg.junction.productive: False
:clones[3].seg.junction.productive: False
$ Cause of unproductivity
:clones[0].seg.junction.unproductive: stop-codon
0:clones[1].seg.junction.unproductive
:clones[2].seg.junction.unproductive: out-of-frame
:clones[3].seg.junction.unproductive: too-short
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment