Une MAJ de sécurité est nécessaire sur notre version actuelle. Elle sera effectuée lundi 02/08 entre 12h30 et 13h. L'interruption de service devrait durer quelques minutes (probablement moins de 5 minutes).

Commit 5d910513 authored by Mikaël Salson's avatar Mikaël Salson
Browse files

segment.cpp: Ensure that no in-frame stop codon prevents productivity

Refining productivity computation. In-frames stop codons will be detected.
However if a sequence goes until the stop codon, it is normal to have an
in-frame stop codon. But our sequence (usually?) don't go after the J gene.

See #1824
parent def760d8
......@@ -1208,7 +1208,8 @@ void FineSegmenter::findCDR3(){
JUNCTIONaa = nuc_to_aa(subsequence(getSequence().sequence, JUNCTIONstart, CDR3start-1))
+ CDR3aa + nuc_to_aa(subsequence(getSequence().sequence, CDR3end+1, JUNCTIONend));
JUNCTIONproductive = (CDR3nuc.length() % 3 == 0) && (JUNCTIONaa.find('*') == string::npos);
JUNCTIONproductive = (CDR3nuc.length() % 3 == 0) && (! hasInFrameStopCodon(getSequence().sequence, (JUNCTIONstart-1)%3));
// Reminder: JUNCTIONstart is 1-based
}
json FineSegmenter::toJson(){
......
......@@ -288,6 +288,20 @@ int revcomp_int(int word, int size) {
return revcomp;
}
bool hasInFrameStopCodon(const string &sequence, int frame) {
list<string> stop_codons {"TAG", "TAA", "TGA"};
for (auto codon: stop_codons) {
size_t pos_codon = sequence.find(codon);
while (pos_codon != string::npos) {
if (pos_codon % 3 == (size_t) frame)
return true;
pos_codon = sequence.find(codon, pos_codon+1);
}
}
return false;
}
string reverse(const string &text) {
return string(text.rbegin(), text.rend());
}
......
......@@ -195,6 +195,15 @@ int revcomp_int(int word, int size);
*/
string reverse(const string &text);
/**
* @param sequence is a DNA sequence in the correct orientation in uppercase
* (ie. no revcomp will be tried)
* @param frame (0, 1 or 2) depending on where the position of the first codon
* in the sequence starts
* @return true iff a stop codon is in-frame.
*/
bool hasInFrameStopCodon(const string &sequence, int frame);
/**
* @return a Sequence whose fields are given by the parameters
*/
......
......@@ -379,6 +379,36 @@ void testNChooseK() {
TAP_TEST(nChoosek(8, 4) == 70, TEST_N_CHOOSE_K, "");
}
void testIsStopCodon() {
TAP_TEST(hasInFrameStopCodon("CATCATCATTAGCATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("CATCATCATTAACATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("CATCATCATTGACATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("TAGCATCATCATCATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("TAACATCATCATCATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("TGACATCATCATCATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(! hasInFrameStopCodon("ACATCATCATTAGCATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(! hasInFrameStopCodon("ACATCATCATTAACATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(! hasInFrameStopCodon("ACATCATCATTGACATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(! hasInFrameStopCodon("ATAGCATCATCATCATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(! hasInFrameStopCodon("ATAACATCATCATCATCG", 0), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("ACATCATCATTAGCATCG", 1), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("ACATCATCATTAACATCG", 1), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("ACATCATCATTGACATCG", 1), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("ATAGCATCATCATCATCG", 1), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("ATAACATCATCATCATCG", 1), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("ATGACATCATCATCATCG", 1), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("AACATCATCATTAGCATCG", 2), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("AACATCATCATTAACATCG", 2), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("AACATCATCATTGACATCG", 2), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("AATAGCATCATCATCATCG", 2), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("AATAACATCATCATCATCG", 2), TEST_IS_STOP_CODON, "");
TAP_TEST(hasInFrameStopCodon("AATGACATCATCATCATCG", 2), TEST_IS_STOP_CODON, "");
}
void testTrimSequence() {
string seq = "NNNNNAATAGTAGACTANNNNN";
size_t start = 0;
......@@ -494,4 +524,5 @@ void testTools() {
testNChooseK();
testGenerateAllSeeds();
testTrimSequence();
testIsStopCodon();
}
......@@ -18,6 +18,7 @@ enum {
TEST_FASTA_OUT,
TEST_FASTA_NB_SEQUENCES,
TEST_FASTA_MARK,
TEST_IS_STOP_CODON,
TEST_CREATE_SEQUENCE_LABEL_FULL,
TEST_CREATE_SEQUENCE_LABEL,
......@@ -191,6 +192,7 @@ inline void declare_tests() {
RECORD_TAP_TEST(TEST_FASTA_INVALID_FILE, "Fasta with invalid file");
RECORD_TAP_TEST(TEST_FASTA_OUT, "Test operator<< with Fasta");
RECORD_TAP_TEST(TEST_FASTA_NB_SEQUENCES, "Nb sequences in Fasta");
RECORD_TAP_TEST(TEST_IS_STOP_CODON, "Check if is stop codon");
RECORD_TAP_TEST(TEST_CREATE_SEQUENCE_LABEL_FULL, "create_sequence: label_full field");
RECORD_TAP_TEST(TEST_CREATE_SEQUENCE_LABEL, "create_sequence: label field");
RECORD_TAP_TEST(TEST_CREATE_SEQUENCE_SEQUENCE, "create_sequence: sequence field");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment