diff --git a/algo/core/segment.cpp b/algo/core/segment.cpp index 329b7163a6f960ea7ab5b1c79e06583bdbf53b2c..5082142c0023a65281d5f1d9172507a6310e8dd1 100644 --- a/algo/core/segment.cpp +++ b/algo/core/segment.cpp @@ -249,6 +249,10 @@ int Segmenter::getRight() const { return box_J->start; } +int Segmenter::getMidLength() const { + return box_J->start - box_V->end - 1; +} + int Segmenter::getLeftD() const { return box_D->start; } @@ -1313,6 +1317,20 @@ void FineSegmenter::findCDR3(){ // Reminder: JUNCTIONstart is 1-based } +void FineSegmenter::checkWarnings(json &json_clone) +{ + if (isSegmented()) + { + // Non-recombined D7-27/J1 sequence + if ((box_V->ref_label.find("IGHD7-27") != string::npos) + && (box_J->ref_label.find("IGHJ1") != string::npos) + && ((getMidLength() >= 90) || (getMidLength() <= 94))) + { + json_add_warning(json_clone, "W61", "Non-recombined D7-27/J1 sequence", LEVEL_ERROR); + } + } +} + json FineSegmenter::toJson(){ json seg; diff --git a/algo/core/segment.h b/algo/core/segment.h index fbf2e940576a0a9db7004c98bd4fa87ada4457c1..e4b644777b0513c8f6df6749f37cdba8a8807d1e 100644 --- a/algo/core/segment.h +++ b/algo/core/segment.h @@ -231,7 +231,14 @@ protected: * @return the right position (on forward strand) of the segmentation */ int getRight() const; + + /** + * @return the number of positions between the left and the right positions + */ + int getMidLength() const; + + /** * @return the left position (on forward strand) of the D segmentation. */ @@ -390,6 +397,7 @@ class FineSegmenter : public Segmenter */ void findCDR3(); + void checkWarnings(json &json_clone); json toJson(); }; diff --git a/algo/tests/data/D7-27--J1.fa b/algo/tests/data/D7-27--J1.fa new file mode 100644 index 0000000000000000000000000000000000000000..c3d4e184708503b63a9d30a7161b9530dec89bae --- /dev/null +++ b/algo/tests/data/D7-27--J1.fa @@ -0,0 +1,7 @@ +>NC_000014.9:c105865580-105865420 Homo sapiens chromosome 14, GRCh38.p12 Primary Assembly +TGAGCTGAGAACCACTGTGCTAACTGGGGACACAGTGATTGGCAGCTCTACAAAAACCATGCTCCCCCGG +GACCCCGGGCTGTGGGTTTCTGTAGCCCCTGGCTCAGGGCTGACTCACCGTGGCTGAATACTTCCAGCAC +TGGGGCCAGGGCACCCTGGTC + + + diff --git a/algo/tests/should-get-tests/colinear-D7-27--J1.should-get b/algo/tests/should-get-tests/colinear-D7-27--J1.should-get new file mode 100644 index 0000000000000000000000000000000000000000..10e07d3d975ff335e797744353c1dcde7c16b0b1 --- /dev/null +++ b/algo/tests/should-get-tests/colinear-D7-27--J1.should-get @@ -0,0 +1,15 @@ + +!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -KA -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH+ -r 4 -b co $VIDJIL_DATA/D7-27--J1.fa ; cat out/co.vidjil + +# Test D7-27 0/92/0 J1 non-recombination + +$ Segmented on IGH+ +1: "germline": "IGH\+" + +$ Correct (non-)recombination +1: "name": "IGHD7-27.01 0/[ACGT]{92}/0 IGHJ1.01" + +$ Warning +1: "code": "W61" +1: "msg": "Non-recombined D7-27/J1 sequence" + diff --git a/algo/tests/unit-tests/testSegment.cpp b/algo/tests/unit-tests/testSegment.cpp index 2f3ddaf27b757cd6f3a003f0306ca17bb0f84435..d629fc46de942ba70405bdc2e6226887f056e8d8 100644 --- a/algo/tests/unit-tests/testSegment.cpp +++ b/algo/tests/unit-tests/testSegment.cpp @@ -159,6 +159,7 @@ void testSegmentationCause(IndexTypes index) { TEST_KMER_JUNCTION, ks.getInfoLineWithAffects()); TAP_TEST_EQUAL(ks.getLeft(), 17, TEST_KMER_LEFT, "left = " << ks.getLeft()); TAP_TEST_EQUAL(ks.getRight(), 18, TEST_KMER_RIGHT, "right = " << ks.getRight()); + TAP_TEST_EQUAL(ks.getMidLength(), 0, TEST_KMER_RIGHT, "mid length = " << ks.getMidLength()); ks.setSegmentationStatus(NOT_PROCESSED); TAP_TEST(! ks.isSegmented(), TEST_SET_SEGMENTATION_CAUSE, ks.getInfoLineWithAffects()); diff --git a/algo/vidjil.cpp b/algo/vidjil.cpp index 378f3cb5338e0f75fd2e04f24ba7093d76421671..961c18179e51167d9f24203f0e135c3d47d2580f 100644 --- a/algo/vidjil.cpp +++ b/algo/vidjil.cpp @@ -1424,6 +1424,8 @@ int main (int argc, char **argv) out_clone << endl; } // end if (seg.isSegmented()) + + seg.checkWarnings(json_clone); json_data_segment[it->first] = json_clone; if (output_sequences_by_cluster) // -a option, output all sequences diff --git a/doc/warnings.md b/doc/warnings.md index a79665a9193c74d6323552bae5cfcf935691e537..219d2d4b1ccd2639d54dc5b43c3a15a591e09cf0 100644 --- a/doc/warnings.md +++ b/doc/warnings.md @@ -42,8 +42,8 @@ Warnings which were implemented ([x]) have a fixed code that should not be chang - [ ] Possible strand ambiguity *Strange recombination (FineSegmenter in vidjil-algo), V and J genes* -- [ ] W6x Co-linear recombination D7-J1 #2232 - [ ] Potential co-linear genome match (pos xxxxxx) #1664 #1629 +- [ ] W61 Non-recombined D7-27/J1 sequence #2232 - [ ] Potential di-mer #2820 - [ ] Very large deletion (xxx bp) #2909 - [ ] Unexpected recombination