Commit 095e9b95 authored by Mathieu Giraud's avatar Mathieu Giraud

merge: Analyzing recombinations with several D

Launching vidjil with '-d' now enables to detect some recombinations with several D.
The method is not optimal (see 41d08568), but should anyway recognize most VDDJ recombinations.
This was developed and discussed by @flothoni, @mikael-s, and @magiraud.
parents e365691f c706dcba
......@@ -29,6 +29,7 @@
#include <cstring>
#include <string>
#define NO_FORBIDDEN_ID (-1)
AlignBox::AlignBox() {
del_left = 0 ;
......@@ -662,7 +663,8 @@ bool comp_pair (pair<int,int> i,pair<int,int> j)
* @post box is filled
*/
void align_against_collection(string &read, Fasta &rep, bool reverse_ref, bool reverse_both, bool local,
void align_against_collection(string &read, Fasta &rep, int forbidden_rep_id,
bool reverse_ref, bool reverse_both, bool local,
AlignBox *box, Cost segment_cost)
{
......@@ -683,6 +685,9 @@ void align_against_collection(string &read, Fasta &rep, bool reverse_ref, bool r
for (int r = 0 ; r < rep.size() ; r++)
{
if (r == forbidden_rep_id)
continue;
DynProg dp = DynProg(sequence_or_rc, rep.sequence(r),
dpMode, // DynProg::SemiGlobalTrans,
segment_cost, // DNA
......@@ -725,7 +730,7 @@ void align_against_collection(string &read, Fasta &rep, bool reverse_ref, bool r
box->score = score_r;
#ifdef DEBUG_SEGMENT
cout << "best: " << best_labels << " " << best_score ;
cout << "best: " << box->ref_label << " " << best_score ;
cout << "del/del2/begin:" << (box->del_right) << "/" << (box->del_left) << "/" << (box->start) << endl;
cout << endl;
#endif
......@@ -814,10 +819,10 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,
/* Segmentation */
align_against_collection(sequence_or_rc, germline->rep_5, reverse_V, reverse_V, false,
align_against_collection(sequence_or_rc, germline->rep_5, NO_FORBIDDEN_ID, reverse_V, reverse_V, false,
box_V, segment_cost);
align_against_collection(sequence_or_rc, germline->rep_3, reverse_J, !reverse_J, false,
align_against_collection(sequence_or_rc, germline->rep_3, NO_FORBIDDEN_ID, reverse_J, !reverse_J, false,
box_J, segment_cost);
// J was run with '!reverseJ', we copy the box informations from right to left
......@@ -875,14 +880,16 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,
bool FineSegmenter::FineSegmentD(Germline *germline,
AlignBox *box_Y, AlignBox *box_DD, AlignBox *box_Z,
int forbidden_id,
int extend_DD_on_Y, int extend_DD_on_Z,
double evalue_threshold, int multiplier){
// Create a zone where to look for D, adding at most EXTEND_D_ZONE nucleotides at each side
int l = box_Y->end - EXTEND_D_ZONE;
// Create a zone where to look for D, adding some nucleotides on both sides
int l = box_Y->end - extend_DD_on_Y;
if (l<0)
l=0 ;
int r = box_Z->start + EXTEND_D_ZONE;
int r = box_Z->start + extend_DD_on_Z;
string seq = getSequence().sequence; // segmented sequence, possibly rev-comped
......@@ -892,7 +899,7 @@ bool FineSegmenter::FineSegmentD(Germline *germline,
string str = seq.substr(l, r-l);
// Align
align_against_collection(str, germline->rep_4, false, false, true,
align_against_collection(str, germline->rep_4, forbidden_id, false, false, true,
box_DD, segment_cost);
box_DD->start += l ;
......@@ -914,20 +921,54 @@ bool FineSegmenter::FineSegmentD(Germline *germline,
return true;
}
void FineSegmenter::FineSegmentD(Germline *germline, double evalue_threshold, int multiplier){
void FineSegmenter::FineSegmentD(Germline *germline, bool several_D,
double evalue_threshold, int multiplier){
if (segmented){
dSegmented = FineSegmentD(germline,
box_V, box_D, box_J,
NO_FORBIDDEN_ID,
EXTEND_D_ZONE, EXTEND_D_ZONE,
evalue_threshold, multiplier);
if (!dSegmented)
return ;
AlignBox *box_D1 = new AlignBox();
AlignBox *box_D2 = new AlignBox();
#define DD_MIN_SEARCH 5
vector <AlignBox*> boxes ;
boxes.push_back(box_V);
if (several_D && (box_D->start - box_V->end >= DD_MIN_SEARCH))
{
bool d1 = FineSegmentD(germline,
box_V, box_D1, box_D,
box_D->ref_nb,
EXTEND_D_ZONE, 0,
evalue_threshold, multiplier);
if (d1)
boxes.push_back(box_D1);
}
boxes.push_back(box_D);
if (several_D && (box_J->start - box_D->end >= DD_MIN_SEARCH))
{
bool d2 = FineSegmentD(germline,
box_D, box_D2, box_J,
box_D->ref_nb,
0, EXTEND_D_ZONE,
evalue_threshold, multiplier);
if (d2)
boxes.push_back(box_D2);
}
boxes.push_back(box_J);
code = codeFromBoxes(boxes, sequence_or_rc);
......
......@@ -306,11 +306,13 @@ class FineSegmenter : public Segmenter
* extend segmentation from VJ to VDJ
* @param germline: germline used
*/
void FineSegmentD(Germline *germline,
void FineSegmentD(Germline *germline, bool several_D,
double threshold = THRESHOLD_NB_EXPECTED_D, int multiplier=1);
bool FineSegmentD(Germline *germline,
AlignBox *box_Y, AlignBox *box_DD, AlignBox *box_Z,
int forbidden_id,
int extend_DD_on_Y, int extend_DD_on_Z,
double threshold = THRESHOLD_NB_EXPECTED_D, int multiplier=1);
void findCDR3();
......
......@@ -8,9 +8,14 @@ $ First sequence, easy segmentation (no error, few deletions at the windows, sma
# |||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||
# V5-10-1: 280 attactgtgcgaga |||||||||||||
# D1-14*01: 1 ggtataaccggaaccac
# |||||||
# D3-16*01: gtattatgattacgtttgggggaGTTATGcttatacc
# J5*01 : 1 acaactggttcgactcctggggccaaggaaccctggtcaccgtctcctcag
# -2/CTTC/-3 -1/GTTA/5
1:^>seq1 \\+ VDJ 0 72 77 89 94 139 IGHV5-10-1.0[1-4] 2/CTTC/3 IGHD1-14.01 1/GTTA/5 IGHJ5.01
# Note that a second D (D3-16*01) can be detected (6 common nucleotides, or even 7 with an overlap on the first D)
# This is tested in segment_simul.should-vdj
$ seq1_polyT is the same as seq1 but starts and ends with 10Ts, the recombination is in the middle. start position should not be 0 and end position should not be length-1
f1:^>seq1_polyT \\+ VDJ 10 82 87 99 104 149 IGHV5-10-1.0[1-4] 2/CTTC/3 IGHD1-14.01 1/GTTA/5 IGHJ5.01
######################################
# VDDJ non detecte
# /9 TRDD3 1/ -> 13-9-1 = 3nt, too short
>TRDV2 7/2/4 TRDD2 0/0/9 TRDD3 1/1/31 TRAJ29 [TRD] TODO
AcgagaaaaggacatctatgGCCCTGGTTTCAAagacAATTTCCAAGGTGACATTGATATTGCAAAGAACCTGGCTGTACTTAAGATACTTGCACCATCAGAGAGAGATGAAGGGTCTTACTACTGTGCCTGGGCCTACTACCaaagggcacaagactttctgtgattgcaagtaagtgtttctagccatccttgattttgatcagcaatggcttcttcccttgaattatttttcagtgtacctagaatgccttttgcccccaagaaaggtttggaaggagctgggtcattagcattgcgcaggaaattacaggttattctgttataattcgaagccactggacagtcatgatgcacacagatctggcctgcattgcacctggctagtgctcagtgtgtagctatctgagctctgatgtaagt
# /3 TRDD2 3/ -> 9-3-3 = 3nt, too short
>TRDV2 3/0/3 TRDD2 3/2/0 TRDD3 1/2/3 TRAJ29 [TRD] TODO
TGGTTTNAAgacaaTTTCcaAGGTGACATTGATATTGCAAAGAACCTGGCTGTACTtaAgATACTTGCaCCATCAgAgAGAGATgAaGGGTCTtACtaCTGTGCCTGTGACtCCCCACtGGGGGATACCTATTCAGGAAACACACCTcTTGTcTTTGGAAaGGGCACAAgAcTTTCTGTGATTGCAAGTAAgTGTTTCTAgCcaTCCTTGATTTTGATcagCAatGGCTTCTTCCCTTGAATTATTTTTCaGTGTACCTAgaATGCtTTTgCCA
......
>TRDV1*01 2/TCTCGAT/2 TRDD2*01 0/CGT/0 TRDD3*01 3/CTCGGG/0 TRDJ1*01 [TRD]
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAATCTCGATTTCCTACCGTACTGGGGGATCTCGGGACACCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1*01 1//1 TRDD2*01 0/G/2 TRDD3*01 0/AGGGTTGGGAT/4 TRDJ1*01 [TRD]
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACCTTCCTACGTGGGGGATACGAGGGTtgGGATCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
# Is such a recombination with Dd3 then Dd2 possible ?
>TRDV1*01 1/GAAAAGGAA/2 TRDD3*01 1//2 TRDD2*01 0//2 TRDJ1*01 [TRD] TODO
nnnCAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACGAAAAGGAATGGGGGATACTTCCTACACCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAACnn
# A short Dd1 with 4nt "AAAT" could also be inserted just after the TRDV1
>TRDV1*01 0/AAATTTCGTGATT/2 TRDD3*01 1/CTTGGG/10 TRDJ1*01 [TRD]
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACTAAATTTCGTGATTTGGGGGATACCTTGGGACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
# A short Dd1 could also be inserted after the TRDV1
>TRDV1*01 10/CTCCCGCAAAATTCA/2 TRDD2*01 0/TGCC/5 TRDD3*01 3/GCACTTTTCCTT/27 TRDJ1*01 [TRD]
nnCAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCCTCCCGCAAAATTCATTCCTACTGCCGGGATGCACTTTTCCTTGAACCCGTGTGACTGTGGAACANNNNNN
>TRDV1*01 1/C/0 TRDD2*01 2//1 TRDD3*01 1/AGGGT/0 TRDJ1*01 [TRD]
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGCaCCCCTTCCTCTGGGGGATACAGGGTACACCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
# only 2 nt (TRDV1), bug ? TRDD2 have 5nt (CCTAC) that are not seen
>TRDV1*01 2/ATGG/4 TRDD2*01 0/AGGA/0 TRDD3*01 5/TCCCTTTGT/0 TRDJ1*01 [TRD] TODO
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAAATGGCCTACAGGAACTGGGGGTCCCTTTGTACACCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1*01 0/T/1 TRDD1*01 2/CCTTAGGGATGGTCGAGGA/2 TRDD3*01 6//4 TRDJ1*01 [TRD]
ACAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACTTAAATACCTTAGGGATGGTCGAGGATGGGGCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1*01 1/AATCACCTCTT/1 TRDD2*01 3/CCACGAAAACAT/0 TRDD3*01 2//9 TRDJ1*01 [TRD]
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACAATCACCTCTTCTTCCCCACGAAAACATACTGGGGGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
......@@ -10,38 +10,9 @@ AgcgggtggtgatggcaaagtgccaaggaaagggaaaaaggaagaagagggtttttatactgatgtgtttCattgtgCCT
>TRDV2 0/7/0 TRDD3 [TRD+]
GAAtcGAtAttGCAAAGAACCtGGCtGtACttAAGatACttGCACCAtCAGaGaGaGAtGAAGgGtCttACtACtGtGCCtGtGACACCCCCCtGTACtGGGGGAtACGCACAGtGCtACAAAACCtACAGAGACCtGTACAAAAACtGCAGGGGCAAAAGtGCCATttCCCtGGGAtAtCCtCACCCtGGGTCCCAA
######################################
# VDDJ non detecte
>TRDV1 2/7/2 TRDD2 0/3/0 TRDD3 3/6/0 TRDJ1 [TRD+] TODO
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAATCTCGATTTCCTACCGTACTGGGGGATCTCGGGACACCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1 2/0/0 TRDD2 0/1/2 TRDD3 0/11/4 TRDJ1 [TRD] TODO
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACCTTCCTACGTGGGGGATACGAGGGTtgGGATCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1 1/0/0 TRDD1 4/5/2 TRDD3 1/5/0 TRDJ1 [TRD] TODO
nnnCAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACGAAAAGGAATGGGGGATACTTCCTACACCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAACnn
>TRDV1 0/0/1 TRDD1 3/9/2 TRDD3 1/6/10 TRDJ1 [TRD] TODO
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACTAAATTTCGTGATTTGGGGGATACCTTGGGACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1 10/8/1 TRDD1 3/3/2 TRDD2 1/21/27 TRDJ1 [TRD] TODO
nnCAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCCTCCCGCAAAATTCATTCCTACTGCCGGGATGCACTTTTCCTTGAACCCGTGTGACTGTGGAACANNNNNN
>TRDV1 0/1/1 TRDD1 2/0/0 TRDD2 5/15/2 TRDD3 6/0/4 TRDJ1 [TRD] TODO
ACAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACTTAAATACCTTAGGGATGGTCGAGGATGGGGCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1 4/4/0 TRDD2 2/0/1 TRDD3 1/5/0 TRDJ1 [TRD] TODO
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGCaCCCCTTCCTCTGGGGGATACAGGGTACACCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1 1/0/2 TRDD1 3/8/1 TRDD2 3/12/0 TRDD3 2/0/9 TRDJ1 [TRD] TODO
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAACAATCACCTCTTCTTCCCCACGAAAACATACTGGGGGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
>TRDV1 2/4/4 TRDD2 0/5/0 TRDD3 5/9/0 TRDJ1 [TRD] TODO
CAAAAAGTGGTCGCTATTCTGTCAACTTCAAGAAAGCAGCGAAATCCGTCGCCTTAACCATTTCAGCCTTACAGCTAGAAGATTCAGCAAAGTACTTTTGTGCTCTTGGGGAAATGGCCTACAGGAACTGGGGGTCCCTTTGTACACCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
############################
# double DD non detecte
# DDJ
>TRDD2*01 0/4/3 TRDD3 0/2/3 TRDJ1*01 [TRD+] TODO
AgcgggtggtgatggcaaagtgccaaggaaagggaaaaaggaagaagagggtttttatactgatgTGTTTCATTGTGCCTTCCTACGTGAGGGGGATACGCCCCGATAAACTCATCTTTGGAAAAGGAACCCGTGTGACTGTGGAAC
......
>IGHV3-11*01 1/CCCCCGAGAGCCGAGGGGGGGAGAGATCCCGGGGGG/6 IGHD6-6*01 1/TCCAGCTCGTCCGGGGG/12 IGHJ4*02 [IGH]
GGGCTGGAGTGGGTTTCATACATTAGTAGTAGTGGTAGTACCATATACTACGCAGACTCTGTGAAGGGCCGATTCACCATCTCCAGGGACAACGCCAAGAACTCACTGTATCTGCAAATGAACAGCCTGAGAGCCGAGGACACGGCCGTGTATTACTGTGCGAGAGCCCCCGAGAGCCGAGGGGGGGAGAGATCCCGGGGGGAGCAGCTCGTCTCCAGCTCGTCCGGGGGACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCAGGT
>IGHV3-9*01 11/A/5 IGHD2-2*02 1/GGAATACGATATTTTGACTGGT/8 IGHJ6*03 [IGH]
GGGGGAGGCTTGGTACAGCCTGGCAGGTCCCTGAGACTCTCCTGTGCAGCCTCTGGATTCACCTTTGATGATTATGCCATGCACTGGGTCCGGCAAGCTCCAGGGAAGGGCCTGGAGTGGGTCTCAGGTATTAGTTGGAATAGTGGTAGCATAGGCTATGCGGACTCTGTGAAGGGCCGATTCACCATCTCCAGAGACAACGCCAAGAACTCCCTGTATCTGCAAATGAACAGTCTGAGAGCTGAGGACACGGCCTTGTATTACTGAATTGTAGTAGTACCAGCTGCTATACGGAATACGATATTTTGACTGGTTACTACTACTACATGGACGTCTGGGGCAAAGGGACCCTGGTCACCGTCTCCTCAGGT
>IGHV3-9*01 11/A/5 IGHD2-2*02 1/GGAA/4 IGHD3-9*01 7//10 IGHJ6*03 [IGH]
GGGGGAGGCTTGGTACAGCCTGGCAGGTCCCTGAGACTCTCCTGTGCAGCCTCTGGATTCACCTTTGATGATTATGCCATGCACTGGGTCCGGCAAGCTCCAGGGAAGGGCCTGGAGTGGGTCTCAGGTATTAGTTGGAATAGTGGTAGCATAGGCTATGCGGACTCTGTGAAGGGCCGATTCACCATCTCCAGAGACAACGCCAAGAACTCCCTGTATCTGCAAATGAACAGTCTGAGAGCTGAGGACACGGCCTTGTATTACTG
aATTGTAGTAGTACCAGCTGCTATAC
ggaaTACGATATTTTGACTGGTTA
CTACTACTACATGGACGTCTGGGGCAAAGGGACCCTGGTCACCGTCTCCTCAGGT
>TRGV2*01 1/C/0 TRGJ1*02 [TRG]
GGAAGGCCCCACAGCGTCTTCAGTACTATGACTCCTACAACTCCAAGGTTGTGTTGGAATCAGGAGTCAGTCCAGGGAAGTATTATACTTACGCAAGCACAAGGAACAACTTGAGATTGATACTGCGAAATCTAATTGAAAATGACTCTGGGGTCTATTACTGTGCCACCTGGGACGGCTTATTATAAGAAACTCTTTGGCAGTGGAACAACAC
......
### Copied from data/segment_simul.fa and algo/tests/segment_simul.should_get
# See algo/tests/segment_simul.should_get for a detailed description
# With only one D, this is:
# IGHV5-10-1*01 2/CTTC/3 IGHD1-14*01 1/GTTA/5 IGHJ5*01 [IGH]
>IGHV5-10-1*01 2/CTTC/3 IGHD1-14*01 1/GTTA/5 IGHJ5*01 [IGH]
>IGHV5-10-1*01 2/CTTC/3 IGHD1-14*01 1//23 IGHD3-16*01 8//7 IGHJ5*01 [IGH]
aagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcga
CTTC
ataaccggaacca
GTTA
tggttcgactcctggggccaaggaaccctggtcaccgtctcctcag
TG
gttcgactcctggggccaaggaaccctggtcaccgtctcctcag
......@@ -28,7 +28,7 @@ from collections import defaultdict
import os
import argparse
VIDJIL_FINE = '{directory}/vidjil -X 100 -# "#" -c segment -i -g {directory}/germline %s >> %s'
VIDJIL_FINE = '{directory}/vidjil -X 100 -# "#" -c segment -i -d -g {directory}/germline %s >> %s'
VIDJIL_KMER = '{directory}/vidjil -# "#" -b out -c windows -uU -2 -i -g {directory}/germline %s > /dev/null ; cat out/out.segmented.vdj.fa out/out.unsegmented.vdj.fa >> %s'
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
......
......@@ -60,7 +60,7 @@ void testFineSegment()
TAP_TEST(s.isSegmented(), TEST_SEGMENT_POSITION, "is segmented (VJ)") ;
//segmentation D
s.FineSegmentD(germline);
s.FineSegmentD(germline, false);
TAP_TEST(s.isSegmented(), TEST_SEGMENT_POSITION, "is segmented (VDJ)") ;
......@@ -70,7 +70,7 @@ void testFineSegment()
TAP_TEST(s2.isSegmented(), TEST_SEGMENT_POSITION, "is segmented (VJ)") ;
//segmentation D
s2.FineSegmentD(germline);
s2.FineSegmentD(germline, false);
TAP_TEST(s2.isSegmented(), TEST_SEGMENT_POSITION, "is segmented (VDJ)") ;
TAP_TEST(s.getLeft() == s2.getLeft(), TEST_SEGMENT_REVCOMP, " left segmentation position");
......
......@@ -167,6 +167,7 @@ void usage(char *progname, bool advanced)
<< endl
<< "Locus/recombinations" << endl
<< " -d try to detect several D (experimental)" << endl
<< " -i try to detect incomplete/unusual recombinations (locus with '+', must be used with -g)" << endl
<< " -2 try to detect unexpected recombinations (must be used with -g)" << endl
<< endl ;
......@@ -356,6 +357,8 @@ int main (int argc, char **argv)
bool output_affects = false;
bool keep_unsegmented_as_clone = false;
bool several_D = false;
bool multi_germline = false;
bool multi_germline_incomplete = false;
bool multi_germline_mark = false;
......@@ -384,7 +387,7 @@ int main (int argc, char **argv)
//$$ options: getopt
while ((c = getopt(argc, argv, "A!x:X:hHaiI124g:G:V:D:J:k:r:vw:e:C:f:W:l:Fc:m:N:s:b:Sn:o:L%:y:z:uUK3E:t:#:")) != EOF)
while ((c = getopt(argc, argv, "A!x:X:hHadiI124g:G:V:D:J:k:r:vw:e:C:f:W:l:Fc:m:N:s:b:Sn:o:L%:y:z:uUK3E:t:#:")) != EOF)
switch (c)
{
......@@ -460,6 +463,10 @@ int main (int argc, char **argv)
germline_system = "multi" ;
break;
case 'd':
several_D = true;
break;
case 'i':
multi_germline_incomplete = true;
break;
......@@ -1355,7 +1362,7 @@ int main (int argc, char **argv)
FineSegmenter seg(representative, segmented_germline, segment_cost);
if (segmented_germline->seg_method == SEG_METHOD_543)
seg.FineSegmentD(segmented_germline);
seg.FineSegmentD(segmented_germline, several_D);
if (detect_CDR3)
seg.findCDR3();
......@@ -1571,7 +1578,7 @@ int main (int argc, char **argv)
if (s.isSegmented())
{
if (germline->seg_method == SEG_METHOD_543)
s.FineSegmentD(germline);
s.FineSegmentD(germline, several_D);
if (detect_CDR3)
s.findCDR3();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment