Attention une mise à jour du service Gitlab va être effectuée le mardi 30 novembre entre 17h30 et 18h00. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes. Cette mise à jour intermédiaire en version 14.0.12 nous permettra de rapidement pouvoir mettre à votre disposition une version plus récente.

Commit 9f488604 authored by Mathieu Giraud's avatar Mathieu Giraud
Browse files

Merge branch 'feature-a/3518-long-deletion' into 'dev'

Feature a/3518 long deletion

Closes #3518

See merge request !417
parents 21b1e528 d854f215
Pipeline #65784 failed with stages
in 44 seconds
......@@ -759,7 +759,7 @@ void Segmenter::setSegmentationStatus(int status) {
string check_and_resolve_overlap(string seq, int seq_begin, int seq_end,
AlignBox *box_left, AlignBox *box_right,
Cost segment_cost)
Cost segment_cost, bool reverse_V, bool reverse_J)
{
// Overlap size
int overlap = box_left->end - box_right->start + 1;
......@@ -773,7 +773,7 @@ string check_and_resolve_overlap(string seq, int seq_begin, int seq_end,
int score_l[overlap+1];
//LEFT
DynProg dp_l = DynProg(seq_left, box_left->ref,
DynProg dp_l = DynProg(seq_left, revcomp(box_left->ref, reverse_V),
DynProg::Local, segment_cost);
score_l[0] = dp_l.compute();
......@@ -781,6 +781,7 @@ string check_and_resolve_overlap(string seq, int seq_begin, int seq_end,
//RIGHT
// reverse right sequence
string ref_right=string(box_right->ref.rbegin(), box_right->ref.rend());
ref_right = revcomp(ref_right, reverse_J);
seq_right=string(seq_right.rbegin(), seq_right.rend());
......@@ -801,11 +802,15 @@ string check_and_resolve_overlap(string seq, int seq_begin, int seq_end,
// #define DEBUG_OVERLAP
#ifdef DEBUG_OVERLAP
cout << dp_l ;
cout << dp_r ;
cout << "=== check_and_resolve_overlap" << endl;
cout << seq << endl;
cout << "boxes: " << *box_left << "/" << *box_right << endl ;
// cout << dp_l ;
// cout << dp_r ;
cout << "seq:" << seq_left << "\t\t" << seq_right << endl;
cout << "ref:" << ref_left << "\t\t" << ref_right << endl;
cout << "ref:" << box_left->ref << "\t\t" << ref_right << endl;
for(int i=0; i<=overlap; i++)
cout << i << " left: " << score_l[i] << "/" << trim_l[i] << " right: " << score_r[i] << "/" << trim_r[i] << endl;
#endif
......@@ -839,6 +844,7 @@ string check_and_resolve_overlap(string seq, int seq_begin, int seq_end,
<< " left: " << best_i << "-" << box_left->del_right << " @" << box_left->end
<< " right:" << best_j << "-" << box_right->del_left << " @" << box_right->start
<< endl;
cout << "boxes: " << *box_left << " / " << *box_right << endl ;
#endif
} // end if (overlap > 0)
......@@ -1113,7 +1119,7 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,
//overlap VJ
seg_N = check_and_resolve_overlap(sequence_or_rc, 0, sequence_or_rc.length(),
box_V, box_J, segment_cost);
box_V, box_J, segment_cost, reverse_V, reverse_J);
// Reset extreme positions
box_V->start = 0;
......
......@@ -148,6 +148,7 @@ ostream &operator<<(ostream &out, const AlignBox &box);
* @param seq_begin, seq_end: the positions to consider on 'seq' for the two sequences that may overlap
* @param *box_left, *box_right the two boxes
* @param segment_cost: the cost used by the dynamic programing
* @param reverse_V, reverse_J should we revcomp the sequence on 5' or 3'?
*
* @post box_left->del_left and box_right->del_right are set to the best number of nucleotides to trim in order to remove the overlap.
* box_left->end and box_right->start are shifted by the good number of nucleotides
......@@ -157,7 +158,8 @@ ostream &operator<<(ostream &out, const AlignBox &box);
string check_and_resolve_overlap(string seq, int seq_begin, int seq_end,
AlignBox *box_left, AlignBox *box_right,
Cost segment_cost);
Cost segment_cost, bool reverse_V = false,
bool reverse_J = false);
class Segmenter {
protected:
......
......@@ -249,7 +249,8 @@ def header_igblast_results(ff_fasta, ff_igblast):
### Vidjil
VIDJIL_FINE = '{directory}/vidjil-algo --header-sep "#" -c segment -3 -d -g {directory}/germline/homo-sapiens.g %s >> %s'
VIDJIL_FINE = '{directory}/vidjil-algo --header-sep "#" -c segment -2 -3 -d -g {directory}/germline/homo-sapiens.g %s >> %s'
VIDJIL_KMER = '{directory}/vidjil-algo -w 20 --header-sep "#" -b out -c windows -uuuU -2 -g {directory}/germline/homo-sapiens.g %s > /dev/null ; cat out/out.segmented.vdj.fa out/out.unsegmented.vdj.fa >> %s'
def should_results_from_vidjil_output(f_log):
......
>IGKV2D-30*01 {6//4, 5//5, 4//6} IGKV3/OR2-268*02 [unexpected]
CTGCAGGTCTAGTCAAAGCCTCGTATACAGTGATGGAAACACCTACTTGAATTGGTTTCAGCAGAGGCCAGGCCAATCTCCAAGGCGCCTAATTTATAAGGTTTCTAACTGGGACTCTGGGGTCCCAGACAGATTCAGCGGCAGTGGGTCAGGCACTGATTTCACACTGAAAATCAGCAGGGTGGAGGCTGAGGATGTTGGGGTTTATTACTGCATGCAAGGTACACACTGGTAAGTTATAATCCTGCTGACAGTAATAAACTGCAAAATCTTCAGGCTGCAGGCTGCTGATGGTGAGAGTGAAGTCTGTCCCAGACCCACTGCCACTGAACCTGGCTGGGATGCCAGTGGCCCTGGTGGATGCACCATAGATGAGGAGCCTGGGAGCCTGCCCAGGTTTCTGCTGGTACCAGGATAAGTAGCTGCTGCTAACACTCTGACTGGCCCTGCAGGAGAGGGTGGCTCTTTCCCCTGGAGACA
>IGKV2-30*01 15//4 IGKV3-20*01 [unexpected]
CAGGTCTAGTCAAAGCCTCGTATACAGTGATGGAAACACCTACTTGAATTGGTTTCAGCAGAGGCCAGGCCAATCTCCAAGGCGCCTAATTTATAAGGTTTCTAACCGGGACTCTGGGGTCCCAGACAGATTCAGCGGCAGTGGGTCAGGCACTGATTTCACACTGAAAATCAGCAGGGTGGAGGCTGAGGATGTTGGGGTTTATTACTGCATGCAAGGGTGAGCTACCATACTGCTGACAGTAATACACTGCAAAATCTTCAGGCTCCAGTCTGCTGATGGTGAGAGTGAAGTCTGTCCCAGACCCACTGCCACTGAACCTGTCTGGGATGCCAGTGGCCCTGCTGGATGCACCATAGATGAGGAGCCTGGGAGCCTGGCCAGGTTTCTGCTGGTACCAGGCTAAGTAGCTGCTGCTAACACTCTGACTGGCC
>IGKV3D-15*01 2//19 IGKV2D-30*01 [unexpected]
TGTCTGTGTCTCCAGGGGAAAGAGCCACCCTCTCCTGCAGGGCCAGTCAGAGTGTTAGCAGCAACTTAGCCTGGTACCAGCAGAAACCTGGCCAGGCTCCCAGGCTCCTCATCTATGGTGCATCCACCAGGGCCACTGGCATCCCAGCCAGGTTCAGTGGCAGTGGGTCTGGGACAGAGTTCACTCTCACCATCAGCAGCCTGCAGTCTGAAGATTTTGCAGTTTATTACTGTCAGCAGTATAATAACTGGCCTGCATGCAGTAATAAACCCCAACATCCTCAGCCTCCACCCTGCTGATTTTCAGTGTGAAATCAGTGCCTGACCCACTGCCGCTGAATCTGTCTGGGACCCCAGAGTCCCAGTTAGAAACCTTATAAATTAGGCGCCTTGGAGATTGGCCTGGCCTCTGCTGAAACCAATTCAAGTAGGTGTTTCCATCACTGTATACGAGGCTTTGACTAGAC
>IGLV1-36*01 0//25 IGLJ1*01
cagtctgtgctgactcagccaccctcggtgtctgaagcccccaggcagagggtcaccatctcctgttctggaagcagctccaacatcggaaataatgctgtaaactggtaccagcagctcccaggaaaggctcccaaactcctcatctattatgatgatctgctgccctcaggggtctctgaccgattctctggctccaagtctggcacctcagcctccctggccatcagtgggctccagtctgaggatgaggctgattattactgtgcagcatgggatgacagcctgaatggtcc
gtcaccgtcctaggagtctgctgtctggggatagcggggagccaggtgtactg
\ No newline at end of file
gtcaccgtcctaggtaagtggctctcaacctttcccagcctgtctcaccctct
\ No newline at end of file
......@@ -334,11 +334,11 @@ if __name__ == '__main__':
print
print "=== Summary, should-vdj tests ===" + (' (only locus)' if args.after_two else '')
print " tested passed bug failed (todo)"
print " tested passed bug failed (todo)"
for locus in sorted(global_stats):
print " %-5s %4d %4d %4d %4d %4s" % (locus, global_stats[locus], global_stats[locus] - global_stats_failed[locus], global_stats_bug[locus], global_stats_failed[locus],
print " %-10s %4d %4d %4d %4d %4s" % (locus, global_stats[locus], global_stats[locus] - global_stats_failed[locus], global_stats_bug[locus], global_stats_failed[locus],
("(%d)" % global_stats_todo[locus] if global_stats_todo[locus] else ''))
print " ===== %4d %4d %4d %4d %4s" % (sum(global_stats.values()), sum(global_stats.values()) - sum(global_stats_failed.values()), sum(global_stats_bug.values()), sum(global_stats_failed.values()),
print " ========== %4d %4d %4d %4d %4s" % (sum(global_stats.values()), sum(global_stats.values()) - sum(global_stats_failed.values()), sum(global_stats_bug.values()), sum(global_stats_failed.values()),
"(%d)" % sum(global_stats_todo.values()))
print
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment