Commit 7582cf96 authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'feature-a/fix-min-cover-representative' into 'dev'

Feature a/fix min cover representative

See merge request !448
parents ca97c439 903430f8
Pipeline #71641 failed with stages
in 21 minutes and 49 seconds
# seq1, 2, 3, 4 are identical
# One insertion has been added both in seq4 and seq5 (different ones) in uppercase.
>seq1
ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc
gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
cag
>seq2
ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc
gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
cag
>seq3
ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc
gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
cag
>seq4
ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
tccagaaacaatgccaaggactcactgtatctgcaaatgAaacagcctgagagccgaggacacggctgtgtattactgtgc
gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
cag
>seq5
ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagAccgaggacacggctgtgtattactgtgc
gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
cag
......@@ -43,8 +43,8 @@ $ Junction of the first clone appears once, but CDR3 twice (it is also included
1:CTREEQYSSWYFDFW
w2:TREEQYSSWYFDF
$ The first clone has three warnings
1:W51 W69 W69
$ The first clone has one warning
1:TATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTG .* W69
$ No spurious character
0:"
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 5 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/representative-few-reads.fa
$ Consensus sequence should span on the whole sequence
1: clone.*99% of 243
......@@ -3,7 +3,7 @@
$ Detailed clone output (out/seq/clone.fa-2), germline
# IGHV1-8*01 could also be detected
1:>IGHV4-59.09
1:>IGHV1-69
1:>IGHD3-22.01
1:>IGHJ6.03
......
......@@ -16,7 +16,7 @@ $ Most abundant window
1:"id": "CCACCTATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTGGGGCC".*"reads": \[8\]
$ Affect values are over all the sequence
1: "affectValues": .[^}]*"start": 1, "stop": 127
1: "affectValues": .[^}]*"start": 1, "stop": 145
$ No quality information here
0: "quality"
......@@ -40,21 +40,21 @@ $ Segmentation
$ Segmentation details - V
# IGHV3-11*03 may also be detected
1:"5": ."delRight": 6, "name": "IGHV3-23.05", "stop": 54.
1:"5": ."delRight": 6, "name": "IGHV3-23.05", "stop": 72.
$ Segmentation details - D
1:"4": ."delLeft": 9, "delRight": 0, "name": "IGHD6-13.01", "start": 73, "stop": 84.
1:"4": ."delLeft": 9, "delRight": 0, "name": "IGHD6-13.01", "start": 91, "stop": 102.
$ Segmentation details - J
1:"3": ."delLeft": 5, "name": "IGHJ4.02", "start": 85.
1:"3": ."delLeft": 5, "name": "IGHJ4.02", "start": 103.
$ Segmentation details - N1, N2
1:"N1": 18,
1:"N2": 0,
$ Segmentation details - CDR3, JUNCTION
1:"cdr3": ."aa": "TREEQYSSWYFDF", "start": 55, "stop": 93.
1:"junction": ."aa": "CTREEQYSSWYFDFW", .* "start": 52, "stop": 96.
1:"cdr3": ."aa": "TREEQYSSWYFDF", "start": 73, "stop": 111.
1:"junction": ."aa": "CTREEQYSSWYFDFW", .* "start": 70, "stop": 114.
$ Second sequence has a DNA sequence provided
1:"id": "TGTGCGAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACTACTAC".*"sequence": "[ACGT]+",
......@@ -65,5 +65,5 @@ $ Second sequence also has evalues
$ All 'start' fields are 1-based, they never equal to zero
0: "start": 0
$ Warning on low coverage
1: "code": "W51", "level": "warn", "msg": "Low coverage: 0.442"
$ Warning on common genes
1: "code": "W69", "level": "warn", "msg": "Several genes with equal[^"]*"
......@@ -26,7 +26,7 @@ $ First clone -- find the good number of reads
2:clone-001--.*--0000008
$ First clone -- find the good representative
1:clone-001--.*--lcl.FLN1FA001CPAUQ.1.-.106,232.-.2
1:clone-001--.*--lcl.FLN1FA001BQ9J5.1.-.88,232.-.4
$ First clone -- find the good coverage
1:clone-001--.* 127 bp .54. of 232.0 bp.
1:clone-001--.* 145 bp .62. of 232.0 bp.
......@@ -112,6 +112,11 @@ enum { CMD_WINDOWS, CMD_CLONES, CMD_SEGMENT, CMD_GERMLINES } ;
#define DEFAULT_MAX_AUDITIONED 2000
#define DEFAULT_RATIO_REPRESENTATIVE 0.5
#define DEFAULT_MIN_COVER_REPRESENTATIVE 3 // At least 3 reads to support a
// representative (consisting of at
// least
// DEFAULT_RATIO_REPRESENTATIVE of
// the clone's reads)
#define DEFAULT_KMER_THRESHOLD 1
......@@ -684,7 +689,7 @@ int main (int argc, char **argv)
cout << "# using default sequence file: " << f_reads << endl ;
}
size_t min_cover_representative = (size_t) (min_reads_clone < (int) max_auditionned ? min_reads_clone : max_auditionned) ;
size_t min_cover_representative = (size_t) min(min_reads_clone, DEFAULT_MIN_COVER_REPRESENTATIVE);
// Check seed buffer
if (seed.size() >= MAX_SEED_SIZE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment