Commit d6049b38 authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'feature-a/3298-filter-fine-by-default' into 'dev'

Feature a/3298 filter fine by default

Closes #3298, #3311, and #3210

See merge request !237
parents 72c6ab69 ee0bb421
Pipeline #32741 failed with stages
in 6 seconds
......@@ -70,11 +70,11 @@ BioReader FilterWithACAutomaton::filterBioReaderWithACAutomaton(
#ifdef DEBUG_FILTER /* Display the number of k-mers found for each genes. */
int currentAsciiNumber;
string previousLabel = "", currentLabel;
currentAsciiNumber = SPECIFIC_KMERS_NUMBER;
previousLabel = extractGeneName(originalBioReader.label(0));
for(int i = 1;i < originalBioReader.size(); ++i){
for(auto const mx: mapAho){
for(auto const mx: mapAho){
string previousLabel = "", currentLabel;
currentAsciiNumber = SPECIFIC_KMERS_NUMBER;
previousLabel = extractGeneName(originalBioReader.label(0));
for(int i = 1;i < originalBioReader.size(); ++i){
currentLabel = extractGeneName(originalBioReader.label(i));
if(currentLabel != previousLabel){
currentAsciiNumber++;
......
>lclKFLN1FA002R16XT.1
gcctggagtggattgggtacatctattacagtgggagcacctactacaacccgtccctcaagagtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaggggtggggtattgtagtggtggtagctgctaccctgatgcttttgatatctggggccaagggacaatggtcaccgtctcctcag
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c segment -g $VIDJIL_DIR/germline/homo-sapiens.g $VIDJIL_DATA/3344-bad-filtering.fa
$ Check that proper filtering is used
1: IGHV4-31.02
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -Z 10 -A -x 30 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -Z 10 -A -x 30 -v -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Clone 13 is correctly analyzed
1:FLN1FA001EP9M2.* IGHV2-26.* 2/GAT.*GCC/8 IGHJ2
$ Statistics on -Z
# TODO, test on -Z statistics #3259
1:Statistics on clone analysis
rb1: IGH 1579/ 10470 15..%
......@@ -2,7 +2,8 @@
# Testing detailed clone output (-a)
$ Detailed clone output (out/seq/clone.fa-2), germline
1:>IGHV1-8.01
# IGHV1-8*01 could also be detected
1:>IGHV4-59.09
1:>IGHD3-22.01
1:>IGHJ6.03
......
......@@ -22,7 +22,8 @@ $ No quality information here
0: "quality"
$ Segmentation
1:"name": "IGHV3-11.03 6/ACCCGGGAGGAACAATAT/9 IGHD6-13.01 0//5 IGHJ4.02"
# IGHV3-11*03 may also be detected
1:"name": "IGHV3-23.05 6/ACCCGGGAGGAACAATAT/9 IGHD6-13.01 0//5 IGHJ4.02"
# Cys-T-=R=-E-=E=-Q-=Y=-S-=S=-W-=Y=-F-=D=-F-Trp
# 1 2 3 4 5 | *| 6 7 * 8 ** 9 | |10 11 12
......@@ -38,7 +39,8 @@ $ Segmentation
# JUNCTION: 53 -> 97
$ Segmentation details - V
1:"5": ."delRight": 6, "name": "IGHV3-11.03", "stop": 54.
# IGHV3-11*03 may also be detected
1:"5": ."delRight": 6, "name": "IGHV3-23.05", "stop": 54.
$ Segmentation details - D
1:"4": ."delLeft": 9, "delRight": 0, "name": "IGHD6-13.01", "start": 73, "stop": 84.
......
>TRGV1*01 0//0 TRGJ1*01 [TRG]
>TRGV1*01, TRGV5*01 0//0 TRGJ1*01 [TRG]
NNNNNNNNNNNN
tactgtgccacctgggacagg
gaattattataagaaactctt
......
>IGHV1-18*01 0//0 IGHD1-1*01 0//0 IGHJ1*01 [IGH]
>IGHV1-18*01, IGHV7-4-1*02 0//0 IGHD1-1*01 0//0 IGHJ1*01 [IGH]
actgtgcgagaga
ggtacaactggaacgac
gctgaatacttcc
......
......@@ -607,6 +607,25 @@ void testTransferBioReaderSequences(){
delete f;
}
/* If the sequence used in Filter class doesn't match any of the sequences
stored in the original BioReader, we expect to get the original BioReader. */
void testOriginalBioReaderIsReturned(){
BioReader testedBioReader1, result;
FilterWithACAutomaton *f;
seqtype seq[3];
testedBioReader1 = getDebugBioReader1();
f = new FilterWithACAutomaton(testedBioReader1, "####");
seq[0] = "CCCCCCCCCCCCCCCCCCC";
seq[1] = "AGGGAGGGAGGGAGGGAGGGT";
seq[2] = "GCGCGCGCGCGCGCGCGCGCGC";
for(int i = 0; i < 3; ++i){
result = f->filterBioReaderWithACAutomaton(seq[i]);
TAP_TEST_EQUAL(result.size(), testedBioReader1.size(),
TEST_FILTER_BIOREADER_WITH_AC_AUTOMATON, "Returned BioReader should be the orignal one.");
}
delete f;
}
void testFilter(){
testAutomatonBuilderFilteringBioReader();
testFilterBioReaderWithACAutomaton();
......@@ -614,4 +633,5 @@ void testFilter(){
testGetNSignicativeKmers();
testExAequoKmersWhenSignificantParameter();
testTransferBioReaderSequences();
testOriginalBioReaderIsReturned();
}
......@@ -110,7 +110,7 @@ enum { CMD_WINDOWS, CMD_CLONES, CMD_SEGMENT, CMD_GERMLINES } ;
#define DEFAULT_MAX_AUDITIONED 2000
#define DEFAULT_RATIO_REPRESENTATIVE 0.5
#define DEFAULT_KMER_THRESHOLD NO_LIMIT_VALUE
#define DEFAULT_KMER_THRESHOLD 3
#define DEFAULT_EPSILON 0
#define DEFAULT_MINPTS 10
......@@ -470,7 +470,7 @@ int main (int argc, char **argv)
int kmer_threshold = DEFAULT_KMER_THRESHOLD;
app.add_option("-Z", kmer_threshold,
"typical number of V genes, selected by k-mer comparison, to compare to the read ('" NO_LIMIT "': all genes, default)", false)
"typical number of V genes, selected by k-mer comparison, to compare to the read ('" NO_LIMIT "': all genes)", true)
-> group(group) -> transform(string_NO_LIMIT) -> level();
bool detect_CDR3 = false;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment