Commit 05caa0a1 authored by Mikaël Salson's avatar Mikaël Salson
Browse files

Merge branch 'feature-a/4263-fix' into 'dev'

Fix algo with N in window

Closes #4263

See merge request !672
parents ae403fb3 18208826
Pipeline #140648 failed with stages
in 9 minutes and 24 seconds
......@@ -122,6 +122,7 @@ void KmerRepresentativeComputer::compute(VirtualReadScore &readScorer, bool try_
size_t length_longest_run = 0;
size_t seq_index_longest_run = 1;
size_t length_longest_cover = 0;
size_t pos_required_longest = 0;
Sequence sequence_longest_run;
bool *cover_longest_run = NULL;
int sequence_used_for_quality = 0;
......@@ -203,6 +204,7 @@ void KmerRepresentativeComputer::compute(VirtualReadScore &readScorer, bool try_
sequence_longest_run = sequence;
length_longest_cover = length_cover;
seq_index_longest_run = seq;
pos_required_longest = pos_required;
if (cover_longest_run)
delete [] cover_longest_run;
cover_longest_run = new bool[sequence.sequence.size()];
......@@ -239,7 +241,8 @@ void KmerRepresentativeComputer::compute(VirtualReadScore &readScorer, bool try_
}
// Update length_longest_run with its actual value
length_longest_run = last_pos_covered - pos_longest_run + 1;
trimSequence(representative.sequence, pos_longest_run, length_longest_run);
trimSequence(representative.sequence, pos_longest_run, length_longest_run,
pos_required_longest, required.length());
}
delete [] cover_longest_run;
representative.sequence = representative.sequence.substr(pos_longest_run, length_longest_run);
......
......@@ -372,7 +372,8 @@ double nChoosek(unsigned n, unsigned k)
return nChoosek_stored[n][k];
}
void trimSequence(string &sequence, size_t &start_pos, size_t &length) {
void trimSequence(string &sequence, size_t &start_pos, size_t &length,
size_t required_start, size_t required_length) {
float prefix_score = 0;
float suffix_score = 0;
size_t start_bad_suffix = 0;
......@@ -425,8 +426,8 @@ void trimSequence(string &sequence, size_t &start_pos, size_t &length) {
}
}
start_pos = max_start_factor;
length = max_factor_length;
start_pos = min(required_start, max_start_factor);
length = max(required_length, max_factor_length);
}
......
......@@ -259,9 +259,13 @@ double nChoosek(unsigned n, unsigned k);
* More precisely, the purpose of the function is to find the longest
* substring whose prefixes and suffixes all have a ratio of N that
* is less than or equal to RATIO_TOO_MANY_N
*
* The parameters required_start and required_end give the positions
* of the required sequence that must not be cut out.
* @post start_pos <= required_start && length >= required_length
*/
void trimSequence(string &sequence, size_t &start_pos, size_t &length);
void trimSequence(string &sequence, size_t &start_pos, size_t &length,
size_t required_start=string::npos, size_t required_length=0);
const Sequence NULL_SEQUENCE = create_sequence("", "", "NULL", "");
......
@M03254:27:000000000-J5BG3:1:2104:17375:5379 2:N:0:CGCTCATT+NGCTCTGA
TTGATATGGTCCGNNNNGGATTGTCAGGCGTGGGACAAGTCCACTGCGGTGTTCGGCGGAGGGACCAGATNGACCGTCCTAGGTCAGCCCAAGGCTGCCCCCNCNNNNNCTCNGTTNCCNNCCNNNTNGANNNNN
+
@6<C@GGGGF-FG####==CC<FFFAFFGEF@CDFCFGFD6ECFEGGGGCF@7CFDFC@FGDGEGGFFFF#9:D,=F4FFF?8FFFGGA@DFA@=<FGFE+C#:#####::=#:+B#:B##:B###3#:3#####
@M03254:27:000000000-J5BG3:1:2104:18354:18365 2:N:0:CGCTCATT+NGCTCTGA
CACAATTTTCAAGNNNNGGATTGTCAGGCGTGGGACAAGTCCACTGCGGTGTTCGGCGGAGGGACCAGATNGACCGTCCTAGGTCAGCCCAAGGCTGCCCCCNCNNNNNCTCNGTTNCCNNCCNNNCNCGNNNNN
+
CCCCCCFEGGGAF####=:CCCEGFFGGGGGGGGGGGFGFEFFFC<FFECDFDFCFEFE:F>>FFC<FFE#:C<DGGGGGFGGGGGGGGGGGCFGGGGGGGG#:#####:4B#::A#:B##8:###+#::#####
@M03254:27:000000000-J5BG3:1:2105:22752:5154 2:N:0:CGCTCATT+GGCTCTGA
TTGATATGGTTCGTNNNGGATTGTCAGGCGTGGGACAAGTCCACTGCGGTGTTCGGCGGAGGGACCAGATTNACCGTCCTAGGTCAGACCAAGGCTGCCCCCTCGGTCACTCTGTNCCCNCCCNCCTNGGTGATC
+
CCCCCGFGGGGGG@###::CCFFGGGGGGGGEGGGGFFFGAFGGGFFGGGGGGGGGGGCFECFFF6CFFEG#:CFGGGGFE?E?FBEE?FGGGGGGGGFGFFGGGGGGGGGGGGG#8BD#8AB#:A3#38@+8@=
@M03254:27:000000000-J5BG3:1:2105:12704:9337 2:N:0:CGCTCATT+GGCTCTGA
TTGATATGGTTCGTNNNGGATTGTCAGGCGTGGGACAAGTCCACTGCGGTGTTCGGCGGAGGGACCAGATTNACCGTCCTAGGTCAGCCCAAGGCTGCCCCCTCGGTCACTCTGTNCCCNCCCNCCTNTCGAGAG
+
CCCCCGGGGGGGGG###==CDGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEG,:EDFG#:CFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGG#:DF#:=F#:A<#+8DFGFG
@M03254:27:000000000-J5BG3:1:2107:10217:5054 2:N:0:CGCTCATT+NGCTCTGA
TTGTTCTGGTTTGTNNNGGATTGTCAGGCGTGGGACAAGTCCACTGCGGTGTTCGGCGGAGGGACCAGATTGACCGTCCTAGGTCAGCCCANNNNTGCCCNNNNNGNNNNTNTNTNNNNNNNNNNNTNGNNNNNN
+
-ACCCGGGGGGGGG###==DFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGG####4:BFG#####:####:#:#:###########:#:######
@M03254:27:000000000-J5BG3:1:2107:19004:10013 2:N:0:CGCTCATT+NGCTCTGA
ATGTTCTGGTTTGTNNNGGATTGTCAGGCGTGGGACAAGTCCACTGCGGTGTTCGGCGGAGGGACCAGATTGACCGTCCTAGGTCAGCCCANNNNTGCCCNNNNNGNNNNTNTNTNNNNNNNNNNNTNANNNNNN
+
CCCCCGGGGGGGGG###==CFGGGFGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGG####9:9FG#####+####8#8#:###########:#:######
@M03254:27:000000000-J5BG3:1:2107:16334:14197 2:N:0:CGCTCATT+NGCTCTGA
ACACGTTCACGTGTNNNGGATTGTCAGGCGTGGGACAAGTCCACTGCGGTGTTCGGCGGAGGGACCAGATTGACCGTCCTAGGTCAGCCCANNNNTGCCCNNNNNGNNNNTNTNTNNNNNNNNNNNTNGNNNNNN
+
A-@BCFGGFFGE6E###==C-CEFCEGGGGEEEGFFGFGGFCF@EF9CEEGGGGGDGC>C@FGCEECCFEFGGDGGGGGGGDFGGFC@FE@####::449#####:####:#:#:###########+#:######
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 1 -g $VIDJIL_DIR/germline/homo-sapiens.g ./bug4263.fastq
$ All the reads are segmented
1: junction detected in 7 reads
$ We have a single clone
1: Clone #001 .* 7 reads
$ We succeed to fine segment it
1: IGLV3.*IGLJ3
......@@ -3,6 +3,7 @@
#include "tests.h"
#include <stdexcept>
#include <vector>
#include <tuple>
void testOnlineBioReader1() {
OnlineBioReader *fa = OnlineBioReaderFactory::create("data/test1.fa");
......@@ -470,6 +471,28 @@ void testTrimSequence() {
"got " << trimmed << " instead of " << seq_pair.second << " (original sequence: " << seq_pair.first
<< ")");
}
// Test the last parameters
// 0 0 1 2 2
// 0 9 2 3 6
string representative = "TTTTTTTTTNNNNCCCCCCCCCCNNNNAAAAAAAAA";
list <std::tuple<size_t, size_t, string> > required_params =
{ std::make_tuple(13, 10, "CCCCCCCCCC"),
std::make_tuple(13, 11, "CCCCCCCCCCN"),
std::make_tuple(12, 10, "NCCCCCCCCC"),
std::make_tuple(12, 11, "NCCCCCCCCCC"),
std::make_tuple(12, 12, "NCCCCCCCCCCN"),
std::make_tuple(11, 14, "NNCCCCCCCCCCNN")};
for (auto ex: required_params) {
start = 0;
length = representative.length();
trimSequence(representative, start, length, std::get<0>(ex), std::get<1>(ex));
trimmed = representative.substr(start, length);
TAP_TEST_EQUAL(trimmed, std::get<2>(ex), TEST_TRIM_SEQUENCE, " required_start = " << std::get<0>(ex) << ", required_length = " << std::get<1>(ex));
}
}
/*
......
......@@ -46,7 +46,7 @@ if (test_results[id] <= TAP_MAX_FAILED) { \
#define TAP_TEST_APPROX(test, expected, approx, id, msg) { test_nb_executions[id]++; \
if (test_results[id] <= TAP_MAX_FAILED) { \
if ((abs((test) - expected)) > approx) { \
if ((fabs((test) - expected)) > approx) { \
test_results[id]++; \
cerr << "Test " << #test << " failed (" << __FILE__ << ":" << __LINE__ << "): " \
<< " expected " << expected << " +/- " << approx << ", got " << (test) \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment