From 50a06631c2c5bdbd0ccef5e6ee129deb58e69e95 Mon Sep 17 00:00:00 2001 From: Mikael Salson Date: Mon, 20 Jun 2016 16:58:47 +0200 Subject: [PATCH] index: Length of the affectation The length of the affectations must be the seed span and not the seed weight as the dashes are replaced by actual letters. Unit test added. --- algo/core/automaton.hpp | 5 ++- algo/core/germline.cpp | 5 +-- algo/core/kmerstore.h | 9 +++--- algo/core/segment.cpp | 8 ++--- algo/tests/unit-tests/testAffectAnalyser.cpp | 34 ++++++++++++++++++++ algo/tests/unit-tests/tests.h | 2 ++ 6 files changed, 49 insertions(+), 14 deletions(-) diff --git a/algo/core/automaton.hpp b/algo/core/automaton.hpp index 0cee67785..865aaaac4 100644 --- a/algo/core/automaton.hpp +++ b/algo/core/automaton.hpp @@ -211,7 +211,6 @@ void PointerACAutomaton::insert(const seqtype &sequence, const string &lab if (seed.empty()) seed = this->seed; size_t seed_span = seed.length(); - size_t seed_w = seed_weight(seed); for(size_t i = start_indexing ; i + seed_span < end_indexing + 1 ; i++) { seqtype substr = sequence.substr(i, seed_span); @@ -226,11 +225,11 @@ void PointerACAutomaton::insert(const seqtype &sequence, const string &lab } for (seqtype &seq: sequences) { - insert(seq, Info(label, 1, seed_w)); + insert(seq, Info(label, 1, seed_span)); } if (! Info::hasRevcompSymetry()) { for (seqtype &seq: sequences_rev) { - insert(seq, Info(label, -1, seed_w)); + insert(seq, Info(label, -1, seed_span)); } } } diff --git a/algo/core/germline.cpp b/algo/core/germline.cpp index bb973661a..cfc9f1d94 100644 --- a/algo/core/germline.cpp +++ b/algo/core/germline.cpp @@ -214,13 +214,14 @@ ostream &operator<<(ostream &out, const Germline &germline) << setw(3) << germline.delta_min << " "; + size_t seed_span = germline.seed.size(); size_t seed_w = seed_weight(germline.seed); if (germline.index) { out << " 0x" << hex << setw(2) << setfill('0') << germline.index->id << dec << setfill(' ') << " " ; out << fixed << setprecision(3) << setw(8) - << 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, seed_w)) << "%" << " " - << 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_3, 1, seed_w)) << "%"; + << 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, seed_span)) << "%" << " " + << 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_3, 1, seed_span)) << "%"; out << " l" << germline.seed.length() << " k" << seed_w << " " << germline.seed ; } diff --git a/algo/core/kmerstore.h b/algo/core/kmerstore.h index 7eea0358a..f09cbae46 100644 --- a/algo/core/kmerstore.h +++ b/algo/core/kmerstore.h @@ -279,10 +279,10 @@ void IKmerStore::insert(Fasta& input, insert(input.sequence(r), label, true, keep_only, seed); } - labels.push_back(make_pair(T(label, 1, seed_weight(seed)), input)) ; + labels.push_back(make_pair(T(label, 1, seed.size()), input)) ; if (revcomp_indexed && ! T::hasRevcompSymetry()) { - labels.push_back(make_pair(T(label, -1, seed_weight(seed)), input)) ; + labels.push_back(make_pair(T(label, -1, seed.size()), input)) ; } } @@ -304,7 +304,6 @@ void IKmerStore::insert(const seqtype &sequence, seed = this->seed; size_t seed_span = seed.length(); - size_t seed_w = seed_weight(seed); size_t size_indexing = end_indexing - start_indexing; if (size_indexing > max_size_indexing) { max_size_indexing = size_indexing; @@ -329,13 +328,13 @@ void IKmerStore::insert(const seqtype &sequence, if (this_kmer.isNull()) { nb_kmers_inserted++; } - this_kmer += T(label, strand, seed_w); + this_kmer += T(label, strand, seed.size()); if (revcomp_indexed && ! T::hasRevcompSymetry()) { seqtype rc_kmer = spaced(revcomp(substr), seed); T &this_rc_kmer = this->get(rc_kmer); if (this_rc_kmer.isNull()) nb_kmers_inserted++; - this_rc_kmer += T(label, -1, seed_w); + this_rc_kmer += T(label, -1, seed.size()); } } } diff --git a/algo/core/segment.cpp b/algo/core/segment.cpp index caf7ae989..996eaa56e 100644 --- a/algo/core/segment.cpp +++ b/algo/core/segment.cpp @@ -490,12 +490,12 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold, return ; } else if (nb_strand[0] > RATIO_STRAND * nb_strand[1]) { strand = -1; - before = KmerAffect(germline->affect_3, -1, seed_weight(germline->seed)); - after = KmerAffect(germline->affect_5, -1, seed_weight(germline->seed)); + before = KmerAffect(germline->affect_3, -1, germline->seed.size()); + after = KmerAffect(germline->affect_5, -1, germline->seed.size()); } else if (nb_strand[1] > RATIO_STRAND * nb_strand[0]) { strand = 1; - before = KmerAffect(germline->affect_5, 1, seed_weight(germline->seed)); - after = KmerAffect(germline->affect_3, 1, seed_weight(germline->seed)); + before = KmerAffect(germline->affect_5, 1, germline->seed.size()); + after = KmerAffect(germline->affect_3, 1, germline->seed.size()); } else { // Ambiguous information: we have positive and negative strands // and there is not enough difference to put them apart. diff --git a/algo/tests/unit-tests/testAffectAnalyser.cpp b/algo/tests/unit-tests/testAffectAnalyser.cpp index 67d3e5d7f..eac15ccd6 100644 --- a/algo/tests/unit-tests/testAffectAnalyser.cpp +++ b/algo/tests/unit-tests/testAffectAnalyser.cpp @@ -197,6 +197,36 @@ void testAffectAnalyserMaxes() { delete index; } +template