Commit 50a06631 authored by Mikaël Salson's avatar Mikaël Salson

index: Length of the affectation

The length of the affectations must be the seed span and not the seed weight
as the dashes are replaced by actual letters.

Unit test added.
parent cd66c85f
......@@ -211,7 +211,6 @@ void PointerACAutomaton<Info>::insert(const seqtype &sequence, const string &lab
if (seed.empty())
seed = this->seed;
size_t seed_span = seed.length();
size_t seed_w = seed_weight(seed);
for(size_t i = start_indexing ; i + seed_span < end_indexing + 1 ; i++) {
seqtype substr = sequence.substr(i, seed_span);
......@@ -226,11 +225,11 @@ void PointerACAutomaton<Info>::insert(const seqtype &sequence, const string &lab
}
for (seqtype &seq: sequences) {
insert(seq, Info(label, 1, seed_w));
insert(seq, Info(label, 1, seed_span));
}
if (! Info::hasRevcompSymetry()) {
for (seqtype &seq: sequences_rev) {
insert(seq, Info(label, -1, seed_w));
insert(seq, Info(label, -1, seed_span));
}
}
}
......
......@@ -214,13 +214,14 @@ ostream &operator<<(ostream &out, const Germline &germline)
<< setw(3) << germline.delta_min
<< " ";
size_t seed_span = germline.seed.size();
size_t seed_w = seed_weight(germline.seed);
if (germline.index) {
out << " 0x" << hex << setw(2) << setfill('0') << germline.index->id << dec << setfill(' ') << " " ;
out << fixed << setprecision(3) << setw(8)
<< 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, seed_w)) << "%" << " "
<< 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_3, 1, seed_w)) << "%";
<< 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, seed_span)) << "%" << " "
<< 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_3, 1, seed_span)) << "%";
out << " l" << germline.seed.length() << " k" << seed_w << " " << germline.seed ;
}
......
......@@ -279,10 +279,10 @@ void IKmerStore<T>::insert(Fasta& input,
insert(input.sequence(r), label, true, keep_only, seed);
}
labels.push_back(make_pair(T(label, 1, seed_weight(seed)), input)) ;
labels.push_back(make_pair(T(label, 1, seed.size()), input)) ;
if (revcomp_indexed && ! T::hasRevcompSymetry()) {
labels.push_back(make_pair(T(label, -1, seed_weight(seed)), input)) ;
labels.push_back(make_pair(T(label, -1, seed.size()), input)) ;
}
}
......@@ -304,7 +304,6 @@ void IKmerStore<T>::insert(const seqtype &sequence,
seed = this->seed;
size_t seed_span = seed.length();
size_t seed_w = seed_weight(seed);
size_t size_indexing = end_indexing - start_indexing;
if (size_indexing > max_size_indexing) {
max_size_indexing = size_indexing;
......@@ -329,13 +328,13 @@ void IKmerStore<T>::insert(const seqtype &sequence,
if (this_kmer.isNull()) {
nb_kmers_inserted++;
}
this_kmer += T(label, strand, seed_w);
this_kmer += T(label, strand, seed.size());
if (revcomp_indexed && ! T::hasRevcompSymetry()) {
seqtype rc_kmer = spaced(revcomp(substr), seed);
T &this_rc_kmer = this->get(rc_kmer);
if (this_rc_kmer.isNull())
nb_kmers_inserted++;
this_rc_kmer += T(label, -1, seed_w);
this_rc_kmer += T(label, -1, seed.size());
}
}
}
......
......@@ -490,12 +490,12 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold,
return ;
} else if (nb_strand[0] > RATIO_STRAND * nb_strand[1]) {
strand = -1;
before = KmerAffect(germline->affect_3, -1, seed_weight(germline->seed));
after = KmerAffect(germline->affect_5, -1, seed_weight(germline->seed));
before = KmerAffect(germline->affect_3, -1, germline->seed.size());
after = KmerAffect(germline->affect_5, -1, germline->seed.size());
} else if (nb_strand[1] > RATIO_STRAND * nb_strand[0]) {
strand = 1;
before = KmerAffect(germline->affect_5, 1, seed_weight(germline->seed));
after = KmerAffect(germline->affect_3, 1, seed_weight(germline->seed));
before = KmerAffect(germline->affect_5, 1, germline->seed.size());
after = KmerAffect(germline->affect_3, 1, germline->seed.size());
} else {
// Ambiguous information: we have positive and negative strands
// and there is not enough difference to put them apart.
......
......@@ -197,6 +197,36 @@ void testAffectAnalyserMaxes() {
delete index;
}
template<template <class> class T>
void testAffectAnalyserSpaced() {
const string seed = "##-##";
const bool revcomp = false;
T<KmerAffect> *index = createIndex<T<KmerAffect> >(seed, revcomp);
string sequence = "AAAACCCCCGGGGG";
// AA-AC, AA-CC, AA-CC, AC-CC, CC-CC, CC-CG, CC-GG, CG-GG, GG-GG
KmerAffectAnalyser kaa(*index, sequence);
CountKmerAffectAnalyser ckaa(*index, sequence);
KmerAffect affect_AAAC(seq[11], 1, seed.size());
KmerAffect affect_AAAC_bad(seq[11], 1, seed_weight(seed));
KmerAffect affect_CCCC(seq[1], 1, seed.size());
KmerAffect affect_CCCC_bad(seq[1], 1, seed_weight(seed));
KmerAffect affect_GGGG(seq[3], 1, seed.size());
KmerAffect affect_GGGG_bad(seq[3], 1, seed_weight(seed));
TAP_TEST(kaa.toString().find("+V _ _ _+C _ _ _ _+G") != string::npos,
TEST_AA_TO_STRING, "");
TAP_TEST(kaa.count(affect_AAAC) == 1, TEST_AA_COUNT, "Expected 1, got " << kaa.count(affect_AAAC) << " in " << __PRETTY_FUNCTION__);
TAP_TEST(kaa.count(affect_CCCC) == 1, TEST_AA_COUNT, "Expected 1, got " << kaa.count(affect_CCCC) << " in " << __PRETTY_FUNCTION__);
TAP_TEST(kaa.count(affect_GGGG) == 1, TEST_AA_COUNT, "Expected 1, got " << kaa.count(affect_GGGG) << " in " << __PRETTY_FUNCTION__);
TAP_TEST(kaa.count(affect_AAAC_bad) == 0, TEST_AA_COUNT, "Expected 0, got " << kaa.count(affect_AAAC_bad) << " in " << __PRETTY_FUNCTION__);
TAP_TEST(kaa.count(affect_CCCC_bad) == 0, TEST_AA_COUNT, "Expected 0, got " << kaa.count(affect_CCCC_bad) << " in " << __PRETTY_FUNCTION__);
TAP_TEST(kaa.count(affect_GGGG_bad) == 0, TEST_AA_COUNT, "Expected 0, got " << kaa.count(affect_GGGG_bad) << " in " << __PRETTY_FUNCTION__);
}
template<template <class> class T>
void testGetMaximum() {
const int k = 4;
......@@ -371,15 +401,19 @@ void testAffectAnalyser() {
testAffectAnalyser2<MapKmerStore>();
testBugAffectAnalyser<MapKmerStore>();
testGetMaximum<MapKmerStore>();
testAffectAnalyserSpaced<MapKmerStore>();
testAffectAnalyser1<ArrayKmerStore>();
testAffectAnalyser2<ArrayKmerStore>();
testAffectAnalyserMaxes<ArrayKmerStore>();
testBugAffectAnalyser<ArrayKmerStore>();
testGetMaximum<ArrayKmerStore>();
testAffectAnalyserSpaced<ArrayKmerStore>();
testAffectAnalyser1<PointerACAutomaton>();
testAffectAnalyser2<PointerACAutomaton>();
testAffectAnalyserMaxes<PointerACAutomaton>();
testBugAffectAnalyser<PointerACAutomaton>();
testAffectAnalyserSpaced<PointerACAutomaton>();
}
......@@ -86,6 +86,7 @@ enum {
TEST_AA_GET_MAXIMUM_COUNTS,
TEST_AA_GET_MAXIMUM_VALUE,
TEST_AA_GET_SEQUENCE,
TEST_AA_TO_STRING,
TEST_COUNT_AA_GET_OVERLAP,
TEST_COUNT_AA_COUNT,
TEST_COUNT_AA_COUNT_BEFORE,
......@@ -252,6 +253,7 @@ inline void declare_tests() {
RECORD_TAP_TEST(TEST_AA_GET_MAXIMUM_COUNTS, "KmerAffectAnalyser: getMaximum() function, counts of affectations");
RECORD_TAP_TEST(TEST_AA_GET_MAXIMUM_VALUE, "KmerAffectAnalyser: getMaximum() function, maximum value");
RECORD_TAP_TEST(TEST_AA_GET_SEQUENCE, "KmerAffectAnalyser: getSequence() function");
RECORD_TAP_TEST(TEST_AA_TO_STRING, "KmerAffectAnalyser: toString() function");
RECORD_TAP_TEST(TEST_COUNT_AA_GET_OVERLAP, "CountKmerAffectAnalyser::getAllowedOverlap");
RECORD_TAP_TEST(TEST_COUNT_AA_COUNT, "CountKmerAffectAnalyser::count");
RECORD_TAP_TEST(TEST_COUNT_AA_COUNT_BEFORE, "CountKmerAffectAnalyser::countBefore");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment