#ifndef AUTOMATON_HPP #define AUTOMATON_HPP #include "automaton.h" #include //////////////////// IMPLEMENTATIONS //////////////////// template AbstractACAutomaton::AbstractACAutomaton():IKmerStore() {} template void AbstractACAutomaton::finish_building() { if (! IKmerStore::finished_building) { IKmerStore::finish_building(); build_failure_functions(); } } template float AbstractACAutomaton::getIndexLoad(Info kmer) const { float load = 0; if (kmers_inserted.count(kmer) == 0) { for(auto iter: kmers_inserted) { load += getIndexLoad(iter.first); } return (kmer.isUnknown()) ? 1 - load : load; } else { return kmers_inserted.at(kmer) / pow(4.0, kmer.getLength()); } } template bool AbstractACAutomaton::hasDifferentKmerTypes() const { return true; } template bool AbstractACAutomaton::isInitialState(void *state) { return state == initialState; } template void *AbstractACAutomaton::goto_state(const string &seq, void *starting_state) { void *current_state = starting_state; size_t seq_length = seq.length(); if (! current_state) current_state = initialState; for (size_t i = 0; i < seq_length; i++) { current_state = this->next(current_state, seq[i]); } return current_state; } /////////////////////// template PointerACAutomaton::PointerACAutomaton(bool revcomp):AbstractACAutomaton(){ init("##########",revcomp); } template PointerACAutomaton::PointerACAutomaton(string seed, bool revcomp):AbstractACAutomaton() { init(seed, revcomp); } template PointerACAutomaton::PointerACAutomaton(int k, bool revcomp):AbstractACAutomaton() { init(seed_contiguous(k), revcomp); } template void PointerACAutomaton::init(string seed, bool revcomp) { if (revcomp && Info::hasRevcompSymetry()) { cerr << "PointerACAutomaton cannot deal with revcomp symmetry at the moment." << endl; exit(42); } this->initialState = new pointer_state(); this->nb_kmers_inserted = 0; this->seed = seed; this->k = seed_weight(seed); this->s = seed.length(); this->revcomp_indexed = revcomp; this->max_size_indexing = 0; } template PointerACAutomaton::~PointerACAutomaton() { free_automaton(this->getInitialState()); } template void PointerACAutomaton::free_automaton(pointer_state *state) { set deleted_states; stack *> states_stacked; deleted_states.insert(state); states_stacked.push(state); while (! states_stacked.empty()) { pointer_state *current_state = states_stacked.top(); states_stacked.pop(); for (size_t i = 0; i < NB_TRANSITIONS; i++) { if (current_state->transitions[i] != NULL && deleted_states.count(current_state->transitions[i]) == 0 && current_state->transitions[i] != current_state) { deleted_states.insert(current_state->transitions[i]); states_stacked.push(current_state->transitions[i]); } } delete current_state; } } template void PointerACAutomaton::build_failure_functions() { queue*,pointer_state*> > q; pointer_state *current_state = this->getInitialState(); // Algorithm ALP-COMPLET in CHL, 2001 for (size_t i = 0; i < NB_TRANSITIONS; i++) { if (current_state->transitions[i] == NULL) { current_state->transitions[i] = current_state; } else { q.push(pair*,pointer_state*>(current_state->transitions[i], this->getInitialState())); } } while (! q.empty()) { pair*, pointer_state*> couple = q.front(); q.pop(); current_state = couple.first; pointer_state *failed_state = couple.second; if (failed_state->is_final) current_state->is_final = true; for (size_t i = 0; i < NB_TRANSITIONS; i++) { if (current_state->transitions[i] != NULL) { q.push(pair*, pointer_state*>(current_state->transitions[i], failed_state->transitions[i])); } else { current_state->transitions[i] = failed_state->transitions[i]; } } } } template list &PointerACAutomaton::getInfo(void *state) { return ((pointer_state *)state)->informations; } template pointer_state* PointerACAutomaton::getInitialState() { return (pointer_state*) this->initialState; } template pointer_state* PointerACAutomaton::goto_state(const string &seq, void *starting_state) { return (pointer_state*) AbstractACAutomaton::goto_state(seq, starting_state); } template void PointerACAutomaton::insert(const seqtype &seq, Info info) { pointer_state *state = getInitialState(); size_t seq_length = seq.length(); size_t i; for (i = 0; i < seq_length && state->transition(seq[i]) != NULL; i++) { state = state->transition(seq[i]); } if (i < seq_length) { // Need to create more states for (; i < seq_length; i++) { pointer_state *new_state = new pointer_state(); state->transitions[nuc_to_int(seq[i])] = new_state; state = new_state; } } state->is_final = true; assert(info.getLength() <= MAX_KMER_SIZE); if (state->informations.front().isNull()) { this->nb_kmers_inserted++; this->kmers_inserted[info]++; } state->informations.front() += info; } template void PointerACAutomaton::insert(const seqtype &sequence, const string &label, bool ignore_extended_nucleotides, int keep_only, string seed) { size_t start_indexing = 0; size_t end_indexing = sequence.length(); if (keep_only > 0 && sequence.length() > (size_t)keep_only) { start_indexing = sequence.length() - keep_only; } else if (keep_only < 0 && sequence.length() > (size_t) -keep_only) { end_indexing = -keep_only; } size_t size_indexing = end_indexing - start_indexing; if (size_indexing > this->max_size_indexing) { this->max_size_indexing = size_indexing; } if (seed.empty()) seed = this->seed; size_t seed_span = seed.length(); for(size_t i = start_indexing ; i + seed_span < end_indexing + 1 ; i++) { seqtype substr = sequence.substr(i, seed_span); vector sequences = generate_all_seeds(substr, seed); vector sequences_rev; if (ignore_extended_nucleotides && has_extended_nucleotides(substr)) continue; if (this->revcomp_indexed && ! Info::hasRevcompSymetry()) { sequences_rev = generate_all_seeds(revcomp(substr), reverse(seed)); } for (seqtype &seq: sequences) { insert(seq, Info(label, 1, seed_span)); } if (! Info::hasRevcompSymetry()) { for (seqtype &seq: sequences_rev) { insert(seq, Info(label, -1, seed_span)); } } } } template bool PointerACAutomaton::isFinalState(void *state) { return ((pointer_state *)state)->is_final; } template void *PointerACAutomaton::next(void *state, char c) { void *next_state = state; c = toupper(c); void *accessed_state = ((pointer_state *)next_state)->transition(c); assert(accessed_state != NULL); // Did you call finish_building()? return accessed_state; } template vector PointerACAutomaton::getResults(const seqtype &seq, bool no_revcomp, string seed) { UNUSED(no_revcomp); // TODO: what should we do with several info at the same place? // for now they are overwritten pointer_state* current_state = getInitialState(); size_t seq_len = seq.length(); vector result(seq.length()); for (size_t i = 0; i < seq_len; i++) { current_state = (pointer_state *)next(current_state, seq[i]); Info info = current_state->informations.front(); result[i - info.getLength()+1] = info; } return result; } template Info& PointerACAutomaton::get(seqtype &word) { pointer_state *state = (pointer_state *)this->goto_state(word); return state->informations.front(); } template Info &PointerACAutomaton::operator[](seqtype &word) { return get(word); } #endif