Commit 83ff2886 authored by Mikaël Salson's avatar Mikaël Salson Committed by Mathieu Giraud

kmerstore.h: Inserting with a seed.

Let the possibility to provide a seed when inserting in a
KmerStore. This will be particularly suitable with an index having
different seeds depending on the sequences.
parent daffb14f
......@@ -59,16 +59,18 @@ public:
/**
* @param input: A single FASTA file
* @param label: label that must be associated to the given files
* @param seed: the seed to use for indexing. By default it will be the seed of the index
* @post All the sequences in the FASTA files have been indexed, and the label is stored in the list of labels
*/
void insert(Fasta& input, const string& label="", int keep_only = 0);
void insert(Fasta& input, const string& label="", int keep_only = 0, string seed = "");
/**
* @param input: A list of FASTA files
* @param label: label that must be associated to the given files
* @param seed: the seed to use for indexing. By default it will be the seed of the index
* @post All the sequences in the FASTA files have been indexed, and the label is stored in the list of labels
*/
void insert(list<Fasta>& input, const string& label="", int keep_only = 0);
void insert(list<Fasta>& input, const string& label="", int keep_only = 0, string seed = "");
/**
* @param input: A sequence to be cut in k-mers
......@@ -77,11 +79,13 @@ public:
* of the sequence. if < 0 will keep at most the first
* keep_only nucleotides of the sequence. if == 0,
* will keep all the sequence.
* @param seed: the seed to use for indexing. By default it will be the seed of the index
* @post All the k-mers in the sequence have been indexed.
*/
void insert(const seqtype &sequence,
const string &label,
bool ignore_extended_nucleotides=true, int keep_only = 0);
bool ignore_extended_nucleotides=true, int keep_only = 0,
string seed="");
/**
* @param word: a k-mer
......@@ -211,18 +215,20 @@ IKmerStore<T>::~IKmerStore(){}
template<class T>
void IKmerStore<T>::insert(list<Fasta>& input,
const string &label,
int keep_only){
int keep_only,
string seed){
for(list<Fasta>::iterator it = input.begin() ; it != input.end() ; it++){
insert(*it, label, keep_only);
insert(*it, label, keep_only, seed);
}
}
template<class T>
void IKmerStore<T>::insert(Fasta& input,
const string &label,
int keep_only){
int keep_only,
string seed){
for (int r = 0; r < input.size(); r++) {
insert(input.sequence(r), label, true, keep_only);
insert(input.sequence(r), label, true, keep_only, seed);
}
labels.push_back(make_pair(T(label, 1), input)) ;
......@@ -236,7 +242,8 @@ template<class T>
void IKmerStore<T>::insert(const seqtype &sequence,
const string &label,
bool ignore_extended_nucleotides,
int keep_only){
int keep_only,
string seed){
size_t start_indexing = 0;
size_t end_indexing = sequence.length();
if (keep_only > 0 && sequence.length() > (size_t)keep_only) {
......@@ -245,13 +252,17 @@ void IKmerStore<T>::insert(const seqtype &sequence,
end_indexing = -keep_only;
}
if (seed.empty())
seed = this->seed;
size_t seed_span = seed.length();
size_t size_indexing = end_indexing - start_indexing;
if (size_indexing > max_size_indexing) {
max_size_indexing = size_indexing;
}
for(size_t i = start_indexing ; i + s < end_indexing + 1 ; i++) {
seqtype substr = sequence.substr(i, s);
for(size_t i = start_indexing ; i + seed_span < end_indexing + 1 ; i++) {
seqtype substr = sequence.substr(i, seed_span);
seqtype kmer = spaced(substr, seed);
if (ignore_extended_nucleotides && has_extended_nucleotides(kmer))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment