kmerstore.h 17.6 KB
Newer Older
Mikaël Salson's avatar
Mikaël Salson committed
1 2 3 4 5 6 7 8
#ifndef KMERSTORE_H
#define KMERSTORE_H

#include <map>
#include <string>
#include <list>
#include <stdexcept>
#include <stdint.h>
9
#include <math.h>
10
#include "bioreader.hpp"
Mikaël Salson's avatar
Mikaël Salson committed
11 12 13 14
#include "tools.h"

using namespace std;

15 16 17
typedef
enum { KMER_INDEX, AC_AUTOMATON } IndexTypes;

18
#define MAX_PRECOMPUTED_PROBA 500 /* Precompute 500 probabilities for each index load */
Mikaël Salson's avatar
Mikaël Salson committed
19 20 21 22 23
class Kmer {
public:
  unsigned int count;

  Kmer();
24 25 26 27

  /**
   * This constructor is used via a IKmerStore<Kmer> index (hence the argument list)       
   */
28
  Kmer(const string &label, int strand=1, size_t length=0);
29

Mikaël Salson's avatar
Mikaël Salson committed
30
  Kmer &operator+=(const Kmer &);
31

32 33 34 35 36
  /**
   * @return the length of the kmer (only there for compatibility with KmerAffect)
   */
  size_t getLength() const;

37 38 39 40
  /**
   * When indexing revcomp, should the value be the same or not?
   * In other words, does the information stored by the class is strand-dependent?
   */
Mikaël Salson's avatar
Mikaël Salson committed
41
  static bool hasRevcompSymetry();
42 43 44 45

  /**
   * @return true if the element is the same as when initialised with default constructor.
   */
46
  bool isNull() const;
Cyprien Borée's avatar
Cyprien Borée committed
47 48
	
	string getLabel() const;
49 50 51 52 53 54

  /**
   * @return true iff the kmer is unknown (which doesn't make sense here but
   * it only there for a reason of compatibility with KmerAffect)
   */
  bool isUnknown() const;
55 56 57 58 59
  /**
   * @return true iff the kmer is ambiguous (which doesn't make sense here but
   * it only there for a reason of compatibility with KmerAffect)
   */
  bool isAmbiguous() const;
Mikaël Salson's avatar
Mikaël Salson committed
60 61
} ;
ostream &operator<<(ostream &os, const Kmer &kmer);
62 63 64 65 66 67 68
bool operator==(const Kmer &k1, const Kmer &k2);
bool operator<(const Kmer &k1, const Kmer &k2);
bool operator>(const Kmer &k1, const Kmer &k2);
bool operator<=(const Kmer &k1, const Kmer &k2);
bool operator>=(const Kmer &k1, const Kmer &k2);
bool operator!=(const Kmer &k1, const Kmer &k2);

Mikaël Salson's avatar
Mikaël Salson committed
69 70 71 72 73 74 75 76 77 78 79


/* K-mer indexing */

template <class T> class IKmerStore
{
protected:
  bool revcomp_indexed;
  int k; // weight of the seed
  int s; // span of the seed (s >= k)
  string seed ;
80
  size_t nb_kmers_inserted;
81
  size_t max_size_indexing;
82
  bool finished_building;
83 84
  map<float, vector<double> > precomputed_proba_with_system,
    precomputed_proba_without_system;
Mikaël Salson's avatar
Mikaël Salson committed
85 86 87 88 89

public:

  virtual ~IKmerStore();

90 91
  static int last_id;
  int id; // id of this index
92
  int refs; // number of germlines using this index
93
  bool multiple_in_one;
94

95
  list< pair <T, BioReader> > labels;
96

97 98
  IKmerStore();

Mikaël Salson's avatar
Mikaël Salson committed
99 100 101
  /**
   * @param input: A single FASTA file
   * @param label: label that must be associated to the given files
102
   * @param seed: the seed to use for indexing. By default it will be the seed of the index
103
   * @post All the sequences in the FASTA files have been indexed, and the label is stored in the list of labels
Mikaël Salson's avatar
Mikaël Salson committed
104
   */
105
  void insert(BioReader& input, const string& label="", int keep_only = 0, string seed = "");
Mikaël Salson's avatar
Mikaël Salson committed
106 107 108 109

  /**
   * @param input: A list of FASTA files
   * @param label: label that must be associated to the given files
110
   * @param seed: the seed to use for indexing. By default it will be the seed of the index
111
   * @post All the sequences in the FASTA files have been indexed, and the label is stored in the list of labels
Mikaël Salson's avatar
Mikaël Salson committed
112
   */
113
  void insert(list<BioReader>& input, const string& label="", int keep_only = 0, string seed = "");
Mikaël Salson's avatar
Mikaël Salson committed
114 115 116 117
  
  /**
   * @param input: A sequence to be cut in k-mers
   * @param label: label that must be associated to the given files
118 119 120 121
   * @param keep_only: if > 0 will keep at most the last keep_only nucleotides
   *                   of the sequence. if < 0 will keep at most the first
   *                   keep_only nucleotides of the sequence. if == 0,
   *                   will keep all the sequence.
122
   * @param seed: the seed to use for indexing. By default it will be the seed of the index. Also note that when the index ! hasDifferentKmerTypes(), the seed of the index will always be used (and therefore the parameter will be ignored)
Mikaël Salson's avatar
Mikaël Salson committed
123 124
   * @post All the k-mers in the sequence have been indexed.
   */
125 126 127 128 129 130 131 132 133 134
  virtual void insert(const seqtype &sequence,
                      const string &label,
                      bool ignore_extended_nucleotides=true, int keep_only = 0,
                      string seed="");

  /**
   * Perform extra steps to finish the building of the index. After using
   * that function, we are not supposed to insert any other sequence.
   * No query function should be called before calling that function.
   */
135
  virtual void finish_building();
Mikaël Salson's avatar
Mikaël Salson committed
136 137 138 139 140

  /**
   * @param word: a k-mer
   * @return the value for the kmer as stored in the structure (don't do revcomp)
   */ 
141
  virtual T& get(seqtype &word) = 0;
Mikaël Salson's avatar
Mikaël Salson committed
142

143
  /**
144 145 146 147
   * @return the percentage of kmers that are set in the index.
   *         When ! hasDifferentKmerTypes(), the index load is always the same
   *         (apart for the unknown kmer).
   *         When kmer.isUnknown(), we return the load of all th other kmers.
148
   */
149
  virtual float getIndexLoad(T kmer) const;
150

151 152 153 154 155 156
  /**
   * @return the given integer (size of a read),
   *         but limit this size to max_size_indexing if it was defined
   */
  int atMostMaxSizeIndexing(int n) const;

157 158 159
  /**
   * @return probability that the number of kmers is 'at_least' or more in a sequence of length 'length'
   */
160
  double getProbabilityAtLeastOrAbove(T kmer, int at_least, int length);
161

Mikaël Salson's avatar
Mikaël Salson committed
162 163 164 165 166 167 168 169 170 171
  /**
   * @return the value of k
   */
  int getK() const;

  /**
   * @return the value of s
   */
  int getS() const;

172 173 174 175 176
  /**
   * @return the seed used
   */
  string getSeed() const;

177 178 179 180
  /**
   * @param kmer: a kmer
   * @return one label associated with the kmer
   */
181
  BioReader getLabel(T kmer) const;
182

183 184 185 186 187
  /**
   * @return whether the index differentiate kmer types
   */
  virtual bool hasDifferentKmerTypes() const;

Mikaël Salson's avatar
Mikaël Salson committed
188 189 190 191
  /**
   * @param seq: a sequence
   * @param no_revcomp: force not to revcomp the sequence, even if
   *                    the index was built with revcomp.
192 193
   * @return a vector of length seq.length() - d, where d is the span
   *         of the seed (minus one) or is 0 when it doesn't apply.
194
   * containing for each position the corresponding value in the index.
Mikaël Salson's avatar
Mikaël Salson committed
195
   */
196
  virtual vector<T> getResults(const seqtype &seq, bool no_revcomp=false, string seed="") = 0;
Mikaël Salson's avatar
Mikaël Salson committed
197 198 199 200 201 202 203 204 205 206 207 208

  /**
   * @return true iff the revcomp is indexed
   */
  bool isRevcomp() const;

  /**
   * @param string: a k-mer
   * @return the value in the index associated to the given k-mer.
   *         isRevcomp() ==> the revcomp of the k-mer may be considered
   * @pre word.length() == this->k
   */
209
  virtual T& operator[](seqtype& word) = 0;
210 211 212 213

 protected:

  void precompute_proba(float index_load);
Mikaël Salson's avatar
Mikaël Salson committed
214 215
};

216 217 218
template<class T>
IKmerStore<T>::IKmerStore() {
  id = ++last_id;
Mikaël Salson's avatar
Mikaël Salson committed
219
  refs = 0;
220
  finished_building = false;
221
  multiple_in_one = false;
222 223
}

224
template<class T> int IKmerStore<T>::last_id = 0;
Mikaël Salson's avatar
Mikaël Salson committed
225 226 227 228 229

template <class T> 
class MapKmerStore : public IKmerStore<T>
{
public:	
230
  map<seqtype, T> store;
Mikaël Salson's avatar
Mikaël Salson committed
231 232 233 234 235 236 237

  /**
   * @param bool: if true, will also index revcomp
   * @param seed: the seed
   */
  MapKmerStore(string seed, bool=false);
  MapKmerStore(int k, bool=false);
238

239
  vector<T> getResults(const seqtype &seq, bool no_revcomp=false, string seed="");
240 241
  T& get(seqtype &word);
  T& operator[](seqtype & word);
242 243 244

 private:
  void init();
Mikaël Salson's avatar
Mikaël Salson committed
245 246 247 248 249 250 251
};

template <class T> 
class ArrayKmerStore : public IKmerStore<T>
{
  T* store;
	
252
  int index(const seqtype& word) const;
253 254 255 256 257 258 259 260

  /**
   * Allocates memory for store
   * @throws bad_alloc: when there is not enough memory or when the
   *                    the total number of kmers cannot be stored in an 
   *                    unsigned int
   */
  void init();
Mikaël Salson's avatar
Mikaël Salson committed
261 262 263 264 265 266 267 268
public:
  /**
   * @param bool: if true, will also index revcomp
   * @param seed: the seed
   */
  ArrayKmerStore(string seed, bool=false);
  ArrayKmerStore(int k, bool=false);
  ~ArrayKmerStore();
269

270
  vector<T> getResults(const seqtype &seq, bool no_revcomp=false, const string seed="");
271 272
  T& get(seqtype &word);
  T& operator[](seqtype & word);
273
  T& operator[](int word);
Mikaël Salson's avatar
Mikaël Salson committed
274 275 276 277 278 279 280 281 282
};


// IKmerStore

template<class T>
IKmerStore<T>::~IKmerStore(){}

template<class T> 
283
void IKmerStore<T>::insert(list<BioReader>& input,
284
                           const string &label,
285 286
                           int keep_only,
                           string seed){
287
  for(list<BioReader>::iterator it = input.begin() ; it != input.end() ; it++){
288
    insert(*it, label, keep_only, seed);
Mikaël Salson's avatar
Mikaël Salson committed
289 290 291 292
  }
}

template<class T> 
293
void IKmerStore<T>::insert(BioReader& input,
294
                           const string &label,
295 296
                           int keep_only,
                           string seed){
Mikaël Salson's avatar
Mikaël Salson committed
297
  for (int r = 0; r < input.size(); r++) {
298
    insert(input.sequence(r), label, true, keep_only, seed);
Mikaël Salson's avatar
Mikaël Salson committed
299
  }
300

301
  labels.push_back(make_pair(T(label, 1, seed.size()), input)) ;
302 303

  if (revcomp_indexed  && ! T::hasRevcompSymetry()) {
304
    labels.push_back(make_pair(T(label, -1, seed.size()), input)) ;
305
  }
Mikaël Salson's avatar
Mikaël Salson committed
306 307 308
}

template<class T> 
309
void IKmerStore<T>::insert(const seqtype &sequence,
310
                           const string &label,
311
                           bool ignore_extended_nucleotides,
312 313
                           int keep_only,
                           string seed){
314 315 316 317 318 319 320
  size_t start_indexing = 0;
  size_t end_indexing = sequence.length();
  if (keep_only > 0 && sequence.length() > (size_t)keep_only) {
    start_indexing = sequence.length() - keep_only;
  } else if (keep_only < 0 && sequence.length() > (size_t) -keep_only) {
    end_indexing = -keep_only;
  }
321

322
  if (seed.empty() || ! this->hasDifferentKmerTypes())
323 324 325
    seed = this->seed;

  size_t seed_span = seed.length();
326 327 328 329 330
  size_t size_indexing = end_indexing - start_indexing;
  if (size_indexing > max_size_indexing) {
    max_size_indexing = size_indexing;
  }

331 332
  for(size_t i = start_indexing ; i + seed_span < end_indexing + 1 ; i++) {
    seqtype substr = sequence.substr(i, seed_span);
333
    seqtype kmer = spaced(substr, seed);
334

335
    if (ignore_extended_nucleotides && has_extended_nucleotides(kmer))
336 337
      continue;

Mikaël Salson's avatar
Mikaël Salson committed
338 339
    int strand = 1;
    if (revcomp_indexed && T::hasRevcompSymetry()) {
340
      seqtype rc_kmer = revcomp(kmer);
Mikaël Salson's avatar
Mikaël Salson committed
341 342 343 344 345
      if (rc_kmer.compare(kmer) < 0) {
        strand = -1;
        kmer = rc_kmer;
      }
    }
346 347 348 349
    T &this_kmer = this->get(kmer);
    if (this_kmer.isNull()) {
      nb_kmers_inserted++;
    }
350
    this_kmer += T(label, strand, seed.size());
Mikaël Salson's avatar
Mikaël Salson committed
351
    if (revcomp_indexed && ! T::hasRevcompSymetry()) {
352
      seqtype rc_kmer = spaced(revcomp(substr), seed);
353 354 355
      T &this_rc_kmer = this->get(rc_kmer);
      if (this_rc_kmer.isNull())
        nb_kmers_inserted++;
356
      this_rc_kmer += T(label, -1, seed.size());
Mikaël Salson's avatar
Mikaël Salson committed
357 358 359
    }
  }
}
360

361
template<class T>
362 363 364
void IKmerStore<T>::finish_building() {
  finished_building = true;
}
365

366 367
template<class T>
float IKmerStore<T>::getIndexLoad(const T kmer) const {
368
  float index_load = nb_kmers_inserted / pow(4.0, k);
369
  return (kmer.isUnknown()) ? 1 - index_load : index_load;
370
}
Mikaël Salson's avatar
Mikaël Salson committed
371

372 373 374 375 376 377 378
template<class T>
int IKmerStore<T>::atMostMaxSizeIndexing(int n) const {
  if (!max_size_indexing || n < max_size_indexing)
    return n ;

  return max_size_indexing ;
}
379 380

template<class T>
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
void IKmerStore<T>::precompute_proba(float index_load) {
  precomputed_proba_with_system[index_load] = vector<double>(MAX_PRECOMPUTED_PROBA);
  precomputed_proba_without_system[index_load] = vector<double>(MAX_PRECOMPUTED_PROBA);

  vector<double> &pproba_with = precomputed_proba_with_system[index_load];
  vector<double> &pproba_without = precomputed_proba_without_system[index_load];

  pproba_with[0] = 1;
  pproba_without[0] = 1;
  for (int i = 1; i < MAX_PRECOMPUTED_PROBA; i++) {
    pproba_with[i] = pproba_with[i - 1] * index_load;
    pproba_without[i] = pproba_without[i - 1] * (1 - index_load);
  }
}

template<class T>
double IKmerStore<T>::getProbabilityAtLeastOrAbove(const T kmer, int at_least, int length)  {
398

399 400
  if (at_least == 0) return 1.0; // even if 'length' is very small
  
401 402
  // n: number of kmers in the sequence
  int n = length - getS() + 1;
403
  float index_load = getIndexLoad(kmer) ;
404 405 406
  if (! precomputed_proba_without_system.count(index_load)) {
    precompute_proba(index_load);
  }
407 408

  double proba = 0;
409 410 411 412 413 414 415 416 417 418 419

  double probability_not_having_system;
  double probability_having_system;
  if (precomputed_proba_with_system.at(index_load).size() > (size_t)at_least)
    probability_having_system = precomputed_proba_with_system.at(index_load)[at_least];
  else
    probability_having_system = pow(index_load, at_least);
  if (precomputed_proba_without_system.at(index_load).size() > (size_t)n - at_least)
    probability_not_having_system = precomputed_proba_without_system.at(index_load)[n-at_least];
  else
    probability_not_having_system = pow(1 - index_load, n - at_least);
420 421 422 423 424 425
  for (int i=at_least; i<=n; i++) {
    proba += nChoosek(n, i) * probability_having_system * probability_not_having_system;
    probability_having_system *= index_load;
    probability_not_having_system /= (1 - index_load);
  }

426 427 428 429
#ifdef DEBUG_KMS_EVALUE
  cerr << "e-value:\tindex_load=" << index_load << ",\tat_least=" << at_least << ",\tlength=" << length <<",\tp-value=" << proba << endl;
#endif

430 431 432 433 434
  return proba;
}



Mikaël Salson's avatar
Mikaël Salson committed
435 436 437 438 439 440 441 442 443 444
template<class T>
int IKmerStore<T>::getK() const {
  return k;
}

template<class T>
int IKmerStore<T>::getS() const {
  return s;
}

445 446 447 448 449
template<class T>
string IKmerStore<T>::getSeed() const {
  return seed;
}

450
template<class T>
451 452
BioReader IKmerStore<T>::getLabel(T kmer) const {
  for (typename list< pair<T, BioReader> >::const_iterator it = labels.begin(); it != labels.end(); ++it)
453 454
    if (it->first == kmer)
      return it->second ;
455 456 457 458 459 460
  // Nothing interesting found
  // Try by ignoring length if the index is not able to deal with different lengths
  if (! hasDifferentKmerTypes() && kmer.getLength() != (unsigned char)~0) {
    kmer.setLength(~0);
    return getLabel(kmer);
  }
461
  return BIOREADER_AMBIGUOUS ;
462
}
463

464 465 466 467 468
template<class T>
bool IKmerStore<T>::hasDifferentKmerTypes() const {
  return false;
}

469
// .getResults()
Mikaël Salson's avatar
Mikaël Salson committed
470
template<class T>
471
vector<T> MapKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp, string seed) {
472

Mikaël Salson's avatar
Mikaël Salson committed
473
  string local_seed = seed;
474
  if (! seed.size())
Mikaël Salson's avatar
Mikaël Salson committed
475 476
    local_seed = IKmerStore<T>::seed;
  int s = local_seed.size();
477

Mikaël Salson's avatar
Mikaël Salson committed
478 479 480 481 482
  if ((int)seq.length() < s - 1) {
    return vector<T>(0);
  }
  vector<T> result(seq.length() - s + 1);
  for (size_t i=0; i + s < seq.length() + 1; i++) {
Mikaël Salson's avatar
Mikaël Salson committed
483
    seqtype kmer = spaced(seq.substr(i, s), local_seed);
484
    //    seqtype kmer = seq.substr(i, s);
Mikaël Salson's avatar
Mikaël Salson committed
485
    // cout << kmer << endl << kmer0 << endl << endl ;
486
    if (IKmerStore<T>::revcomp_indexed && no_revcomp) {
Mikaël Salson's avatar
Mikaël Salson committed
487 488 489 490 491 492 493 494
      result[i] = get(kmer);
    } else {
      result[i] = (*this)[kmer];
    }
  }
  return result;
}

495
template<class T>
496
vector<T> ArrayKmerStore<T>::getResults(const seqtype &seq, bool no_revcomp, string seed) {
Mikaël Salson's avatar
Mikaël Salson committed
497 498

  string local_seed = seed;
499
  if (! seed.size())
Mikaël Salson's avatar
Mikaël Salson committed
500 501
    local_seed = IKmerStore<T>::seed;
  int s = local_seed.size();
502 503 504 505 506 507 508 509 510 511 512 513

  int N = (int)seq.length();

  if (N < s - 1) {
    return vector<T>(0);
  }
  vector<T> result(N - s + 1);

  /* Read once the sequence, convert it to int* */
  int* intseq = new int[N];
  for (int i=0; i<N; i++)
    {
514
      intseq[i] = nuc_to_int(seq[i]);
515 516 517 518
    }

  /* Compute results */
  for (size_t i=0; (int) i+s < N+1; i++) {
Mikaël Salson's avatar
Mikaël Salson committed
519
    int kmer = spaced_int(intseq + i, local_seed);
520 521 522 523
    if (IKmerStore<T>::revcomp_indexed && no_revcomp) {
      result[i] = store[kmer]; // getfromint(kmer); // store[kmer];
      // cout << i << "/" << N << "  " << kmer << result[i] << endl ;
    } else {
524
      result[i] = (*this)[kmer]; // Deals with revcomp
525 526 527 528 529 530 531
    }
  }

  delete[] intseq ;
  return result;
}

Mikaël Salson's avatar
Mikaël Salson committed
532 533 534 535 536 537 538 539 540

template<class T>
bool IKmerStore<T>::isRevcomp() const {
  return revcomp_indexed;
}

// MapKmerStore

template <class T>
541
MapKmerStore<T>::MapKmerStore(string seed, bool revcomp):IKmerStore<T>(){
Mikaël Salson's avatar
Mikaël Salson committed
542 543 544 545 546
  this->seed = seed;   
  int k = seed_weight(seed);
  this->k = k;
  this->s = seed.size();
  this->revcomp_indexed = revcomp;
547
  init();
Mikaël Salson's avatar
Mikaël Salson committed
548 549 550
}

template <class T>
551
MapKmerStore<T>::MapKmerStore(int k, bool revcomp):IKmerStore<T>(){
Mikaël Salson's avatar
Mikaël Salson committed
552 553 554 555
  this->seed = seed_contiguous(k);
  this->k = k;
  this->s = k;
  this->revcomp_indexed = revcomp;
556 557 558 559 560 561
  init();
}

template <class T>
void MapKmerStore<T>::init() {
  this->nb_kmers_inserted = 0;
562
  this->max_size_indexing = 0;
Mikaël Salson's avatar
Mikaël Salson committed
563 564 565
}

template <class T> 
566
T& MapKmerStore<T>::get(seqtype& word){
Mikaël Salson's avatar
Mikaël Salson committed
567 568 569 570
  return store[word];
}

template <class T> 
571
T& MapKmerStore<T>::operator[](seqtype& word){
Mikaël Salson's avatar
Mikaël Salson committed
572
  if (this->isRevcomp() && T::hasRevcompSymetry()) {
573
    seqtype rc_kmer = revcomp(word);
Mikaël Salson's avatar
Mikaël Salson committed
574 575 576 577 578 579 580 581 582
    if (rc_kmer.compare(word) < 0)
      word = rc_kmer;
  }
  return store[word];
}

// ArrayKmerStore

template <class T> 
583
ArrayKmerStore<T>::ArrayKmerStore(int k, bool revcomp):IKmerStore<T>() {
Mikaël Salson's avatar
Mikaël Salson committed
584 585 586 587
  this->seed = seed_contiguous(k);
  this->k = k;
  this->s = k;
  this->revcomp_indexed = revcomp;
588
  init();
Mikaël Salson's avatar
Mikaël Salson committed
589 590 591 592
}


template <class T> 
593
ArrayKmerStore<T>::ArrayKmerStore(string seed, bool revcomp):IKmerStore<T>(){
Mikaël Salson's avatar
Mikaël Salson committed
594 595 596 597 598
  this->seed = seed; 
  int k = seed_weight(seed);
  this->k = k;
  this->s = seed.size();
  this->revcomp_indexed = revcomp;
599 600 601 602 603
  init();
}

template <class T>
void ArrayKmerStore<T>::init() {
604
  this->nb_kmers_inserted = 0;
605
  this->max_size_indexing = 0;
606
  if ((size_t)(this->k << 1) >= sizeof(int) * 8)
Mikaël Salson's avatar
Mikaël Salson committed
607
    throw std::bad_alloc();
608
  store = new T[(unsigned int)1 << (this->k << 1)];
Mikaël Salson's avatar
Mikaël Salson committed
609 610 611 612 613 614 615 616 617 618 619 620 621 622
  if (! store)
    throw std::bad_alloc();
}

template <class T> 
ArrayKmerStore<T>::~ArrayKmerStore(){
  delete [] store;
}

/**	Returns index of word in an array of size 4^k
	Considering B(A) = 0, B(C) = 1, B(T) = 2, B(G) = 3
	index_word = \sum_{i=0}^{k-1}B(word[i])*4^(k-i-1)
**/
template <class T> 
623
int ArrayKmerStore<T>::index(const seqtype& word) const{
Mikaël Salson's avatar
Mikaël Salson committed
624
  return dna_to_int(word, this->k);
Mikaël Salson's avatar
Mikaël Salson committed
625 626 627
}

template <class T> 
628
T& ArrayKmerStore<T>::get(seqtype& word){
Mikaël Salson's avatar
Mikaël Salson committed
629 630 631 632
  return store[index(word)];
}

template <class T> 
633
T& ArrayKmerStore<T>::operator[](seqtype& word){
634 635 636 637 638
  return (*this)[index(word)];
}

template <class T> 
T& ArrayKmerStore<T>::operator[](int word){
Mikaël Salson's avatar
Mikaël Salson committed
639
  if (this->isRevcomp() && T::hasRevcompSymetry()) {
640 641
    int rc_kmer = revcomp_int(word, IKmerStore<T>::k);
    if (rc_kmer < word)
Mikaël Salson's avatar
Mikaël Salson committed
642 643
      word = rc_kmer;
  }
644
  return store[word];
Mikaël Salson's avatar
Mikaël Salson committed
645 646 647
}

#endif