read_storage.h 4.95 KB
Newer Older
1 2 3 4 5
#ifndef READ_STORAGE_H
#define READ_STORAGE_H

#include "read_score.h"
#include <list>
6
#include <iostream>
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
using namespace std;

/**
 * Stores reads so that we can easily store only the best reads among all the reads.
 * Only a limited amount of reads may be stored.
 */
class VirtualReadStorage {
 protected:
  size_t maxNbStored;
  const VirtualReadScore *scorer;

 public:

  virtual ~VirtualReadStorage() {}
  
  /**
   * Add a sequence s to all the reads.
   * The sequence may or may no be inserted depending on its score
   * and the number of sequences already inserted.
   */
  virtual void add(Sequence &s) = 0;

  /**
   * Sets how many reads should be stored in maximum.
   */
  void setMaxNbReadsStored(size_t nb);

  /**
   * @return the maximal number of reads stored
   */
  size_t getMaxNbReadsStored() const;

  /**
   * @return the number of elements that have been inserted (all may not be stored)
   */
  virtual size_t getNbInserted() const = 0;

  /**
   * @return the number of elements actually stored
   */
  virtual size_t getNbStored() const = 0;

  /**
   * @return all the stored reads
   */
  virtual list<Sequence> getReads() const = 0;
};

55 56 57
/**
 * Store reads in bins as well as their scores (the scores are used for binning the reads).
 */
58 59 60 61
class BinReadStorage: public VirtualReadStorage {
 private:
  size_t nb_bins;
  list<Sequence> *bins;
62 63 64
  double *score_bins;
  size_t *nb_scores;
  size_t total_nb_scores;
65 66 67 68
  size_t max_score;
  size_t nb_inserted;
  size_t nb_stored;
  size_t smallest_bin_not_empty;
69
  string label;
70 71 72 73 74 75 76 77 78
public:
  BinReadStorage();
  
  /**
   * Creates a storage with bins. This function *must* be called before using the object
   * nb_bins are created and the maximal score for the reads that will be added is assumed 
   * to be max_score. If higher score are met, they are put in the nb_bins+1 bin.
   * The class doesn't destruct the VirtualReadScore. It is the responsability of the caller.
   * @pre all scores must be >= 0
79 80 81
   * @param no_list: don't create a list (useful for storing only stats,
   *                 false by default: lists are created). If the option is set to true, the
   *                 function add() must not be called but only the addScore().
82
   */
83
  void init(size_t nb_bins, size_t max_score, const VirtualReadScore *vrs, bool no_list = false);
84 85 86 87 88

  ~BinReadStorage();
  
  void add(Sequence &s);

89 90 91 92 93 94
  /**
   * @return the number of bins requested by the used. Note that an additional
   * bin is created for the values greater than the provided max value.
   */
  size_t getNbBins() const;

95 96 97 98
  size_t getNbInserted() const;

  size_t getNbStored() const;

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
  /**
   * Add score information only (not the sequence itself)
   * depending on the scorer that was given to the init() function.
   */
  void addScore(Sequence &s);

  /**
   * Add score information based on the provided score.
   */
  void addScore(float score);

  /**
   * Add score information in the given bin based on the provided score.
   * This method should not be used, prefer the one with the score only.
   */
  void addScore(size_t bin, float score);

  /**
   * @return the average score stored in the bin corresponding to the score
   * obtained for the provided sequence.
   */
  double getAverageScoreBySeq(Sequence &s);

  /**
   * @return the average score stored in the bin of the corresponding score
   */
  double getAverageScoreByScore(float score);

  /**
   * @return the average score stored in the corresponding bin. If no
   * parameter is provided or if the parameter is outside the range [0,
   * getNbBins()] then the average over all the score is returned.
   */
  double getAverageScore(size_t bin=~0);

  /**
   * @return the sum of all the scores stored in the bin corresponding to the score
   * obtained for the provided sequence.
   */
  double getScoreBySeq(Sequence &s);

  /**
   * @return the sum of all the scores stored in the bin of the corresponding score
   */
  double getScoreByScore(float score);

  /**
   * @return the sum of all the scores stored in the corresponding bin. If no parameter is
   * provided or if the parameter is outside the range [0, getNbBins()] then
   * the sum of all the scores is returned.
   */
  double getScore(size_t bin=~0);

  /**
   * @return the number of score stored in the given bin. If no parameter
   * is given or if the parameter is out of the ranges [0, getNbBins()], then
   * the total number of scores stored is returned.
   * @complexity O(1)
   */
  size_t getNbScores(size_t bin=~0) const;

160 161 162 163
  bool hasLabel() const;

  string getLabel() const;

164 165
  list<Sequence> getReads() const;

166 167 168 169 170
  /**
   * Set the label of the statistics
   */
  void setLabel(string &label);

171 172
  void out_average_scores(ostream &out);

173 174 175 176 177 178 179 180 181 182 183 184 185 186
 private:
  /**
   * @return the bin a sequence of the given score must lie.
   */
  size_t scoreToBin(float score);

  /**
   * Search for a largest value such that the bin is not empty.
   * If none is found ~0 is stored.
   */
  void update_smallest_bin_not_empty();

  friend void testBinReadStorage();
};
187

188
#endif