read_storage.cpp 5.95 KB
Newer Older
1
#include "read_storage.h"
2
#include "tools.h"
3 4 5 6 7 8 9 10 11 12 13 14 15

size_t VirtualReadStorage::getMaxNbReadsStored() const{
  return maxNbStored;
}

void VirtualReadStorage::setMaxNbReadsStored(size_t nb_reads) {
  maxNbStored = nb_reads;
}


//////////////////////////////////////////////////

BinReadStorage::BinReadStorage()
16
  :nb_bins(0), bins(NULL), score_bins(NULL), nb_scores(NULL), total_nb_scores(0), max_score(0),
17
   nb_inserted(0), nb_stored(0), smallest_bin_not_empty(~0),label(),inited(false) {}
18

19
void BinReadStorage::init(size_t nb_bins, size_t max_score, const VirtualReadScore *vrs, bool no_list) {
20 21
  this->max_bins = nb_bins;
  __init(0, max_score, vrs, no_list);
22 23 24
}

void BinReadStorage::__init(size_t nb_bins, size_t max_score, const VirtualReadScore *vrs, bool no_list) {
25
  this->all_read_lengths = 0;
26 27 28
  if (inited)
    return;

29 30
  this->nb_bins = nb_bins;
  this->max_score = max_score;
31 32 33 34
  if (no_list)
    bins = NULL;
  else
    bins = new list<Sequence>[nb_bins+1];
35 36 37 38 39 40
  score_bins = new double[nb_bins+1];
  nb_scores = new size_t[nb_bins+1];
  for (size_t i = 0; i <= nb_bins; i++) {
    score_bins[i] = 0;
    nb_scores[i] = 0;
  }
41
  scorer = vrs;
42
  inited=true;
43 44
}

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
void BinReadStorage::reallocate(){
  list<Sequence> tmpBin;
  if (bins)
    tmpBin = bins[0];

  free_objects();

  all_read_lengths = 0;
  smallest_bin_not_empty = ~0;
  total_nb_scores = 0;
  nb_stored = 0;
  inited = false;
  __init(max_bins, max_score, scorer, tmpBin.size() == 0);
  for(auto s : tmpBin){
    this->add(s);
  }
  nb_inserted -= nb_stored;
}

64
BinReadStorage::~BinReadStorage() {
65 66 67 68
  free_objects();
}

void BinReadStorage::free_objects() {
69 70
  if (bins)
    delete [] bins;
71 72 73 74 75 76 77 78
  if (score_bins) {
    delete [] score_bins;
    delete [] nb_scores;
  }

}

void BinReadStorage::addScore(Sequence &s) {
79
  addScore(scorer->getScore(s));
80 81 82 83 84 85 86 87 88 89
}

void BinReadStorage::addScore(float score) {
  addScore(scoreToBin(score), score);
}

void BinReadStorage::addScore(size_t bin, float score) {
  score_bins[bin] += score;
  nb_scores[bin]++;
  total_nb_scores++;
90 91 92
}

void BinReadStorage::add(Sequence &s) {
93 94 95
  if(nb_stored == getMaxNbReadsStored() && nb_inserted == nb_stored){
    reallocate();
  }
96
  float score = scorer->getScore(s);
97 98
  size_t bin = scoreToBin(score);
  addScore(bin, score);
99
  all_read_lengths += s.sequence.length();
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
  if (nb_stored < getMaxNbReadsStored()) {
    bins[bin].push_back(s);
    nb_stored++;
    if (bin < (size_t)smallest_bin_not_empty)
      smallest_bin_not_empty = bin;
  } else {
    // We don't have space left.
    // Either we don't insert that sequence or it replaces another one
    if (bin > smallest_bin_not_empty) {
      bins[smallest_bin_not_empty].erase(bins[smallest_bin_not_empty].begin());
      if (bins[smallest_bin_not_empty].size() == 0)
        update_smallest_bin_not_empty();
      bins[bin].push_back(s);
    }
  }
  nb_inserted++;
}

118 119 120 121
list<Sequence> BinReadStorage::getBin(size_t bin) const{
  return bins[bin];
}

122 123 124 125
size_t BinReadStorage::getNbBins() const {
  return nb_bins;
}

126 127 128 129
size_t BinReadStorage::getNbInserted() const {
  return nb_inserted;
}

130
double BinReadStorage::getAverageScoreBySeq(Sequence &s) {
131
  return getAverageScoreByScore(scorer->getScore(s));
132 133 134 135 136 137
}

double BinReadStorage::getAverageScoreByScore(float score) {
  return getAverageScore(scoreToBin(score));
}

138 139 140 141
double BinReadStorage::getAverageLength() const{
  return all_read_lengths *1. / getNbScores();
}

142 143 144
double BinReadStorage::getAverageScore(size_t bin) {
  return getScore(bin) / getNbScores(bin);
}
145 146 147
double BinReadStorage::getInvertedAverageScore(size_t bin) {
  return getNbScores(bin) / getScore(bin);
}
148 149

double BinReadStorage::getScoreBySeq(Sequence &s) {
150
  return getScoreByScore(scorer->getScore(s));
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
}

double BinReadStorage::getScoreByScore(float score) {
  return getScore(scoreToBin(score));
}

double BinReadStorage::getScore(size_t bin) {
  if (bin > getNbBins()) {
    double sum = 0;
    for (size_t i = 0; i <= getNbBins(); i++)
      sum += score_bins[i];
    return sum;
  }
  return score_bins[bin];
}

size_t BinReadStorage::getNbScores(size_t bin) const {
  if (bin > getNbBins())
    return total_nb_scores;
  return nb_scores[bin];
}

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
size_t BinReadStorage::getNbStored() const {
  return nb_stored;
}

list<Sequence> BinReadStorage::getReads() const {
  list<Sequence>results;

  for (size_t i = smallest_bin_not_empty; i <= nb_bins; i++) {
    if (bins[i].size() > 0) {
      for (list<Sequence>::iterator it = bins[i].begin(); it != bins[i].end();
           it++)
        results.push_back(*it);
    }
  }
  return results;
}

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
list<Sequence> BinReadStorage::getBestReads(size_t max_nb, size_t min_score) const {
  list<Sequence>best_reads;
  size_t smallest_interesting_bin = max(smallest_bin_not_empty, scoreToBin(min_score));

  for (size_t i = nb_bins+1; i > smallest_interesting_bin; i--) {
    size_t j = i-1;
    if (bins[j].size() > 0) {
      if (bins[j].size() <= max_nb) {
        best_reads.insert(best_reads.end(), bins[j].begin(), bins[j].end());
        max_nb -= bins[j].size();
      } else {
        for (list<Sequence>::iterator it = bins[j].begin(); max_nb > 0 && it != bins[j].end();
             it++) {
          best_reads.push_back(*it);
          max_nb--;
        }
      }
    }
  }
  return best_reads;

}

213 214 215 216 217 218 219 220 221 222 223 224
string BinReadStorage::getLabel() const {
  return label;
}

bool BinReadStorage::hasLabel() const {
  return this->label.length() > 0;
}

void BinReadStorage::setLabel(string &label) {
  this->label = label;
}

225 226
void BinReadStorage::out_average_scores(ostream &out, bool inversed) {
  output_label_average(out, getLabel(), getNbScores(), inversed ? getInvertedAverageScore() : getAverageScore(), inversed ? 3 : 1);
227 228
}

229
size_t BinReadStorage::scoreToBin(float score) const{
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
  assert(score >= 0);
  if (score > max_score)
    return nb_bins;
  return (int)((score / (max_score+1)) * nb_bins);
}

void BinReadStorage::update_smallest_bin_not_empty() {
  for (size_t i = smallest_bin_not_empty; i <= nb_bins; i++) {
    if (bins[i].size() > 0) {
      smallest_bin_not_empty = i;
      return;
    }
  }
  smallest_bin_not_empty = ~0;
}