Commit e9a7b7c7 authored by Mikaël Salson's avatar Mikaël Salson

BinReadStorage: Store the score information (in bins).

Scores are also stored in bins for all the sequences (even the ones that are not kept).

Unit tests.
parent 20a1d400
......@@ -12,24 +12,51 @@ void VirtualReadStorage::setMaxNbReadsStored(size_t nb_reads) {
//////////////////////////////////////////////////
BinReadStorage::BinReadStorage()
:nb_bins(0), max_score(0), nb_inserted(0), nb_stored(0), smallest_bin_not_empty(~0) {
bins = NULL;
}
:nb_bins(0), bins(NULL), score_bins(NULL), nb_scores(NULL), total_nb_scores(0), max_score(0),
nb_inserted(0), nb_stored(0), smallest_bin_not_empty(~0) {}
void BinReadStorage::init(size_t nb_bins, size_t max_score, const VirtualReadScore *vrs) {
this->nb_bins = nb_bins;
this->max_score = max_score;
bins = new list<Sequence>[nb_bins+1];
score_bins = new double[nb_bins+1];
nb_scores = new size_t[nb_bins+1];
for (size_t i = 0; i <= nb_bins; i++) {
score_bins[i] = 0;
nb_scores[i] = 0;
}
scorer = vrs;
}
BinReadStorage::~BinReadStorage() {
if (bins)
delete [] bins;
if (score_bins) {
delete [] score_bins;
delete [] nb_scores;
}
}
void BinReadStorage::addScore(Sequence &s) {
addScore(scorer->getScore(s.sequence));
}
void BinReadStorage::addScore(float score) {
addScore(scoreToBin(score), score);
}
void BinReadStorage::addScore(size_t bin, float score) {
score_bins[bin] += score;
nb_scores[bin]++;
total_nb_scores++;
}
void BinReadStorage::add(Sequence &s) {
size_t bin = scoreToBin(scorer->getScore(s.sequence));
float score = scorer->getScore(s.sequence);
size_t bin = scoreToBin(score);
addScore(bin, score);
if (nb_stored < getMaxNbReadsStored()) {
bins[bin].push_back(s);
nb_stored++;
......@@ -48,10 +75,50 @@ void BinReadStorage::add(Sequence &s) {
nb_inserted++;
}
size_t BinReadStorage::getNbBins() const {
return nb_bins;
}
size_t BinReadStorage::getNbInserted() const {
return nb_inserted;
}
double BinReadStorage::getAverageScoreBySeq(Sequence &s) {
return getAverageScoreByScore(scorer->getScore(s.sequence));
}
double BinReadStorage::getAverageScoreByScore(float score) {
return getAverageScore(scoreToBin(score));
}
double BinReadStorage::getAverageScore(size_t bin) {
return getScore(bin) / getNbScores(bin);
}
double BinReadStorage::getScoreBySeq(Sequence &s) {
return getScoreByScore(scorer->getScore(s.sequence));
}
double BinReadStorage::getScoreByScore(float score) {
return getScore(scoreToBin(score));
}
double BinReadStorage::getScore(size_t bin) {
if (bin > getNbBins()) {
double sum = 0;
for (size_t i = 0; i <= getNbBins(); i++)
sum += score_bins[i];
return sum;
}
return score_bins[bin];
}
size_t BinReadStorage::getNbScores(size_t bin) const {
if (bin > getNbBins())
return total_nb_scores;
return nb_scores[bin];
}
size_t BinReadStorage::getNbStored() const {
return nb_stored;
}
......
......@@ -51,10 +51,16 @@ class VirtualReadStorage {
virtual list<Sequence> getReads() const = 0;
};
/**
* Store reads in bins as well as their scores (the scores are used for binning the reads).
*/
class BinReadStorage: public VirtualReadStorage {
private:
size_t nb_bins;
list<Sequence> *bins;
double *score_bins;
size_t *nb_scores;
size_t total_nb_scores;
size_t max_score;
size_t nb_inserted;
size_t nb_stored;
......@@ -76,10 +82,77 @@ public:
void add(Sequence &s);
/**
* @return the number of bins requested by the used. Note that an additional
* bin is created for the values greater than the provided max value.
*/
size_t getNbBins() const;
size_t getNbInserted() const;
size_t getNbStored() const;
/**
* Add score information only (not the sequence itself)
* depending on the scorer that was given to the init() function.
*/
void addScore(Sequence &s);
/**
* Add score information based on the provided score.
*/
void addScore(float score);
/**
* Add score information in the given bin based on the provided score.
* This method should not be used, prefer the one with the score only.
*/
void addScore(size_t bin, float score);
/**
* @return the average score stored in the bin corresponding to the score
* obtained for the provided sequence.
*/
double getAverageScoreBySeq(Sequence &s);
/**
* @return the average score stored in the bin of the corresponding score
*/
double getAverageScoreByScore(float score);
/**
* @return the average score stored in the corresponding bin. If no
* parameter is provided or if the parameter is outside the range [0,
* getNbBins()] then the average over all the score is returned.
*/
double getAverageScore(size_t bin=~0);
/**
* @return the sum of all the scores stored in the bin corresponding to the score
* obtained for the provided sequence.
*/
double getScoreBySeq(Sequence &s);
/**
* @return the sum of all the scores stored in the bin of the corresponding score
*/
double getScoreByScore(float score);
/**
* @return the sum of all the scores stored in the corresponding bin. If no parameter is
* provided or if the parameter is outside the range [0, getNbBins()] then
* the sum of all the scores is returned.
*/
double getScore(size_t bin=~0);
/**
* @return the number of score stored in the given bin. If no parameter
* is given or if the parameter is out of the ranges [0, getNbBins()], then
* the total number of scores stored is returned.
* @complexity O(1)
*/
size_t getNbScores(size_t bin=~0) const;
list<Sequence> getReads() const;
private:
......
......@@ -18,18 +18,30 @@ void testBinReadStorage() {
TAP_TEST(reads.smallest_bin_not_empty == 1, TEST_BRS_SBNE, "");
TAP_TEST(reads.getNbInserted() == 1, TEST_BRS_GET_NB_INSERTED, "");
TAP_TEST(reads.getNbStored() == 1, TEST_BRS_GET_NB_STORED, "");
TAP_TEST(reads.getScoreBySeq(seq1) == 5
&& reads.getScoreByScore(5.) == 5
&& reads.getScore(1) == 5, TEST_BRS_GET_SCORE, "");
Sequence seq2 = {"label2", "l2", "GA", "", NULL};
reads.add(seq2);
TAP_TEST(reads.smallest_bin_not_empty == 0, TEST_BRS_SBNE, "");
TAP_TEST(reads.getNbInserted() == 2, TEST_BRS_GET_NB_INSERTED, "");
TAP_TEST(reads.getNbStored() == 2, TEST_BRS_GET_NB_STORED, "");
TAP_TEST(reads.getScoreBySeq(seq2) == 2
&& reads.getScoreByScore(2.) == 2
&& reads.getScore(0) == 2, TEST_BRS_GET_SCORE, "");
TAP_TEST(reads.getScore() == 7, TEST_BRS_GET_SCORE, "");
TAP_TEST(reads.getAverageScore() == 3.5, TEST_BRS_GET_AVG_SCORE, "");
Sequence seq3 = {"label3", "l3", "GGAGACAGTA", "", NULL};
reads.add(seq3);
TAP_TEST(reads.smallest_bin_not_empty == 0, TEST_BRS_SBNE, "");
TAP_TEST(reads.getNbInserted() == 3, TEST_BRS_GET_NB_INSERTED, "");
TAP_TEST(reads.getNbStored() == 3, TEST_BRS_GET_NB_STORED, "");
TAP_TEST(reads.getScoreBySeq(seq3) == 10
&& reads.getScoreByScore(10.) == 10
&& reads.getScore(2) == 10, TEST_BRS_GET_SCORE, "");
Sequence seq4 = {"label4", "l4", "AGAGACAGTA", "", NULL};
reads.add(seq4);
......@@ -40,6 +52,10 @@ void testBinReadStorage() {
TAP_TEST(reads.bins[1].size() == 1, TEST_BRS_ADD, "");
TAP_TEST(reads.bins[2].size() == 2, TEST_BRS_ADD, "");
TAP_TEST(reads.bins[3].size() == 0, TEST_BRS_ADD, "");
TAP_TEST(reads.getScoreBySeq(seq4) == 20
&& reads.getScoreByScore(10.) == 20
&& reads.getScore(2) == 20, TEST_BRS_GET_SCORE, "");
TAP_TEST(reads.getAverageScoreBySeq(seq4) == 10, TEST_BRS_GET_AVG_SCORE, "");
Sequence seq5 = {"label5", "l5", "AATAAGAGTGAGACAGTA", "", NULL};
reads.add(seq5);
......@@ -50,7 +66,25 @@ void testBinReadStorage() {
TAP_TEST(reads.bins[1].size() == 0, TEST_BRS_ADD, "");
TAP_TEST(reads.bins[2].size() == 2, TEST_BRS_ADD, "");
TAP_TEST(reads.bins[3].size() == 1, TEST_BRS_ADD, "");
TAP_TEST(reads.getScoreBySeq(seq5) == seq5.sequence.length()
&& reads.getScoreByScore(18.) == seq5.sequence.length()
&& reads.getScore(3) == seq5.sequence.length(), TEST_BRS_GET_SCORE, "");
TAP_TEST(reads.getAverageScore() == 9, TEST_BRS_GET_AVG_SCORE, "");
reads.add(seq2);
TAP_TEST(reads.smallest_bin_not_empty == 2, TEST_BRS_SBNE, "");
TAP_TEST(reads.getNbInserted() == 6, TEST_BRS_GET_NB_INSERTED, "");
TAP_TEST(reads.getNbStored() == 3, TEST_BRS_GET_NB_STORED, "");
TAP_TEST(reads.getScoreBySeq(seq2) == 4, TEST_BRS_GET_SCORE, "");
TAP_TEST((int) (reads.getAverageScore()*10) == 78, TEST_BRS_GET_AVG_SCORE, reads.getAverageScore());
TAP_TEST(reads.getNbScores() == 6, TEST_BRS_GET_NB_SCORES, "");
TAP_TEST(reads.getNbScores(0) == 2, TEST_BRS_GET_NB_SCORES, "");
TAP_TEST(reads.getNbScores(1) == 1, TEST_BRS_GET_NB_SCORES, "");
TAP_TEST(reads.getNbScores(2) == 2, TEST_BRS_GET_NB_SCORES, "");
TAP_TEST(reads.getNbScores(3) == 1, TEST_BRS_GET_NB_SCORES, "");
list<Sequence> sequences = reads.getReads();
list<Sequence>::iterator it = sequences.begin();
TAP_TEST(it->sequence == "GGAGACAGTA", TEST_BRS_GET_READS, "");
......
......@@ -142,6 +142,9 @@ enum {
TEST_BRS_GET_NB_STORED,
TEST_BRS_ADD,
TEST_BRS_GET_READS,
TEST_BRS_GET_SCORE,
TEST_BRS_GET_AVG_SCORE,
TEST_BRS_GET_NB_SCORES,
/* Bugs */
TEST_BUG_SEGMENTATION,
......@@ -279,6 +282,9 @@ inline void declare_tests() {
RECORD_TAP_TEST(TEST_BRS_GET_NB_STORED, "Test the getNbStored() method in ReadStorage");
RECORD_TAP_TEST(TEST_BRS_ADD, "Test the add() method in ReadStorage");
RECORD_TAP_TEST(TEST_BRS_GET_READS, "Test the getReads() method in ReadStorage");
RECORD_TAP_TEST(TEST_BRS_GET_SCORE, "Test BinReadStorage::getScore()");
RECORD_TAP_TEST(TEST_BRS_GET_AVG_SCORE, "Test BinReadStorage::getAverageScore()");
RECORD_TAP_TEST(TEST_BRS_GET_NB_SCORES, "Test BinReadStorage::getNbScores()");
RECORD_TAP_TEST(TEST_KMER_REPRESENTATIVE, "Test KmerRepresentativeComputer computations");
RECORD_TAP_TEST(TEST_KMER_REPRESENTATIVE_REQUIRED_SEQ, "Test KmerRepresentativeComputer computations with a required sequence");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment