Attention une mise à jour du serveur va être effectuée le lundi 17 mai entre 13h et 13h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit 9c18be23 authored by Marc Duez's avatar Marc Duez
parents 8d0ec67f 0931bd84
......@@ -50,6 +50,10 @@ void RepresentativeComputer::setRequiredSequence(string sequence) {
required = sequence;
}
void RepresentativeComputer::setCoverageReferenceLength(float coverage_reference_length) {
this->coverage_reference_length = coverage_reference_length;
}
string KmerRepresentativeComputer::getSeed() const{
return seed;
}
......@@ -94,13 +98,9 @@ void KmerRepresentativeComputer::compute() {
Sequence sequence_longest_run;
size_t k = getSeed().length();
Stats stats_length;
for (size_t seq = 1; seq <= sequences.size() && seq <= seq_index_longest_run + stability_limit ; seq++) {
Sequence sequence = rc.getithBest(seq);
stats_length.insert(sequence.sequence.size());
if (sequence.sequence.size() <= length_longest_run) {
continue;
}
......@@ -159,11 +159,10 @@ void KmerRepresentativeComputer::compute() {
representative.quality = "";
int length = stats_length.getAverage();
coverage = (float) length_longest_run / length;
coverage = (float) length_longest_run / coverage_reference_length;
coverage_info = string_of_int(length_longest_run) + " bp"
+ " (" + string_of_int(100 * coverage) + "% of " + string_of_int(length) + " bp)";
+ " (" + string_of_int(100 * coverage) + "% of " + fixed_string_of_float(coverage_reference_length, 1) + " bp)";
representative.label += " - " + coverage_info ;
}
......
......@@ -22,6 +22,7 @@ protected:
float percent_cover;
bool revcomp;
string required;
float coverage_reference_length;
float coverage;
string coverage_info;
public:
......@@ -83,6 +84,11 @@ public:
* Setting the sequence is not required and it can be empty.
*/
void setRequiredSequence(string sequence);
/**
* @param coverage_reference_length: reference length used to compute the coverage
*/
void setCoverageReferenceLength(float coverage_reference_length);
};
/**
......
#include <algorithm>
#include <iostream>
#include <iomanip>
#include "tools.h"
string seed_contiguous(int k)
......@@ -54,6 +56,13 @@ string string_of_int(int number)
return ss.str();
}
string fixed_string_of_float(float number, int precision)
{
stringstream ss;
ss << fixed << setprecision(precision) << number ;
return ss.str();
}
string scientific_string_of_double(double number)
{
stringstream ss;
......
......@@ -72,6 +72,7 @@ bool pair_occurrence_sort(pair<T, int> a, pair<T, int> b);
string string_of_int(int number);
string fixed_string_of_float(float number, int precision);
string scientific_string_of_double(double number);
/**
......
......@@ -70,6 +70,7 @@ KmerRepresentativeComputer WindowsStorage::getRepresentativeComputer(junction wi
repComp.setMinCover(min_cover);
repComp.setPercentCoverage(percent_cover);
repComp.setRequiredSequence(window);
repComp.setCoverageReferenceLength(stats_by_window[window].getAverage());
repComp.compute();
// We should always have a representative, because
......@@ -142,6 +143,7 @@ void WindowsStorage::add(junction window, Sequence sequence, int status, Germlin
seqs_by_window[window].add(sequence);
status_by_window[window][status]++;
stats_by_window[window].insert(sequence.sequence.length());
germline_by_window[window] = germline;
}
......
......@@ -21,6 +21,7 @@
#include "read_storage.h"
#include "read_score.h"
#include "representative.h"
#include "stats.h"
#define NB_BINS 15
#define MAX_VALUE_BINS 500
......@@ -32,6 +33,7 @@ typedef string junction ;
class WindowsStorage {
private:
map<junction, BinReadStorage > seqs_by_window;
map<junction, Stats > stats_by_window;
map<junction, vector<int> > status_by_window;
map<junction, Germline* > germline_by_window;
map<string, string> windows_labels;
......
......@@ -18,3 +18,11 @@ $ Find approximately the good number of sequences for e-value computation
$ Find the good number of windows in Stanford S22
1: found 10732 50-windows in 13138 reads
$ First clone -- find the good number of reads
2:clone-001--.*--0000008
$ First clone -- find the good representative
1:clone-001--.*--lcl.FLN1FA001CPAUQ.1.-.105,232.-.2
$ First clone -- find the good coverage
1:clone-001--.* 128 bp .55. of 232.0 bp.
......@@ -3,7 +3,7 @@
cd $(dirname $0)
wget -O - http://www.imgt.org/download/GENE-DB/IMGTGENEDB-ReferenceSequences.fasta-nt-WithoutGaps-F+ORF+inframeP | python split-from-imgt.py
wget -O - IMGT_RELEASE http://www.imgt.org/download/GENE-DB/RELEASE
wget -O IMGT_RELEASE http://www.imgt.org/download/GENE-DB/RELEASE
wget -N http://rbx.vidjil.org/browser/germline/IGK-INTRON.fa
wget -N http://rbx.vidjil.org/browser/germline/IGK-KDE.fa
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment