Commit 34bbfe5e authored by flothoni's avatar flothoni

Merge branch 'feature-c/4034-genescan-cluster' of...

Merge branch 'feature-c/4034-genescan-cluster' of gitlab.inria.fr:vidjil/vidjil into feature-c/4034-genescan-cluster
parents df2098c2 163f7b53
Pipeline #150413 failed with stages
in 9 seconds
......@@ -23,6 +23,8 @@ profiling_algo:
- vidjil.cpu
- vidjil.mem.*
when: manual
only:
- /^feature-.*a.*\/.*$/
tags:
- cidocker
#ifndef AUTOMATON_H
#define AUTOMATON_H
#include <list>
#include <vector>
#include <cctype>
#include "kmerstore.h"
#include <cstdlib>
......@@ -53,7 +53,7 @@ public:
/**
* @return the information stored for this state
*/
virtual list<Info> &getInfo(void *state) = 0;
virtual vector<Info> &getInfo(void *state) = 0;
/**
* @param starting_state: the starting state for traversing the automate.
......@@ -104,7 +104,7 @@ class pointer_state {
public:
pointer_state<Info> *transitions[NB_TRANSITIONS]; /* Transitions to the 5 nt */
bool is_final;
list<Info> informations; /* != NULL when is_final */
vector<Info> informations; /* != NULL when is_final */
pointer_state():is_final(false),informations() {
for (size_t i = 0; i < NB_TRANSITIONS; i++)
......@@ -164,7 +164,7 @@ public:
/**
* @return the information stored for this state
*/
list<Info> &getInfo(void *state);
vector<Info> &getInfo(void *state);
/**
* @return the automaton initial state
......
......@@ -4,6 +4,7 @@
#include "automaton.h"
#include <stack>
#include <set>
#include <list>
//////////////////// IMPLEMENTATIONS ////////////////////
template <class Info>
......@@ -160,7 +161,7 @@ void PointerACAutomaton<Info>::build_failure_functions() {
}
template <class Info>
list<Info> &PointerACAutomaton<Info>::getInfo(void *state) {
vector<Info> &PointerACAutomaton<Info>::getInfo(void *state) {
return ((pointer_state<Info> *)state)->informations;
}
......
......@@ -178,7 +178,7 @@ namespace std {
template <>
struct hash<KmerAffect> {
size_t operator()(const KmerAffect &affect) const {
return (affect.getLabel()[0] << 8) | affect.getLength();
return (((unsigned char) affect.affect.c << 8) | (affect.getLength()));
}
};
}
......
......@@ -46,6 +46,10 @@ string Kmer::getLabel() const{
return "";
}
int Kmer::getStrand() const{
return 1;
}
size_t Kmer::getLength() const{
return 10;
}
......
......@@ -9,13 +9,13 @@
#include <math.h>
#include "bioreader.hpp"
#include "tools.h"
#include "proba.h"
using namespace std;
typedef
enum { KMER_INDEX, AC_AUTOMATON } IndexTypes;
#define MAX_PRECOMPUTED_PROBA 500 /* Precompute 500 probabilities for each index load */
class Kmer {
public:
unsigned int count;
......@@ -57,6 +57,11 @@ public:
* it only there for a reason of compatibility with KmerAffect)
*/
bool isAmbiguous() const;
/**
* @return 1 (only there for compatibility reasons with KmerAffect)
*/
int getStrand() const;
} ;
ostream &operator<<(ostream &os, const Kmer &kmer);
bool operator==(const Kmer &k1, const Kmer &k2);
......@@ -80,9 +85,8 @@ protected:
size_t nb_kmers_inserted;
size_t max_size_indexing;
bool finished_building;
map<float, vector<double> > precomputed_proba_with_system,
precomputed_proba_without_system;
ProbaPrecomputer proba;
public:
virtual ~IKmerStore();
......@@ -377,61 +381,20 @@ int IKmerStore<T>::atMostMaxSizeIndexing(int n) const {
return max_size_indexing ;
}
template<class T>
void IKmerStore<T>::precompute_proba(float index_load) {
precomputed_proba_with_system[index_load] = vector<double>(MAX_PRECOMPUTED_PROBA);
precomputed_proba_without_system[index_load] = vector<double>(MAX_PRECOMPUTED_PROBA);
vector<double> &pproba_with = precomputed_proba_with_system[index_load];
vector<double> &pproba_without = precomputed_proba_without_system[index_load];
pproba_with[0] = 1;
pproba_without[0] = 1;
for (int i = 1; i < MAX_PRECOMPUTED_PROBA; i++) {
pproba_with[i] = pproba_with[i - 1] * index_load;
pproba_without[i] = pproba_without[i - 1] * (1 - index_load);
}
}
template<class T>
double IKmerStore<T>::getProbabilityAtLeastOrAbove(const T kmer, int at_least, int length) {
if (at_least == 0) return 1.0; // even if 'length' is very small
// n: number of kmers in the sequence
int n = length - getS() + 1;
float index_load = getIndexLoad(kmer) ;
if (! precomputed_proba_without_system.count(index_load)) {
precompute_proba(index_load);
}
double proba = 0;
double probability_not_having_system;
double probability_having_system;
if (precomputed_proba_with_system.at(index_load).size() > (size_t)at_least)
probability_having_system = precomputed_proba_with_system.at(index_load)[at_least];
else
probability_having_system = pow(index_load, at_least);
if (precomputed_proba_without_system.at(index_load).size() > (size_t)n - at_least)
probability_not_having_system = precomputed_proba_without_system.at(index_load)[n-at_least];
else
probability_not_having_system = pow(1 - index_load, n - at_least);
for (int i=at_least; i<=n; i++) {
proba += nChoosek(n, i) * probability_having_system * probability_not_having_system;
probability_having_system *= index_load;
probability_not_having_system /= (1 - index_load);
}
#ifdef DEBUG_KMS_EVALUE
cerr << "e-value:\tindex_load=" << index_load << ",\tat_least=" << at_least << ",\tlength=" << length <<",\tp-value=" << proba << endl;
#endif
return proba;
int kmer_length = kmer.getLength();
if (kmer.isUnknown() || kmer.isAmbiguous() || kmer == AFFECT_NOT_UNKNOWN
|| kmer.getLength() == ~0)
kmer_length = getS();
int n = max(0, length - kmer_length + 1);
at_least = min(at_least, n);
return proba.getProba(index_load, at_least, n);
}
template<class T>
int IKmerStore<T>::getK() const {
return k;
......
#include "proba.h"
#include <math.h>
#include "tools.h"
// ProbaPrecomputer::ProbaPrecomputer() {}
void ProbaPrecomputer::precomputeSystemProba(float index_load) {
precomputed_proba_with_system[index_load] = std::vector<double>(MAX_PRECOMPUTED_PROBA);
precomputed_proba_without_system[index_load] = std::vector<double>(MAX_PRECOMPUTED_PROBA);
std::vector<double> &pproba_with = precomputed_proba_with_system[index_load];
std::vector<double> &pproba_without = precomputed_proba_without_system[index_load];
pproba_with[0] = 1;
pproba_without[0] = 1;
for (int i = 1; i < MAX_PRECOMPUTED_PROBA; i++) {
pproba_with[i] = pproba_with[i - 1] * index_load;
pproba_without[i] = pproba_without[i - 1] * (1 - index_load);
}
precomputed_proba[index_load] = std::vector<std::vector<double> >(MAX_PRECOMPUTED_PROBA);
}
void ProbaPrecomputer::precomputeProba(float index_load, int length) {
if (length < MAX_PRECOMPUTED_PROBA) {
// By definition of MAX_PRECOMPUTED_PROBA they exist
double probability_having_system = precomputed_proba_with_system.at(index_load)[length];
double probability_not_having_system = 1;
precomputed_proba[index_load][length] = std::vector<double>(length+1);
std::vector<double> &precomp_proba = precomputed_proba[index_load][length];
double proba = 0;
precomp_proba[0] = proba;
double Cnk = 1; // nChoosek(length, length);
for (int i=length; i>=1; i--) {
proba += probabilityPreviousIteration(i, length, Cnk, probability_having_system,
probability_not_having_system, index_load);
precomp_proba[i] = proba;
}
}
}
double ProbaPrecomputer::probabilityPreviousIteration(int iteration, int length, double &Cnk, double &proba_with, double &proba_without, float index_load) {
double proba = Cnk * proba_with * proba_without;
proba_with = getProbaWith(index_load, iteration-1); // Otherwise when probability are too low they are stored as 0 and we are f*****
proba_without *= (1 - index_load);
Cnk *= iteration*1. / (length-iteration+1);
return proba;
}
double ProbaPrecomputer::getProbaWith(double p, int nb) {
if (nb < MAX_PRECOMPUTED_PROBA)
return precomputed_proba_with_system.at(p)[nb];
return pow(p, nb);
}
double ProbaPrecomputer::getProba(float index_load, int at_least, int length) {
if (at_least == 0) return 1.0; // even if 'length' is very small
if (! precomputed_proba_without_system.count(index_load)) {
precomputeSystemProba(index_load);
}
if (length < MAX_PRECOMPUTED_PROBA) {
if (! precomputed_proba[index_load][length].size())
precomputeProba(index_load, length);
if (precomputed_proba[index_load][length].size() > 0) {
return precomputed_proba[index_load][length][at_least];
}
}
double proba = 0;
double probability_not_having_system;
double probability_having_system;
probability_having_system = pow(index_load, length);
probability_not_having_system = 1;
double Cnk = 1;
for (int i=length; i >= at_least; i--) {
proba += probabilityPreviousIteration(i, length, Cnk, probability_having_system,
probability_not_having_system, index_load);
}
#ifdef DEBUG_KMS_EVALUE
cerr << "e-value:\tindex_load=" << index_load << ",\tat_least=" << at_least << ",\tlength=" << length <<",\tp-value=" << proba << endl;
#endif
return proba;
}
#ifndef PROBA_H
#define PROBA_H
#include <map>
#include <vector>
#define MAX_PRECOMPUTED_PROBA 500 /* Precompute 500 probabilities for each index load */
/**
* Store probabilities for sequences whose length is < MAX_PRECOMPUTED_PROBA
*/
class ProbaPrecomputer {
private:
// Store for each index load, the probability that have been computed for
// each possiible at_least and length values. To be used as
// precomputed_proba[index_load][length][at_least]
std::map<float, std::vector<std::vector<double> > > precomputed_proba;
std::map<float, std::vector<double> > precomputed_proba_with_system, precomputed_proba_without_system;
/**
* Precompute index_load^n and (1-index_load)^n (for n=0...MAX_PRECOMPUTED_PROBA)
*/
void precomputeSystemProba(float index_load);
/**
* Precompute the results of getProba (if length < MAX_PRECOMPUTED_PROBA)
* for every possible values of at_least between 1 and length
*/
void precomputeProba(float index_load, int length);
/**
* Commpute the previous iteration for the probability (and updates the values Cnk, proba_with,
* proba_without.
*/
inline double probabilityPreviousIteration(int iteration, int length, double &Cnk, double &proba_with, double &proba_without, float index_load);
public:
/**
* @return probability that the number of kmers (with index load 'index_load')
* is 'at_least' or more in a sequence of length 'length'
*/
double getProba(float index_load, int at_least, int length);
/**
* @return p^nb (potentially pre-computed)
*/
double getProbaWith(double p, int nb);
friend void testProba1();
};
#endif
......@@ -895,12 +895,9 @@ void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id
int score = dp.compute(onlyBottomTriangle, BOTTOM_TRIANGLE_SHIFT);
if (local==true){
dp.backtrack();
}
if (score > best_score)
{
{
dp.backtrack();
best_score = score ;
best_best_i = dp.best_i ;
best_best_j = dp.best_j ;
......@@ -908,9 +905,6 @@ void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id
best_first_j = dp.first_j ;
box->ref_nb = r ;
box->ref_label = rep.label(r) ;
if (!local)
dp.backtrack();
box->marked_pos = dp.marked_pos_i ;
}
......
......@@ -302,7 +302,7 @@ def header_igblast_results(ff_fasta, ff_igblast):
VIDJIL_FINE = '{directory}/vidjil-algo --header-sep "#" -c designations -2 -3 -d -g {directory}/germline/homo-sapiens.g %s >> %s'
VIDJIL_KMER = '{directory}/vidjil-algo -w 20 --header-sep "#" -b out -c windows -uuuU -2 -g {directory}/germline/homo-sapiens.g %s > /dev/null ; cat out/out.segmented.vdj.fa out/out.unsegmented.vdj.fa >> %s'
VIDJIL_KMER = '{directory}/vidjil-algo -w 20 --header-sep "#" -b out -c windows -uuuU -2 -g {directory}/germline/homo-sapiens.g %s > /dev/null ; cat out/out.detected.vdj.fa out/out.undetected.vdj.fa >> %s'
def should_results_from_vidjil_output(f_log):
'''
......
......@@ -12,10 +12,10 @@ $ Find the good statistics on TRG
$ Find the good length statistics
1: SEG -> .* 84.0
$ Four segmented on forward
$ Four detected on forward
1:SEG_\+ -> 4
$ Three segmented on reverse (Junc#01 doesn't have its rc)
$ Three detected on reverse (Junc#01 doesn't have its rc)
1:SEG_- -> 3
$ No too short window
......
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -k 4 -w 20 -z 0 -c clones -V $VIDJIL_DATA/toy_V.fa -J $VIDJIL_DATA/toy_J.fa $VIDJIL_DATA/ambiguous_representative.fa
$ Short reads properly segmented
$ Short reads properly detected
1:SEG_+.* -> .* 4
$ Window found
......
!LAUNCH: $VIDJIL_DIR/$EXEC --header-sep FA -k 16 -z 0 -w 60 -r 5 -o out2 -uuu -U -v -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa $VIDJIL_DATA/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --header-sep FA -k 16 -z 0 -w 60 -r 5 -o out2 -uuu -U -v -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa $VIDJIL_DATA/Stanford_S22.fasta ; tail out2/Stanford_S22.detected.vdj.fa ; grep UNSEG out2/Stanford_S22.undetected.vdj.fa
# Testing uncommon and debug options
$ verbose (-v)
2:auditioned sequences
$ segmented.fa (-U)
$ detected.fa (-U)
2:>lcl.FLN1.* VJ .* seed custom SEG_[+]
$ unsegmented.fa (-u)
$ undetected.fa (-u)
3:>lcl.FLN1.* UNSEG
$ fasta header stops before FA
......
......@@ -11,5 +11,5 @@ $ With default seeds the J is not found as it is too short
$ With different seeds we don't have the same seed for the IGHV and IGHJ
1: IGH .* ######-###### .* ########
$ The sequence is segmented
$ The sequence is detected
1: SEG -> 1
......@@ -9,7 +9,7 @@ $ From homo-sapiens.g
$ Number of reads
1:"total": \[13153\]
$ Number of segmented reads
$ Number of detected reads
1:"segmented": \[13153\]
$ Most abundant window
......
......@@ -3,13 +3,13 @@
!LAUNCH: diff out/Stanford_S22{,.rc}.vidjil | grep GGG && diff vidjil_s22.log vidjil_s22_rc.log
!EXIT_CODE: 1
$ Same number segmented
0:==> segmented
$ Same number detected
0:==> detected
$ Same number of windows found
0:==> found
$ Same number unsegmented
$ Same number undetected
0:UNSEG.*->
$ Keep the same number of windows
......
......@@ -6,7 +6,7 @@ $ Just one window found
$ Just one clone found
1:==> 1 clones
$ Two reads segmented on TRA+D
$ Two reads detected on TRA+D
1:TRA.D -> 2
$ One clone for the two reads, Vd1/Ja29
......
......@@ -3,6 +3,6 @@
$ Segment 6 reads, thanks to -i
1:junction detected in 6 reads
$ 4 reads are segmented on TRD+ (with -w 10, otherwise the reads are too short)
$ 4 reads are detected on TRD+ (with -w 10, otherwise the reads are too short)
1: TRD[+] .*->.* 4
!LAUNCH: $VIDJIL_DIR/$EXEC -y 0 --trim 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -x 100 $VIDJIL_DATA/Stanford_S22.fasta
$ No read segmented as we have no germline because of the --trim
$ No read detected as we have no germline because of the --trim
1: UNSEG too few V/J -> 100
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRB,TRD,TRG,IGH,IGK,IGL -uuu $VIDJIL_DATA/segmentation-2.fa ; cat out/segmentation-2.unsegmented.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRB,TRD,TRG,IGH,IGK,IGL -uuu $VIDJIL_DATA/segmentation-2.fa ; cat out/segmentation-2.undetected.vdj.fa
$ Only one sequence is segmented, but it is too small for a window (too short w)
$ Only one sequence is detected, but it is too small for a window (too short w)
1: junction detected in 1 reads
1: found 0 windows in 0 reads
......@@ -13,7 +13,7 @@ $ The proper unsegmentation cause is given
1: UNSEG ambiguous -> .* 1
1: UNSEG too short w -> .* 1
$ The proper unsegmentation cause is given in the .unsegmented.vdj.fa file (-uuu)
$ The proper cause is given in the .undetected.vdj.fa file (-uuu)
1: >too_short .* UNSEG too short
1: >strand .* UNSEG strand
3: >too_few_vj-..* UNSEG too few V/J
......
......@@ -17,7 +17,7 @@ $ Some changed windows are lighlty shortened
63: w95/-10
29: w90/-10
$ Find the good number of segmented sequences (including "too short sequences")
$ Find the good number of detected sequences (including "too short sequences")
1: junction detected in 13153 reads .100%.
$ Find the good number of windows in Stanford S22
......
#include "tests.h"
#include <core/proba.h>
#include <chrono>
void testProba1() {
ProbaPrecomputer p;
// Very simple tests (and test precomputation)
TAP_TEST_APPROX(p.getProba(0.5, 1, 1), 0.5, 0.01, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.3, 2, 3), 0.216, 0.001, TEST_PROBA_PRECOMPUTER, ""); // See https://www.wolframalpha.com/input/?i=prob+x%3E%3D2+for+x+binomial+with+n%3D3+and+p%3D0.3
// Test precomputation
TAP_TEST_EQUAL(p.precomputed_proba[0.5].size(), MAX_PRECOMPUTED_PROBA, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba_with_system[0.5].size(), MAX_PRECOMPUTED_PROBA, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba_without_system[0.5].size(), MAX_PRECOMPUTED_PROBA, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba[0.3].size(), MAX_PRECOMPUTED_PROBA, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba_with_system[0.3].size(), MAX_PRECOMPUTED_PROBA, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba_without_system[0.3].size(), MAX_PRECOMPUTED_PROBA, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba.count(0.4), 0, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba_with_system.count(0.4), 0, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba_without_system.count(0.4), 0, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba[0.5][1].size(), 2, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba[0.5][100].size(), 0, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.5, 1, 100), 1-pow(0.5,100), 1e-15, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba[0.5][100].size(), 101, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba.count(0.4), 0, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 99, 100), 2.43e-38, 1e-38, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_EQUAL(p.precomputed_proba[0.4][100].size(), 101, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 99, 100), 2.43e-38, 1e-38, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 90, 100), 1.730e-25, 1e-28, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 70, 100), 1.25e-9, 1e-11, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 50, 100), 0.027, 1e-3, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 30, 100), 0.985, 1e-3, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 10, 100), 1, 1e-3, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 1, 100), 1, 1e-3, TEST_PROBA_PRECOMPUTER, "");
// Test not pre-computed (too large for MAX_PRECOMPUTED_PROBA)
TAP_TEST(MAX_PRECOMPUTED_PROBA < 1000, TEST_PROBA_PRECOMPUTER, "If I'm failing change me AND the following test");
TAP_TEST_APPROX(p.getProba(0.4, 412, 1000), 0.229, 1e-3, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 912, 1000), 4e-255, 1e-100, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 800, 1000), 1.5e-147, 1e-148, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 600, 1000), 2.8e-37, 1e-38, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 370, 1000), .976, 0.001, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 300, 1000), 1, 1e-3, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 100, 1000), 1, 1e-3, TEST_PROBA_PRECOMPUTER, "");
TAP_TEST_APPROX(p.getProba(0.4, 10, 1000), 1, 1e-3, TEST_PROBA_PRECOMPUTER, "");
}
void testProba() {
testProba1();
}
......@@ -433,12 +433,14 @@ void testProbability(IndexTypes index) {
KmerAffectAnalyser *kaa = kseg.getKmerAffectAnalyser();
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_NOT_UNKNOWN, 3), 0, TEST_PROBABILITY_SEGMENTATION, "");
// We have only 2 k-mers, thus we can't have 3 "not unknowns". Thus, it is the probability of having 2 among 2 (with p=.75), same as the test below
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_NOT_UNKNOWN, 3), .75 * .75 , TEST_PROBABILITY_SEGMENTATION, "");
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_NOT_UNKNOWN, 2), .75 * .75, TEST_PROBABILITY_SEGMENTATION, "");
TAP_TEST(kaa->getProbabilityAtLeastOrAbove(AFFECT_NOT_UNKNOWN, 1) == .75 * 2 * .25 + kaa->getProbabilityAtLeastOrAbove(AFFECT_NOT_UNKNOWN, 2), TEST_PROBABILITY_SEGMENTATION, "");
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_NOT_UNKNOWN, 0), 1, TEST_PROBABILITY_SEGMENTATION, "");
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_UNKNOWN, 3), 0, TEST_PROBABILITY_SEGMENTATION, ".getProbabilityAtLeastOrAbove() with AFFECT_UNKOWN");
// Same: we only have 2
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_UNKNOWN, 2), .25 * .25, TEST_PROBABILITY_SEGMENTATION, ".getProbabilityAtLeastOrAbove() with AFFECT_UNKOWN");
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_UNKNOWN, 2), .25 * .25, TEST_PROBABILITY_SEGMENTATION, ".getProbabilityAtLeastOrAbove() with AFFECT_UNKOWN");
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_UNKNOWN, 0), 1, TEST_PROBABILITY_SEGMENTATION, ".getProbabilityAtLeastOrAbove() with AFFECT_UNKOWN");
}
......
......@@ -18,6 +18,7 @@
#include "testReadStorage.cpp"
#include "testAutomaton.cpp"
#include "testMath.cpp"
#include "testProbability.cpp"
int main(void) {
TAP_START(NB_TESTS);
......@@ -40,6 +41,6 @@ int main(void) {
testReadStorage();
testAutomaton();
testMath();
testProba();
TAP_END_TEST_EXIT
}
......@@ -197,6 +197,9 @@ enum {
/* Bugs */
TEST_BUG2224,
// ProbaPrecomputer
TEST_PROBA_PRECOMPUTER,
NB_TESTS
};
......@@ -367,6 +370,7 @@ inline void declare_tests() {
RECORD_TAP_TEST(TEST_FINE_SEGMENT_OVERLAP, "Test fine segmentation with an overlap");
RECORD_TAP_TEST(TEST_SEGMENT_REVCOMP, "Test segmentation on a sequence and its revcomp");
RECORD_TAP_TEST(TEST_BUG2224, "Test issue #2224 (seed longer than sequence)");
RECORD_TAP_TEST(TEST_PROBA_PRECOMPUTER, "Test ProbaPrecomputer::getProba");
}
TAP_DECLARATIONS
......
......@@ -99,8 +99,8 @@ enum { CMD_WINDOWS, CMD_CLONES, CMD_SEGMENT, CMD_GERMLINES } ;
#define CLONES_FILENAME ".vdj.fa"
#define CLONE_FILENAME "clone.fa-"
#define WINDOWS_FILENAME ".windows.fa"
#define SEGMENTED_FILENAME ".segmented.vdj.fa"
#define UNSEGMENTED_FILENAME ".unsegmented.vdj.fa"
#define SEGMENTED_FILENAME ".detected.vdj.fa"
#define UNSEGMENTED_FILENAME ".undetected.vdj.fa"
#define UNSEGMENTED_DETAIL_FILENAME ".fa"
#define AFFECTS_FILENAME ".affects"
#define EDGES_FILENAME ".edges"
......@@ -485,7 +485,7 @@ int main (int argc, char **argv)
double expected_value = THRESHOLD_NB_EXPECTED;
app.add_option("--e-value,-e", expected_value,
"maximal e-value for determining if a V-J segmentation can be trusted", true)
"maximal e-value for trusting the detection of a V-J recombination", true)
-> group(group) -> transform(string_NO_LIMIT);
Cost segment_cost = DEFAULT_SEGMENT_COST ;
......@@ -499,7 +499,7 @@ int main (int argc, char **argv)
double expected_value_D = THRESHOLD_NB_EXPECTED_D;
app.add_option("--analysis-e-value-D,-E", expected_value_D,
"maximal e-value for determining if a D segment can be trusted", true)
"maximal e-value for trusting the detection of a D segment", true)
-> group(group) -> level();
int kmer_threshold = DEFAULT_KMER_THRESHOLD;
......@@ -547,22 +547,22 @@ int main (int argc, char **argv)
group = "Detailed output per read (generally not recommended, large files, but may be used for filtering, as in -uu -X 1000)";
bool output_segmented = false;
app.add_flag("--out-analyzed,-U", output_segmented,
"output analyzed reads (in " SEGMENTED_FILENAME " file)")
app.add_flag("--out-detected,-U", output_segmented,
"output reads with detected recombinations (in " SEGMENTED_FILENAME " file)")
-> group(group);
bool output_unsegmented = false;
bool output_unsegmented_detail = false;
bool output_unsegmented_detail_full = false;
app.add_flag_function("--out-unanalyzed,-u", [&](size_t n) {
app.add_flag_function("--out-undetected,-u", [&](size_t n) {
output_unsegmented = (n >= 3); // -uuu
output_unsegmented_detail_full = (n >= 2); // -uu
output_unsegmented_detail = (n >= 1); // -u
}, R"Z(
-u output unanalyzed reads, gathered by cause, except for very short and 'too few V/J' reads (in *)Z" UNSEGMENTED_DETAIL_FILENAME R"Z( files)
-uu output unanalyzed reads, gathered by cause, all reads (in *)Z" UNSEGMENTED_DETAIL_FILENAME R"Z( files) (use only for debug)
-uuu output unanalyzed reads, all reads, including a )Z" UNSEGMENTED_FILENAME R"Z( file (use only for debug))Z")
-u output undetected reads, gathered by cause, except for very short and 'too few V/J' reads (in *)Z" UNSEGMENTED_DETAIL_FILENAME R"Z( files)
-uu output undetected reads, gathered by cause, all reads (in *)Z" UNSEGMENTED_DETAIL_FILENAME R"Z( files) (use only for debug)
-uuu output undetected reads, all reads, including a )Z" UNSEGMENTED_FILENAME R"Z( file (use only for debug))Z")
-> group(group);
bool output_sequences_by_cluster = false;
......
......@@ -385,7 +385,7 @@ Graph.prototype = {
elem[0].classList.remove("graph_listElem_selected")