Commit adf42c5d authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'feature-a/3954-different-seeds-v-j' into 'dev'

Different V/D/J/ seeds

Closes #3954

See merge request !634
parents b02d0e36 72fa871c
Pipeline #132693 failed with stages
in 28 minutes and 51 seconds
......@@ -6,7 +6,7 @@
#include <ctype.h>
void Germline::init(string _code, char _shortcut,
string seed,
string seed_5, string seed_4, string seed_3,
int max_indexing, bool build_automaton)
{
seg_method = SEG_METHOD_53 ;
......@@ -15,7 +15,9 @@ void Germline::init(string _code, char _shortcut,
index = 0 ;
this->max_indexing = max_indexing;
this->seed = expand_seed(seed);
this->seed_5 = expand_seed(seed_5);
this->seed_4 = expand_seed(seed_4);
this->seed_3 = expand_seed(seed_3);
affect_5 = "V" ;
affect_4 = "" ;
......@@ -24,20 +26,20 @@ void Germline::init(string _code, char _shortcut,
affect_5 = string(1, toupper(shortcut)) + "-" + code + "V";
affect_4 = string(1, 14 + shortcut) + "-" + code + "D";
affect_3 = string(1, tolower(shortcut)) + "-" + code + "J";
filter_5 = build_automaton ? new FilterWithACAutomaton(rep_5, this->seed) : nullptr;
filter_5 = build_automaton ? new FilterWithACAutomaton(rep_5, this->seed_5) : nullptr;
}
Germline::Germline(string _code, char _shortcut,
string seed, int max_indexing, bool build_automaton)
string seed_5, string seed_4, string seed_3, int max_indexing, bool build_automaton)
{
init(_code, _shortcut, seed, max_indexing, build_automaton);
init(_code, _shortcut, seed_5, seed_4, seed_3, max_indexing, build_automaton);
}
Germline::Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
string seed, int max_indexing, bool build_automaton)
string seed_5, string seed_4, string seed_3, int max_indexing, bool build_automaton)
{
f_reps_5.push_back(f_rep_5);
......@@ -49,7 +51,7 @@ Germline::Germline(string _code, char _shortcut,
rep_4 = BioReader(f_rep_4, 2, "|");
rep_3 = BioReader(f_rep_3, 2, "|");
init(_code, _shortcut, seed, max_indexing, build_automaton);
init(_code, _shortcut, seed_5, seed_4, seed_3, max_indexing, build_automaton);
if (rep_4.size())
seg_method = SEG_METHOD_543 ;
......@@ -58,7 +60,7 @@ Germline::Germline(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
string seed, int max_indexing, bool build_automaton)
string seed_5, string seed_4, string seed_3, int max_indexing, bool build_automaton)
{
f_reps_5 = _f_reps_5 ;
......@@ -74,7 +76,7 @@ Germline::Germline(string _code, char _shortcut,
for (list<string>::const_iterator it = f_reps_5.begin(); it != f_reps_5.end(); ++it)
rep_5.add(*it);
init(_code, _shortcut, seed, max_indexing, build_automaton);
init(_code, _shortcut, seed_5, seed_4, seed_3, max_indexing, build_automaton);
for (list<string>::const_iterator it = f_reps_4.begin(); it != f_reps_4.end(); ++it)
rep_4.add(*it);
......@@ -89,20 +91,20 @@ Germline::Germline(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
BioReader _rep_5, BioReader _rep_4, BioReader _rep_3,
string seed, int max_indexing, bool build_automaton)
string seed_5, string seed_4, string seed_3, int max_indexing, bool build_automaton)
{
rep_5 = _rep_5 ;
rep_4 = _rep_4 ;
rep_3 = _rep_3 ;
init(_code, _shortcut, seed, max_indexing, build_automaton);
init(_code, _shortcut, seed_5, seed_4, seed_3, max_indexing, build_automaton);
if (rep_4.size())
seg_method = SEG_METHOD_543 ;
}
Germline::Germline(string code, char shortcut, string path, json json_recom,
string seed, int max_indexing, bool build_automaton)
string seed_5, string seed_4, string seed_3, int max_indexing, bool build_automaton)
{
bool regular = (code.find("+") == string::npos);
......@@ -119,7 +121,7 @@ Germline::Germline(string code, char shortcut, string path, json json_recom,
rep_5.add(path + filename);
}
init(code, shortcut, seed, max_indexing, build_automaton);
init(code, shortcut, seed_5, seed_4, seed_3, max_indexing, build_automaton);
if (json_recom.find("4") != json_recom.end()) {
for (json::iterator it = json_recom["4"].begin();
......@@ -167,10 +169,12 @@ void Germline::finish() {
void Germline::new_index(IndexTypes type)
{
assert(! seed.empty() && (seed.find(SEED_YES) != std::string::npos));
assert(! seed_5.empty() && (seed_5.find(SEED_YES) != std::string::npos));
assert(! seed_4.empty() && (seed_4.find(SEED_YES) != std::string::npos));
assert(! seed_3.empty() && (seed_3.find(SEED_YES) != std::string::npos));
bool rc = true ;
index = KmerStoreFactory<KmerAffect>::createIndex(type, seed, rc);
index = KmerStoreFactory<KmerAffect>::createIndex(type, seed_5, rc);
index->refs = 1;
update_index();
......@@ -187,19 +191,19 @@ void Germline::update_index(IKmerStore<KmerAffect> *_index)
{
if (!_index) _index = index ;
_index->insert(rep_5, affect_5, max_indexing, seed);
_index->insert(rep_4, affect_4, 0, seed);
_index->insert(rep_3, affect_3, -max_indexing, seed);
_index->insert(rep_5, affect_5, max_indexing, seed_5);
_index->insert(rep_4, affect_4, 0, seed_4);
_index->insert(rep_3, affect_3, -max_indexing, seed_3);
}
void Germline::mark_as_ambiguous(Germline *other)
{
index->insert(other->rep_5, AFFECT_AMBIGUOUS_SYMBOL, max_indexing, seed);
index->insert(other->rep_5, AFFECT_AMBIGUOUS_SYMBOL, max_indexing, seed_5);
if (other->affect_4.size())
index->insert(other->rep_4, AFFECT_AMBIGUOUS_SYMBOL, 0, seed);
index->insert(other->rep_4, AFFECT_AMBIGUOUS_SYMBOL, 0, seed_4);
index->insert(other->rep_3, AFFECT_AMBIGUOUS_SYMBOL, -max_indexing, seed);
index->insert(other->rep_3, AFFECT_AMBIGUOUS_SYMBOL, -max_indexing, seed_3);
}
void Germline::override_rep5_rep3_from_labels(KmerAffect left, KmerAffect right)
......@@ -229,15 +233,18 @@ ostream &operator<<(ostream &out, const Germline &germline)
out << setw(5) << left << germline.code << right << " '" << germline.shortcut << "' "
<< " ";
size_t seed_span = germline.seed.size();
size_t seed_w = seed_weight(germline.seed);
size_t seed_5_span = germline.seed_5.size();
size_t seed_5_w = seed_weight(germline.seed_5);
size_t seed_3_span = germline.seed_3.size();
size_t seed_3_w = seed_weight(germline.seed_3);
if (germline.index) {
out << " 0x" << hex << setw(2) << setfill('0') << germline.index->id << dec << setfill(' ') << " " ;
out << fixed << setprecision(3) << setw(8)
<< 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, seed_span)) << "%" << " "
<< 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_3, 1, seed_span)) << "%";
out << " l" << germline.seed.length() << " k" << seed_w << " " << germline.seed ;
<< 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_5, 1, seed_5_span)) << "%" << " "
<< 100 * germline.index->getIndexLoad(KmerAffect(germline.affect_3, 1, seed_3_span)) << "%";
out << " l" << germline.seed_5.length() << " k" << seed_5_w << " " << germline.seed_5 ;
out << " l" << germline.seed_3.length() << " k" << seed_3_w << " " << germline.seed_3 ;
}
out << endl;
......@@ -324,8 +331,26 @@ void MultiGermline::build_from_json(string path, string json_filename_and_filter
char shortcut = json_value["shortcut"].dump()[1];
string code = it.key();
json json_parameters = json_value["parameters"];
string seed = json_parameters["seed"];
string seed;
string seed_5, seed_4, seed_3;
if (json_parameters.find("seed") != json_parameters.end()) {
seed = json_parameters["seed"];
}
if (default_seed.size() > 0)
seed = default_seed;
if (seed.size() > 0)
seed_5 = seed_4 = seed_3 = seed;
if (json_parameters.find("seed_5") != json_parameters.end()) {
seed_5 = json_parameters["seed_5"];
}
if (json_parameters.find("seed_4") != json_parameters.end()) {
seed_4 = json_parameters["seed_4"];
}
if (json_parameters.find("seed_3") != json_parameters.end()) {
seed_3 = json_parameters["seed_3"];
}
if (default_max_indexing == 0) {
if (json_parameters.count("trim_sequences") > 0) {
max_indexing = json_parameters["trim_sequences"];
......@@ -353,12 +378,10 @@ void MultiGermline::build_from_json(string path, string json_filename_and_filter
break ;
}
seed = (default_seed.size() == 0) ? seed : default_seed;
//for each set of recombination 3/4/5
for (json::iterator it2 = recom.begin(); it2 != recom.end(); ++it2) {
add_germline(new Germline(code, shortcut, path + "/", *it2,
seed, max_indexing, build_automaton));
seed_5, seed_4, seed_3, max_indexing, build_automaton));
}
}
......
......@@ -44,7 +44,7 @@ class Germline {
int max_indexing;
void init(string _code, char _shortcut,
string seed, int max_indexing, bool build_automaton=false);
string seed_5, string seed_4, string seed_3, int max_indexing, bool build_automaton=false);
public:
/*
......@@ -53,21 +53,26 @@ class Germline {
Germline(string _code, char _shortcut,
list <string> f_rep_5, list <string> f_rep_4, list <string> f_rep_3,
string seed="", int max_indexing=0, bool build_automaton=false);
string seed_5="", string seed_4="", string seed_3="", int max_indexing=0,
bool build_automaton=false);
Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
string seed="", int max_indexing=0, bool build_automaton=false);
string seed_5="", string seed_4="", string seed_3="", int max_indexing=0,
bool build_automaton=false);
Germline(string _code, char _shortcut,
BioReader _rep_5, BioReader _rep_4, BioReader _rep_3,
string seed="", int max_indexing=0, bool build_automaton=false);
string seed_5="", string seed_4="", string seed_3="", int max_indexing=0,
bool build_automaton=false);
Germline(string _code, char _shortcut,
string seed="", int max_indexing=0, bool build_automaton=false);
string seed_5="", string seed_4="", string seed_3="", int max_indexing=0,
bool build_automaton=false);
Germline(string _code, char shortcut, string path, json json_recom,
string seed="", int max_indexing=0, bool build_automaton=false);
string seed_5="", string seed_4="", string seed_3="", int max_indexing=0,
bool build_automaton=false);
~Germline();
......@@ -79,7 +84,9 @@ class Germline {
/**
* The string used for indexing the germline.
*/
string seed;
string seed_5;
string seed_4;
string seed_3;
/**
* Finishes the construction of the germline so that it can be used
......
......@@ -484,7 +484,7 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold,
KmerAffectAnalyser kaa(*(germline->index), sequence);
KmerAffect kmer = KmerAffect(germline->affect_4, 1, germline->seed.size());
KmerAffect kmer = KmerAffect(germline->affect_4, 1, germline->seed_4.size());
int c = kaa.count(kmer);
// E-value
......@@ -576,12 +576,12 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold,
return ;
} else if (nb_strand[0] > RATIO_STRAND * nb_strand[1]) {
strand = -1;
before = KmerAffect(germline->affect_3, -1, germline->seed.size());
after = KmerAffect(germline->affect_5, -1, germline->seed.size());
before = KmerAffect(germline->affect_3, -1, germline->seed_3.size());
after = KmerAffect(germline->affect_5, -1, germline->seed_5.size());
} else if (nb_strand[1] > RATIO_STRAND * nb_strand[0]) {
strand = 1;
before = KmerAffect(germline->affect_5, 1, germline->seed.size());
after = KmerAffect(germline->affect_3, 1, germline->seed.size());
before = KmerAffect(germline->affect_5, 1, germline->seed_5.size());
after = KmerAffect(germline->affect_3, 1, germline->seed_3.size());
} else {
// Ambiguous information: we have positive and negative strands
// and there is not enough difference to put them apart.
......
......@@ -21,7 +21,10 @@ int seed_weight(const string &seed)
}
map<string, string> seedMap = {
{"7c", "#######"},
{"8c", "########"},
{"9c", "#########"},
{"8s", "####-####"},
{"10s", "#####-#####"},
{"12s", "######-######"},
{"13s", "#######-######"}
......
{
"ref": "http://www.vidjil.org/germlines/germline-59.tar.gz",
"species": "Homo sapiens",
"species_taxon_id": 9606,
"path": "../../../germline/homo-sapiens",
"systems": {
"IGH": {
"shortcut": "H",
"color" : "#6c71c4",
"description": "Human immunoglobulin, heavy locus (14q32.33)",
"recombinations": [ {
"5": ["IGHV.fa"],
"4": ["IGHD.fa"],
"3": ["IGHJ+down.fa"]
} ],
"parameters": {
"seed": "12s",
"seed_3": "8c"
}
}
}
}
>ighv-ighj
gacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
attactact
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/short-j.fa
$ Both seeds for IGH are identical
1: IGH .* ######-###### .* ######-######
$ With default seeds the J is not found as it is too short
1: UNSEG only V/5' -> 1
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -g $VIDJIL_DATA/seeds.g $VIDJIL_DATA/short-j.fa
$ With different seeds we don't have the same seed for the IGHV and IGHJ
1: IGH .* ######-###### .* ########
$ The sequence is segmented
1: SEG -> 1
......@@ -13,7 +13,8 @@ void testSegmentationBug1(IndexTypes index) {
BioReader seqJ("../../germline/homo-sapiens/TRGJ.fa");
Germline *germline ;
germline = new Germline("custom", 'x', seqV, seqV, seqJ, "#############");
germline = new Germline("custom", 'x', seqV, seqV, seqJ,
"#############", "#############", "#############");
germline->new_index(index);
germline->finish();
......
#include <fstream>
#include <iostream>
#include <string>
#include "core/germline.h"
#include "core/kmerstore.h"
#include "core/bioreader.hpp"
using namespace std;
void testGermline1(Germline *g1)
{
// Test metadata
TAP_TEST_EQUAL(g1->code, "IGH", TEST_GERMLINE, "code") ;
TAP_TEST_EQUAL(g1->shortcut, 'G', TEST_GERMLINE, "shortcut") ;
// Test seeds
TAP_TEST_EQUAL(g1->seed_5, "####-####", TEST_GERMLINE, "seed_5") ;
TAP_TEST_EQUAL(g1->seed_4, "###-###", TEST_GERMLINE, "seed_4") ;
TAP_TEST_EQUAL(g1->seed_3, "#######", TEST_GERMLINE, "seed_3") ;
}
void testIndexLoad(Germline *g1, IndexTypes index, float expected_index_load)
{
g1->new_index(index);
g1->finish();
size_t seed_5_span = g1->seed_5.size();
float index_load_5 = g1->index->getIndexLoad(KmerAffect(g1->affect_5, 1, seed_5_span));
TAP_TEST_APPROX(index_load_5, expected_index_load, 0.02, TEST_GERMLINE, "seed_5, index_load") ;
}
void testGermline() {
BioReader seqV("../../germline/homo-sapiens/IGHV.fa", 2);
BioReader seqD("../../germline/homo-sapiens/IGHD.fa", 2);
BioReader seqJ("../../germline/homo-sapiens/IGHJ.fa", 2);
Germline *g1 ;
g1 = new Germline("IGH", 'G', seqV, seqD, seqJ,
"8s", "###-###", "7c");
testGermline1(g1);
testIndexLoad(g1, KMER_INDEX, 0.24);
delete g1->index;
testIndexLoad(g1, AC_AUTOMATON, 0.12);
delete g1;
}
......@@ -53,7 +53,8 @@ void testFineSegment(IndexTypes index)
data.next();
Germline *germline ;
germline = new Germline("IGH", 'G', seqV, seqD, seqJ, "########");
germline = new Germline("IGH", 'G', seqV, seqD, seqJ,
"########", "########", "########");
germline->new_index(index);
germline->finish();
......@@ -105,11 +106,13 @@ void testSegmentOverlap(IndexTypes index)
BioReader data("data/bug-segment-overlap.fa", 1, " ");
Germline *germline1 ;
germline1 = new Germline("TRG", 'G', seqV, BioReader(), seqJ, "##########");
germline1 = new Germline("TRG", 'G', seqV, BioReader(), seqJ,
"##########", "##########", "##########");
germline1->new_index(index);
Germline *germline2 ;
germline2 = new Germline("TRG2", 'G', seqV, BioReader(), seqJ, "##########");
germline2 = new Germline("TRG2", 'G', seqV, BioReader(), seqJ,
"##########", "##########", "##########");
germline2->new_index(index);
germline1->finish();
......@@ -141,7 +144,8 @@ void testSegmentationCause(IndexTypes index) {
BioReader data("data/segmentation.fasta", 1, " ");
Germline *germline ;
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, "##########");
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ,
"##########", "##########", "##########");
germline->new_index(index);
germline->finish();
......@@ -268,7 +272,8 @@ void testBug2224(IndexTypes index) {
Germline *germline ;
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, "###########");
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ,
"###########", "###########", "###########");
germline->new_index(index);
germline->finish();
......@@ -293,7 +298,8 @@ void testExtractor(IndexTypes index) {
OnlineFasta data("data/segmentation.fasta", 1, " ");
Germline *germline ;
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ, "##########");
germline = new Germline("TRG", 'G', seqV, BioReader(), seqJ,
"##########", "##########", "##########");
germline->new_index(index);
MultiGermline *multi ;
......@@ -409,7 +415,8 @@ void testProbability(IndexTypes index) {
V.add(v);
J.add(j);
}
Germline germline("Test", 'T', V, BioReader(), J, "####");
Germline germline("Test", 'T', V, BioReader(), J,
"####", "####", "####");
germline.new_index(index);
germline.finish();
......@@ -436,6 +443,45 @@ void testProbability(IndexTypes index) {
TAP_TEST_EQUAL(kaa->getProbabilityAtLeastOrAbove(AFFECT_UNKNOWN, 0), 1, TEST_PROBABILITY_SEGMENTATION, ".getProbabilityAtLeastOrAbove() with AFFECT_UNKOWN");
}
void testDifferentSeeds(IndexTypes index) {
string v_seq = "AGAGAGAGAGAGAGAGAGAGAGAGAGAGAG";
string j_seq = "CACACACACACACACACACACACACACACA";
BioReader V, J;
V.add({"V", "V", v_seq, "", 0});
J.add({"J", "J", j_seq, "", 0});
Sequence seq = {"seq", "seq", "AGAGAGAGCACACACA", "", 0};
Germline germline_basic("Test1", 'T', V, BioReader(), J);
germline_basic.new_index(index);
germline_basic.finish();
Germline germline_small_seed_5("Test1", 'T', V, BioReader(), J, "####", "", "");
germline_small_seed_5.new_index(index);
germline_small_seed_5.finish();
Germline germline_small_seed_3("Test1", 'T', V, BioReader(), J, "", "", "####");
germline_small_seed_3.new_index(index);
germline_small_seed_3.finish();
Germline germline_small_seeds("Test1", 'T', V, BioReader(), J, "####", "", "####");
germline_small_seeds.new_index(index);
germline_small_seeds.finish();
KmerSegmenter ks1(seq, &germline_basic);
KmerSegmenter ks2(seq, &germline_small_seed_5);
KmerSegmenter ks3(seq, &germline_small_seed_3);
KmerSegmenter ks4(seq, &germline_small_seeds);
TAP_TEST(! ks1.isSegmented(), TEST_KMER_IS_SEGMENTED, "");
TAP_TEST(! ks2.isSegmented(), TEST_KMER_IS_SEGMENTED, "");
TAP_TEST(! ks3.isSegmented(), TEST_KMER_IS_SEGMENTED, "");
TAP_TEST(ks4.isSegmented(), TEST_KMER_IS_SEGMENTED, "");
TAP_TEST_EQUAL(ks1.getSegmentationStatus(), UNSEG_TOO_FEW_ZERO, TEST_KMER_SEGMENTATION_CAUSE, "");
TAP_TEST_EQUAL(ks2.getSegmentationStatus(), UNSEG_ONLY_V, TEST_KMER_SEGMENTATION_CAUSE, ks2.getInfoLineWithAffects());
TAP_TEST_EQUAL(ks3.getSegmentationStatus(), UNSEG_ONLY_J, TEST_KMER_SEGMENTATION_CAUSE, ks3.getInfoLineWithAffects());
TAP_TEST_EQUAL(ks4.getSegmentationStatus(), SEG_PLUS, TEST_KMER_SEGMENTATION_CAUSE, ks4.getInfoLineWithAffects());
}
void testSegment() {
testSegmentOverlap(KMER_INDEX);
testSegmentOverlap(AC_AUTOMATON);
......@@ -451,4 +497,5 @@ void testSegment() {
testBug2224(KMER_INDEX);
testBug2224(AC_AUTOMATON);
testBestLengthShifts();
testDifferentSeeds(AC_AUTOMATON); // KMER_INDEX can't deal with seeds of different sizes
}
......@@ -8,7 +8,7 @@ void testWSAdd() {
map<string, string> labels;
WindowsStorage ws(labels);
Sequence seq = {"label", "l", "GATACATTAGACAGCT", "", 0};
Germline germline("Test", 't', "data/small_V.fa", "", "data/small_J.fa", "");
Germline germline("Test", 't', "data/small_V.fa", "", "data/small_J.fa", "", "", "");
TAP_TEST_EQUAL(ws.size(), 0, TEST_WS_SIZE_NONE, "");
......@@ -50,7 +50,7 @@ void testWSAdd() {
TAP_TEST_EQUAL(it->label_full, "other", TEST_WS_GET_READS, "");
TAP_TEST_EQUAL(it->sequence, "TAAGATTAGCCACGGACT", TEST_WS_GET_READS, "");
Germline germline2("Other test", 'o', "data/small_V.fa", "", "data/small_J.fa", "");
Germline germline2("Other test", 'o', "data/small_V.fa", "", "data/small_J.fa", "", "", "");
// Insert a sequence from another germline 2 times
for (int i = 0; i < 2; i++) {
ws.add("CATT", seq, SEG_MINUS, &germline2);
......@@ -60,7 +60,7 @@ void testWSAdd() {
TAP_TEST(ws.getGermline("ATTAG") == &germline,TEST_WS_GET_GERMLINE, "");
TAP_TEST(ws.getGermline("CATT") == &germline2,TEST_WS_GET_GERMLINE, "");
Germline germline3("Another test", 'a', "data/small_V.fa", "", "data/small_J.fa", "");
Germline germline3("Another test", 'a', "data/small_V.fa", "", "data/small_J.fa", "", "", "");
// Insert a sequence from another germline 6 times
for (int i = 0; i < 6; i++) {
ws.add("ATAGCAT", seq, SEG_MINUS, &germline3);
......@@ -117,7 +117,7 @@ void testWSAddWithLimit() {
ws.setBinParameters(1, 20);
Sequence seq = {"label", "l", "GATACATTAGACAGCT", "", 0};
Sequence seq_long = {"label", "l", "GATACATTAGACAGCTTATATATATATTTATAT", "", 0};
Germline germline("Test", 't', "data/small_V.fa", "", "data/small_J.fa", "");
Germline germline("Test", 't', "data/small_V.fa", "", "data/small_J.fa", "", "", "");
ws.add("ATTAG", seq, SEG_PLUS, &germline);
ws.add("ATTAG", seq, SEG_PLUS, &germline);
......
......@@ -44,6 +44,18 @@ if (test_results[id] <= TAP_MAX_FAILED) { \
}}}
#define TAP_TEST_APPROX(test, expected, approx, id, msg) { test_nb_executions[id]++; \
if (test_results[id] <= TAP_MAX_FAILED) { \
if ((abs((test) - expected)) > approx) { \
test_results[id]++; \
cerr << "Test " << #test << " failed (" << __FILE__ << ":" << __LINE__ << "): " \
<< " expected " << expected << " +/- " << approx << ", got " << (test) \
<< " " << msg << endl; \
cerr << TAP_ADDITIONAL_INFOS << endl; \
}}}
#define TAP_END_TEST tap_end_test(__FILE__)
inline bool tap_end_test(const char *filename) {
......
......@@ -4,6 +4,7 @@
#include "testTools.cpp"
#include "testFilter.cpp"
#include "testKmerAffect.cpp"
#include "testGermline.cpp"
#include "testStorage.cpp"
#include "testAffectAnalyser.cpp"
#include "testBugs.cpp"
......@@ -23,6 +24,7 @@ int main(void) {
declare_tests();
testTools();
testFilter();
testGermline();
testStorage();
testKmerAffect();
testAffectAnalyser();
......
......@@ -46,6 +46,9 @@ enum {
TEST_TRIM_SEQUENCE,
TEST_GENERATE_ALL_SEEDS,
/* Germline tests */
TEST_GERMLINE,
/* Storage tests */
TEST_ARRAY_KMERSTORE,
TEST_MAP_KMERSTORE,
......
......@@ -871,7 +871,7 @@ int main (int argc, char **argv)
Germline *germline;
germline = new Germline("custom", 'X',
f_reps_V, f_reps_D, f_reps_J,
seed, trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
seed, seed, seed, trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
germline->new_index(indexType);
......@@ -892,14 +892,14 @@ int main (int argc, char **argv)
}
if (multi_germline_unexpected_recombinations_12) {
Germline *pseudo = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
Germline *pseudo = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", "", "", trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
pseudo->seg_method = SEG_METHOD_MAX12 ;
pseudo->set_index(multigermline->index);
multigermline->germlines.push_back(pseudo);
}
if (multi_germline_unexpected_recombinations_1U) {
Germline *pseudo_u = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
Germline *pseudo_u = new Germline(PSEUDO_UNEXPECTED, PSEUDO_UNEXPECTED_CODE, "", "", "", trim_sequences, (kmer_threshold != NO_LIMIT_VALUE));
pseudo_u->seg_method = SEG_METHOD_MAX1U ;
// TODO: there should be more up/downstream regions for the PSEUDO_UNEXPECTED germline. And/or smaller seeds ?
pseudo_u->set_index(multigermline->index);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment