Commit 9fa64ce4 authored by Mikaël Salson's avatar Mikaël Salson

Germline: Make the seed a property of the Germline

Seed should be specific to the Germline as in the germlines.data
file. We could have several Germlines in an index, but with different
seeds. Designing Germlines this way makes it possible
parent 85455813
......@@ -4,7 +4,7 @@
#include <ctype.h>
void Germline::init(string _code, char _shortcut,
int _delta_min,
int _delta_min, string seed,
int max_indexing)
{
seg_method = SEG_METHOD_53 ;
......@@ -12,6 +12,7 @@ void Germline::init(string _code, char _shortcut,
shortcut = _shortcut ;
index = 0 ;
this->max_indexing = max_indexing;
this->seed = seed;
affect_5 = "V" ;
affect_4 = "" ;
......@@ -26,19 +27,19 @@ void Germline::init(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
int _delta_min,
int _delta_min, string seed,
int max_indexing)
{
init(_code, _shortcut, _delta_min, max_indexing);
init(_code, _shortcut, _delta_min, seed, max_indexing);
}
Germline::Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
int _delta_min,
int _delta_min, string seed,
int max_indexing)
{
init(_code, _shortcut, _delta_min, max_indexing);
init(_code, _shortcut, _delta_min, seed, max_indexing);
f_reps_5.push_back(f_rep_5);
f_reps_4.push_back(f_rep_4);
......@@ -56,10 +57,10 @@ Germline::Germline(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
int _delta_min,
int _delta_min, string seed,
int max_indexing)
{
init(_code, _shortcut, _delta_min, max_indexing);
init(_code, _shortcut, _delta_min, seed, max_indexing);
f_reps_5 = _f_reps_5 ;
f_reps_4 = _f_reps_4 ;
......@@ -87,10 +88,10 @@ Germline::Germline(string _code, char _shortcut,
Germline::Germline(string _code, char _shortcut,
Fasta _rep_5, Fasta _rep_4, Fasta _rep_3,
int _delta_min,
int _delta_min, string seed,
int max_indexing)
{
init(_code, _shortcut, _delta_min, max_indexing);
init(_code, _shortcut, _delta_min, seed, max_indexing);
rep_5 = _rep_5 ;
rep_4 = _rep_4 ;
......@@ -100,7 +101,8 @@ Germline::Germline(string _code, char _shortcut,
seg_method = SEG_METHOD_543 ;
}
Germline::Germline(string code, char shortcut, string path, json json_recom, int max_indexing)
Germline::Germline(string code, char shortcut, string path, json json_recom,
string seed, int max_indexing)
{
int delta_min = -10;
......@@ -109,7 +111,7 @@ Germline::Germline(string code, char shortcut, string path, json json_recom, int
delta_min = 0;
}
init(code, shortcut, delta_min, max_indexing);
init(code, shortcut, delta_min, seed, max_indexing);
bool regular = (code.find("+") == string::npos);
......@@ -147,8 +149,10 @@ Germline::Germline(string code, char shortcut, string path, json json_recom, int
seg_method = SEG_METHOD_543 ;
}
void Germline::new_index(string seed)
void Germline::new_index()
{
assert(! seed.empty());
bool rc = true ;
index = KmerStoreFactory::createIndex<KmerAffect>(seed, rc);
index->refs = 1;
......@@ -167,19 +171,19 @@ void Germline::update_index(IKmerStore<KmerAffect> *_index)
{
if (!_index) _index = index ;
_index->insert(rep_5, affect_5, max_indexing);
_index->insert(rep_4, affect_4);
_index->insert(rep_3, affect_3, -max_indexing);
_index->insert(rep_5, affect_5, max_indexing, seed);
_index->insert(rep_4, affect_4, 0, seed);
_index->insert(rep_3, affect_3, -max_indexing, seed);
}
void Germline::mark_as_ambiguous(Germline *other)
{
index->insert(other->rep_5, AFFECT_AMBIGUOUS_SYMBOL, max_indexing);
index->insert(other->rep_5, AFFECT_AMBIGUOUS_SYMBOL, max_indexing, seed);
if (other->affect_4.size())
index->insert(other->rep_4, AFFECT_AMBIGUOUS_SYMBOL);
index->insert(other->rep_4, AFFECT_AMBIGUOUS_SYMBOL, 0, seed);
index->insert(other->rep_3, AFFECT_AMBIGUOUS_SYMBOL, -max_indexing);
index->insert(other->rep_3, AFFECT_AMBIGUOUS_SYMBOL, -max_indexing, seed);
}
......@@ -233,10 +237,10 @@ void MultiGermline::insert(Germline *germline)
germlines.push_back(germline);
}
void MultiGermline::add_germline(Germline *germline, string seed)
void MultiGermline::add_germline(Germline *germline)
{
if (one_index_per_germline)
germline->new_index(seed);
germline->new_index();
germlines.push_back(germline);
}
......@@ -278,7 +282,8 @@ void MultiGermline::build_from_json(string path, string json_filename, int filte
//for each set of recombination 3/4/5
for (json::iterator it2 = recom.begin(); it2 != recom.end(); ++it2) {
add_germline(new Germline(code, shortcut, path + "/", *it2 , max_indexing), seedMap[seed]);
add_germline(new Germline(code, shortcut, path + "/", *it2 , seedMap[seed],
max_indexing));
}
}
......
......@@ -31,7 +31,7 @@ class Germline {
int max_indexing;
void init(string _code, char _shortcut,
int _delta_min,
int _delta_min, string seed,
int max_indexing);
public:
......@@ -44,24 +44,25 @@ class Germline {
Germline(string _code, char _shortcut,
list <string> f_rep_5, list <string> f_rep_4, list <string> f_rep_3,
int _delta_min,
int _delta_min, string seed="",
int max_indexing=0);
Germline(string _code, char _shortcut,
string f_rep_5, string f_rep_4, string f_rep_3,
int _delta_min,
int _delta_min, string seed="",
int max_indexing=0);
Germline(string _code, char _shortcut,
Fasta _rep_5, Fasta _rep_4, Fasta _rep_3,
int _delta_min,
int _delta_min, string seed="",
int max_indexing=0);
Germline(string _code, char _shortcut,
int _delta_min,
int _delta_min, string seed="",
int max_indexing=0);
Germline(string _code, char shortcut, string path, json json_recom, int max_indexing=0);
Germline(string _code, char shortcut, string path, json json_recom,
string seed="", int max_indexing=0);
~Germline();
......@@ -69,7 +70,12 @@ class Germline {
string code ;
char shortcut ;
void new_index(string seed);
/**
* The string used for indexing the germline.
*/
string seed;
void new_index();
void set_index(IKmerStore<KmerAffect> *index);
void update_index(IKmerStore<KmerAffect> *_index = NULL);
......@@ -126,7 +132,7 @@ class MultiGermline {
~MultiGermline();
void insert(Germline *germline);
void add_germline(Germline *germline, string seed);
void add_germline(Germline *germline);
void build_from_json(string path, string json_filename, int filter, int max_indexing);
// Creates and update an unique index for all the germlines
......
......@@ -13,8 +13,8 @@ void testSegmentationBug1(int delta_min, int delta_max) {
Fasta seqJ("../../germline/TRGJ.fa");
Germline *germline ;
germline = new Germline("custom", 'x', seqV, seqV, seqJ, delta_min, delta_max);
germline->new_index("##############");
germline = new Germline("custom", 'x', seqV, seqV, seqJ, delta_min, "##############");
germline->new_index();
OnlineFasta input(buggy_sequences);
......
......@@ -52,8 +52,8 @@ void testFineSegment()
data.next();
Germline *germline ;
germline = new Germline("IGH", 'G', seqV, seqD, seqJ, 0);
germline->new_index("########");
germline = new Germline("IGH", 'G', seqV, seqD, seqJ, 0, "########");
germline->new_index();
Sequence seq = data.getSequence();
......@@ -103,12 +103,12 @@ void testSegmentOverlap()
Fasta data("../../data/bug-segment-overlap.fa", 1, " ");
Germline *germline1 ;
germline1 = new Germline("TRG", 'G', seqV, Fasta(), seqJ, -50);
germline1->new_index("##########");
germline1 = new Germline("TRG", 'G', seqV, Fasta(), seqJ, -50, "##########");
germline1->new_index();
Germline *germline2 ;
germline2 = new Germline("TRG2", 'G', seqV, Fasta(), seqJ, -50);
germline2->new_index("##########");
germline2 = new Germline("TRG2", 'G', seqV, Fasta(), seqJ, -50, "##########");
germline2->new_index();
for (int i = 0; i < data.size(); i++) {
KmerSegmenter ks(data.read(i), germline1);
......@@ -136,8 +136,8 @@ void testSegmentationCause() {
Fasta data("../../data/segmentation.fasta", 1, " ");
Germline *germline ;
germline = new Germline("TRG", 'G', seqV, Fasta(), seqJ, 0);
germline->new_index("##########");
germline = new Germline("TRG", 'G', seqV, Fasta(), seqJ, 0, "##########");
germline->new_index();
int nb_checked = 0;
......@@ -248,8 +248,8 @@ void testExtractor() {
OnlineFasta data("../../data/segmentation.fasta", 1, " ");
Germline *germline ;
germline = new Germline("TRG", 'G', seqV, Fasta(), seqJ, 0, 0);
germline->new_index("##########");
germline = new Germline("TRG", 'G', seqV, Fasta(), seqJ, 0, "##########");
germline->new_index();
MultiGermline *multi ;
multi = new MultiGermline();
......@@ -328,8 +328,8 @@ void testProbability() {
V.add(v);
J.add(j);
}
Germline germline("Test", 'T', V, Fasta(), J, 0);
germline.new_index("####");
Germline germline("Test", 'T', V, Fasta(), J, 0, "####");
germline.new_index();
TAP_TEST(germline.index->getIndexLoad() == .75, TEST_GET_INDEX_LOAD, "");
......
......@@ -7,7 +7,7 @@ void testWSAdd() {
map<string, string> labels;
WindowsStorage ws(labels);
Sequence seq = {"label", "l", "GATACATTAGACAGCT", "", NULL, 0};
Germline germline("Test", 't', "../../data/small_V.fa", "", "../../data/small_J.fa", -10);
Germline germline("Test", 't', "../../data/small_V.fa", "", "../../data/small_J.fa", -10, "");
TAP_TEST(ws.size() == 0, TEST_WS_SIZE_NONE, "");
......@@ -49,7 +49,7 @@ void testWSAdd() {
TAP_TEST(it->label_full == "other", TEST_WS_GET_READS, "");
TAP_TEST(it->sequence == "TAAGATTAGCCACGGACT", TEST_WS_GET_READS, "");
Germline germline2("Other test", 'o', "../../data/small_V.fa", "", "../../data/small_J.fa", -20);
Germline germline2("Other test", 'o', "../../data/small_V.fa", "", "../../data/small_J.fa", -20, "");
// Insert a sequence from another germline 2 times
for (int i = 0; i < 2; i++) {
ws.add("CATT", seq, SEG_MINUS, &germline2);
......@@ -59,7 +59,7 @@ void testWSAdd() {
TAP_TEST(ws.getGermline("ATTAG") == &germline,TEST_WS_GET_GERMLINE, "");
TAP_TEST(ws.getGermline("CATT") == &germline2,TEST_WS_GET_GERMLINE, "");
Germline germline3("Another test", 'a', "../../data/small_V.fa", "", "../../data/small_J.fa", -52);
Germline germline3("Another test", 'a', "../../data/small_V.fa", "", "../../data/small_J.fa", -52, "");
// Insert a sequence from another germline 6 times
for (int i = 0; i < 6; i++) {
ws.add("ATAGCAT", seq, SEG_MINUS, &germline3);
......@@ -114,7 +114,7 @@ void testWSAddWithLimit() {
ws.setBinParameters(1, 20);
Sequence seq = {"label", "l", "GATACATTAGACAGCT", "", NULL, 0};
Sequence seq_long = {"label", "l", "GATACATTAGACAGCTTATATATATATTTATAT", "", NULL, 0};
Germline germline("Test", 't', "../../data/small_V.fa", "", "../../data/small_J.fa", -10);
Germline germline("Test", 't', "../../data/small_V.fa", "", "../../data/small_J.fa", -10, "");
ws.add("ATTAG", seq, SEG_PLUS, &germline);
ws.add("ATTAG", seq, SEG_PLUS, &germline);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment