Commit 928a98e6 authored by Marc Duez's avatar Marc Duez

germline.cpp : add function to load germline from germlines.data

parent d9350010
#include "germline.h" #include "germline.h"
#include <fstream>
#include <ctype.h> #include <ctype.h>
void Germline::init(string _code, char _shortcut, void Germline::init(string _code, char _shortcut,
...@@ -86,6 +87,49 @@ Germline::Germline(string _code, char _shortcut, ...@@ -86,6 +87,49 @@ Germline::Germline(string _code, char _shortcut,
rep_3 = _rep_3 ; rep_3 = _rep_3 ;
} }
Germline::Germline(string code, char shortcut, string path, json json_recom, int max_indexing)
{
int delta_min = 0;
if (json_recom.find("4") != json_recom.end()) {
delta_min = -10;
}
init(code, shortcut, delta_min, max_indexing);
rep_5 = Fasta(2, "|") ;
rep_4 = Fasta(2, "|") ;
rep_3 = Fasta(2, "|") ;
for (json::iterator it = json_recom["5"].begin();
it != json_recom["5"].end(); ++it)
{
string filename = *it;
f_reps_5.push_back(path + filename);
rep_5.add(path + filename);
}
if (json_recom.find("4") != json_recom.end()) {
for (json::iterator it = json_recom["4"].begin();
it != json_recom["4"].end(); ++it)
{
string filename = *it;
f_reps_4.push_back(path + filename);
rep_4.add(path + filename);
}
}
for (json::iterator it = json_recom["3"].begin();
it != json_recom["3"].end(); ++it)
{
string filename = *it;
f_reps_3.push_back(path + filename);
rep_3.add(path + filename);
}
}
void Germline::new_index(string seed) void Germline::new_index(string seed)
{ {
bool rc = true ; bool rc = true ;
...@@ -177,6 +221,37 @@ void MultiGermline::add_germline(Germline *germline, string seed) ...@@ -177,6 +221,37 @@ void MultiGermline::add_germline(Germline *germline, string seed)
germlines.push_back(germline); germlines.push_back(germline);
} }
void MultiGermline::build(string path, int max_indexing)
{
//parse germlines.data
ifstream germline_data(path + "/germlines.data");
string content( (std::istreambuf_iterator<char>(germline_data) ),
(std::istreambuf_iterator<char>() ) );
json j = json::parse(content);
//for each germline
for (json::iterator it = j.begin(); it != j.end(); ++it) {
json recom = it.value()["recombinations"];
char shortcut = it.value()["shortcut"].dump()[1];
string code = it.key();
string seed = it.value()["parameters"]["seed"];
map<string, string> seedMap;
seedMap["13s"] = SEED_S13;
seedMap["12s"] = SEED_S12;
seedMap["10s"] = SEED_S10;
seedMap["9s"] = SEED_9;
//for each set of recombination 3/4/5
for (json::iterator it2 = recom.begin(); it2 != recom.end(); ++it2) {
add_germline(new Germline(code, shortcut, path, *it2 , max_indexing), seedMap[seed]);
}
}
}
void MultiGermline::build_default_set(string path, int max_indexing) void MultiGermline::build_default_set(string path, int max_indexing)
{ {
// Should parse 'data/germlines.data' // Should parse 'data/germlines.data'
......
...@@ -8,10 +8,12 @@ ...@@ -8,10 +8,12 @@
#include "kmerstore.h" #include "kmerstore.h"
#include "stats.h" #include "stats.h"
#include "tools.h" #include "tools.h"
#include "../lib/json.hpp"
#define PSEUDO_GERMLINE_MAX12 "xxx" #define PSEUDO_GERMLINE_MAX12 "xxx"
using namespace std; using namespace std;
using json = nlohmann::json;
class Germline { class Germline {
private: private:
...@@ -47,6 +49,8 @@ class Germline { ...@@ -47,6 +49,8 @@ class Germline {
Germline(string _code, char _shortcut, Germline(string _code, char _shortcut,
int _delta_min, int _delta_min,
int max_indexing=0); int max_indexing=0);
Germline(string _code, char shortcut, string path, json json_recom, int max_indexing=0);
~Germline(); ~Germline();
...@@ -99,6 +103,7 @@ class MultiGermline { ...@@ -99,6 +103,7 @@ class MultiGermline {
void add_germline(Germline *germline, string seed); void add_germline(Germline *germline, string seed);
void build_default_set(string path, int max_indexing); void build_default_set(string path, int max_indexing);
void build_incomplete_set(string path, int max_indexing); void build_incomplete_set(string path, int max_indexing);
void build(string path, int max_indexing);
// Creates and update an unique index for all the germlines // Creates and update an unique index for all the germlines
// If 'set_index' is set, set this index as the index for all germlines // If 'set_index' is set, set this index as the index for all germlines
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define SEGMENT_H #define SEGMENT_H
#include <string> #include <string>
#include <fstream>
#include <iostream> #include <iostream>
#include "fasta.h" #include "fasta.h"
#include "dynprog.h" #include "dynprog.h"
......
...@@ -773,7 +773,8 @@ int main (int argc, char **argv) ...@@ -773,7 +773,8 @@ int main (int argc, char **argv)
if (multi_germline) if (multi_germline)
{ {
multigermline->build_default_set(multi_germline_file, trim_sequences); multigermline->build_default_set(multi_germline_file, trim_sequences);
//multigermline->build(multi_germline_file, trim_sequences);
} }
else else
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment