germline.cpp 8.67 KB
Newer Older
1 2

#include "germline.h"
3
#include <fstream>
4
#include <ctype.h>
5

6
void Germline::init(string _code, char _shortcut,
7
                    int _delta_min,
8
                    int max_indexing)
9
{
10
  seg_method = SEG_METHOD_53 ;
11 12
  code = _code ;
  shortcut = _shortcut ;
13
  index = 0 ;
14
  this->max_indexing = max_indexing;
15

16 17 18
  affect_5 = "V" ;
  affect_4 = "" ;
  affect_3 = "J" ;
19 20

  affect_5 = string(1, toupper(shortcut)) + "-" + code + "V";
21
  affect_4 = string(1, 14 + shortcut) + "-" + code + "D";
22 23
  affect_3 = string(1, tolower(shortcut)) + "-" + code + "J";
     
24 25 26
  delta_min = _delta_min ;
}

27 28

Germline::Germline(string _code, char _shortcut,
29
		   int _delta_min,
30
                    int max_indexing)
31
{
32
  init(_code, _shortcut, _delta_min, max_indexing);
33 34 35
}


36 37
Germline::Germline(string _code, char _shortcut,
		   string f_rep_5, string f_rep_4, string f_rep_3,
38
		   int _delta_min,
39
                    int max_indexing)
40
{
41
  init(_code, _shortcut, _delta_min, max_indexing);
42

43 44 45 46
  f_reps_5.push_back(f_rep_5);
  f_reps_4.push_back(f_rep_4);
  f_reps_3.push_back(f_rep_3);

47 48 49
  rep_5 = Fasta(f_rep_5, 2, "|");
  rep_4 = Fasta(f_rep_4, 2, "|");
  rep_3 = Fasta(f_rep_3, 2, "|");
50 51 52

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
53 54 55
}


56 57
Germline::Germline(string _code, char _shortcut,
		   list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
58
		   int _delta_min,
59
                    int max_indexing)
60
{
61
  init(_code, _shortcut, _delta_min, max_indexing);
62 63 64 65 66

  f_reps_5 = _f_reps_5 ;
  f_reps_4 = _f_reps_4 ;
  f_reps_3 = _f_reps_3 ;

67 68 69 70
  rep_5 = Fasta(2, "|") ;
  rep_4 = Fasta(2, "|") ;
  rep_3 = Fasta(2, "|") ;

71
  for (list<string>::const_iterator it = f_reps_5.begin(); it != f_reps_5.end(); ++it)
72
    rep_5.add(*it);
73 74
  
  for (list<string>::const_iterator it = f_reps_4.begin(); it != f_reps_4.end(); ++it)
75
    rep_4.add(*it);
76 77

  for (list<string>::const_iterator it = f_reps_3.begin(); it != f_reps_3.end(); ++it)
78
    rep_3.add(*it);
79 80 81

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
82 83 84
}


85 86
Germline::Germline(string _code, char _shortcut, 
           Fasta _rep_5, Fasta _rep_4, Fasta _rep_3,
87
		   int _delta_min,
88
                    int max_indexing)
89
{
90
  init(_code, _shortcut, _delta_min, max_indexing);
91 92 93 94

  rep_5 = _rep_5 ;
  rep_4 = _rep_4 ;
  rep_3 = _rep_3 ;
95 96 97

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
98 99
}

100 101 102
Germline::Germline(string code, char shortcut, string path, json json_recom, int max_indexing)
{
    
103
  int delta_min = -10;
104 105
  
  if (json_recom.find("4") != json_recom.end()) {
106
      delta_min = 0;
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
  }
  
  init(code, shortcut, delta_min, max_indexing);
  
  rep_5 = Fasta(2, "|") ;
  rep_4 = Fasta(2, "|") ;
  rep_3 = Fasta(2, "|") ;

  for (json::iterator it = json_recom["5"].begin();
       it != json_recom["5"].end(); ++it) 
  {
    string filename = *it;
    f_reps_5.push_back(path + filename);
    rep_5.add(path + filename);
  }
  
  if (json_recom.find("4") != json_recom.end()) {
    for (json::iterator it = json_recom["4"].begin();
        it != json_recom["4"].end(); ++it) 
    {
        string filename = *it;
        f_reps_4.push_back(path + filename);
        rep_4.add(path + filename);
    }
  }
  
  for (json::iterator it = json_recom["3"].begin();
       it != json_recom["3"].end(); ++it) 
  {
    string filename = *it;
    f_reps_3.push_back(path + filename);
    rep_3.add(path + filename);
  }
140 141 142

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
143 144
}

145
void Germline::new_index(string seed)
146
{
147 148
  bool rc = true ;
  index = KmerStoreFactory::createIndex<KmerAffect>(seed, rc);
149
  index->refs = 1;
150

151 152 153
  update_index();
}

154
void Germline::set_index(IKmerStore<KmerAffect> *_index)
155 156
{
  index = _index;
157
  index->refs++ ;
158 159 160
}


161
void Germline::update_index(IKmerStore<KmerAffect> *_index)
162
{
163
  if (!_index) _index = index ;
164

165 166
  _index->insert(rep_5, affect_5, max_indexing);
  _index->insert(rep_4, affect_4);
167
  _index->insert(rep_3, affect_3, -max_indexing);
168 169
}

170 171
void Germline::mark_as_ambiguous(Germline *other)
{
172
  index->insert(other->rep_5, AFFECT_AMBIGUOUS_SYMBOL, max_indexing);
173 174 175 176

  if (other->affect_4.size())
    index->insert(other->rep_4, AFFECT_AMBIGUOUS_SYMBOL);

177
  index->insert(other->rep_3, AFFECT_AMBIGUOUS_SYMBOL, -max_indexing);
178 179 180
}


181 182 183 184 185
void Germline::override_rep5_rep3_from_labels(KmerAffect left, KmerAffect right)
{
  rep_5 = index->getLabel(left);
  rep_3 = index->getLabel(right);
}
186

187 188
Germline::~Germline()
{
189
  if (index)
190 191 192 193
    {
      if (--(index->refs) == 0)
        delete index;
    }
194 195 196 197
}

ostream &operator<<(ostream &out, const Germline &germline)
{
198
  out << setw(5) << left << germline.code << right << " '" << germline.shortcut << "' "
199
      << setw(3) << germline.delta_min
200
      << " ";
201

202
  if (germline.index) {
203
    out << " 0x" << hex << setw(2) << setfill('0') << germline.index->id << dec << setfill(' ') << " " ;
204
    out << fixed << setprecision(3) << setw(8) << 100 * germline.index->getIndexLoad() << "%";
205
    out << " l" << germline.index->getS() << " k" << germline.index->getK() << " " << germline.index->getSeed() ; // TODO: there should be a << for index
206
  }
207 208

  out << endl;
209 210 211 212
  return out;
}


213
MultiGermline::MultiGermline(bool _one_index_per_germline)
214
{
215
  index = NULL;
216
  one_index_per_germline = _one_index_per_germline;
217 218
}

Mikaël Salson's avatar
Mikaël Salson committed
219 220 221 222 223 224
MultiGermline::~MultiGermline() {
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      delete *it ;
    }
}
225

226 227 228 229 230
void MultiGermline::insert(Germline *germline)
{
  germlines.push_back(germline);
}

231 232
void MultiGermline::add_germline(Germline *germline, string seed)
{
233 234
  if (one_index_per_germline)
    germline->new_index(seed);
235 236 237
  germlines.push_back(germline);
}

238
void MultiGermline::build_from_json(string path, string json_filename, int filter, int max_indexing)
239 240
{
  //parse germlines.data
241
  ifstream germline_data(path + "/" + json_filename);
242 243 244 245 246 247 248 249 250 251 252 253
  string content( (std::istreambuf_iterator<char>(germline_data) ),
                  (std::istreambuf_iterator<char>()    ) );

  json j = json::parse(content);
  
  //for each germline
  for (json::iterator it = j.begin(); it != j.end(); ++it) {
      
    json recom = it.value()["recombinations"];
    char shortcut = it.value()["shortcut"].dump()[1];
    string code = it.key();
    string seed = it.value()["parameters"]["seed"];
254 255 256 257 258 259 260 261 262 263 264 265 266 267

    switch (filter) {
    case GERMLINES_REGULAR:
      if (code.find("+") != string::npos) continue ;
      break ;

    case GERMLINES_INCOMPLETE:
      if (code.find("+") == string::npos) continue ;
      break ;

    default:
      break ;
    }

268 269 270 271 272 273 274 275
    map<string, string> seedMap;
    seedMap["13s"] = SEED_S13;
    seedMap["12s"] = SEED_S12;
    seedMap["10s"] = SEED_S10;
    seedMap["9s"] = SEED_9;
    
    //for each set of recombination 3/4/5
    for (json::iterator it2 = recom.begin(); it2 != recom.end(); ++it2) {
276
      add_germline(new Germline(code, shortcut, path + "/", *it2 , max_indexing), seedMap[seed]);
277 278 279 280 281
    }
  }
  
}

282

283
/* if 'one_index_per_germline' was not set, this should be called once all germlines have been loaded */
284
void MultiGermline::insert_in_one_index(IKmerStore<KmerAffect> *_index, bool set_index)
285 286 287 288
{
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
289

290 291
      if (germline->rep_4.size())
	germline->affect_4 = string(1, 14 + germline->shortcut) + "-" + germline->code + "D";
292

293 294 295 296
      germline->update_index(_index);

      if (set_index)
        germline->set_index(_index);
297 298 299
    }
}

300
void MultiGermline::build_with_one_index(string seed, bool set_index)
301 302 303
{
  bool rc = true ;
  index = KmerStoreFactory::createIndex<KmerAffect>(seed, rc);
304
  insert_in_one_index(index, set_index);
305 306
}

307 308 309
/* Mark k-mers common to several germlines as ambiguous */
void MultiGermline::mark_cross_germlines_as_ambiguous()
{
310
  string VdJa = "TRA+D";
311
  
312 313 314 315
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
      cout << *germline << ":" ;
316 317 318 319

      // Skip VdJa
      if (!(germline->code.compare(VdJa)))
        continue;
320 321 322 323 324 325 326
      
      for (list<Germline*>::const_iterator it2 = germlines.begin(); it2 != germlines.end(); ++it2)
      {
        Germline *germline2 = *it2 ;
        if (germline2 == germline)
          continue ;

327 328 329
        // Skip germlines on a same system, such as 'D' (TRD) and 'd' (TRD+)
        if (toupper(germline2->shortcut) == toupper(germline->shortcut))
          continue;
330 331 332 333

        // Skip VdJa
        if (!(germline2->code.compare(VdJa)))
          continue;
334
        
335 336 337 338 339 340 341
        germline->mark_as_ambiguous(germline2);
      }

      cout << endl;
    }
}

342 343 344 345 346 347 348 349 350 351 352

ostream &operator<<(ostream &out, const MultiGermline &multigermline)
{
  for (list<Germline*>::const_iterator it = multigermline.germlines.begin(); it != multigermline.germlines.end(); ++it)
    {
      Germline *germline = *it ;
      out << "   " << *germline ;
    }

  return out;
}