germline.cpp 9.2 KB
Newer Older
1 2

#include "germline.h"
3
#include "automaton.hpp"
4
#include <fstream>
5
#include <ctype.h>
6

7
void Germline::init(string _code, char _shortcut,
8
                    int _delta_min, string seed,
9
                    int max_indexing)
10
{
11
  seg_method = SEG_METHOD_53 ;
12 13
  code = _code ;
  shortcut = _shortcut ;
14
  index = 0 ;
15
  this->max_indexing = max_indexing;
16
  this->seed = seed;
17

18 19 20
  affect_5 = "V" ;
  affect_4 = "" ;
  affect_3 = "J" ;
21 22

  affect_5 = string(1, toupper(shortcut)) + "-" + code + "V";
23
  affect_4 = string(1, 14 + shortcut) + "-" + code + "D";
24 25
  affect_3 = string(1, tolower(shortcut)) + "-" + code + "J";
     
26 27 28
  delta_min = _delta_min ;
}

29 30

Germline::Germline(string _code, char _shortcut,
31
		   int _delta_min, string seed,
32
                    int max_indexing)
33
{
34
  init(_code, _shortcut, _delta_min, seed, max_indexing);
35 36 37
}


38 39
Germline::Germline(string _code, char _shortcut,
		   string f_rep_5, string f_rep_4, string f_rep_3,
40
		   int _delta_min, string seed,
41
                    int max_indexing)
42
{
43
  init(_code, _shortcut, _delta_min, seed, max_indexing);
44

45 46 47 48
  f_reps_5.push_back(f_rep_5);
  f_reps_4.push_back(f_rep_4);
  f_reps_3.push_back(f_rep_3);

49
  /// no CYS104_IN_GAPPED_V / PHE118_TRP118_IN_GAPPED_J here ?
50 51 52
  rep_5 = Fasta(f_rep_5, 2, "|");
  rep_4 = Fasta(f_rep_4, 2, "|");
  rep_3 = Fasta(f_rep_3, 2, "|");
53 54 55

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
56 57 58
}


59 60
Germline::Germline(string _code, char _shortcut,
		   list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
61
		   int _delta_min, string seed,
62
                    int max_indexing)
63
{
64
  init(_code, _shortcut, _delta_min, seed, max_indexing);
65 66 67 68 69

  f_reps_5 = _f_reps_5 ;
  f_reps_4 = _f_reps_4 ;
  f_reps_3 = _f_reps_3 ;

70 71 72
  bool regular = (code.find("+") == string::npos);

  rep_5 = Fasta(2, "|", regular ? CYS104_IN_GAPPED_V : 0);
73
  rep_4 = Fasta(2, "|") ;
74
  rep_3 = Fasta(2, "|", regular ? PHE118_TRP118_IN_GAPPED_J : 0);
75

76
  for (list<string>::const_iterator it = f_reps_5.begin(); it != f_reps_5.end(); ++it)
77
    rep_5.add(*it);
78 79
  
  for (list<string>::const_iterator it = f_reps_4.begin(); it != f_reps_4.end(); ++it)
80
    rep_4.add(*it);
81 82

  for (list<string>::const_iterator it = f_reps_3.begin(); it != f_reps_3.end(); ++it)
83
    rep_3.add(*it);
84 85 86

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
87 88 89
}


90 91
Germline::Germline(string _code, char _shortcut, 
           Fasta _rep_5, Fasta _rep_4, Fasta _rep_3,
92
		   int _delta_min, string seed,
93
                    int max_indexing)
94
{
95
  init(_code, _shortcut, _delta_min, seed, max_indexing);
96 97 98 99

  rep_5 = _rep_5 ;
  rep_4 = _rep_4 ;
  rep_3 = _rep_3 ;
100 101 102

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
103 104
}

105 106
Germline::Germline(string code, char shortcut, string path, json json_recom,
                   string seed, int max_indexing)
107 108
{
    
109
  int delta_min = -10;
110 111
  
  if (json_recom.find("4") != json_recom.end()) {
112
      delta_min = 0;
113 114
  }
  
115
  init(code, shortcut, delta_min, seed, max_indexing);
116 117

  bool regular = (code.find("+") == string::npos);
118
  
119
  rep_5 = Fasta(2, "|", regular ? CYS104_IN_GAPPED_V : 0) ;
120
  rep_4 = Fasta(2, "|") ;
121
  rep_3 = Fasta(2, "|", regular ? PHE118_TRP118_IN_GAPPED_J : 0) ;
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147

  for (json::iterator it = json_recom["5"].begin();
       it != json_recom["5"].end(); ++it) 
  {
    string filename = *it;
    f_reps_5.push_back(path + filename);
    rep_5.add(path + filename);
  }
  
  if (json_recom.find("4") != json_recom.end()) {
    for (json::iterator it = json_recom["4"].begin();
        it != json_recom["4"].end(); ++it) 
    {
        string filename = *it;
        f_reps_4.push_back(path + filename);
        rep_4.add(path + filename);
    }
  }
  
  for (json::iterator it = json_recom["3"].begin();
       it != json_recom["3"].end(); ++it) 
  {
    string filename = *it;
    f_reps_3.push_back(path + filename);
    rep_3.add(path + filename);
  }
148 149 150

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
151 152
}

153
void Germline::new_index()
154
{
155 156
  assert(! seed.empty());

157
  bool rc = true ;
158
  index = new PointerACAutomaton<KmerAffect>(seed, rc);
159
  index->refs = 1;
160

161 162 163
  update_index();
}

164
void Germline::set_index(IKmerStore<KmerAffect> *_index)
165 166
{
  index = _index;
167
  index->refs++ ;
168 169 170
}


171
void Germline::update_index(IKmerStore<KmerAffect> *_index)
172
{
173
  if (!_index) _index = index ;
174

175 176 177
  _index->insert(rep_5, affect_5, max_indexing, seed);
  _index->insert(rep_4, affect_4, 0, seed);
  _index->insert(rep_3, affect_3, -max_indexing, seed);
178 179
}

180 181
void Germline::mark_as_ambiguous(Germline *other)
{
182
  index->insert(other->rep_5, AFFECT_AMBIGUOUS_SYMBOL, max_indexing, seed);
183 184

  if (other->affect_4.size())
185
    index->insert(other->rep_4, AFFECT_AMBIGUOUS_SYMBOL, 0, seed);
186

187
  index->insert(other->rep_3, AFFECT_AMBIGUOUS_SYMBOL, -max_indexing, seed);
188 189 190
}


191 192 193 194 195
void Germline::override_rep5_rep3_from_labels(KmerAffect left, KmerAffect right)
{
  rep_5 = index->getLabel(left);
  rep_3 = index->getLabel(right);
}
196

197 198
Germline::~Germline()
{
199
  if (index)
200 201 202 203
    {
      if (--(index->refs) == 0)
        delete index;
    }
204 205 206 207
}

ostream &operator<<(ostream &out, const Germline &germline)
{
208
  out << setw(5) << left << germline.code << right << " '" << germline.shortcut << "' "
209
      << setw(3) << germline.delta_min
210
      << " ";
211

212
  if (germline.index) {
213
    out << " 0x" << hex << setw(2) << setfill('0') << germline.index->id << dec << setfill(' ') << " " ;
214
    out << fixed << setprecision(3) << setw(8) << 100 * germline.index->getIndexLoad() << "%";
215
    out << " l" << germline.index->getS() << " k" << germline.index->getK() << " " << germline.index->getSeed() ; // TODO: there should be a << for index
216
  }
217 218

  out << endl;
219 220 221 222
  return out;
}


223
MultiGermline::MultiGermline(bool _one_index_per_germline)
224
{
225
  index = NULL;
226
  one_index_per_germline = _one_index_per_germline;
227 228
}

Mikaël Salson's avatar
Mikaël Salson committed
229 230 231 232 233 234
MultiGermline::~MultiGermline() {
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      delete *it ;
    }
}
235

236 237 238 239 240
void MultiGermline::insert(Germline *germline)
{
  germlines.push_back(germline);
}

241
void MultiGermline::add_germline(Germline *germline)
242
{
243
  if (one_index_per_germline)
244
    germline->new_index();
245 246 247
  germlines.push_back(germline);
}

248
void MultiGermline::build_from_json(string path, string json_filename, int filter, int max_indexing)
249 250
{
  //parse germlines.data
251
  ifstream germline_data(path + "/" + json_filename);
252 253 254 255 256 257 258 259 260 261 262 263
  string content( (std::istreambuf_iterator<char>(germline_data) ),
                  (std::istreambuf_iterator<char>()    ) );

  json j = json::parse(content);
  
  //for each germline
  for (json::iterator it = j.begin(); it != j.end(); ++it) {
      
    json recom = it.value()["recombinations"];
    char shortcut = it.value()["shortcut"].dump()[1];
    string code = it.key();
    string seed = it.value()["parameters"]["seed"];
264 265 266 267 268 269 270 271 272 273 274 275 276 277

    switch (filter) {
    case GERMLINES_REGULAR:
      if (code.find("+") != string::npos) continue ;
      break ;

    case GERMLINES_INCOMPLETE:
      if (code.find("+") == string::npos) continue ;
      break ;

    default:
      break ;
    }

278 279 280 281 282 283 284 285
    map<string, string> seedMap;
    seedMap["13s"] = SEED_S13;
    seedMap["12s"] = SEED_S12;
    seedMap["10s"] = SEED_S10;
    seedMap["9s"] = SEED_9;
    
    //for each set of recombination 3/4/5
    for (json::iterator it2 = recom.begin(); it2 != recom.end(); ++it2) {
286 287
      add_germline(new Germline(code, shortcut, path + "/", *it2 , seedMap[seed],
                                max_indexing));
288 289 290 291 292
    }
  }
  
}

293

294
/* if 'one_index_per_germline' was not set, this should be called once all germlines have been loaded */
295
void MultiGermline::insert_in_one_index(IKmerStore<KmerAffect> *_index, bool set_index)
296 297 298 299
{
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
300

301 302
      if (germline->rep_4.size())
	germline->affect_4 = string(1, 14 + germline->shortcut) + "-" + germline->code + "D";
303

304 305 306 307
      germline->update_index(_index);

      if (set_index)
        germline->set_index(_index);
308 309 310
    }
}

311
void MultiGermline::build_with_one_index(string seed, bool set_index)
312 313
{
  bool rc = true ;
314
  index = new PointerACAutomaton<KmerAffect>(seed, rc);
315
  insert_in_one_index(index, set_index);
316 317
}

318 319 320
/* Mark k-mers common to several germlines as ambiguous */
void MultiGermline::mark_cross_germlines_as_ambiguous()
{
321
  string VdJa = "TRA+D";
322
  
323 324 325 326
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
      cout << *germline << ":" ;
327 328 329 330

      // Skip VdJa
      if (!(germline->code.compare(VdJa)))
        continue;
331 332 333 334 335 336 337
      
      for (list<Germline*>::const_iterator it2 = germlines.begin(); it2 != germlines.end(); ++it2)
      {
        Germline *germline2 = *it2 ;
        if (germline2 == germline)
          continue ;

338 339 340
        // Skip germlines on a same system, such as 'D' (TRD) and 'd' (TRD+)
        if (toupper(germline2->shortcut) == toupper(germline->shortcut))
          continue;
341 342 343 344

        // Skip VdJa
        if (!(germline2->code.compare(VdJa)))
          continue;
345
        
346 347 348 349 350 351 352
        germline->mark_as_ambiguous(germline2);
      }

      cout << endl;
    }
}

353 354 355 356 357 358 359 360 361 362 363

ostream &operator<<(ostream &out, const MultiGermline &multigermline)
{
  for (list<Germline*>::const_iterator it = multigermline.germlines.begin(); it != multigermline.germlines.end(); ++it)
    {
      Germline *germline = *it ;
      out << "   " << *germline ;
    }

  return out;
}