germline.cpp 9.19 KB
Newer Older
1 2

#include "germline.h"
3
#include <fstream>
4
#include <ctype.h>
5

6
void Germline::init(string _code, char _shortcut,
7
                    int _delta_min, string seed,
8
                    int max_indexing)
9
{
10
  seg_method = SEG_METHOD_53 ;
11 12
  code = _code ;
  shortcut = _shortcut ;
13
  index = 0 ;
14
  this->max_indexing = max_indexing;
15
  this->seed = seed;
16

17 18 19
  affect_5 = "V" ;
  affect_4 = "" ;
  affect_3 = "J" ;
20 21

  affect_5 = string(1, toupper(shortcut)) + "-" + code + "V";
22
  affect_4 = string(1, 14 + shortcut) + "-" + code + "D";
23 24
  affect_3 = string(1, tolower(shortcut)) + "-" + code + "J";
     
25 26 27
  delta_min = _delta_min ;
}

28 29

Germline::Germline(string _code, char _shortcut,
30
		   int _delta_min, string seed,
31
                    int max_indexing)
32
{
33
  init(_code, _shortcut, _delta_min, seed, max_indexing);
34 35 36
}


37 38
Germline::Germline(string _code, char _shortcut,
		   string f_rep_5, string f_rep_4, string f_rep_3,
39
		   int _delta_min, string seed,
40
                    int max_indexing)
41
{
42
  init(_code, _shortcut, _delta_min, seed, max_indexing);
43

44 45 46 47
  f_reps_5.push_back(f_rep_5);
  f_reps_4.push_back(f_rep_4);
  f_reps_3.push_back(f_rep_3);

48
  /// no CYS104_IN_GAPPED_V / PHE118_TRP118_IN_GAPPED_J here ?
49 50 51
  rep_5 = Fasta(f_rep_5, 2, "|");
  rep_4 = Fasta(f_rep_4, 2, "|");
  rep_3 = Fasta(f_rep_3, 2, "|");
52 53 54

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
55 56 57
}


58 59
Germline::Germline(string _code, char _shortcut,
		   list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
60
		   int _delta_min, string seed,
61
                    int max_indexing)
62
{
63
  init(_code, _shortcut, _delta_min, seed, max_indexing);
64 65 66 67 68

  f_reps_5 = _f_reps_5 ;
  f_reps_4 = _f_reps_4 ;
  f_reps_3 = _f_reps_3 ;

69 70 71
  bool regular = (code.find("+") == string::npos);

  rep_5 = Fasta(2, "|", regular ? CYS104_IN_GAPPED_V : 0);
72
  rep_4 = Fasta(2, "|") ;
73
  rep_3 = Fasta(2, "|", regular ? PHE118_TRP118_IN_GAPPED_J : 0);
74

75
  for (list<string>::const_iterator it = f_reps_5.begin(); it != f_reps_5.end(); ++it)
76
    rep_5.add(*it);
77 78
  
  for (list<string>::const_iterator it = f_reps_4.begin(); it != f_reps_4.end(); ++it)
79
    rep_4.add(*it);
80 81

  for (list<string>::const_iterator it = f_reps_3.begin(); it != f_reps_3.end(); ++it)
82
    rep_3.add(*it);
83 84 85

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
86 87 88
}


89 90
Germline::Germline(string _code, char _shortcut, 
           Fasta _rep_5, Fasta _rep_4, Fasta _rep_3,
91
		   int _delta_min, string seed,
92
                    int max_indexing)
93
{
94
  init(_code, _shortcut, _delta_min, seed, max_indexing);
95 96 97 98

  rep_5 = _rep_5 ;
  rep_4 = _rep_4 ;
  rep_3 = _rep_3 ;
99 100 101

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
102 103
}

104 105
Germline::Germline(string code, char shortcut, string path, json json_recom,
                   string seed, int max_indexing)
106 107
{
    
108
  int delta_min = -10;
109 110
  
  if (json_recom.find("4") != json_recom.end()) {
111
      delta_min = 0;
112 113
  }
  
114
  init(code, shortcut, delta_min, seed, max_indexing);
115 116

  bool regular = (code.find("+") == string::npos);
117
  
118
  rep_5 = Fasta(2, "|", regular ? CYS104_IN_GAPPED_V : 0) ;
119
  rep_4 = Fasta(2, "|") ;
120
  rep_3 = Fasta(2, "|", regular ? PHE118_TRP118_IN_GAPPED_J : 0) ;
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146

  for (json::iterator it = json_recom["5"].begin();
       it != json_recom["5"].end(); ++it) 
  {
    string filename = *it;
    f_reps_5.push_back(path + filename);
    rep_5.add(path + filename);
  }
  
  if (json_recom.find("4") != json_recom.end()) {
    for (json::iterator it = json_recom["4"].begin();
        it != json_recom["4"].end(); ++it) 
    {
        string filename = *it;
        f_reps_4.push_back(path + filename);
        rep_4.add(path + filename);
    }
  }
  
  for (json::iterator it = json_recom["3"].begin();
       it != json_recom["3"].end(); ++it) 
  {
    string filename = *it;
    f_reps_3.push_back(path + filename);
    rep_3.add(path + filename);
  }
147 148 149

  if (rep_4.size())
    seg_method = SEG_METHOD_543 ;
150 151
}

152
void Germline::new_index()
153
{
154 155
  assert(! seed.empty());

156 157
  bool rc = true ;
  index = KmerStoreFactory::createIndex<KmerAffect>(seed, rc);
158
  index->refs = 1;
159

160 161 162
  update_index();
}

163
void Germline::set_index(IKmerStore<KmerAffect> *_index)
164 165
{
  index = _index;
166
  index->refs++ ;
167 168 169
}


170
void Germline::update_index(IKmerStore<KmerAffect> *_index)
171
{
172
  if (!_index) _index = index ;
173

174 175 176
  _index->insert(rep_5, affect_5, max_indexing, seed);
  _index->insert(rep_4, affect_4, 0, seed);
  _index->insert(rep_3, affect_3, -max_indexing, seed);
177 178
}

179 180
void Germline::mark_as_ambiguous(Germline *other)
{
181
  index->insert(other->rep_5, AFFECT_AMBIGUOUS_SYMBOL, max_indexing, seed);
182 183

  if (other->affect_4.size())
184
    index->insert(other->rep_4, AFFECT_AMBIGUOUS_SYMBOL, 0, seed);
185

186
  index->insert(other->rep_3, AFFECT_AMBIGUOUS_SYMBOL, -max_indexing, seed);
187 188 189
}


190 191 192 193 194
void Germline::override_rep5_rep3_from_labels(KmerAffect left, KmerAffect right)
{
  rep_5 = index->getLabel(left);
  rep_3 = index->getLabel(right);
}
195

196 197
Germline::~Germline()
{
198
  if (index)
199 200 201 202
    {
      if (--(index->refs) == 0)
        delete index;
    }
203 204 205 206
}

ostream &operator<<(ostream &out, const Germline &germline)
{
207
  out << setw(5) << left << germline.code << right << " '" << germline.shortcut << "' "
208
      << setw(3) << germline.delta_min
209
      << " ";
210

211
  if (germline.index) {
212
    out << " 0x" << hex << setw(2) << setfill('0') << germline.index->id << dec << setfill(' ') << " " ;
213
    out << fixed << setprecision(3) << setw(8) << 100 * germline.index->getIndexLoad() << "%";
214
    out << " l" << germline.index->getS() << " k" << germline.index->getK() << " " << germline.index->getSeed() ; // TODO: there should be a << for index
215
  }
216 217

  out << endl;
218 219 220 221
  return out;
}


222
MultiGermline::MultiGermline(bool _one_index_per_germline)
223
{
224
  index = NULL;
225
  one_index_per_germline = _one_index_per_germline;
226 227
}

Mikaël Salson's avatar
Mikaël Salson committed
228 229 230 231 232 233
MultiGermline::~MultiGermline() {
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      delete *it ;
    }
}
234

235 236 237 238 239
void MultiGermline::insert(Germline *germline)
{
  germlines.push_back(germline);
}

240
void MultiGermline::add_germline(Germline *germline)
241
{
242
  if (one_index_per_germline)
243
    germline->new_index();
244 245 246
  germlines.push_back(germline);
}

247
void MultiGermline::build_from_json(string path, string json_filename, int filter, int max_indexing)
248 249
{
  //parse germlines.data
250
  ifstream germline_data(path + "/" + json_filename);
251 252 253 254 255 256 257 258 259 260 261 262
  string content( (std::istreambuf_iterator<char>(germline_data) ),
                  (std::istreambuf_iterator<char>()    ) );

  json j = json::parse(content);
  
  //for each germline
  for (json::iterator it = j.begin(); it != j.end(); ++it) {
      
    json recom = it.value()["recombinations"];
    char shortcut = it.value()["shortcut"].dump()[1];
    string code = it.key();
    string seed = it.value()["parameters"]["seed"];
263 264 265 266 267 268 269 270 271 272 273 274 275 276

    switch (filter) {
    case GERMLINES_REGULAR:
      if (code.find("+") != string::npos) continue ;
      break ;

    case GERMLINES_INCOMPLETE:
      if (code.find("+") == string::npos) continue ;
      break ;

    default:
      break ;
    }

277 278 279 280 281 282 283 284
    map<string, string> seedMap;
    seedMap["13s"] = SEED_S13;
    seedMap["12s"] = SEED_S12;
    seedMap["10s"] = SEED_S10;
    seedMap["9s"] = SEED_9;
    
    //for each set of recombination 3/4/5
    for (json::iterator it2 = recom.begin(); it2 != recom.end(); ++it2) {
285 286
      add_germline(new Germline(code, shortcut, path + "/", *it2 , seedMap[seed],
                                max_indexing));
287 288 289 290 291
    }
  }
  
}

292

293
/* if 'one_index_per_germline' was not set, this should be called once all germlines have been loaded */
294
void MultiGermline::insert_in_one_index(IKmerStore<KmerAffect> *_index, bool set_index)
295 296 297 298
{
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
299

300 301
      if (germline->rep_4.size())
	germline->affect_4 = string(1, 14 + germline->shortcut) + "-" + germline->code + "D";
302

303 304 305 306
      germline->update_index(_index);

      if (set_index)
        germline->set_index(_index);
307 308 309
    }
}

310
void MultiGermline::build_with_one_index(string seed, bool set_index)
311 312 313
{
  bool rc = true ;
  index = KmerStoreFactory::createIndex<KmerAffect>(seed, rc);
314
  insert_in_one_index(index, set_index);
315 316
}

317 318 319
/* Mark k-mers common to several germlines as ambiguous */
void MultiGermline::mark_cross_germlines_as_ambiguous()
{
320
  string VdJa = "TRA+D";
321
  
322 323 324 325
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
      cout << *germline << ":" ;
326 327 328 329

      // Skip VdJa
      if (!(germline->code.compare(VdJa)))
        continue;
330 331 332 333 334 335 336
      
      for (list<Germline*>::const_iterator it2 = germlines.begin(); it2 != germlines.end(); ++it2)
      {
        Germline *germline2 = *it2 ;
        if (germline2 == germline)
          continue ;

337 338 339
        // Skip germlines on a same system, such as 'D' (TRD) and 'd' (TRD+)
        if (toupper(germline2->shortcut) == toupper(germline->shortcut))
          continue;
340 341 342 343

        // Skip VdJa
        if (!(germline2->code.compare(VdJa)))
          continue;
344
        
345 346 347 348 349 350 351
        germline->mark_as_ambiguous(germline2);
      }

      cout << endl;
    }
}

352 353 354 355 356 357 358 359 360 361 362

ostream &operator<<(ostream &out, const MultiGermline &multigermline)
{
  for (list<Germline*>::const_iterator it = multigermline.germlines.begin(); it != multigermline.germlines.end(); ++it)
    {
      Germline *germline = *it ;
      out << "   " << *germline ;
    }

  return out;
}