germline.cpp 8.92 KB
Newer Older
1 2

#include "germline.h"
3
#include <ctype.h>
4

5 6
void Germline::init(string _code, char _shortcut,
                    int _delta_min, int _delta_max)
7 8 9
{
  code = _code ;
  shortcut = _shortcut ;
10 11
  index = 0 ;

12 13 14
  affect_5 = "V" ;
  affect_4 = "" ;
  affect_3 = "J" ;
15 16 17 18 19

  affect_5 = string(1, toupper(shortcut)) + "-" + code + "V";
  affect_4 = ""; // string(1, 14 + shortcut) + "-" + code + "D";
  affect_3 = string(1, tolower(shortcut)) + "-" + code + "J";
     
20 21 22
  delta_min = _delta_min ;
  delta_max = _delta_max ;

23
  stats_reads.setLabel(code);
24
  stats_clones.setLabel("");
25 26 27 28 29 30 31 32
}

Germline::Germline(string _code, char _shortcut,
		   string f_rep_5, string f_rep_4, string f_rep_3,
		   int _delta_min, int _delta_max)
{
  init(_code, _shortcut, _delta_min, _delta_max);

33 34 35 36
  f_reps_5.push_back(f_rep_5);
  f_reps_4.push_back(f_rep_4);
  f_reps_3.push_back(f_rep_3);

37 38 39
  rep_5 = Fasta(f_rep_5, 2, "|");
  rep_4 = Fasta(f_rep_4, 2, "|");
  rep_3 = Fasta(f_rep_3, 2, "|");
40 41 42
}


43 44 45 46
Germline::Germline(string _code, char _shortcut,
		   list <string> _f_reps_5, list <string> _f_reps_4, list <string> _f_reps_3,
		   int _delta_min, int _delta_max)
{
47
  init(_code, _shortcut, _delta_min, _delta_max);
48 49 50 51 52

  f_reps_5 = _f_reps_5 ;
  f_reps_4 = _f_reps_4 ;
  f_reps_3 = _f_reps_3 ;

53 54 55 56
  rep_5 = Fasta(2, "|") ;
  rep_4 = Fasta(2, "|") ;
  rep_3 = Fasta(2, "|") ;

57
  for (list<string>::const_iterator it = f_reps_5.begin(); it != f_reps_5.end(); ++it)
58
    rep_5.add(*it);
59 60
  
  for (list<string>::const_iterator it = f_reps_4.begin(); it != f_reps_4.end(); ++it)
61
    rep_4.add(*it);
62 63

  for (list<string>::const_iterator it = f_reps_3.begin(); it != f_reps_3.end(); ++it)
64
    rep_3.add(*it);
65 66 67
}


68 69
Germline::Germline(string _code, char _shortcut, 
           Fasta _rep_5, Fasta _rep_4, Fasta _rep_3,
70 71
		   int _delta_min, int _delta_max)
{
72
  init(_code, _shortcut, _delta_min, _delta_max);
73 74 75 76

  rep_5 = _rep_5 ;
  rep_4 = _rep_4 ;
  rep_3 = _rep_3 ;
77 78
}

79
void Germline::new_index(string seed)
80
{
81 82 83
  bool rc = true ;
  index = KmerStoreFactory::createIndex<KmerAffect>(seed, rc);

84 85 86 87 88 89 90 91 92 93 94 95 96 97
  update_index();
}

void Germline::use_index(IKmerStore<KmerAffect> *_index)
{
  index = _index;

  update_index();
}


void Germline::update_index()
{
  index->insert(rep_5, affect_5);
98 99 100 101

  if (affect_4.size())
    index->insert(rep_4, affect_4);

102
  index->insert(rep_3, affect_3);
103 104
}

105 106 107 108 109 110 111 112 113 114 115 116
void Germline::mark_as_ambiguous(Germline *other)
{
  index->insert(other->rep_5, AFFECT_AMBIGUOUS_SYMBOL);

  if (other->affect_4.size())
    index->insert(other->rep_4, AFFECT_AMBIGUOUS_SYMBOL);

  index->insert(other->rep_3, AFFECT_AMBIGUOUS_SYMBOL);
}



117 118
Germline::~Germline()
{
119 120
  if (index)
    delete index;
121 122 123 124
}

ostream &operator<<(ostream &out, const Germline &germline)
{
125 126
  out << setw(4) << left << germline.code << right << " '" << germline.shortcut << "' "
      << setw(3) << germline.delta_min << "/" << setw(3) << germline.delta_max
127 128 129 130 131 132
      << " " << germline.index ;

  if (germline.index)
    out << " s" << germline.index->getS() << " k" << germline.index->getK() << " " << germline.index->getSeed() ; // TODO: there should be a << for index

  out << endl;
133 134 135 136
  return out;
}


137
MultiGermline::MultiGermline()
138 139 140
{
}

Mikaël Salson's avatar
Mikaël Salson committed
141 142 143 144 145 146
MultiGermline::~MultiGermline() {
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      delete *it ;
    }
}
147

148 149 150 151 152
void MultiGermline::insert(Germline *germline)
{
  germlines.push_back(germline);
}

153
void MultiGermline::build_default_set(string path)
154
{
155
  // Should parse 'data/germlines.data'
156 157
  Germline *germline;
  
158
  germline = new Germline("TRG", 'G', path + "/TRGV.fa", "",                path + "/TRGJ.fa",   -10, 30);
159 160 161 162 163 164 165
  germline->new_index("#####-#####");
  germlines.push_back(germline);

  germline = new Germline("IGH", 'H', path + "/IGHV.fa", path + "/IGHD.fa",  path + "/IGHJ.fa",   0, 80);
  germline->new_index("######-######");
  germlines.push_back(germline);

166 167 168 169 170 171 172 173
  germline = new Germline("TRD", 'D', path + "/TRDV.fa", path + "/TRDD.fa",  path + "/TRDJ.fa",   0, 80);
  germline->new_index("#####-#####");
  germlines.push_back(germline);

  germline = new Germline("IGK", 'K', path + "/IGKV.fa", "",  path + "/IGKJ.fa",  -10, 20);
  germline->new_index("#####-#####");
  germlines.push_back(germline);

174 175 176 177 178 179 180 181 182 183 184 185 186

  germline = new Germline("TRA", 'A', path + "/TRAV.fa", "",  path + "/TRAJ.fa",  -10, 20);
  germline->new_index("#######-######");
  germlines.push_back(germline);

  germline = new Germline("TRB", 'B', path + "/TRBV.fa", path + "/TRBD.fa",  path + "/TRBJ.fa",   0, 80);
  germline->new_index("######-######");
  germlines.push_back(germline);

  germline = new Germline("IGL", 'L', path + "/IGLV.fa", "",  path + "/IGLJ.fa",  -10, 20);
  germline->new_index("#####-#####");
  germlines.push_back(germline);

187
}
188

189 190 191 192 193 194 195 196 197 198
void MultiGermline::build_incomplete_set(string path)
{
  // Should parse 'data/germlines.data'
  Germline *germline;

  // DH-JH
  germline = new Germline("IGH+", 'h', path + "/IGHD.fa", "", path + "/IGHJ.fa",   -10, 20);
  germline->new_index("######-######");
  germlines.push_back(germline);

199 200 201 202
  // VD-JA
  germline = new Germline("VdJa", 'a', path + "/TRDV.fa", "",  path + "/TRAJ.fa",   -10, 80);
  germline->new_index("#####-#####");
  germlines.push_back(germline);
203 204

  // DD2-DD3
205
  germline = new Germline("TRD+", 'd', path + "/TRDD2-01.fa", "", path + "/TRDJ.fa",   -10, 60);
206
  germline->new_index("#########");
207
  germlines.push_back(germline);
208
  germline = new Germline("TRD+", 'd', path + "/TRDV.fa", "", path + "/TRDD3-01.fa",   -10, 50);
209
  germline->new_index("#########");
210
  germlines.push_back(germline);
211
  germline = new Germline("TRD+", 'd', path + "/TRDD2-01.fa", "", path + "/TRDD3-01.fa",  -10, 50);
212
  germline->new_index("#########");
213 214 215 216
  germlines.push_back(germline);


  // IGK: KDE, INTRON
217
  germline = new Germline("IGK+", 'k', path + "/IGK-INTRON.fa", "",  path + "/IGK-KDE.fa",  -10, 80);
218 219
  germline->new_index("#####-#####");
  germlines.push_back(germline);
220
  germline = new Germline("IGK+", 'k', path + "/IGKV.fa", "",  path + "/IGK-KDE.fa",  -10, 80);
221 222 223 224 225
  germline->new_index("#####-#####");
  germlines.push_back(germline);

}

226

227 228 229 230
void MultiGermline::load_standard_set(string path)
{
  germlines.push_back(new Germline("TRA", 'A', path + "/TRAV.fa", "",                path + "/TRAJ.fa",   -10, 20));
  germlines.push_back(new Germline("TRB", 'B', path + "/TRBV.fa", path + "/TRBD.fa", path + "/TRBJ.fa",   -10, 20));
231
  germlines.push_back(new Germline("TRG", 'G', path + "/TRGV.fa", "",                path + "/TRGJ.fa",   -10, 30));
232 233 234 235 236 237 238 239 240 241 242 243
  germlines.push_back(new Germline("TRD", 'D', path + "/TRDV.fa", path + "/TRDD.fa", path + "/TRDJ.fa",     0, 80));

  germlines.push_back(new Germline("IGH", 'H', path + "/IGHV.fa", path + "/IGHD.fa", path + "/IGHJ.fa",     0, 80));
  germlines.push_back(new Germline("IGK", 'K', path + "/IGKV.fa", "",                path + "/IGKJ.fa",   -10, 20));
  germlines.push_back(new Germline("IGL", 'L', path + "/IGLV.fa", "",                path + "/IGLJ.fa",   -10, 20));
}

void MultiGermline::insert_in_one_index(IKmerStore<KmerAffect> *_index)
{
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
244

245 246
      if (germline->rep_4.size())
	germline->affect_4 = string(1, 14 + germline->shortcut) + "-" + germline->code + "D";
247

248 249 250 251
      germline->use_index(_index) ;
    }
}

252 253 254 255 256 257 258
void MultiGermline::build_with_one_index(string seed)
{
  bool rc = true ;
  index = KmerStoreFactory::createIndex<KmerAffect>(seed, rc);
  insert_in_one_index(index);
}

259 260
void MultiGermline::out_stats(ostream &out)
{
261 262
  out << "                         " ;
  out << "reads av. len     clones av. rds" ;
263 264
  out << endl ;

265 266 267
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
268
      out << germline->stats_reads ;
269
      out << germline->stats_clones ;
270
      out << endl ;
271 272
    }
}
273

274 275 276
/* Mark k-mers common to several germlines as ambiguous */
void MultiGermline::mark_cross_germlines_as_ambiguous()
{
277 278
  string VdJa = "VdJa";
  
279 280 281 282
  for (list<Germline*>::const_iterator it = germlines.begin(); it != germlines.end(); ++it)
    {
      Germline *germline = *it ;
      cout << *germline << ":" ;
283 284 285 286

      // Skip VdJa
      if (!(germline->code.compare(VdJa)))
        continue;
287 288 289 290 291 292 293
      
      for (list<Germline*>::const_iterator it2 = germlines.begin(); it2 != germlines.end(); ++it2)
      {
        Germline *germline2 = *it2 ;
        if (germline2 == germline)
          continue ;

294 295 296
        // Skip germlines on a same system, such as 'D' (TRD) and 'd' (TRD+)
        if (toupper(germline2->shortcut) == toupper(germline->shortcut))
          continue;
297 298 299 300

        // Skip VdJa
        if (!(germline2->code.compare(VdJa)))
          continue;
301
        
302 303 304 305 306 307 308
        germline->mark_as_ambiguous(germline2);
      }

      cout << endl;
    }
}

309 310 311 312 313 314 315 316 317 318 319

ostream &operator<<(ostream &out, const MultiGermline &multigermline)
{
  for (list<Germline*>::const_iterator it = multigermline.germlines.begin(); it != multigermline.germlines.end(); ++it)
    {
      Germline *germline = *it ;
      out << "   " << *germline ;
    }

  return out;
}