windowExtractor.cpp 4.15 KB
Newer Older
Mathieu Giraud's avatar
Mathieu Giraud committed
1 2 3
#include "windowExtractor.h"
#include "segment.h"

4
// Progress bar
5 6
#define PROGRESS_POINT 25000
#define PROGRESS_LINE 40
7

8
WindowExtractor::WindowExtractor(): out_segmented(NULL), out_unsegmented(NULL), out_affects(NULL), max_reads_per_window(~0){}
Mathieu Giraud's avatar
Mathieu Giraud committed
9
                                    
10 11
WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *multigermline,
					 size_t w,
12
                                         map<string, string> &windows_labels,
13
                                         int stop_after, int only_nth_read, bool keep_unsegmented_as_clone) {
Mathieu Giraud's avatar
Mathieu Giraud committed
14 15 16
  init_stats();

  WindowsStorage *windowsStorage = new WindowsStorage(windows_labels);
17
  windowsStorage->setMaximalNbReadsPerWindow(max_reads_per_window);
18 19 20 21 22
  for (list<Germline*>::const_iterator it = multigermline->germlines.begin(); it != multigermline->germlines.end(); ++it)
    {
      Germline *germline = *it ;
      nb_reads_germline[germline->code] = 0;
    }
23

24
  int nb_reads_all = 0;
25
  int bp_total = 0;
26 27

  while (reads->hasNext() && (int) nb_reads != stop_after) {
Mathieu Giraud's avatar
Mathieu Giraud committed
28
    reads->next();
29 30 31 32 33
    nb_reads_all++;

    if (nb_reads_all % only_nth_read)
      continue ;

Mathieu Giraud's avatar
Mathieu Giraud committed
34
    nb_reads++;
35

36 37
    if (out_affects) {
      *out_affects << reads->getSequence();
38
    }
39
    
40
    KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects);
41
    
42 43
    KmerSegmenter *seg = kmseg.the_kseg ;
    int read_length = seg->getSequence().sequence.length();
Mathieu Giraud's avatar
Mathieu Giraud committed
44

45 46
    stats[seg->getSegmentationStatus()].insert(read_length);
    if (seg->isSegmented()) {
47

48
      seg->segmented_germline->stats_reads.insert(read_length);
49

50
      junction junc = seg->getJunction(w);
Mathieu Giraud's avatar
Mathieu Giraud committed
51 52

      if (junc.size()) {
53
        stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ;
54
        windowsStorage->add(junc, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
Mathieu Giraud's avatar
Mathieu Giraud committed
55
      } else {
56
        stats[TOTAL_SEG_BUT_TOO_SHORT_FOR_THE_WINDOW].insert(read_length) ;
Mathieu Giraud's avatar
Mathieu Giraud committed
57 58
      }

59
      if (out_segmented) {
60
        *out_segmented << *seg ; // KmerSegmenter output (V/N/J)
61 62
      }
      
63
      nb_reads_germline[seg->system]++;
64
      
65 66 67 68 69
    } else {
      if (keep_unsegmented_as_clone && (reads->getSequence().sequence.length() >= w))
        {
          // Keep the unsegmented read, taking the full sequence as the junction
          windowsStorage->add(reads->getSequence().sequence, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
70
          stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ; // TODO: rather count that in a pseudo-germline such as 'TRG!'
71 72 73
        }

      if (out_unsegmented) {
74
        *out_unsegmented << *seg ;
75
      }
76 77 78 79
    }

    // Last line of detailed affects output
    if (out_affects) {
80
      *out_affects << "#" << seg->getInfoLine() << endl;
Mathieu Giraud's avatar
Mathieu Giraud committed
81
    }
82 83 84 85 86 87 88 89 90

    // Progress bar
    bp_total += read_length;

    if (!(nb_reads % PROGRESS_POINT))
      {
	cout << "." ;

	if (!(nb_reads % (PROGRESS_POINT * PROGRESS_LINE)))
91
	  cout << setw(10) << nb_reads / 1000 << "k reads " << fixed << setprecision(2) << setw(14) << bp_total / 1E6 << " Mbp" << endl ;
92 93 94

	cout.flush() ;
      }
Mathieu Giraud's avatar
Mathieu Giraud committed
95
  }
96 97 98

  cout << endl ;

Mathieu Giraud's avatar
Mathieu Giraud committed
99 100 101 102
  return windowsStorage;
}

float WindowExtractor::getAverageSegmentationLength(SEGMENTED seg) {
103
  return stats[seg].getAverage();
Mathieu Giraud's avatar
Mathieu Giraud committed
104 105
}

106 107 108 109
size_t WindowExtractor::getMaximalNbReadsPerWindow() {
  return max_reads_per_window;
}

Mathieu Giraud's avatar
Mathieu Giraud committed
110 111 112 113 114
size_t WindowExtractor::getNbReads() {
  return nb_reads;
}

size_t WindowExtractor::getNbSegmented(SEGMENTED seg) {
115
  return stats[seg].nb;
Mathieu Giraud's avatar
Mathieu Giraud committed
116 117
}

118 119 120 121
size_t WindowExtractor::getNbReadsGermline(string germline) {
  return nb_reads_germline[germline];
}

122 123 124 125
void WindowExtractor::setMaximalNbReadsPerWindow(size_t max_reads) {
  max_reads_per_window = max_reads;
}

Mathieu Giraud's avatar
Mathieu Giraud committed
126 127 128 129 130 131 132 133
void WindowExtractor::setSegmentedOutput(ostream *out) {
  out_segmented = out;
}

void WindowExtractor::setUnsegmentedOutput(ostream *out) {
  out_unsegmented = out;
}

134 135 136 137
void WindowExtractor::setAffectsOutput(ostream *out) {
  out_affects = out;
}

Mathieu Giraud's avatar
Mathieu Giraud committed
138 139
void WindowExtractor::init_stats() {
  for (int i = 0; i < STATS_SIZE; i++) {
140
    stats[i].label = segmented_mesg[i];
Mathieu Giraud's avatar
Mathieu Giraud committed
141 142 143
  }
  nb_reads = 0;
}
144 145 146 147 148 149 150

void WindowExtractor::out_stats(ostream &out)
{
  for (int i=0; i<STATS_SIZE; i++)
    {
      if (i == TOTAL_SEG_AND_WINDOW)
	out << endl;
151
      out << stats[i] << endl ;
152 153
    }
}