Attention une mise à jour du serveur va être effectuée le lundi 17 mai entre 13h et 13h30. Cette mise à jour va générer une interruption du service de quelques minutes.

windowExtractor.cpp 4.29 KB
Newer Older
Mathieu Giraud's avatar
Mathieu Giraud committed
1 2 3
#include "windowExtractor.h"
#include "segment.h"

4
// Progress bar
5 6
#define PROGRESS_POINT 25000
#define PROGRESS_LINE 40
7

8
WindowExtractor::WindowExtractor(): out_segmented(NULL), out_unsegmented(NULL), out_affects(NULL), max_reads_per_window(~0){}
Mathieu Giraud's avatar
Mathieu Giraud committed
9
                                    
10 11
WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *multigermline,
					 size_t w,
12
                                         map<string, string> &windows_labels,
13
                                         int stop_after, int only_nth_read, bool keep_unsegmented_as_clone,
14
                                         double nb_expected, int nb_reads_for_evalue) {
Mathieu Giraud's avatar
Mathieu Giraud committed
15 16 17
  init_stats();

  WindowsStorage *windowsStorage = new WindowsStorage(windows_labels);
18
  windowsStorage->setMaximalNbReadsPerWindow(max_reads_per_window);
19 20 21 22 23
  for (list<Germline*>::const_iterator it = multigermline->germlines.begin(); it != multigermline->germlines.end(); ++it)
    {
      Germline *germline = *it ;
      nb_reads_germline[germline->code] = 0;
    }
24

25
  int nb_reads_all = 0;
26
  unsigned long long int bp_total = 0;
27 28

  while (reads->hasNext() && (int) nb_reads != stop_after) {
Mathieu Giraud's avatar
Mathieu Giraud committed
29
    reads->next();
30 31 32 33 34
    nb_reads_all++;

    if (nb_reads_all % only_nth_read)
      continue ;

Mathieu Giraud's avatar
Mathieu Giraud committed
35
    nb_reads++;
36

37 38
    if (out_affects) {
      *out_affects << reads->getSequence();
39
    }
40
    
41
    KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected, nb_reads_for_evalue);
42
    
43 44
    KmerSegmenter *seg = kmseg.the_kseg ;
    int read_length = seg->getSequence().sequence.length();
Mathieu Giraud's avatar
Mathieu Giraud committed
45

46 47
    stats[seg->getSegmentationStatus()].insert(read_length);
    if (seg->isSegmented()) {
48

49
      seg->segmented_germline->stats_reads.insert(read_length);
50

51
      junction junc = seg->getJunction(w);
Mathieu Giraud's avatar
Mathieu Giraud committed
52 53

      if (junc.size()) {
54
        stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ;
55
        windowsStorage->add(junc, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
Mathieu Giraud's avatar
Mathieu Giraud committed
56
      } else {
57
        stats[TOTAL_SEG_BUT_TOO_SHORT_FOR_THE_WINDOW].insert(read_length) ;
Mathieu Giraud's avatar
Mathieu Giraud committed
58 59
      }

60
      if (out_segmented) {
61
        *out_segmented << *seg ; // KmerSegmenter output (V/N/J)
62 63
      }
      
64
      nb_reads_germline[seg->system]++;
65
      
66 67 68 69 70
    } else {
      if (keep_unsegmented_as_clone && (reads->getSequence().sequence.length() >= w))
        {
          // Keep the unsegmented read, taking the full sequence as the junction
          windowsStorage->add(reads->getSequence().sequence, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
71
          stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ; // TODO: rather count that in a pseudo-germline such as 'TRG!'
72 73 74
        }

      if (out_unsegmented) {
75
        *out_unsegmented << *seg ;
76
      }
77 78 79 80
    }

    // Last line of detailed affects output
    if (out_affects) {
81
      *out_affects << "#" << seg->getInfoLine() << endl;
Mathieu Giraud's avatar
Mathieu Giraud committed
82
    }
83 84 85 86 87 88 89 90 91

    // Progress bar
    bp_total += read_length;

    if (!(nb_reads % PROGRESS_POINT))
      {
	cout << "." ;

	if (!(nb_reads % (PROGRESS_POINT * PROGRESS_LINE)))
92
	  cout << setw(10) << nb_reads / 1000 << "k reads " << fixed << setprecision(2) << setw(14) << bp_total / 1E6 << " Mbp" << endl ;
93 94 95

	cout.flush() ;
      }
Mathieu Giraud's avatar
Mathieu Giraud committed
96
  }
97 98 99

  cout << endl ;

Mathieu Giraud's avatar
Mathieu Giraud committed
100 101 102 103
  return windowsStorage;
}

float WindowExtractor::getAverageSegmentationLength(SEGMENTED seg) {
104
  return stats[seg].getAverage();
Mathieu Giraud's avatar
Mathieu Giraud committed
105 106
}

107 108 109 110
size_t WindowExtractor::getMaximalNbReadsPerWindow() {
  return max_reads_per_window;
}

Mathieu Giraud's avatar
Mathieu Giraud committed
111 112 113 114 115
size_t WindowExtractor::getNbReads() {
  return nb_reads;
}

size_t WindowExtractor::getNbSegmented(SEGMENTED seg) {
116
  return stats[seg].nb;
Mathieu Giraud's avatar
Mathieu Giraud committed
117 118
}

119 120 121 122
size_t WindowExtractor::getNbReadsGermline(string germline) {
  return nb_reads_germline[germline];
}

123 124 125 126
void WindowExtractor::setMaximalNbReadsPerWindow(size_t max_reads) {
  max_reads_per_window = max_reads;
}

Mathieu Giraud's avatar
Mathieu Giraud committed
127 128 129 130 131 132 133 134
void WindowExtractor::setSegmentedOutput(ostream *out) {
  out_segmented = out;
}

void WindowExtractor::setUnsegmentedOutput(ostream *out) {
  out_unsegmented = out;
}

135 136 137 138
void WindowExtractor::setAffectsOutput(ostream *out) {
  out_affects = out;
}

Mathieu Giraud's avatar
Mathieu Giraud committed
139 140
void WindowExtractor::init_stats() {
  for (int i = 0; i < STATS_SIZE; i++) {
141
    stats[i].label = segmented_mesg[i];
Mathieu Giraud's avatar
Mathieu Giraud committed
142 143 144
  }
  nb_reads = 0;
}
145 146 147 148 149 150 151

void WindowExtractor::out_stats(ostream &out)
{
  for (int i=0; i<STATS_SIZE; i++)
    {
      if (i == TOTAL_SEG_AND_WINDOW)
	out << endl;
152
      out << stats[i] << endl ;
153 154
    }
}