Commit 19e8268c authored by Mathieu Giraud's avatar Mathieu Giraud

vidjil.cpp: use new MultiGermline, via windowExtractor.{h,cpp}, deep to segment.{h,cpp}

Now the MultiGermline contains only one germline.
The 'should' tests are still passing, but some unit tests are now broken.
parent 1900762c
......@@ -161,8 +161,7 @@ ostream &operator<<(ostream &out, const Segmenter &s)
// KmerSegmenter (Cheap)
KmerSegmenter::KmerSegmenter(Sequence seq, IKmerStore<KmerAffect> *index,
int delta_min, int delta_max)
KmerSegmenter::KmerSegmenter(Sequence seq, MultiGermline *multigermline)
{
label = seq.label ;
sequence = seq.sequence ;
......@@ -172,7 +171,9 @@ KmerSegmenter::KmerSegmenter(Sequence seq, IKmerStore<KmerAffect> *index,
reversed = false;
Dend=0;
int s = (size_t)index->getS() ;
// Now we just take one germline
Germline *germline = multigermline->germlines.back();
int s = (size_t)germline->index->getS() ;
int length = sequence.length() ;
if (length < s)
......@@ -182,7 +183,7 @@ KmerSegmenter::KmerSegmenter(Sequence seq, IKmerStore<KmerAffect> *index,
return ;
}
kaa = new KmerAffectAnalyser<KmerAffect>(*index, sequence);
kaa = new KmerAffectAnalyser<KmerAffect>(*(germline->index), sequence);
// Check strand consistency among the affectations.
int strand;
......@@ -210,7 +211,7 @@ KmerSegmenter::KmerSegmenter(Sequence seq, IKmerStore<KmerAffect> *index,
strand = 2;
}
computeSegmentation(strand, delta_min, delta_max, s);
computeSegmentation(strand, germline->delta_min, germline->delta_max, s);
if (segmented)
{
......
......@@ -6,6 +6,7 @@
#include "fasta.h"
#include "dynprog.h"
#include "tools.h"
#include "germline.h"
#include "kmerstore.h"
#include "kmeraffect.h"
#include "affectanalyser.h"
......@@ -135,16 +136,9 @@ class KmerSegmenter : public Segmenter
/**
* Build a segmenter based on KmerSegmentation
* @param seq: An object read from a FASTA/FASTQ file
* @param index: A Kmer index
* @param delta_min: the minimal distance between the right bound and the left bound
* so that the segmentation is accepted
* (left bound: end of V, right bound : start of J)
* @param delta_min: the maximal distance between the right bound and the left bound
* so that the segmentation is accepted
* (left bound: end of V, right bound : start of J)
* @param multigermline: the multigermline
*/
KmerSegmenter(Sequence seq, IKmerStore<KmerAffect> *index,
int delta_min, int delta_max);
KmerSegmenter(Sequence seq, MultiGermline *multigermline);
~KmerSegmenter();
......
......@@ -4,8 +4,8 @@
WindowExtractor::WindowExtractor(): out_segmented(NULL), out_unsegmented(NULL){}
WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, IKmerStore<KmerAffect> *index,
size_t w, int delta_min, int delta_max,
WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, MultiGermline *multigermline,
size_t w,
map<string, string> &windows_labels) {
init_stats();
......@@ -14,8 +14,8 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, IKmerStore<KmerAffe
while (reads->hasNext()) {
reads->next();
nb_reads++;
KmerSegmenter seg(reads->getSequence(), index, delta_min, delta_max);
KmerSegmenter seg(reads->getSequence(), multigermline);
stats_segmented[seg.getSegmentationStatus()]++;
stats_length[seg.getSegmentationStatus()] += seg.getSequence().sequence.length();
......
......@@ -3,6 +3,7 @@
#include <iostream>
#include "segment.h"
#include "germline.h"
#include "kmerstore.h"
#include "kmeraffect.h"
#include "windows.h"
......@@ -31,20 +32,16 @@ class WindowExtractor {
* If (un)segmented sequences must be output, the functions
* set(Un)SegmentedOutput() must be called before.
* @param reads: the collection of input reads
* @param index: the index of the germline
* @param multigermline: the multigermline
* @param w: length of the window
* @param delta_min: The minimal distance between the end of the V
* and the start of the J (can be < 0)
* @param delta_max: The maximal distance between the end of the V
* and the start of the J.
* @param windows_labels: Windows that must be kept and registered as such.
* @return a pointer to a WindowsStorage that will contain all the windows.
* It is a pointer so that the WindowsStorage is not duplicated.
* @post Statistics on segmentation will be provided through the getSegmentationStats() methods
* and getAverageSegmentationLength().
*/
WindowsStorage *extract(OnlineFasta *reads, IKmerStore<KmerAffect> *index,
size_t w, int delta_min, int delta_max,
WindowsStorage *extract(OnlineFasta *reads, MultiGermline *multigermline,
size_t w,
map<string, string> &windows_labels);
/**
......
......@@ -33,6 +33,7 @@
#include "core/tools.h"
#include "core/json.h"
#include "core/germline.h"
#include "core/kmerstore.h"
#include "core/fasta.h"
#include "core/segment.h"
......@@ -780,14 +781,14 @@ int main (int argc, char **argv)
//////////////////////////////////
//$$ Build Kmer indexes
cout << "Build Kmer indexes" << endl ;
bool rc = true ;
IKmerStore<KmerAffect> *index = KmerStoreFactory::createIndex<KmerAffect>(seed, rc);
index->insert(rep_V, "V");
index->insert(rep_J, "J");
Germline *germline;
germline = new Germline(rep_V, rep_D, rep_J, seed,
delta_min, delta_max);
MultiGermline *multigermline;
multigermline = new MultiGermline(germline);
//////////////////////////////////
//$$ Kmer Segmentation
......@@ -813,8 +814,8 @@ int main (int argc, char **argv)
we.setUnsegmentedOutput(out_unsegmented);
}
WindowsStorage *windowsStorage = we.extract(reads, index, w, delta_min,
delta_max, windows_labels);
WindowsStorage *windowsStorage = we.extract(reads, multigermline, w, windows_labels);
windowsStorage->setIdToAll();
size_t nb_total_reads = we.getNbReads();
......@@ -929,9 +930,9 @@ int main (int argc, char **argv)
comp.del();
}
else
{
cout << "No clustering" << endl ;
clones_windows = comp.nocluster() ;
{
cout << "No clustering" << endl ;
clones_windows = comp.nocluster() ; /// XXX SUPPRIMER
}
cout << " ==> " << clones_windows.size() << " clones" << endl ;
......@@ -996,7 +997,7 @@ int main (int argc, char **argv)
list <Sequence> representatives ;
list <string> representatives_labels ;
VirtualReadScore *scorer = new KmerAffectReadScore(*index);
VirtualReadScore *scorer = new KmerAffectReadScore(*(germline->index));
int num_clone = 0 ;
int clones_without_representative = 0 ;
......@@ -1112,7 +1113,7 @@ int main (int argc, char **argv)
} else {
//$$ There is one representative, FineSegmenter
representative.label = string_of_int(it->second) + "--" + representative.label;
FineSegmenter seg(representative, rep_V, rep_J, delta_min, delta_max, segment_cost);
FineSegmenter seg(representative, rep_V, rep_J, germline->delta_min, germline->delta_max, segment_cost);
if (segment_D)
seg.FineSegmentD(rep_V, rep_D, rep_J);
......@@ -1294,7 +1295,7 @@ int main (int argc, char **argv)
//json->add("links", jsonLevenshtein);
out_json << json->toString();
delete index ;
delete germline ;
delete json;
delete windowsStorage;
delete json_samples;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment