Commit d5bb0679 authored by Marc Duez's avatar Marc Duez
Browse files
parents 79ba6f3d 6e265afe
......@@ -12,6 +12,18 @@ BINDIR=..
v: vidjil
###
kmers: cleankmers
make CONFIG="-DDEBUG_KMERS"
nokmers: cleankmers
make
cleankmers:
rm -f vidjil.o core/windowExtractor.o core/segment.o
###
spaced: cleanspaced
make
......@@ -22,11 +34,7 @@ nospaced: cleanspaced
cleanspaced:
rm -f vidjil.o core/tools.o
unsegmented:
rm -f vidjil.o core/segment.o
make CONFIG="-DOUT_UNSEGMENTED"
###
all: $(EXEC)
......
......@@ -207,8 +207,12 @@ int KmerAffectAnalyser<T>::last(const T &affect) const{
template <class T>
string KmerAffectAnalyser<T>::toString() const{
string kmer;
for (size_t i = 0; i < affectations.size(); i++)
for (size_t i = 0; i < affectations.size(); i++) {
kmer += affectations[i].toString();
#ifdef DEBUG_KMERS
kmer += ": "+spaced(seq.substr(i,kms.getS()), kms.getSeed())+"\n";
#endif
}
return kmer;
}
......
......@@ -75,6 +75,11 @@ public:
*/
int getS() const;
/**
* @return the seed used
*/
string getSeed() const;
/**
* @param seq: a sequence
* @param no_revcomp: force not to revcomp the sequence, even if
......@@ -188,6 +193,11 @@ int IKmerStore<T>::getS() const {
return s;
}
template<class T>
string IKmerStore<T>::getSeed() const {
return seed;
}
template<class T>
vector<T> IKmerStore<T>::getResults(const string &seq, bool no_revcomp) {
if ((int)seq.length() < s - 1) {
......
......@@ -31,8 +31,13 @@ WindowsStorage *WindowExtractor::extract(OnlineFasta *reads, IKmerStore<KmerAffe
stats_length[TOTAL_SEG_BUT_TOO_SHORT_FOR_THE_WINDOW] += seg.getSequence().sequence.length() ;
}
if (out_segmented)
if (out_segmented) {
*out_segmented << seg ; // KmerSegmenter output (V/N/J)
if (out_unsegmented)
*out_segmented << seg.getKmerAffectAnalyser()->toString() << endl;
}
} else if (out_unsegmented) {
*out_unsegmented << reads->getSequence();
*out_unsegmented << "#" << segmented_mesg[seg.getSegmentationStatus()] << endl;
......
......@@ -89,10 +89,10 @@ enum { CMD_WINDOWS, CMD_ANALYSIS, CMD_SEGMENT } ;
// "tests/data/leukemia.fa"
#define DEFAULT_K 10
#define DEFAULT_K 0
#define DEFAULT_W 40
#define DEFAULT_W_D 60
#define DEFAULT_SEED "#####-#####"
#define DEFAULT_SEED ""
#define DEFAULT_DELTA_MIN -10
#define DEFAULT_DELTA_MAX 20
......@@ -142,9 +142,10 @@ void usage(char *progname)
<< "Window prediction" << endl
#ifndef NO_SPACED_SEEDS
<< " -s <string> spaced seed used for the V/J affectation (default: " << DEFAULT_SEED << ")" << endl
<< " -s <string> spaced seed used for the V/J affectation" << endl
<< " (default: #####-#####, ######-######, #######-#######, depends on germline)" << endl
#endif
<< " -k <int> k-mer size used for the V/J affectation (default: " << DEFAULT_K << ")" << endl
<< " -k <int> k-mer size used for the V/J affectation (default: 10, 12, 13, depends on germline)" << endl
<< " -w <int> w-mer size used for the length of the extracted window (default: " << DEFAULT_W << ")(default with -d: " << DEFAULT_W_D << ")" << endl
<< endl
......@@ -179,13 +180,15 @@ void usage(char *progname)
<< " -f <string> use custom Cost for fine segmenter : format \"match, subst, indels, homo, del_end\" (default "<<VDJ<<" )"<< endl
<< endl
<< "Debug" << endl
<< " -u output unsegmented sequences (default: " << UNSEGMENTED_FILENAME << ")" << endl
<< " and display detailed k-mer affectation both on segmented and on unsegmented sequences" << endl
<< "Output" << endl
<< " -o <dir> output directory (default: " << OUT_DIR << ")" << endl
<< " -p <string> prefix output filenames by the specified string" << endl
<< " -a output all sequences by cluster (" << SEQUENCES_FILENAME << ")" << endl
<< " -x no detailed analysis of each cluster" << endl
<< " -u output unsegmented sequences (default: " << UNSEGMENTED_FILENAME << ")" << endl
<< " -v verbose mode" << endl
<< endl
......@@ -452,6 +455,31 @@ int main (int argc, char **argv)
//$$ options: post-processing+display
// Default seeds
#ifndef NO_SPACED_SEEDS
if (k == DEFAULT_K)
{
if (germline_system.find("TRA") != string::npos)
seed = "#######-######" ;
else if ((germline_system.find("TRB") != string::npos)
|| (germline_system.find("IGH") != string::npos))
seed = "######-######" ;
else // TRD, TRG, IGK, IGL
seed = "#####-#####" ;
k = seed_weight(seed);
}
#else
{
cout << "Vidjil was compiled with NO_SPACED_SEEDS: please provide a -k option." << endl;
exit(1) ;
}
#endif
#ifndef NO_SPACED_SEEDS
// Check seed buffer
if (seed.size() >= MAX_SEED_SIZE)
......
2014-03-26 The Vidjil Team
* Better default seed selection, depending on the germline, segments more reads (vidjil.cpp)
* Better selection of representative read XXX setRequiredSequence (core/representative.cpp)
* New option to output all clones (-A), for testing purposes
* Updated debug option (-u) to display k-mer affection (core/windowExtractor.cpp)
* Improved documentation
2014-02-20 The Vidjil Team
* Refactored main vidjil.cpp (core/windows.cpp, core/windowExtractor.cpp)
* Removed unused html output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment