Commit e391ee31 authored by Marc Duez's avatar Marc Duez
parents 18abab51 eb9bce3a
......@@ -6,15 +6,25 @@ VIDJIL_SERVER_SRC = server/
all:
make -C $(VIDJIL_ALGO_SRC)
test: all
# make -C $(VIDJIL_SERVER_SRC) tests
make -C $(VIDJIL_ALGO_SRC)/tests
test:
make unit
make should
# make pytests
test_with_fuse:
make unit
make should
make pytests
unit: all
@echo "*** Launching unit tests..."
make -C $(VIDJIL_ALGO_SRC)/tests
@echo "*** All .should_get tests passed"
pytests:
@echo "*** Launching python tests..."
python server/fuse.py --test x
@echo "*** All python tests passed"
should: all
@echo
......
......@@ -8,7 +8,7 @@ LIBCORE=core/vidjil.a
BINDIR=..
CREATE_VERSION_GIT_H := $(shell ./create-git-version-h.sh)
CREATE_VERSION_GIT_H := $(shell test -x ./create-git-version-h.sh && ./create-git-version-h.sh)
.PHONY: all core clean forcedep
......
......@@ -43,6 +43,8 @@ template<class T>
class AffectAnalyser {
public:
virtual ~AffectAnalyser() {}
/* Queries */
/**
......
......@@ -74,9 +74,9 @@ void comp_matrix::load(string file){
char* tampon=(char*)malloc(windows.size()*sizeof(char));
ifstream in_comp(file.c_str());
for(int i=0; i<windows.size();i++){
for(unsigned int i=0; i<windows.size();i++){
in_comp.read(tampon, windows.size()*sizeof(char));
for(int j=0;j<windows.size(); j++){
for(unsigned int j=0;j<windows.size(); j++){
m[i][j]=tampon[j];
}
}
......@@ -90,7 +90,7 @@ void comp_matrix::save(string file){
ofstream out_comp(file.c_str());
for(int i=0; i<windows.size();i++){
for(unsigned int i=0; i<windows.size();i++){
out_comp.write((char *)m[i],windows.size()*sizeof(char));
}
......@@ -99,7 +99,7 @@ void comp_matrix::save(string file){
void comp_matrix::del(){
for (int i=0;i<windows.size();i++){
for (unsigned int i=0;i<windows.size();i++){
free(m[i]);
}
free(m);
......@@ -112,7 +112,6 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
typedef map<junction,list<Sequence> > mjs ;
typedef list<string> li ;
map <string, map <string, bool> > graph ;
......
......@@ -343,11 +343,11 @@ void DynProg::backtrack()
linkgap = new int[x.size()+1];
gap2 = new int[y.size()+1];
for (int i = 0; i <=x.size(); i++) {
for (unsigned int i = 0; i <=x.size(); i++) {
gap1[i] = 0;
linkgap[i] = 0;
}
for (int i = 0; i <= y.size(); i++) {
for (unsigned int i = 0; i <= y.size(); i++) {
gap2[i] = 0;
}
......
......@@ -197,7 +197,7 @@ void OnlineFasta::next() {
// Compute seq
current.seq = new int[current.sequence.length()];
for (int i=0; i< current.sequence.length(); i++)
for (unsigned int i=0; i< current.sequence.length(); i++)
{
current.seq[i] = nuc_to_int(current.sequence[i]) ;
}
......
......@@ -52,7 +52,7 @@ void LazyMsa::alignOne(string *align, int one){
ostringstream stream;
for (int i=0; i<ref.size(); i++){
for (unsigned int i=0; i<ref.size(); i++){
for (int j=0; j<gapRef[one][i] ; j++){
stream <<"-";
}
......@@ -66,7 +66,7 @@ void LazyMsa::alignOne(string *align, int one){
ostringstream stream2;
for (int i=0; i<sequences[one].size(); i++){
for (unsigned int i=0; i<sequences[one].size(); i++){
for (int j=0; j<gapSeq[one][i] ; j++){
stream2 <<"-";
}
......@@ -84,19 +84,19 @@ void LazyMsa::align(string *align){
int *maxGap= new int [ref.size()+1];
for (int i = 0; i< ref.size()+1; i++){
for (unsigned int i = 0; i< ref.size()+1; i++){
maxGap[i]=0;
}
for (int i=0; i<sizeUsed+1; i++){
for (int j=0; j<ref.size()+1; j++){
for (unsigned int j=0; j<ref.size()+1; j++){
if ( gapRef[i][j] > maxGap[j] ) maxGap[j]=gapRef[i][j];
}
}
ostringstream stream;
for (int i=0; i<ref.size(); i++){
for (unsigned int i=0; i<ref.size(); i++){
for (int j=0; j<maxGap[i] ; j++){
stream <<"-";
}
......@@ -111,11 +111,11 @@ void LazyMsa::align(string *align){
for (int i=0; i<sizeUsed+1; i++){
ostringstream stream2;
for (int j = 0; j< ref.size()+1; j++){
for (unsigned int j = 0; j< ref.size()+1; j++){
gapSeq[i][link[i][j]]+=maxGap[j]-gapRef[i][j];
}
for (int j=0; j<sequences[i].size(); j++){
for (unsigned int j=0; j<sequences[i].size(); j++){
for (int k=0; k<gapSeq[i][j] ; k++){
stream2 <<"-";
}
......
......@@ -60,6 +60,7 @@ void KmerAffectReadScore::setUnknownScore(float score) {
////////////////////////////////////////////////////////////////////////////////
ReadLengthScore::ReadLengthScore(){}
ReadLengthScore::~ReadLengthScore(){}
float ReadLengthScore::getScore(const string &sequence) const {
return sequence.size();
......
......@@ -11,6 +11,8 @@
*/
class VirtualReadScore {
public:
virtual ~VirtualReadScore() {}
/**
* @param sequence: some text
* @return the score associated to the sequence.
......@@ -62,6 +64,7 @@ public:
class ReadLengthScore: public VirtualReadScore {
public:
ReadLengthScore();
~ReadLengthScore();
/**
* @return the sequence length
......
......@@ -175,7 +175,7 @@ KmerSegmenter::KmerSegmenter(Sequence seq, IKmerStore<KmerAffect> *index,
int s = (size_t)index->getS() ;
int length = sequence.length() ;
if (length < (size_t) s)
if (length < s)
{
because = UNSEG_TOO_SHORT;
kaa = NULL;
......@@ -568,7 +568,7 @@ FineSegmenter::FineSegmenter(Sequence seq, Fasta &rep_V, Fasta &rep_J,
// Trim J
Jstart += b_r;
del_J += b_r;
if (Jstart>=sequence.length())
if (Jstart>=(int) sequence.length())
Jstart=sequence.length()-1;
}
......@@ -611,7 +611,6 @@ void FineSegmenter::FineSegmentD(Fasta &rep_V, Fasta &rep_D, Fasta &rep_J){
int tag_D;
int length = 0 ;
int begin = 0;
int score;
// Create a zone where to look for D, adding at most EXTEND_D_ZONE nucleotides at each side
int l = Vend - EXTEND_D_ZONE;
......@@ -629,7 +628,6 @@ void FineSegmenter::FineSegmentD(Fasta &rep_V, Fasta &rep_D, Fasta &rep_J){
end = align_against_collection(str, rep_D, false, true, &tag_D, &del_D_right, &del_D_left, &begin,
&length, &score_D, segment_cost);
score=score_D[0].first;
best_D = tag_D;
Dstart = l + begin;
......
......@@ -29,7 +29,7 @@ vector<int> WindowsStorage::getStatus(junction window) {
JsonList WindowsStorage::statusToJson(junction window) {
JsonList result;
for (int i=0; i<status_by_window[window].size(); i++){
for (unsigned int i=0; i<status_by_window[window].size(); i++){
if (status_by_window[window][i] !=0){
ostringstream oss;
oss << i;
......
......@@ -26,7 +26,7 @@ void testCluster() {
map<string, string> labels;
WindowsStorage windows = WindowsStorage(labels);
Sequence seq = {"", "", "", ""};
Sequence seq = {"", "", "", "", NULL};
windows.add("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAT", seq, 0);
windows.add("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", seq, 0);
......
......@@ -183,8 +183,11 @@ void usage(char *progname)
<< "Limits to report a clone" << endl
<< " -R <nb> minimal number of reads supporting a clone (default: " << MIN_READS_CLONE << ")" << endl
<< " -% <ratio> minimal percentage of reads supporting a clone (default: " << RATIO_READS_CLONE << ")" << endl
<< " -z <nb> maximal number of clones reported (0: no limit) (default: " << MAX_CLONES << ")" << endl
<< " -A reports all clones (-r 0 -R 1 -% 0 -z 0), to be used only on very small datasets" << endl
<< endl
<< "Limits to segment a clone" << endl
<< " -z <nb> maximal number of clones to be segmented (0: no limit, do not use) (default: " << MAX_CLONES << ")" << endl
<< " -A reports and segments all clones (-r 0 -R 1 -% 0 -z 0), to be used only on very small datasets" << endl
<< endl
<< "Fine segmentation options (second pass, see warning in doc/README)" << endl
......@@ -207,6 +210,9 @@ void usage(char *progname)
<< " -v verbose mode" << endl
<< endl
<< "The full help is available in the doc/algo.org file."
<< endl
<< endl
<< "Examples (see doc/README)" << endl
<< " " << progname << " -G germline/IGH -d data/Stanford_S22.fasta" << endl
......@@ -219,7 +225,7 @@ void usage(char *progname)
int main (int argc, char **argv)
{
cout << "# Vidjil -- V(D)J recombinations analysis <http://bioinfo.lifl.fr/vidjil>" << endl
cout << "# Vidjil -- V(D)J recombinations analysis <http://www.vidjil.org/>" << endl
<< "# Copyright (C) 2011, 2012, 2013, 2014 by the Vidjil team" << endl
<< "# Bonsai bioinformatics at LIFL (UMR CNRS 8022, Université Lille) and Inria Lille" << endl
<< endl ;
......@@ -1495,6 +1501,7 @@ int main (int argc, char **argv)
delete index ;
delete json;
delete windowsStorage;
delete json_samples;
if (output_segmented)
delete out_segmented;
......
......@@ -72,7 +72,7 @@ Database.prototype = {
var msg = " Welcome to Vidjil! </br>"
+ "Your browser currently does not recognize our SSL certificate. </br>"
+ "To use the sample database, you need to accept this certificate and/or tag this website as a trusted one. </br>"
+ "<a href='"+DB_ADDRESS+"' target='_blank'>Follow this link<a/>"
+ "<a href='"+DB_ADDRESS+"'>Follow this link<a/>"
popupMsg(msg)
}
},
......
......@@ -7,15 +7,15 @@ var CGI_ADDRESS = "http://127.0.1.1/cgi-bin/";
if (typeof config != 'undefined') {
if (config.cgi_address){
if (config.cgi_address) CGI_ADDRESS = config.cgi_address
if (config.cgi_address == "default") CGI_ADDRESS = "http://"+window.location.hostname+"/cgi/"
}
if (config.use_database != undefined && config.use_database) {
if (config.db_address) { DB_ADDRESS = config.db_address}
if (config.db_address == "default") DB_ADDRESS = "https://"+window.location.hostname+"/vidjil/"
if (config.cgi_address == "default") CGI_ADDRESS = "http://"+window.location.hostname+"/cgi/"
var fileref=document.createElement('script')
fileref.setAttribute("type","text/javascript")
fileref.setAttribute("src", DB_ADDRESS + "static/js/checkSSL.js")
document.getElementsByTagName("head")[0].appendChild(fileref)
}
if (config.demo && config.demo.file.length != 0){
......
This changelog concerns the algorithmic part (C++) of Vidjil.
2014-09-xx The Vidjil Team
2014-09-23 The Vidjil Team
* Export cause of non-segmentation in the .data
* New option to output segmented reads (-U), now by default segmented reads are not output one by one
* Updated .data .json output (the format will change again in a next release)
......
......@@ -78,10 +78,86 @@ browser. They are intented for a command-line use only.
Launching vidjil with =-h= option provides the list of parameters that can be
used.
** Main algorithm parameters
#+BEGIN_EXAMPLE
Window prediction
(use either -s or -k option, but not both)
-s <string> spaced seed used for the V/J affectation
(default: #####-#####, ######-######, #######-#######, depends on germline)
-k <int> k-mer size used for the V/J affectation (default: 10, 12, 13, depends on germline)
(using -k option is equivalent to set with -s a contiguous seed with only '#' characters)
-w <int> w-mer size used for the length of the extracted window (default: 40)(default with -d: 60)
#+END_EXAMPLE
The =-s= and =-k= options are the options of the heuristic. A detailed
explanation can be found in the paper. More help on that will be
available in the following months. The defaults values should work.
The =-w= option fixes the size of the "window" that is the main
identifier to gather clones. The defaults values (40 for TRG, 60 for
IGH) were selected to ensure a high-quality clone gathering. The
high-throughput heuristic predicts the center of the "window" that may
be shifted by a few bases from the actual "center" of the CDR3 (for TRG,
less than 15 bases compared to the IMGT/V-QUEST or IgBlast prediction
in >99% of cases). The extracted window should be large enough to
fully contain the CDR3 as well as some part of the end of the V and
the start of the J to uniquely identify a clone.
Setting =-w= to 30 for TRG and 50 for IGH may "segment" (analyze) a
few more reads, but may in some rare cases falsely cluster reads from
different clones. Setting =-w= to lower values is not recommended.
** Threshold on clone output
The following options control how many clones are output and analyzed.
#+BEGIN_EXAMPLE
Limit to keep a window
-r <nb> minimal number of reads containing a window (default: 10)
Limits to report a clone
-R <nb> minimal number of reads supporting a clone (default: 10)
-% <ratio> minimal percentage of reads supporting a clone (default: 0)
Limits to segment a clone
-z <nb> maximal number of clones to be segmented (0: no limit, do not use) (default: 20)
-A reports and segments all clones (-r 0 -R 1 -% 0 -z 0), to be used only on very small datasets
#+END_EXAMPLE
The =-r/-R/-%= options are strong thresholds: if a clone does not have
the requested number of reads, the clone is discarded (except when
using =-l=, see below).
The =-r= option is applied before the additional clusterization, the
=-R/-%= options after it.
The default =-r 10 -R 10= options are meant to only output clones that
have a significant read support. You can safely put =-r 1 -R 1= if you
want to detect all clones starting from the first read (especially for
MRD detection).
The =-z= option limits the number of clones that are fully analyzed,
/with their V(D)J segmentation/, in particular to enable the browser
to display the clones on the grid (otherwise they are displayed on the
'?/?' axis).
If you want to analyze more clones, you should use =-z 50= or
=-z 100=. It is not recommended to use larger values: outputting more
than 100 clones is often not useful since they can't be visualized easily
in the browser, and takes large computation time.
Note that even if a clone is not in the top 20 (or 50, or 100) but
still passes the =-R=, =-%= options, it is still reported in the .data
file. If the clone is at some MRD point in the top 20 (or 50, or 100),
it will be fully analyzed/segmented by this other point (and then
collected by the =fuse.py= script, and then, on the browser, correctly
displayed on the grid).
The =-A= option disables all these thresholds. This option should be
used only for test and debug purposes, on very small datasets, and
produce large file and takes huge computation times.
** Force to follow some sequences
Vidjil allows to specify a list of windows that must be followed
(even if those windows are 'rare', below the -r/-R/-% thresholds).
(even if those windows are 'rare', below the =-r/-R/-%= thresholds).
The parameter =-l= is made for providing such a list in a file following
the following format: window label (separed by one space)
......
......@@ -26,7 +26,7 @@ if [ $COMPLETE -eq 1 ]; then
zip -r $filename web2py/applications/vidjil/uploads/ web2py/applications/vidjil/databases/
else
filename="${DIR}backup_essentials_"$now
zip -r $filename web2py/applications/vidjil/databases/ web2py/applications/vidjil/uploads/results_file* web2py/applications/vidjil/uploads/fused_file*
zip -r $filename web2py/applications/vidjil/databases/ web2py/applications/vidjil/uploads/results_file* web2py/applications/vidjil/uploads/fused_file* web2py/applications/vidjil/uploads/analysis_file*
fi
echo $filename
......@@ -17,7 +17,7 @@
<body onload="init()">
<h4>Welcome to Vidjil !</h4>
The certificate is now validated.
<!-- The certificate is now validated. -->
<a id="browser" href="#">Go to Vidjil</a>
<!--
......
......@@ -4,7 +4,12 @@
<table>
<tbody>
<tr>
<td ><label for="patient_first_name" id="patient_first_name__label"> First Name: </label></td>
<td><label for="patient_id_label" id="patient_id_label__label">Patient ID: </label></td>
<td><input class="date" id="patient_id_label" name="id_label" type="text" value=""><span></span></td>
<td></td>
</tr>
<tr>
<td ><label for="patient_first_name" id="patient_first_name__label">First Name: </label></td>
<td ><input class="string" id="patient_first_name" name="first_name" type="text" value=""><span>*</span></td>
<td ></td>
</tr>
......@@ -18,11 +23,6 @@
<td><input class="date" id="patient_birth" name="birth" type="text" value=""><span>* (format: yyyy-mm-dd)</span></td>
<td></td>
</tr>
<tr>
<td><label for="patient_id_label" id="patient_id_label__label">ID label </label></td>
<td><input class="date" id="patient_id_label" name="id_label" type="text" value=""><span>id used by hospital administration system</span></td>
<td></td>
</tr>
<tr>
<td><label for="patient_info" id="patient_info__label">Info: </label></td>
<td><textarea class="text" cols="40" id="patient_info" name="info" rows="10"></textarea></td>
......
......@@ -8,6 +8,11 @@ info = db.patient[request.vars["id"]]
<input type="HIDDEN" id="patient_id" name="id" value="{{=request.vars["id"]}}">
<table>
<tbody>
<tr>
<td><label for="patient_id_label" id="patient_id_label__label">Patient ID:</label></td>
<td><input class="date" id="patient_id_label" name="id_label" type="text" value="{{=info.id_label}}"><span></span></td>
<td></td>
</tr>
<tr>
<td > <label for="patient_first_name" id="patient_first_name__label"> First Name: </label> </td>
<td > <input class="string" id="patient_first_name" name="first_name" type="text" value="{{=info.first_name}}"><span>*</span></td>
......@@ -23,11 +28,6 @@ info = db.patient[request.vars["id"]]
<td> <input class="date" id="patient_birth" name="birth" type="text" value="{{=info.birth}}"><span>* (format: yyyy-mm-dd)</span></td>
<td> </td>
</tr>
<tr>
<td><label for="patient_id_label" id="patient_id_label__label">ID label </label></td>
<td><input class="date" id="patient_id_label" name="id_label" type="text" value="{{=info.id_label}}"><span>id used by hospital administration system</span></td>
<td></td>
</tr>
<tr>
<td> <label for="patient_info" id="patient_info__label">Info: </label> </td>
<td> <textarea class="text" cols="40" id="patient_info" name="info" rows="10"> {{=info.info}} </textarea> </td>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment