Commit cdadc9ec authored by Vidjil Team's avatar Vidjil Team
parents 08207142 efd423b3
!LAUNCH: ../../vidjil -G ../../germline/TRG -c clones ../../data/segment_lec.fa
$ Extract 40bp windows (TRG)
1:found . 40-windows
$ Extract 50bp windows (TRG)
1:found . 50-windows
$ Find the good number of windows
1: found 2 .* in 4 segments .* inside 7 sequences
......
......@@ -7,5 +7,5 @@ $ Same number of 'V' affectations in both reads
2:143
$ Only one window
1:==> found 1 60-window
1:==> found 1 ..-window
......@@ -4,4 +4,4 @@ $ Skip the good number of reads
1:Processing every 131th read
$ Analyze the good number of reads
1: found 100 60-windows in 100 segments .100.. inside 100 sequences
1: found 100 ..-windows in 100 segments .100.. inside 100 sequences
!LAUNCH: ../../vidjil -G ../../germline/IGH -r 5 -b data ../../data/Stanford_S22.fasta ; cat out/data.vidjil | python ../../tools/format_json.py -1
!LAUNCH: ../../vidjil -G ../../germline/IGH -w 60 -r 5 -b data ../../data/Stanford_S22.fasta ; cat out/data.vidjil | python ../../tools/format_json.py -1
$ Number of reads
e1:"total": [13153]
......
!LAUNCH: ../../vidjil -r 5 -o out2 -u -U -v -G ../../germline/IGH ../../data/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
!LAUNCH: ../../vidjil -w 60 -r 5 -o out2 -u -U -v -G ../../germline/IGH ../../data/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
# Testing uncommon and debug options
$ verbose (-v)
......
!LAUNCH: ../../vidjil -G ../../germline/IGH ../../data/Stanford_S22.fasta ; python ../../tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python ../../tools/format_json.py -1
!LAUNCH: ../../vidjil -w 60 -G ../../germline/IGH ../../data/Stanford_S22.fasta ; python ../../tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python ../../tools/format_json.py -1
$ Points list
e1:"original_names": ["../../data/Stanford_S22.fasta", "../../data/Stanford_S22.fasta"]
......
......@@ -11,5 +11,4 @@ $ Parses germline/IGHJ.fa
1: 701 bp in 13 sequences
$ Find the good number of windows in Stanford S22 (contiguous seed 14)
1: found 10921 60-windows in 13114 segments
1: found 10743 50-windows in 13114 segments
!LAUNCH: ../../vidjil -G ../../germline/IGH -r 5 -l ../../data/Stanford_S22.label ../../data/Stanford_S22.fasta
!LAUNCH: ../../vidjil -w 60 -G ../../germline/IGH -r 5 -l ../../data/Stanford_S22.label ../../data/Stanford_S22.fasta
$ Keep the good number of windows, including one window labeled in Stanford_S22.label
1: keep 3 windows
......
!LAUNCH: ../../vidjil -x 100 -G ../../germline/IGH ../../data/Stanford_S22.fasta
$ Analyze the good number of sequences in Stanford S22
1: found 98 60-windows in 99 segments .99.. inside 100 sequences
1: found 98 ..-windows in 99 segments .99.. inside 100 sequences
......@@ -16,5 +16,5 @@ $ Find approximately the good number of sequences for e-value computation
1: approx. 131.. sequences
$ Find the good number of windows in Stanford S22
1: found 10935 60-windows in 13138 segments
1: found 10732 50-windows in 13138 segments
......@@ -13,5 +13,8 @@ $ Keep the same number of windows
0:==> keep
$ Same segmentation for the main clone
2:clone.001.*0 114 118 131 137 180
2:clone.001.*0 31 38 57 66 111
$ Same representative for the main clone
2:clone.001.* 112 bp .45. of 247 bp.
......@@ -98,8 +98,7 @@ enum { CMD_WINDOWS, CMD_CLONES, CMD_SEGMENT, CMD_GERMLINES } ;
// "tests/data/leukemia.fa"
#define DEFAULT_K 0
#define DEFAULT_W 40
#define DEFAULT_W_D 60
#define DEFAULT_W 50
#define DEFAULT_SEED ""
#define DEFAULT_DELTA_MIN -10
......@@ -179,7 +178,7 @@ void usage(char *progname, bool advanced)
#endif
<< " -m <int> minimal admissible delta between last V and first J k-mer (default: " << DEFAULT_DELTA_MIN << ") (default with -D: " << DEFAULT_DELTA_MIN_D << ")" << endl
<< " -M <int> maximal admissible delta between last V and first J k-mer (default: " << DEFAULT_DELTA_MAX << ") (default with -D: " << DEFAULT_DELTA_MAX_D << ")" << endl
<< " -w <int> w-mer size used for the length of the extracted window (default: " << DEFAULT_W << ") (default with -D: " << DEFAULT_W_D << ")" << endl
<< " -w <int> w-mer size used for the length of the extracted window (default: " << DEFAULT_W << ")" << endl
<< " -e <float> maximal e-value for determining if a segmentation can be trusted (default: " << THRESHOLD_NB_EXPECTED << ")" << endl
<< endl
......@@ -286,8 +285,7 @@ int main (int argc, char **argv)
string comp_filename = COMP_FILENAME;
int k = DEFAULT_K ;
int w = 0 ;
int default_w = DEFAULT_W ;
int w = DEFAULT_W ;
int epsilon = DEFAULT_EPSILON ;
int minPts = DEFAULT_MINPTS ;
......@@ -383,7 +381,6 @@ int main (int argc, char **argv)
f_reps_D.push_back(optarg);
delta_min = DEFAULT_DELTA_MIN_D ;
delta_max = DEFAULT_DELTA_MAX_D ;
default_w = DEFAULT_W_D ;
break;
case 'J':
......@@ -425,7 +422,6 @@ int main (int argc, char **argv)
f_reps_D.push_back(putative_f_rep_D.c_str()) ;
delta_min = DEFAULT_DELTA_MIN_D ;
delta_max = DEFAULT_DELTA_MAX_D ;
default_w = DEFAULT_W_D ;
}
}
f_reps_J.push_back((germline_system + "J.fa").c_str()) ;
......@@ -582,11 +578,6 @@ int main (int argc, char **argv)
exit(1);
}
// If there was no -w option, then w is either DEFAULT_W or DEFAULT_W_D
if (w == 0)
w = default_w ;
if (options_s_k > 1)
{
cerr << ERROR_STRING << "Use at most one -s or -k option." << endl ;
......
......@@ -41,6 +41,7 @@ VIDJIL_JSON_VERSION = '2014.09';
* @constructor
* */
function Model() {
var self=this;
console.log("creation Model")
for (f in Model_loader.prototype) {
......@@ -51,7 +52,7 @@ function Model() {
this.checkBrowser();
this.germlineList = new GermlineList()
this.build();
window.onresize = function () { m.resize(); };
window.onresize = function () { self.resize(); };
this.start()
......
var console = new Com(console)
test("Axis : ", function() {
......
var console = new Com(console)
var json_clone1 = {
"sequence" : "abcdefghijklmnopqrstuvwxyz",
"name" : "hello",
......
var console = new Com(console)
test("graph: svg path builder", function() {
var m = new Model(m);
m.parseJsonData(json_data,100)
......
var console = new Com(console)
test("model : load", function() {
var m = new Model();
m.parseJsonData(json_data)
......
var console = new Com(console)
test("scatterplot : grid", function() {
var m = new Model(m);
m.parseJsonData(json_data,100)
......
......@@ -158,7 +158,7 @@ Window prediction
(using -k option is equivalent to set with -s a contiguous seed with only '#' characters)
-m <int> minimal admissible delta between last V and first J k-mer (default: -10) (default with -D: 0)
-M <int> maximal admissible delta between last V and first J k-mer (default: 20) (default with -D: 80)
-w <int> w-mer size used for the length of the extracted window (default: 40)(default with -D: 60)
-w <int> w-mer size used for the length of the extracted window (default: 50)
-e <float> maximal e-value for determining if a segmentation can be trusted (default: 'all', no limit)
#+END_EXAMPLE
......@@ -166,18 +166,19 @@ The =-s=, =-k=, =-m= and =-M= options are the options of the seed-based heuristi
explanation can be found in the paper. These options are for advanced usage, the defaults values should work.
The =-w= option fixes the size of the "window" that is the main
identifier to gather clones. The defaults values (40 for VJ, 60 for
VDJ recombinations) were selected to ensure a high-quality clone gathering. The
identifier to gather clones. The default value (=-w 50=) was selected
to ensure a high-quality clone gathering. The
high-throughput heuristic predicts the center of the "window" that may
be shifted by a few bases from the actual "center" of the CDR3 (for TRG,
less than 15 bases compared to the IMGT/V-QUEST or IgBlast prediction
in >99% of cases). The extracted window should be large enough to
fully contain the CDR3 as well as some part of the end of the V and
the start of the J to uniquely identify a clone.
the start of the J, or at least some specific N region, to uniquely identify a clone.
Setting =-w= to 30 for VJ and 50 for VDJ recombinations may "segment" (analyze) a
few more reads, but may in some rare cases falsely cluster reads from
different clones. Setting =-w= to lower values is not recommended.
Setting =-w= to lower values may "segment" (analyze) a few more reads, depending
on the read length of your data, but may in some rare cases falsely cluster reads from
different clones. The =-w 40= option is usually safe, and =-w 30= can also be tested.
Setting =-w= to lower values is not recommended.
The =-e= option sets the maximal e-value accepted for segmenting a sequence.
It is an upper bound on the number of exepcted windows found by chance by the seed-based heuristic.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment