...
 
Commits (150)
......@@ -299,7 +299,6 @@ test_server_functional:
- sed -i '/\/etc\/nginx\/ssl\:\/etc\/nginx\/ssl/d' ./docker/docker-compose.yml
- sed -i 's/\:latest/\:test/g' ./docker/docker-compose.yml
- cd docker/vidjil-server/conf/ && mv defs.py defs_https.py && mv defs_http.py defs.py && cd ../../..
- cd docker/vidjil-client/conf/ && mv conf.js conf_https.js && mv conf_http.js conf.js && cd ../../..
- make germline && cp browser/js/germline.js docker/vidjil-client/conf
- cd docker && docker-compose up -d && cd ..
- sed -i "s/^python\ \.\.\/\.\.\/\.\./docker\ exec\ docker_uwsgi_1\ python\ \/usr\/share\/vidjil\/server\/web2py/" server/web2py/applications/vidjil/tests/init_func_test_db.sh
......
#include "affectanalyser.h"
#include <algorithm>
#include <unordered_map>
bool operator==(const affect_infos &ai1, const affect_infos &ai2) {
return ai1.first_pos_max == ai2.first_pos_max
......@@ -312,6 +314,33 @@ int KmerAffectAnalyser::last(const KmerAffect &affect) const{
}
pair <KmerAffect, KmerAffect> KmerAffectAnalyser::max12(const set<KmerAffect> forbidden) const {
pair<KmerAffect, int> max_counts[2] = {make_pair(KmerAffect::getUnknown(), -1),
make_pair(KmerAffect::getUnknown(), -1)};
std::unordered_map<KmerAffect, int> counts;
for (KmerAffect affect: affectations) {
if (forbidden.count(affect) == 0) {
if (counts.count(affect) > 0)
counts[affect]++;
else
counts[affect] = 1;
}
}
for (auto it: counts) {
if (it.second > max_counts[1].second) {
if (it.second > max_counts[0].second) {
max_counts[1] = max_counts[0];
max_counts[0] = it;
} else {
max_counts[1] = it;
}
}
}
return make_pair(max_counts[0].first, max_counts[1].first);
}
string KmerAffectAnalyser::toString() const{
string kmer;
for (size_t i = 0; i < affectations.size(); i++) {
......@@ -349,12 +378,8 @@ CountKmerAffectAnalyser::CountKmerAffectAnalyser(IKmerStore<KmerAffect> &kms, co
CountKmerAffectAnalyser::~CountKmerAffectAnalyser() {
set<KmerAffect> affects = this->getDistinctAffectations();
/* Initialize each key with a 0-integer array */
for (set<KmerAffect>::iterator it = affects.begin();
it != affects.end(); it++) {
delete [] counts[*it];
for (auto it : counts) {
delete [] it.second;
}
}
......@@ -390,38 +415,6 @@ KmerAffect CountKmerAffectAnalyser::max(const set<KmerAffect> forbidden) const {
return max_affect;
}
pair <KmerAffect, KmerAffect> CountKmerAffectAnalyser::max12(const set<KmerAffect> forbidden) const {
map<KmerAffect, int* >::const_iterator it = counts.begin();
KmerAffect max1_affect = KmerAffect::getUnknown();
KmerAffect max2_affect = KmerAffect::getUnknown();
int max1_count = -1;
int max2_count = -1;
for (; it != counts.end(); it++) {
if (forbidden.count(it->first) == 0) {
int current_count = count(it->first);
if (current_count > max1_count)
{
max2_affect = max1_affect ;
max2_count = max1_count ;
max1_affect = it->first ;
max1_count = current_count ;
}
else if (current_count > max2_count)
{
max2_affect = it->first;
max2_count = current_count;
}
}
}
return make_pair(max1_affect, max2_affect);
}
int CountKmerAffectAnalyser::countBefore(const KmerAffect&affect, int pos) const {
if (pos == 0 || counts.count(affect) == 0)
return 0;
......
......@@ -121,6 +121,14 @@ class AffectAnalyser {
*/
virtual int last(const KmerAffect &affect) const = 0;
/*
* @return the two affectations that are seen the most frequently in the sequence
* taken apart the forbidden ones.
* @complexity n + m log m where n is the input sequence length and m the number
* of affectations
*/
virtual pair <KmerAffect, KmerAffect> max12(const set<KmerAffect> forbidden) const = 0;
/**
* @return a string representation of the object
*/
......@@ -222,6 +230,8 @@ class KmerAffectAnalyser: public AffectAnalyser {
int last(const KmerAffect &affect) const ;
pair <KmerAffect, KmerAffect> max12(const set<KmerAffect> forbidden) const;
string toString() const;
string toStringValues() const;
......@@ -302,12 +312,6 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser {
*/
KmerAffect max(const set<KmerAffect> forbidden = set<KmerAffect>()) const;
/*
* @return the two affectations that are seen the most frequently in the sequence
* taken apart the forbidden ones.
*/
pair <KmerAffect, KmerAffect> max12(const set<KmerAffect> forbidden) const;
/**
* Set the overlap allowed between two k-mers with two different affectations,
* when looking for the maximum.
......
......@@ -24,6 +24,7 @@ class AbstractACAutomaton: public IKmerStore<Info> {
protected:
void *initialState;
float all_index_load;
map<Info, size_t> kmers_inserted;
public:
AbstractACAutomaton();
......
......@@ -15,16 +15,16 @@ void AbstractACAutomaton<Info>::finish_building() {
IKmerStore<Info>::finish_building();
build_failure_functions();
}
all_index_load = 0;
for(auto iter: kmers_inserted) {
all_index_load += getIndexLoad(iter.first);
}
}
template<class Info>
float AbstractACAutomaton<Info>::getIndexLoad(Info kmer) const {
float load = 0;
if (kmers_inserted.count(kmer) == 0) {
for(auto iter: kmers_inserted) {
load += getIndexLoad(iter.first);
}
return (kmer.isUnknown()) ? 1 - load : load;
return (kmer.isUnknown()) ? 1 - all_index_load : all_index_load;
} else {
return kmers_inserted.at(kmer) / pow(4.0, kmer.getLength());
}
......
......@@ -126,6 +126,7 @@ BioReader::BioReader(bool virtualfasta, string name)
init(0, "");
this -> name = name;
basename = extract_basename(name);
filenames.push_back(this->name);
}
BioReader::BioReader(int extract_field, string extract_separator, int mark_pos)
......@@ -157,6 +158,7 @@ void BioReader::add(const string &filename, bool verbose) {
name += filename;
basename += extract_basename(filename);
filenames.push_back(name);
if (verbose)
cout << " <== " << filename ;
......
......@@ -189,6 +189,7 @@ public:
string name;
string basename;
list<string> filenames;
int size() const;
size_t totalSize() const;
......
......@@ -174,6 +174,17 @@ bool operator>=(const KmerAffect &a1, const KmerAffect &a2);
bool operator!=(const KmerAffect &a1, const KmerAffect &a2);
ostream &operator<<(ostream &os, const KmerAffect &kmer);
namespace std {
template <>
struct hash<KmerAffect> {
size_t operator()(const KmerAffect &affect) const {
return (affect.getLabel()[0] << 8) | affect.getLength();
}
};
}
#define AFFECT_NOT_UNKNOWN_SYMBOL "*"
#define AFFECT_AMBIGUOUS_SYMBOL "\0"
#define AFFECT_UNKNOWN_SYMBOL "\1"
......
......@@ -15,6 +15,7 @@ using namespace std;
typedef
enum { KMER_INDEX, AC_AUTOMATON } IndexTypes;
#define MAX_PRECOMPUTED_PROBA 500 /* Precompute 500 probabilities for each index load */
class Kmer {
public:
unsigned int count;
......@@ -74,6 +75,8 @@ protected:
size_t nb_kmers_inserted;
size_t max_size_indexing;
bool finished_building;
map<float, vector<double> > precomputed_proba_with_system,
precomputed_proba_without_system;
public:
......@@ -149,7 +152,7 @@ public:
/**
* @return probability that the number of kmers is 'at_least' or more in a sequence of length 'length'
*/
double getProbabilityAtLeastOrAbove(T kmer, int at_least, int length) const;
double getProbabilityAtLeastOrAbove(T kmer, int at_least, int length);
/**
* @return the value of k
......@@ -199,6 +202,10 @@ public:
* @pre word.length() == this->k
*/
virtual T& operator[](seqtype& word) = 0;
protected:
void precompute_proba(float index_load);
};
template<class T>
......@@ -366,17 +373,45 @@ int IKmerStore<T>::atMostMaxSizeIndexing(int n) const {
}
template<class T>
double IKmerStore<T>::getProbabilityAtLeastOrAbove(const T kmer, int at_least, int length) const {
void IKmerStore<T>::precompute_proba(float index_load) {
precomputed_proba_with_system[index_load] = vector<double>(MAX_PRECOMPUTED_PROBA);
precomputed_proba_without_system[index_load] = vector<double>(MAX_PRECOMPUTED_PROBA);
vector<double> &pproba_with = precomputed_proba_with_system[index_load];
vector<double> &pproba_without = precomputed_proba_without_system[index_load];
pproba_with[0] = 1;
pproba_without[0] = 1;
for (int i = 1; i < MAX_PRECOMPUTED_PROBA; i++) {
pproba_with[i] = pproba_with[i - 1] * index_load;
pproba_without[i] = pproba_without[i - 1] * (1 - index_load);
}
}
template<class T>
double IKmerStore<T>::getProbabilityAtLeastOrAbove(const T kmer, int at_least, int length) {
if (at_least == 0) return 1.0; // even if 'length' is very small
// n: number of kmers in the sequence
int n = length - getS() + 1;
float index_load = getIndexLoad(kmer) ;
if (! precomputed_proba_without_system.count(index_load)) {
precompute_proba(index_load);
}
double proba = 0;
double probability_having_system = pow(index_load, at_least);
double probability_not_having_system = pow(1 - index_load, n - at_least);
double probability_not_having_system;
double probability_having_system;
if (precomputed_proba_with_system.at(index_load).size() > (size_t)at_least)
probability_having_system = precomputed_proba_with_system.at(index_load)[at_least];
else
probability_having_system = pow(index_load, at_least);
if (precomputed_proba_without_system.at(index_load).size() > (size_t)n - at_least)
probability_not_having_system = precomputed_proba_without_system.at(index_load)[n-at_least];
else
probability_not_having_system = pow(1 - index_load, n - at_least);
for (int i=at_least; i<=n; i++) {
proba += nChoosek(n, i) * probability_having_system * probability_not_having_system;
probability_having_system *= index_load;
......
......@@ -14,7 +14,7 @@ using namespace std;
#define THRESHOLD_BAD_COVERAGE .5 /* Threshold below which the representatie
coverage is considered bad */
static ReadQualityScore DEFAULT_READ_SCORE;
static RandomScore DEFAULT_READ_SCORE;
/**
* Compute a representative sequence from a list of sequences.
......
......@@ -442,7 +442,6 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold,
info_extra = "seed";
segmented = false;
segmented_germline = germline ;
system = germline->code; // useful ?
reversed = false;
because = NOT_PROCESSED ; // Cause of unsegmentation
score = 0 ;
......@@ -524,7 +523,7 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold,
|| (germline->seg_method == SEG_METHOD_MAX1U))
{ // Pseudo-germline, MAX12 and MAX1U
pair <KmerAffect, KmerAffect> max12 ;
CountKmerAffectAnalyser ckaa(*(germline->index), sequence);
KmerAffectAnalyser ckaa = *kaa;
set<KmerAffect> forbidden;
......
......@@ -82,7 +82,7 @@ WindowsStorage *WindowExtractor::extract(OnlineBioReader *reads,
stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ;
if (seg->isJunctionChanged())
stats[SEG_CHANGED_WINDOW].insert(read_length);
stats_reads[seg->system].addScore(read_length);
stats_reads[seg->segmented_germline->code].addScore(read_length);
if (out_segmented) {
*out_segmented << *seg ; // KmerSegmenter output (V/N/J)
......
......@@ -11,8 +11,8 @@ EXEC=$(SRC:.cpp=)
OBJ=$(SRC:.cpp=.o)
OTHER_SRC=$(wildcard unit-tests/*.cpp)
LIB=../core/vidjil.a ../lib/lib.a
SHOULD=$(wildcard should-get-tests/*.should-get) $(wildcard bugs/*.should-get)
SHOULD_LOG=$(SHOULD:.should-get=.tap)
SHOULD=$(wildcard should-get-tests/*.should) $(wildcard bugs/*.should)
SHOULD_LOG=$(SHOULD:.should=.tap)
SHOULD_VDJ=$(wildcard should-vdj-tests/*.should-vdj.fa)
SHOULD_VDJ_VDJ=$(SHOULD_VDJ:.should-vdj.fa=.1.vdj)
SHOULD_LOCUS=$(wildcard should-vdj-tests/*.should-locus.fa)
......
......@@ -3,7 +3,7 @@
"samples" : {
"number" : 1,
"original_names" : [ "/some/file" ] ,
"original_names" : [ "/some/file_1" ] ,
"run_timestamp" : [ "2015-02-19 16:37:06" ] ,
"producer" : [ "vidjil dev 0cf35de (2015-02-17)" ] ,
"log" : [ "Some log" ],
......
......@@ -3,7 +3,7 @@
"samples" : {
"number" : 1,
"original_names" : [ "/some/file" ] ,
"original_names" : [ "/some/file_2" ] ,
"run_timestamp" : [ "2015-02-19 16:37:06" ] ,
"producer" : [ "vidjil dev 0cf35de (2015-02-17)" ] ,
"log" : [ "Some log" ],
......@@ -27,6 +27,7 @@
"sequence" : "seq-1",
"reads" : [ 300 ] ,
"normalized_reads" : [ 500 ] ,
"top" : 1,
"germline" : "IGH"
},
......
This diff is collapsed.
......@@ -2,12 +2,13 @@
Parses output of various RepSeq programs.
Takes either:
- a .fa file, a _Summary.txt file as produced by IMGT/V-QUEST
- or a results file produced by MiXCR
- or a results file produced by MiXCR or IgReC
and creates a .vdj file to be checked by should-vdj-to-tap.py
python repseq_vdj.py data-curated/curated_IG.fa data-curated/curated_ig_Summary.txt > data-curated/imgt-IG.vdj
python repsep_vdj.py data-curated/curated_TR.fa data-curated/curated_tr_Summary.txt > data-curated/imgt-TR.vdj
python repseq_vdj.py data-curated/mixcr.results > data-curated/mixcr.vdj
python repseq_vdj.py bla.igrec.results
python repseq_vdj.py data-curated/curated_IG.fa data-curated/igblast/IG/*.aln > data-curated/igblast-IG.vdj > data-curated/igblast-IG.vdj
python repseq_vdj.py data-curated/curated_TR.fa data-curated/igblast/TR/*.aln > data-curated/igblast-TR.vdj > data-curated/igblast-TR.vdj
'''
......@@ -91,6 +92,9 @@ class Result(VDJ_Formatter):
self.populate()
def __contains__ (self, key):
return key in self.d
def __getitem__(self, key):
return self.d[key]
......@@ -98,6 +102,49 @@ class Result(VDJ_Formatter):
return str(self.d)
### IgReC
IGREC_LABELS = [
'Read id', 'locus',
'V id', 'V start', 'V end', 'V score',
'J id', 'J start', 'J end', 'J score',
]
class IgReC_Result(Result):
r'''
>>> lig = '\t'.join(['blabli4577', 'TRB', 'TRBV13*02', '1', '164', '0.58156', 'TRBJ1-5*01', '319', '367', '0.94'])
>>> r = IgReC_Result(lig)
>>> r['Read id']
'blabli4577'
>>> r.vdj[V]
['TRBV13*02']
>>> r.vdj[J]
['TRBJ1-5*01']
'''
def parse(self, l):
self.labels = IGREC_LABELS
if ('\t' in l.strip()):
return l
else:
return None
def populate(self):
self.vdj[V] = [self['V id']]
self.vdj[J] = [self['J id']]
def header_igrec_results(ff_igrec):
f = open(ff_igrec).__iter__()
while True:
l = f.next()
result = IgReC_Result(l)
yield result['Read id'].replace('_', ' '), result.to_vdj()
### MiXCR
......@@ -111,16 +158,20 @@ class MiXCR_Result(Result):
return None
def populate(self):
self.vdj[V] = [self['Best V hit']]
if self['Best D hit']:
self.vdj[D] = [self['Best D hit']]
self.vdj[J] = [self['Best J hit']]
self.vdj[V] = [self['bestVHit']]
if self['bestDHit']:
self.vdj[D] = [self['bestDHit']]
self.vdj[J] = [self['bestJHit']]
self.vdj[N1] = self['N. Seq. VDJunction']
self.vdj[N2] = self['N. Seq. DJJunction']
self.vdj[N] = self['N. Seq. VJJunction']
if 'nSeqVDJunction' in self:
self.vdj[N1] = self['nSeqVDJunction']
if 'nSeqDJJunction' in self:
self.vdj[N2] = self['nSeqDJJunction']
if 'nSeqVJJunction' in self:
self.vdj[N] = self['nSeqVJJunction']
self.vdj[JUNCTION] = self['AA. Seq. CDR3']
if 'aaSeqCDR3' in self:
self.vdj[JUNCTION] = self['aaSeqCDR3']
def header_mixcr_results(ff_mixcr):
......@@ -128,12 +179,12 @@ def header_mixcr_results(ff_mixcr):
f = open(ff_mixcr).__iter__()
mixcr_first_line = f.next()
globals()['mixcr_labels'] = mixcr_first_line.split('\t')
globals()['mixcr_labels'] = mixcr_first_line.rstrip().split('\t')
while True:
l = f.next()
l = f.next().rstrip()
result = MiXCR_Result(l)
yield result['Description R1'], result.to_vdj()
yield result['descrsR1'], result.to_vdj()
......@@ -354,6 +405,8 @@ if __name__ == '__main__':
if 'mixcr' in sys.argv[1]:
vdj.parse_from_gen(header_mixcr_results(sys.argv[1]))
elif 'igrec' in sys.argv[1]:
vdj.parse_from_gen(header_igrec_results(sys.argv[1]))
elif 'igblast' in sys.argv[2]:
vdj.parse_from_gen(header_igblast_results(sys.argv[1], sys.argv[2:]))
else:
......
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -g $VIDJIL_DIR/germline/homo-sapiens.g $VIDJIL_DATA/3344-bad-filtering.fa
$ Check that proper filtering is used
1: IGHV4-31.02
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c designations -g $VIDJIL_DIR/germline/homo-sapiens.g $VIDJIL_DATA/3344-bad-filtering.fa
$ Check that proper filtering is used
1: IGHV4-31.02
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c clones -z 2 -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.tsv
!LAUNCH: $VIDJIL_DIR/$EXEC -c clones -z 2 -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.tsv
$ There are four lines, all with tabs
4:
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c clones -z 2 -2 -3 -r 1 -g $VIDJIL_DIR/germline/homo-sapiens.g ../should-vdj-tests/Demo-X5.should-vdj.fa > /dev/null ; cat out/Demo-X5.should-vdj.tsv
!LAUNCH: $VIDJIL_DIR/$EXEC -c clones -z 2 -2 -3 -r 1 -g $VIDJIL_DIR/germline/homo-sapiens.g ../should-vdj-tests/Demo-X5.should-vdj.fa > /dev/null ; cat out/Demo-X5.should-vdj.tsv
$ There are 15 = 1 + 14 lines, all with tabs
15:
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 1 -z 0 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -z 0 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/fuse.py out/Stanford_S22.vidjil out/Stanford_S22.vidjil -o out/fused.data ; cat out/fused.data | python $VIDJIL_DIR/tools/format_json.py -1
$ Points list
1:"original_names": \[".*data//Stanford_S22.fasta", ".*data//Stanford_S22.fasta"\]
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -x 100 -r 1 -z 5 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/vidjil-to-fasta.py -o out/S22.fasta out/Stanford_S22.vidjil ;
!LAUNCH: $VIDJIL_DIR/$EXEC -x 100 -r 1 -z 5 -w 60 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; python $VIDJIL_DIR/tools/vidjil-to-fasta.py -o out/S22.fasta out/Stanford_S22.vidjil ;
!OUTPUT_FILE: out/S22.fasta
$ 5 representative sequences in the FASTA output file
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --alternative-genes 3 -c designations -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC --alternative-genes 3 -c designations -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ Presence of alternative:
1: "3alt"
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 10 -z 0 -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -s "######-######" $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -e 10 -z 0 -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -s "######-######" $VIDJIL_DATA/Stanford_S22.fasta
$ Germlines are custom
1: custom germlines
......@@ -26,7 +26,7 @@ $ First clone -- find the good number of reads
2:clone-001--.*--0000008
$ First clone -- find the good representative
1:clone-001--.*--lcl.FLN1FA001BQ9J5.1.-.88,232.-.4
1:GACAATTCCAAGAACACGCTGTACCTGCAAATGAACAGCCTGCGAGCCGAGGACACGGCCACCTATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTGGGGCCAGGGGATCCTGGTCACCGTCTCCTCAG
$ First clone -- find the good coverage
1:clone-001--.* 145 bp .62. of 232.0 bp.
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --analysis-cost '1, 2, 3, 4, 5' $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC --analysis-cost '1, 2, 3, 4, 5' $VIDJIL_DATA/Stanford_S22.fasta
!EXIT_CODE: 1
$Check that correct custom cost is used
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens-isoforms.g -r 1 -e 0.1 $VIDJIL_DATA/erg_debre.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens-isoforms.g -r 1 -e 0.1 $VIDJIL_DATA/erg_debre.fa
$ All sequences have been seen as different ERG recombinations
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens-isoforms.g -r 1 -e 0.1 $VIDJIL_DATA/ikaros_debre.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens-isoforms.g -r 1 -e 0.1 $VIDJIL_DATA/ikaros_debre.fa
$ All sequences have been seen as unique clone
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g -c clones --all -3 $VIDJIL_DATA/segment_lec.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g -c clones --all -3 $VIDJIL_DATA/segment_lec.fa
$ Extract up to 50bp windows (TRG)
1:windows up to 50bp
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 1e-2 -y 0 -g $VIDJIL_DIR/germline/homo-sapiens-cd.g $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -e 1e-2 -y 0 -g $VIDJIL_DIR/germline/homo-sapiens-cd.g $VIDJIL_DATA/Stanford_S22.fasta
$ Do not segment any read with SEG_METHOD_ONE on homo-sapiens-cd.g
1: CD .* -> .* 0 .* 0
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c designations -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/cdr3-stopcodon.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/cdr3-stopcodon.fa
!OUTPUT_FILE: out/cdr3-stopcodon.vidjil
$ Two identical junctions in JSON
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c designations -3 -E 1.0 -g $VIDJIL_DIR/germline ../should-vdj-tests/Demo-X5.should-vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -3 -E 1.0 -g $VIDJIL_DIR/germline ../should-vdj-tests/Demo-X5.should-vdj.fa
$ Detects a CDR3 on regular V(D)J recombinations
1: IGH SEG.* [{].*[}]
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline --all -2 $VIDJIL_DATA/2549.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline --all -2 $VIDJIL_DATA/2549.fa
$ The KmerSegmenter segments the chimera on xxx germline (-2)
1:unexpected .* -> .* 1
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake.fa
$ The KmerSegmenter segments the three chimera reads on PSEUDO_MAX12 germline (-2)
1:unexpected .* -> .* 3
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake-D.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake-D.fa
$ The KmerSegmenter segments the chimera reads on PSEUDO_MAX12 germline (-2)
f1:unexpected .* -> .* 2
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DATA/chimera-fake-VJ-trim.g $VIDJIL_DATA/chimera-fake-VJ.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DATA/chimera-fake-VJ-trim.g $VIDJIL_DATA/chimera-fake-VJ.fa
# Testing a custom (fake) .g with special parameters for the algorithm
$ The KmerSegmenter segments no read in Y because of the parameter
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DATA/chimera-fake-VJ.g $VIDJIL_DATA/chimera-fake-VJ.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DATA/chimera-fake-VJ.g $VIDJIL_DATA/chimera-fake-VJ.fa
# Testing a custom (fake) germlines.data
$ Report the species
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake-VJ.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DIR/germline -2 $VIDJIL_DATA/chimera-fake-VJ.fa
$ The KmerSegmenter segments the five chimera reads on PSEUDO_MAX12 germline (-2)
1:unexpected .* -> .* 5
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 100 --all -g $VIDJIL_DIR/germline -4 $VIDJIL_DATA/chimera-fake-half.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -e 100 --all -g $VIDJIL_DIR/germline -4 $VIDJIL_DATA/chimera-fake-half.fa
# TODO: a more precise modeling should give a e-value computation that could make this work even with -e 1
$ The KmerSegmenter segments the six chimera reads on PSEUDO_MAX1U germline (-4)
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 1e-2 -g $VIDJIL_DIR/germline $VIDJIL_DATA/chimera-fake.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -e 1e-2 -g $VIDJIL_DIR/germline $VIDJIL_DATA/chimera-fake.fa
$ Do not segment on any germline, even incomplete
1:junction detected in 0 reads
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -uU -g $VIDJIL_DIR/germline $VIDJIL_DATA/chimera-trg.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -uU -g $VIDJIL_DIR/germline $VIDJIL_DATA/chimera-trg.fa
$ Do not segment on IG/TR by chance
12:(IG|TR).* -> .* 0
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -k 14 -w 50 -c clones -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -y 3 -z 1 -r 1 $VIDJIL_DATA/clones_simul.fa
$ Junction extractions
1:found 25 windows in 66 reads
$ No clustering
1:==> 25 clones
$ Clone 1 output
1:Clone #001 .* 29 reads
$ Clone 2 output
1:Clone #002 .* 14 reads
$ Clone 3 output (sequencing error)
1:Clone #003 .* 1 reads
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -k 14 -w 50 -c clones -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -y 3 -z 0 -r 1 --cluster-epsilon 5 $VIDJIL_DATA/clones_simul.fa ; cat out/clones_simul.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -k 14 -w 50 -c clones -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -y 3 -z 1 -r 1 $VIDJIL_DATA/clones_simul.fa
$ Junction extractions
1:found 25 windows in 66 reads
$ No clustering
1:==> 25 clones
$ Clone 1 output
1:Clone #001 .* 29 reads
$ Clone 2 output
1:Clone #002 .* 14 reads
$ Clone 3 output (sequencing error)
1:Clone #003 .* 1 reads
!LAUNCH: $VIDJIL_DIR/$EXEC -k 14 -w 50 -c clones -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -y 3 -z 0 -r 1 --cluster-epsilon 5 $VIDJIL_DATA/clones_simul.fa ; cat out/clones_simul.vidjil
$ Window extractions
1:windows up to 50bp
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -K --all -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH+ -r 4 -b co $VIDJIL_DATA/D7-27--J1.fa ; cat out/co.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -K --all -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH+ -r 4 -b co $VIDJIL_DATA/D7-27--J1.fa ; cat out/co.vidjil
# Test D7-27 0/92/0 J1 non-recombination
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -x 2000 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --grep-reads GAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACT $VIDJIL_DATA/Stanford_S22.fasta ; cat out/seq/clone.fa-1
!LAUNCH: $VIDJIL_DIR/$EXEC -x 2000 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --grep-reads GAGAGGTTACTATGATAGTAGTGGTTATTACGGGGTAGGGCAGTACTACT $VIDJIL_DATA/Stanford_S22.fasta ; cat out/seq/clone.fa-1
# See also label-grep-reads.should-get
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -K --all -z 0 -s 10s -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa $VIDJIL_DATA/common-V-D.fa ; cat out/common-V-D.affects
!LAUNCH: $VIDJIL_DIR/$EXEC -K --all -z 0 -s 10s -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa -D $VIDJIL_DIR/germline/homo-sapiens/IGHD.fa $VIDJIL_DATA/common-V-D.fa ; cat out/common-V-D.affects
$ Segments the sequence
1: SEG .* -> .* 1
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --config ../data/config.vidjil $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --config ../data/config.vidjil $VIDJIL_DATA/Stanford_S22.fasta
$ Analyze 10 reads (--first-reads)
: in 10 reads
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 1 -x 10 -y 5 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -x 10 -y 5 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
# Testing -x/-y/-z options
......@@ -13,7 +13,7 @@ $ 1 clone is fully analyzed, and not more (-z 1)
2:clone-.* IGHV
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --config out/Stanford_S22.vidjil $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --config out/Stanford_S22.vidjil $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
$ Same tests than before, options taken from 'out/StanfordS22.vidjil' that was just generated
1: ==> 10 clones
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG ../should-vdj-tests/ext-nucleotides-N.should-vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG ../should-vdj-tests/ext-nucleotides-N.should-vdj.fa
$ Segments on TRG
1: TRG .* -> .* 1
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --all -c clones -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/test_representatives.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --all -c clones -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/test_representatives.fa
$ Three clones should be found
1:3 clones
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 1 -k 4 -w 20 -z 0 -c clones -V $VIDJIL_DATA/toy_V.fa -J $VIDJIL_DATA/toy_J.fa $VIDJIL_DATA/ambiguous_representative.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -r 1 -k 4 -w 20 -z 0 -c clones -V $VIDJIL_DATA/toy_V.fa -J $VIDJIL_DATA/toy_J.fa $VIDJIL_DATA/ambiguous_representative.fa
$ Short reads properly segmented
1:SEG_+.* -> .* 4
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -r 5 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/representative-few-reads.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -r 5 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/representative-few-reads.fa
$ Consensus sequence should span on the whole sequence
1: clone.*99% of 243
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-default.log
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG --consensus-on-random-sample $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-random.log
!LAUNCH: $VIDJIL_DIR/$EXEC -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG --consensus-on-longest-sequences $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-longest.log
!LAUNCH: $VIDJIL_DIR/$EXEC -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-random.log
!NO_LAUNCHER:
!LAUNCH: diff consensus-default.log consensus-random.log
!LAUNCH: diff consensus-longest.log consensus-random.log
!EXIT_CODE: 1
$ Output should differ: default has a consensus of 52bp (with the spurious insertion)
$ Output should differ: ReadQualityScore gives a consensus of 52bp (with the spurious insertion)
# Appears twice in the header of the consensus sequence and in the similarity matrix
2:^< .* 52 bp
1:^< CTTTT
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --header-sep FA -k 16 -z 0 -w 60 -r 5 -o out2 -uuu -U -v -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa $VIDJIL_DATA/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC --header-sep FA -k 16 -z 0 -w 60 -r 5 -o out2 -uuu -U -v -V $VIDJIL_DIR/germline/homo-sapiens/IGHV.fa -J $VIDJIL_DIR/germline/homo-sapiens/IGHJ.fa $VIDJIL_DATA/Stanford_S22.fasta ; tail out2/Stanford_S22.segmented.vdj.fa ; grep UNSEG out2/Stanford_S22.unsegmented.vdj.fa
# Testing uncommon and debug options
$ verbose (-v)
......
......@@ -7,9 +7,9 @@
# other reads. This is what is tested, we first put 10 sequences, then 5 and
# finally just the sequence of interest alone.
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -x 6 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -d -r 1 -w 60 -z 100 -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -d -r 1 -w 60 -z 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC -d -r 1 -w 60 -z 100 -x 6 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa \
; $VIDJIL_DIR/$EXEC -d -r 1 -w 60 -z 100 -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/buggy-D.fa
$ Three times the same window
3: TGTGCGGGATCTTCGTCCTCTTATCATAATAATGGTTTTTTGGCGGGGGAGTCATGGGGC
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -v -g $VIDJIL_DIR/germline -c designations $VIDJIL_DATA/segment_simul.fa | grep '^[>#]'
!LAUNCH: $VIDJIL_DIR/$EXEC -v -g $VIDJIL_DIR/germline -c designations $VIDJIL_DATA/segment_simul.fa | grep '^[>#]'
$ First sequence, easy segmentation (no error, few deletions at the windows, small N)
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -c designations $VIDJIL_DATA/segment_S22.fa | grep '^>' ; cat out/segment_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -c designations $VIDJIL_DATA/segment_S22.fa | grep '^>' ; cat out/segment_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ First sequence Stanford
# 164 175 195 203
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c segment -aAtl reads 2>&1
!LAUNCH: $VIDJIL_DIR/$EXEC -c segment -aAtl reads 2>&1
!EXIT_CODE: 1
$ Deprecated options
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --hello reads 2>&1
!LAUNCH: $VIDJIL_DIR/$EXEC --hello reads 2>&1
!EXIT_CODE: 109
$ Unknown option
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --analysis-filter 10 --all -x 30 -v -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC --analysis-filter 10 --all -x 30 -v -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Clone 13 is correctly analyzed
1:FLN1FA001EP9M2.* IGHV2-26.* 2/GAT.*GCC/8 IGHJ2
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c designations -x 2 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations -x 2 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Segments the good number of sequences in Stanford S22
2: >lcl
......
!LAUNCH: $VIDJIL_DIR/$EXEC -y 0 -x 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Analyze the good number of sequences in Stanford S22
1: found .* of 100 reads
!LAUNCH: $VIDJIL_DIR/$EXEC -y 0 -X 100 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Skip the good number of reads
1:Processing every 131th read
$ Analyze the good number of reads
1: found .* of 100 reads
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS $VIDJIL_DATA/Stanford_S22.fasta 2>&1
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DATA/Stanford_S22.fasta 2>&1
!EXIT_CODE: 1
$ Error, no germlines
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/Makefile $VIDJIL_DATA/Stanford_S22.fasta 2>&1
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/Makefile $VIDJIL_DATA/Stanford_S22.fasta 2>&1
!EXIT_CODE: 1
$ Error, incorrect *.g
......
!LAUNCH: ($LAUNCHER $VIDJIL_DIR/$EXEC $EXTRA $VIDJIL_DEFAULT_OPTIONS -c germlines -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRB,TRD,TRG,IGH,IGK,IGL --trim 100 -s '######-######' $VIDJIL_DATA/Stanford_S22.fasta)
!LAUNCH: ($LAUNCHER $VIDJIL_DIR/$EXEC $EXTRA -c germlines -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRB,TRD,TRG,IGH,IGK,IGL --trim 100 -s '######-######' $VIDJIL_DATA/Stanford_S22.fasta)
$ number of reads and kmers
1:13153 reads, 3020179 kmers
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline -g $VIDJIL_DIR/germline/homo-sapiens-isotypes.g $VIDJIL_DATA/isotypes.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline -g $VIDJIL_DIR/germline/homo-sapiens-isotypes.g $VIDJIL_DATA/isotypes.fa
$ Report the correct species
1: Homo sapiens .9606.
......
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/clones_simul.fa > out-fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -b clones_simul $VIDJIL_DATA/clones_simul.fa.gz > out-fa-gz
!NO_EXTRA:
diff -s -I '\#' -I 'index' -I 'Command line' out-fa out-fa-gz ; echo 'Diff: '$?; wc -l out-fa-gz
$ Identical output
1:Diff: 0
$ Testing out-fa-gz has approximately 90 lines
1: [89]. out-fa-gz
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/clones_simul.fa > out-fa ; $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -b clones_simul $VIDJIL_DATA/clones_simul.fa.gz > out-fa-gz ; diff -s -I '\#' -I 'index' -I 'Command line' out-fa out-fa-gz ; echo 'Diff: '$?; wc -l out-fa-gz
$ Identical output
1:Diff: 0
$ Testing out-fa-gz has approximately 90 lines
1: [89]. out-fa-gz
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -h
!LAUNCH: $VIDJIL_DIR/$EXEC -h
$ License
1:vidjil-algo is free software
......@@ -16,3 +16,25 @@ $ Do not display advanced options
$ Correct number of regular options
24:^..-
!NO_LAUNCHER:
!LAUNCH: $VIDJIL_DIR/$EXEC -H
$ License
1:vidjil-algo is free software
$ Check default costs
1:analysis.* "4, -6, -10, -1, -10"
1:clustering .* "1, -4, -4, 0, 0"
$ Show seeds
1: 9c.#########
1: 13s.#######-######
$ Display advanced options
: , experimental options
: custom Cost
$ Correct number of options
54:^..-
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -3 --max-clones 10 -y 5 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 4 -b limits $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/limits.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -3 --max-clones 10 -y 5 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 4 -b limits $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/limits.vidjil
# Test limits in the .vidjil json output
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c clones --all -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/segment_lec.fq > /dev/null ; cat out/segment_lec.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -c clones --all -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/segment_lec.fq > /dev/null ; cat out/segment_lec.vidjil
$ Window
1:"id": "GGGGTCTATTACTGTGCCACCTGGGCCTTATTATAAGAAACTCTTTGGCA"
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -3 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -w 60 -r 5 -e 10 -b data $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/data.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
!LAUNCH: $VIDJIL_DIR/$EXEC -3 -z 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -w 60 -r 5 -e 10 -b data $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/data.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ From homo-sapiens.g
1:"ref": "http://www.vidjil.org/germlines/germline-.*.tar.gz"
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -y 0 -k 14 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
!LAUNCH: $VIDJIL_DIR/$EXEC -y 0 -k 14 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta
$ Find the good number of windows in Stanford S22 (contiguous seed 14)
1: found 10796 windows in 13152 reads
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -z 0 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -x 100 -r 5 --label-file $VIDJIL_DATA/Stanford_S22.label $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -z 0 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -x 100 -r 5 --label-file $VIDJIL_DATA/Stanford_S22.label $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vidjil
$ Some clone has only one read, bypassing the -r 5 option, and the good label
1: clone-00..*0001-.* my-clone
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -e 10 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --label ACCGGTATTACT --label CAGCTGCTCCCC --label TGGGCCACTC --label ATCAACGCTGGCAATGGTAACACTAAATATTCACAGAAGTTCCAGGGCAGAGTCACCATTACCAGGGACACATACGCGAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCTCTGTATTACTGTGCGAGAGTGCGCAGCAGCTGGTCTGATGCTTTTGATTATCTGG $VIDJIL_DATA/clones_simul.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -e 10 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --label ACCGGTATTACT --label CAGCTGCTCCCC --label TGGGCCACTC --label ATCAACGCTGGCAATGGTAACACTAAATATTCACAGAAGTTCCAGGGCAGAGTCACCATTACCAGGGACACATACGCGAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCTCTGTATTACTGTGCGAGAGTGCGCAGCAGCTGGTCTGATGCTTTTGATTATCTGG $VIDJIL_DATA/clones_simul.fa
$ ACCGGTATTACT is found (in window and representative and in the command line)
3:ACCGGTATTACT
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -x 100 -z 0 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 5 --label ACTGTGCGAGAGTTGGAATTAGTAGTGGCTGGCCTGATTCCTGGGGCCAG $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vidjil
!LAUNCH: $VIDJIL_DIR/$EXEC -x 100 -z 0 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -r 5 --label ACTGTGCGAGAGTTGGAATTAGTAGTGGCTGGCCTGATTCCTGGGGCCAG $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vidjil
$ Some clone has only one read, bypassing the -r 5 option, and the good label
1: clone-00..*0001-.* --label
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.