Commit 5c3c6f6f authored by Mathieu Giraud's avatar Mathieu Giraud
Browse files

Merge branch 'dev' into feature-a/2732-cli

parents 75d83be3 cee6249a
Pipeline #28898 failed with stages
in 11 minutes and 36 seconds
......@@ -20,6 +20,7 @@ stages:
- release
- valgrind_unit
- valgrind_functional
- prepare_deploy
- deploy_prod
......@@ -36,9 +37,9 @@ test_germlines:
stage: test_germlines
script:
- make -C germline get-all-data
- make -C algo/tests should_germline
- make -C germline/tests
only:
- /^feature-g\/.*$/
- /^feature-.*g.*\/.*$/
# Algorithm
......@@ -47,31 +48,31 @@ test_algo_unit:
stage: test_unit
script: make -C algo unit
only:
- /^feature-a\/.*$/
- /^feature-.*a.*\/.*$/
test_tools_unit:
stage: test_unit
script: make -C tools/tests
only:
- /^feature-a\/.*$/
- /^feature-.*a.*\/.*$/
test_algo_should:
stage: test_functional
script: make -C algo should
only:
- /^feature-a\/.*$/
- /^feature-.*a.*\/.*$/
test_algo_shouldvdj:
stage: test_shouldvdj
script: make -C algo shouldvdj
only:
- /^feature-a\/.*$/
- /^feature-.*a.*\/.*$/
algo_valgrind_unit:
stage: valgrind_unit
script: make -C algo valgrind_unit
only:
- /^feature-a\/.*$/
- /^feature-.*a.*\/.*$/
tags:
- valgrind
......@@ -80,7 +81,7 @@ algo_valgrind_functional:
script: make -C algo valgrind_should
when: manual
only:
- /^feature-a\/.*$/
- /^feature-.*a.*\/.*$/
tags:
- valgrind
......@@ -89,21 +90,48 @@ prepare_release:
script: make -C algo release RELEASE_TAG='rc'
when: manual
only:
- /^feature-a\/.*$/
- /^feature-.*a.*\/.*$/
release:
stage: release
script: make -C algo release RELEASE_TAG=`cat algo/release`
when: manual
only:
- /^feature-a\/release$/
- /^feature-.*a.*\/release$/
artifacts:
paths:
- algo/releases/vidjil-algo-`cat algo/release`.tar.gz
- algo/releases/vidjil-algo-`cat algo/release`/vidjil-algo-`cat algo/release`_`uname -m`
allow_failure: false
tags:
- ubuntu-16.04-amd64
copy_release:
stage: prepare_deploy
script:
- |
for repo in $ALGO_REPOSITORIES; do
echo "Copying release to $repo"
scp algo/releases/vidjil-algo*.tar.gz algo/releases/vidjil-algo*/vidjil-algo-*_* $repo
done
release_name=$(cat algo/release)
ssh $VIDJIL_WWW "cd /var/www/html/releases; ln -sf vidjil-algo-${release_name}.tar.gz vidjil-latest.tgz; ln -sf vidjil-algo-${release_name}_x86_64 vidjil-latest_x86_64"
ssh $VIDJIL_BONSAI "cd /bio1/www/html/vidjil; scp vidjil-algo-${release_name}* VIDJIL_BONSAI_PROD:/bio1/www/html/vidjil"
only:
- /^feature-.*a.*\/release$/
deploy_release_prod:
stage: deploy_prod
only:
- /^feature-.*a.*\/release$/
script:
- |
release_name=$(cat algo/release)
ssh $VIDJIL_SERVER "cd releases/vidjil;\
tar xvzf vidjil-algo-${release_name}.tar.gz;\
make -C vidjil-algo-${release_name}
ln -sf vidjil-algo-${release_name} next"
# Client
test_browser_unit:
......@@ -113,8 +141,8 @@ test_browser_unit:
paths:
- browser/
only:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
- /^feature-.*c.*\/.*$/
- /^hotfix-.*c.*\/.*$/
- prod-client
- dev
tags:
......@@ -131,8 +159,8 @@ test_browser-functional:
paths:
- browser/
only:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
- /^feature-.*c.*\/.*$/
- /^hotfix-.*c.*\/.*$/
- prod-client
tags:
- web
......@@ -149,8 +177,8 @@ test_browser-functional-external:
paths:
- browser/
only:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
- /^feature-.*c.*\/.*$/
- /^hotfix-.*c.*\/.*$/
- prod-client
tags:
- web
......@@ -159,13 +187,36 @@ code_quality:
stage: test_quality
script: make -C browser quality
only:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
- /^feature-.*c.*\/.*$/
- /^hotfix-.*c.*\/.*$/
- prod-client
- dev
tags:
- web
# Server
test_server_unit:
stage: test_unit
script:
- virtualenv $CI_BUILD_REF_SLUG
- source $CI_BUILD_REF_SLUG/bin/activate
- pip install -r requirements.txt
- make -C server install_web2py_standalone
- make -C server launch_fuse_server
- make unit_server || (make -C server kill_fuse_server; deactivate; false)
- make -C server kill_fuse_server
- deactivate
only:
- /^feature-.*s.*\/.*$/
- /^hotfix-.*s.*\/.*$/
- prod-server
- dev
tags:
- web
# Deployment
deploy_review:
stage: deploy_review
script:
......@@ -176,19 +227,24 @@ deploy_review:
url: http://$CI_BUILD_REF_SLUG.ci.vidjil.org/?data=analysis-example.vidjil
on_stop: stop_deploy_review
only:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
- /^feature-.*c.*\/.*$/
- /^hotfix-.*c.*\/.*$/
tags:
- web
deploy_prod:
stage: deploy_prod
script:
- rsync -av --delete browser $PROD_CLIENT_SERVER:$PROD_CLIENT_PATH
- ssh $PROD_CLIENT_SERVER "cp -pr $PROD_CLIENT_PATH_TEMPLATE/browser $PROD_CLIENT_PATH"
- ssh $PROD_CLIENT_SERVER "
cd $PROD_CLIENT_PATH;
git fetch
&& git reset --hard origin/prod-client
&& make -C browser
&& make -C browser sha1
&& cp doc/analysis-example.vidjil browser/"
environment:
name: production
url: http://app.vidjil.org/?data=analysis-example.vidjil
url: http://$PROD_CLIENT_SERVER/?data=analysis-example.vidjil
only:
- prod-client
tags:
......@@ -205,7 +261,7 @@ stop_deploy_review:
name: review/$CI_BUILD_REF_NAME
action: stop
only:
- /^feature-[cw]\/.*$/
- /^hotfix-[cw]\/.*$/
- /^feature-.*c.*\/.*$/
- /^hotfix-.*c.*\/.*$/
tags:
- web
\ No newline at end of file
- web
......@@ -262,7 +262,7 @@ RELEASE_ALGO = src/
RELEASE_TOOLS = $(wildcard ../tools/*.py)
RELEASE_MAKE = ../tools/tests/Makefile ../doc/Makefile
RELEASE_TESTS = ../doc/format-analysis.org $(wildcard ../doc/analysis-example.vidjil) $(wildcard ../tools/tests/*.should_get) ../tools/tests/should-to-tap.sh ../tools/diff_json.sh ../demo/get-sequences ../demo/Demo-X5.fa ../demo/Makefile
RELEASE_GERMLINES = ../germline/Makefile ../germline/germline_id ../germline/get-saved-germline ../germline/get-germline ../germline/split-from-imgt.py $(wildcard ../germline/*.g) ../germline/revcomp-fasta.py ../germline/fasta.py
RELEASE_GERMLINES = ../germline/Makefile ../germline/germline_id ../germline/get-saved-germline ../germline/get-germline ../germline/split-from-imgt.py $(wildcard ../germline/*.g) ../germline/revcomp-fasta.py ../germline/fasta.py ../germline/homo-sapiens/IKZF*fa ../germline/homo-sapiens/ERG*fa
RELEASE_HELP = ../doc/algo.org ../doc/locus.org ../doc/dev.org ../doc/should-vdj.org ../doc/credits.org ../doc/CHANGELOG ../doc/LICENSE ../README.org ../INSTALL.org
RELEASE_FILES = $(RELEASE_TOOLS) $(RELEASE_TESTS) $(RELEASE_MAKE) $(RELEASE_GERMLINES) $(RELEASE_HELP)
......
......@@ -289,7 +289,7 @@ string KmerAffectAnalyser::toString() const{
}
string KmerAffectAnalyser::toStringValues() const{
string kmer;
string kmer;
for (size_t i = 0; i < affectations.size(); i++) {
kmer += affectations[i].toStringValues();
}
......
......@@ -8,7 +8,7 @@
#include <queue>
#include <utility>
#include "tools.h"
#include <set>
#include <map>
using namespace std;
......@@ -80,6 +80,18 @@ public:
*/
virtual void *next(void *state, char c) = 0;
/**
* This function returns the number of times every Info appears in the
* given sequence.
* It returns a map containing the number of occurences per Info.
* @param seq: The sequence to be queried. It is passed through
* the automaton to identify matching k-mers and extract
* the corresponding Info.
* @param false: unused.
* @param seed: unused.
*/
virtual map<Info,int> getMultiResults
(const seqtype &seq, bool no_revcomp=false, string seed = "") = 0;
};
#define DNA_ALPHABET_SIZE 4
......@@ -111,32 +123,38 @@ public:
template <class Info>
class PointerACAutomaton: public AbstractACAutomaton<Info> {
private:
bool multiple_info;
void free_automaton(pointer_state<Info> *);
void init(string seed, bool revcomp);
void init(string seed, bool revcomp, bool multiple_info);
public:
using IKmerStore<Info>::insert;
/**
* @param revcomp: should the revcomp of the sequences also be indexed
* @param multiple_info: should all the Info be stored in the automaton or
* only a single value summarizing them all.
*
* The default seed will be a contiguous seed of 10 letters. But the seed
* can be specified when inserting sequences. This should be the preferred
* choice as one may want to have different seeds depending on the
* sequences.
*/
PointerACAutomaton(bool revcomp=false);
PointerACAutomaton(bool revcomp=false, bool multiple_info=false);
/**
* @param seed: the seed to be used for indexing
* @param revcomp: indexing revcomp too ?
* @param multiple_info: storing all info?
*/
PointerACAutomaton(string seed, bool revcomp=false);
PointerACAutomaton(string seed, bool revcomp=false, bool multiple_info=false);
/**
* @param k: the size of the contiguous seed
* @param revcomp: indexing revcomp too ?
* @param multiple_info: storing all info?
*/
PointerACAutomaton(int k, bool revcomp=false);
PointerACAutomaton(int k, bool revcomp=false, bool multiple_info=false);
~PointerACAutomaton();
......@@ -183,6 +201,8 @@ public:
// From IKmerStore
vector<Info> getResults(const seqtype &seq, bool no_revcomp=false, string seed = "");
map<Info,int> getMultiResults(const seqtype &seq, bool no_revcomp=false, string seed = "");
Info& get(seqtype &word) ;
Info& operator[](seqtype& word);
......
......@@ -3,7 +3,7 @@
#include "automaton.h"
#include <stack>
#include <set>
//////////////////// IMPLEMENTATIONS ////////////////////
template <class Info>
......@@ -58,22 +58,22 @@ void *AbstractACAutomaton<Info>::goto_state(const string &seq, void *starting_st
///////////////////////
template <class Info>
PointerACAutomaton<Info>::PointerACAutomaton(bool revcomp):AbstractACAutomaton<Info>(){
init("##########",revcomp);
PointerACAutomaton<Info>::PointerACAutomaton(bool revcomp, bool multiple_info):AbstractACAutomaton<Info>(){
init("##########",revcomp, multiple_info);
}
template <class Info>
PointerACAutomaton<Info>::PointerACAutomaton(string seed, bool revcomp):AbstractACAutomaton<Info>() {
init(seed, revcomp);
PointerACAutomaton<Info>::PointerACAutomaton(string seed, bool revcomp, bool multiple_info):AbstractACAutomaton<Info>() {
init(seed, revcomp, multiple_info);
}
template <class Info>
PointerACAutomaton<Info>::PointerACAutomaton(int k, bool revcomp):AbstractACAutomaton<Info>() {
init(seed_contiguous(k), revcomp);
PointerACAutomaton<Info>::PointerACAutomaton(int k, bool revcomp, bool multiple_info):AbstractACAutomaton<Info>() {
init(seed_contiguous(k), revcomp, multiple_info);
}
template <class Info>
void PointerACAutomaton<Info>::init(string seed, bool revcomp) {
void PointerACAutomaton<Info>::init(string seed, bool revcomp, bool multiple_info) {
if (revcomp && Info::hasRevcompSymetry()) {
cerr << "PointerACAutomaton cannot deal with revcomp symmetry at the moment."
<< endl;
......@@ -86,6 +86,7 @@ void PointerACAutomaton<Info>::init(string seed, bool revcomp) {
this->s = seed.length();
this->revcomp_indexed = revcomp;
this->max_size_indexing = 0;
this->multiple_info = multiple_info;
}
template <class Info>
......@@ -182,12 +183,16 @@ void PointerACAutomaton<Info>::insert(const seqtype &seq, Info info) {
}
}
state->is_final = true;
assert(info.getLength() <= MAX_KMER_SIZE);
if (state->informations.front().isNull()) {
this->nb_kmers_inserted++;
this->kmers_inserted[info]++;
state->informations.front() += info;
} else {
if (this->multiple_info)
state->informations.push_back(info);
else
state->informations.front() += info;
}
state->informations.front() += info;
}
template <class Info>
......@@ -270,6 +275,32 @@ vector<Info> PointerACAutomaton<Info>::getResults(const seqtype &seq, bool no_re
return result;
}
template <class Info>
map<Info, int> PointerACAutomaton<Info>::getMultiResults(const seqtype &seq, bool no_revcomp, string seed) {
UNUSED(no_revcomp);
UNUSED(seed);
pointer_state<Info>* current_state = getInitialState();
size_t seq_len = seq.length();
map<Info, int> results;
for(size_t i = 0;i < seq_len;++i) {
current_state = (pointer_state<Info> *)next(current_state, seq[i]);
set<Info> informations(current_state->informations.begin(),
current_state->informations.end());
for(auto const& info : informations){
/* If map contain info, increase its occurence. */
if(results.count(info) > 0){
results[info] = results[info] + 1;
}
/* Otherwise add info into map with a value of 1. */
else{
results.insert(pair<Info,int>(info,1));
}
}
}
return results;
}
template <class Info>
Info& PointerACAutomaton<Info>::get(seqtype &word) {
pointer_state<Info> *state = (pointer_state<Info> *)this->goto_state(word);
......
......@@ -65,7 +65,7 @@ void comp_matrix::compare(ostream &out, Cost cluster_cost)
j2=it1->first;
DynProg dp = DynProg(j1, j2, DynProg::Local, compareCost);
int score=dp.compute();
int distance = max(j1.size(), j2.size())-score;
int distance = max(j1.size(), j2.size()) * compareCost.match - score;
m[c2][c1]=distance;
m[c1][c2]=distance;
c1++;
......@@ -88,8 +88,8 @@ void comp_matrix::load(string file){
for(unsigned int i=0; i<sort_clones.size();i++){
in_comp.read(tampon, sort_clones.size()*sizeof(char));
for(unsigned int j=0;j<sort_clones.size(); j++){
m[i][j]=tampon[j];
}
m[i][j]=tampon[j];
}
}
free(tampon);
......@@ -157,25 +157,25 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
size_t j = 0;
for (list <pair<junction,size_t> >::const_iterator it1 = sort_clones.begin();
(it1 != sort_clones.end()) & (j<matrix_size); ++it1)
{
j++;
j2=it1->first;
int distance = (int)m[c2][c1];
(it1 != sort_clones.end()) & (j<matrix_size); ++it1)
{
j++;
j2=it1->first;
int distance = (int)m[c2][c1];
if (distance <= epsilon){
if (distance <= epsilon){
neighbor[j1].push_back(j2);
}
c1++;
c++;
}
c1++;
c++;
}//fin it1
c2++;
c1=0;
}//fin it0
c2++;
c1=0;
}//fin it0
/////////////////////////
//Forced - edges
if (forced_edges.size())
/////////////////////////
//Forced - edges
if (forced_edges.size())
{
ifstream fe(forced_edges.c_str());
......@@ -243,7 +243,7 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
nVoisins=0;
for (list<string>::iterator it1 = voisins1.begin();
it1 != voisins1.end(); ++it1 ) nVoisins+=count[*it1];
it1 != voisins1.end(); ++it1 ) nVoisins+=count[*it1];
if (nVoisins<minPts){
//noise
......@@ -268,7 +268,7 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
nVoisins=0;
for (list<string>::iterator it1 = voisins2.begin();it1 != voisins2.end();
++it1 )nVoisins+=count[*it1];
++it1 )nVoisins+=count[*it1];
//si la junction possede assez de voisins
if (nVoisins>=minPts){
//on ajoute ses voisins a la liste des voisins du cluster
......@@ -308,9 +308,9 @@ list<list<junction> > comp_matrix::cluster(string forced_edges, int w, ostream
list< string > c2;
for (list< pair<int,string> >::iterator c_it = c.begin(); c_it != c.end();
++c_it ){
++c_it ){
pair<int,string> pair=*c_it;
c2.push_back(pair.second);
c2.push_back(pair.second);
}
c2.reverse();
cluster.push_back(c2);
......
......@@ -38,7 +38,7 @@ SimilarityMatrix compare_all(list <Sequence> sequences,
for (list <Sequence>::const_iterator it1 = sequences.begin();
it1 != sequences.end(); ++it1 )
num = 0 ;
num = 0 ;
list<string>::const_iterator itLabel = sequence_names.begin();
for (list <Sequence>::const_iterator it1 = sequences.begin();
......
#include "filter.h"
pair<vector<int>*, AbstractACAutomaton<KmerAffect>*>* buildACAutomatonToFilterBioReader
(BioReader &origin, string seed){
pair<vector<int>*, AbstractACAutomaton<KmerAffect>*>* result;
vector<int>* indexes;
PointerACAutomaton<KmerAffect>* aho;
char asciiChar;
int asciiNumber;
string currentLabel;
string previousLabel;
if(origin.size() < 1){
return nullptr;
}
result = new pair<vector<int>*,AbstractACAutomaton<KmerAffect>*>();
aho = new PointerACAutomaton<KmerAffect>(seed, false, true);
indexes = new vector<int>();
aho->insert(origin.sequence(0),std::string("") + char(1), true, 0, seed);
asciiNumber = 1;
indexes->push_back(0);
previousLabel = extractGeneName(origin.label(0));
int i;
for(i = 1;i < origin.size(); ++i){
currentLabel = extractGeneName(origin.label(i));
if(currentLabel != previousLabel){
indexes->push_back(i);
asciiNumber++;
}
if(asciiNumber > 127){
delete result; delete aho; delete indexes;
return nullptr;
}
asciiChar = char(asciiNumber);
aho->insert(origin.sequence(i),std::string("") + asciiChar, true, 0, seed);
previousLabel = currentLabel;
}
indexes->push_back(origin.size());
aho->build_failure_functions();
result->first = indexes;
result->second = aho;
return result;
}
/*
Takes a built automaton and a vector of indexes and build a BioReader
based on it.
*/
BioReader filterBioReaderWithACAutomaton(
pair<vector<int>*, AbstractACAutomaton<KmerAffect>*>* idxAho,
BioReader &origin, seqtype &seq,
int kmer_threshold){
BioReader result;
AbstractACAutomaton<KmerAffect>* aho;
vector<int>* indexes;
map<KmerAffect, int> mapAho;
KmerAffect tmpKmer;
unsigned int asciiNum;
char asciiChar;
if(!idxAho || kmer_threshold < 0){
return origin;
}
indexes = idxAho->first;
aho = idxAho->second;
mapAho = aho->getMultiResults(seq);
//All k-mers selected : iterate over all map
if(kmer_threshold == ALL_KMERS_VALUE || kmer_threshold > (int)mapAho.size()){
for(auto const mx: mapAho){
tmpKmer = mx.first;
asciiChar = tmpKmer.getLabel().at(0);
asciiNum = int(asciiChar);
if(asciiNum > indexes->size() - 1){
break;
}
for(int i = indexes->at(asciiNum - 1); i < indexes->at(asciiNum); ++i){
result.add(origin.read(i));
}
}
/* The most significant k-mers selected : iterate over a portion of the
sorted map */
}else{