Commit de494035 authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'feature-a/1409-export-the-best-v-d-j' into 'dev'

From feature-a/1409-export-the-best-v-d-j into dev

Closes #1409

See merge request !255
parents 9b403ac1 b724e0f0
Pipeline #34217 passed with stages
in 44 minutes and 21 seconds
......@@ -70,7 +70,7 @@ string AlignBox::getSequence(string sequence) {
return sequence.substr(start, end-start+1);
}
void AlignBox::addToJson(json &seg) {
void AlignBox::addToJson(json &seg, int alternative_genes) {
json j;
......@@ -89,6 +89,14 @@ void AlignBox::addToJson(json &seg) {
}
seg[key] = j ;
/*Export the N best genes if threshold parameter is specified*/
if(rep && !this->score.empty() && rep->size() <= (int)this->score.size() && alternative_genes > 0 && alternative_genes <= (int)this->score.size()){
seg[key + "alt"] = json::array();
for(int i = 0; i < alternative_genes;++i){
int r = this->score[i].second;
seg[key + "alt"].push_back(json::object({{"name",rep->label(r)}}));
}
}
}
......@@ -926,7 +934,7 @@ void align_against_collection(string &read, BioReader &rep, int forbidden_rep_id
box->del_right = reverse_both ? best_best_j : box->ref.size() - best_best_j - 1;
box->del_left = best_first_j;
box->start = best_first_i;
box->rep = &rep;
box->score = score_r;
#ifdef DEBUG_SEGMENT
......@@ -947,12 +955,13 @@ string format_del(int deletions)
return deletions ? *"(" + string_of_int(deletions) + " del)" : "" ;
}
FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c, double threshold, double multiplier, int kmer_threshold)
FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,
double threshold, double multiplier, int kmer_threshold, int alternative_genes)
{
box_V = new AlignBox("5");
box_D = new AlignBox("4");
box_J = new AlignBox("3");
this->alternative_genes = alternative_genes;
segmented = false;
dSegmented = false;
because = NOT_PROCESSED ;
......@@ -1049,9 +1058,9 @@ FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,
/* Regular 53 Segmentation */
if(kmer_threshold != NO_LIMIT_VALUE){
FilterWithACAutomaton* f = germline->getFilter_5();
BioReader filtered = f->filterBioReaderWithACAutomaton(sequence_or_rc, kmer_threshold);
align_against_collection(sequence_or_rc, filtered, NO_FORBIDDEN_ID, reverse_V, reverse_V, false,
FilterWithACAutomaton* f = germline->getFilter_5();
this->filtered_rep_5 = f->filterBioReaderWithACAutomaton(sequence_or_rc, kmer_threshold);
align_against_collection(sequence_or_rc, this->filtered_rep_5, NO_FORBIDDEN_ID, reverse_V, reverse_V, false,
box_V, segment_cost, false, standardised_threshold_evalue);
}else{
align_against_collection(sequence_or_rc, germline->rep_5, NO_FORBIDDEN_ID, reverse_V, reverse_V, false,
......@@ -1333,7 +1342,7 @@ json FineSegmenter::toJson(){
for (AlignBox *box: boxes)
{
box->addToJson(seg);
box->addToJson(seg, this->alternative_genes);
}
if (isSegmented()) {
......
......@@ -91,6 +91,7 @@ const char* const segmented_mesg[] = { "?",
class AlignBox
{
public:
BioReader *rep;
string key;
string color;
......@@ -101,7 +102,7 @@ class AlignBox
AlignBox(string key = "", string color="");
string getSequence(string sequence);
void addToJson(json &seg);
void addToJson(json &seg, int alternative_genes=NO_LIMIT_VALUE);
/**
* Returns 'V', 'D', 'J', or possibly '5', '4', '3', '?', depending on the ref_label and on the key
......@@ -355,6 +356,9 @@ class KmerMultiSegmenter
class FineSegmenter : public Segmenter
{
private:
BioReader filtered_rep_5;
int alternative_genes;
public:
vector<pair<int, int> > score_V;
vector<pair<int, int> > score_D;
......@@ -375,7 +379,7 @@ class FineSegmenter : public Segmenter
*/
FineSegmenter(Sequence seq, Germline *germline, Cost segment_cost,
double threshold = THRESHOLD_NB_EXPECTED, double multiplier=1.0,
int kmer_threshold=NO_LIMIT_VALUE);
int kmer_threshold=NO_LIMIT_VALUE, int alternative_genes=NO_LIMIT_VALUE);
~FineSegmenter();
......
!REQUIRES: python $VIDJIL_DIR/tools/check_python_version.py
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --alternative-genes 3 -c segment -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.vidjil | python $VIDJIL_DIR/tools/format_json.py -1
$ Presence of alternative:
1: "3alt"
1: "name": "IGHJ4.02"
1: "name": "IGHJ4.01"
1: "name": "IGHJ5.01"
1: "4alt"
1: "name": "IGHD3-22.01"
1: "name": "IGHD3-10.02"
1: "name": "IGHD3-10.01"
1: "5alt"
1: "name": "IGHV3-48.01"
1: "name": "IGHV3-48.02"
1: "name": "IGHV3-48.04"
$ Correct number of "name" fields (1 clone + 3*1 'best' genes + 3*3 'alternative' genes)
w13:"name"
......@@ -17,4 +17,4 @@ $ Display advanced options
: custom Cost
$ Correct number of options
B48:^ -
B49:^ -
......@@ -477,7 +477,9 @@ int main (int argc, char **argv)
app.add_flag("-3,--cdr3", detect_CDR3, "CDR3/JUNCTION detection (requires gapped V/J germlines)")
-> group(group);
int alternative_genes = 0;
app.add_option("--alternative-genes", alternative_genes, "number of alternative V(D)J genes to show beyond the most similar one", true)
-> group(group) -> level();
// ----------------------------------------------------------------------------------------------------------------------
group = "Additional clustering (third pass, experimental)" ;
......@@ -1366,7 +1368,7 @@ int main (int argc, char **argv)
// FineSegmenter
size_t nb_fine_segmented = (size_t) max_clones; // When -1, it will become the max value.
nb_fine_segmented = MIN(nb_fine_segmented, sort_clones.size());
FineSegmenter seg(representative, segmented_germline, segment_cost, expected_value, nb_fine_segmented, kmer_threshold);
FineSegmenter seg(representative, segmented_germline, segment_cost, expected_value, nb_fine_segmented, kmer_threshold, alternative_genes);
if (segmented_germline->seg_method == SEG_METHOD_543)
seg.FineSegmentD(segmented_germline, several_D, expected_value_D, nb_fine_segmented);
......@@ -1587,7 +1589,7 @@ int main (int argc, char **argv)
KmerSegmenter *seg = kmseg.the_kseg ;
Germline *germline = seg->segmented_germline ;
FineSegmenter s(seq, germline, segment_cost, expected_value, nb_reads_for_evalue, kmer_threshold);
FineSegmenter s(seq, germline, segment_cost, expected_value, nb_reads_for_evalue, kmer_threshold, alternative_genes);
json json_clone;
json_clone["id"] = seq.label;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment