Attention une mise à jour du serveur va être effectuée le lundi 17 mai entre 13h et 13h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit fe306e43 authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/4643-show-alignments' into 'dev'

new option --show-junction

Closes #4643

See merge request !886
parents 35efa87f c5685935
Pipeline #210984 passed with stages
in 9 minutes and 34 seconds
......@@ -124,10 +124,7 @@ void AlignBox::addToOutput(CloneOutput *clone, int alternative_genes) {
}
}
#define NO_COLOR "\033[0m"
int AlignBox::posInRef(int i) {
int AlignBox::posInRef(int i) const {
// Works now only for V/J boxes
if (del_left >= 0) // J
......@@ -139,15 +136,15 @@ int AlignBox::posInRef(int i) {
return -99;
}
string AlignBox::refToString(int from, int to) {
string AlignBox::refToString(int from, int to) const {
stringstream s;
s << ref_label << " \t" ;
s << left << setw(SHOW_NAME_WIDTH) << ref_label << " " ;
int j = posInRef(from);
s << j << "\t" ;
s << right << setw(4) << j << " " ;
if (from > start)
s << color;
......@@ -172,11 +169,37 @@ string AlignBox::refToString(int from, int to) {
if (to < end)
s << NO_COLOR;
s << "\t" << j ;
s << right << setw(4) << j ;
return s.str();
}
void show_colored_read(ostream &out, Sequence seq, const AlignBox *box_V, const AlignBox *box_J, int start_5, int end_3)
{
out << left << setw(SHOW_NAME_WIDTH) << seq.label.substr(0,SHOW_NAME_WIDTH) << " "
<< right << setw(4) << start_5 << " " ;
out << V_COLOR << seq.sequence.substr(start_5, box_V->end - start_5 + 1)
<< NO_COLOR
<< seq.sequence.substr(box_V->end+1, (box_J->start - 1) - (box_V->end + 1) +1)
<< J_COLOR
<< seq.sequence.substr(box_J->start, end_3 - box_J->start + 1)
<< NO_COLOR ;
out << right << setw(4) << end_3 << endl ;
}
void show_colored_read_germlines(ostream &out, Sequence seq, const AlignBox *box_V, const AlignBox *box_J, int max_gene_align)
{
int align_V_length = min(max_gene_align, box_V->end - box_V->start + 1);
int align_J_length = min(max_gene_align, (int)seq.sequence.size() - box_J->start + 1);
int start_V = box_V->end - align_V_length + 1;
int end_J = box_J->start + align_J_length - 1;
show_colored_read(out, seq, box_V, box_J, start_V, end_J);
out << box_V->refToString(start_V, end_J) << " " << *box_V << endl ;
out << box_J->refToString(start_V, end_J) << " " << *box_J << endl ;
}
ostream &operator<<(ostream &out, const AlignBox &box)
......@@ -446,9 +469,9 @@ KmerSegmenter::KmerSegmenter() { kaa = 0 ; }
KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold, double multiplier)
{
box_V = new AlignBox();
box_V = new AlignBox("5", V_COLOR);
box_D = new AlignBox();
box_J = new AlignBox();
box_J = new AlignBox("3", J_COLOR);
CDR3start = -1;
CDR3end = -1;
......@@ -1390,6 +1413,10 @@ void FineSegmenter::checkWarnings(CloneOutput *clone, bool phony)
}
}
void FineSegmenter::showAlignments(ostream &out){
show_colored_read_germlines(out, getSequence(), box_V, box_J, SHOW_MAX_GENE_ALIGNMENT);
}
void FineSegmenter::toOutput(CloneOutput *clone){
json seg;
......
......@@ -57,6 +57,12 @@
*/
#define FRACTION_ALIGNED_AT_WORST .5 /* Fraction of the sequence that should be aligned before deactivating the heuristics */
#define SHOW_NAME_WIDTH 15
#define SHOW_MAX_GENE_ALIGNMENT 20
#define V_COLOR "\033[1;42m"
#define J_COLOR "\033[1;43m"
#define NO_COLOR "\033[0m"
using namespace std;
using json = nlohmann::json;
......@@ -145,13 +151,13 @@ class AlignBox
* Returns the position in the reference string corresponding to the position in the read
* Preliminary implementation, only works for the start of V and J boxes
*/
int posInRef(int i);
int posInRef(int i) const;
/**
* Format the reference string in a given range position, possibly adding the ANSI colors
* where there is the alignment
*/
string refToString(int from, int to);
string refToString(int from, int to) const;
/* Identifier, label and sequence of the reference sequence (the best one) */
int ref_nb;
......@@ -162,6 +168,12 @@ class AlignBox
vector<pair<int, int> > score;
};
/**
* Show sequence, and possibly alignments against the germline genes, possibly adding the ANSI colors
*/
void show_colored_read(ostream &out, Sequence seq, const AlignBox *boxV, const AlignBox *boxJ, int start_5, int end_3);
void show_colored_read_germlines(ostream &out, Sequence seq, const AlignBox *box_V, const AlignBox *box_J, int max_gene_align);
ostream &operator<<(ostream &out, const AlignBox &box);
/**
......@@ -430,8 +442,13 @@ class FineSegmenter : public Segmenter
void findCDR3();
void checkWarnings(CloneOutput *clone, bool phony=true);
void toOutput(CloneOutput *clone);
/**
* Show sequence, with aligments
*/
void showAlignments(ostream &out);
void toOutput(CloneOutput *clone);
};
......
......@@ -37,4 +37,4 @@ $ Display advanced options
: custom Cost
$ Correct number of options, including advanced options
60:^..-
61:^..-
# Testing --show-junction, after removing ANSI codes
!LAUNCH: $VIDJIL_DIR/$EXEC -c designations --show-junction -x 1 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta | sed 's/\x1b\[[0-9;]*m//g'
$ Read has correct positions
1:lcl.FLN1FA002RW 145 .* 222
$ Read is correctly cut
1:[^ACGT]TGTGT.*GGAAC[^ACGT]
$ V gene has a correct position and is correctly cut
1:IGHV3-48.01 276.*[^ACGT]TGTGTATTACTGTGCGAGAGA[.]{20}
$ J gene has a correct position and is correctly cut
1:IGHJ4.02 .*[.]{20}ACTACTTTGACTACTGGGGCCAGGGAAC[^ACGT] 29
\ No newline at end of file
......@@ -12,9 +12,6 @@ using namespace std;
#define GENE_ALIGN 20
#define V_COLOR "\033[1;42m"
#define J_COLOR "\033[1;43m"
#define NO_COLOR "\033[0m"
void usage(int argc, const char **argv) {
if (argc != 7) {
......@@ -86,26 +83,12 @@ int main(int argc, const char** argv)
// Read on stdin
read = read_sequence(cin);
}
align_against_collection(read, interestingV, -1, false, false, false, &box_V, VDJ);
align_against_collection(read, interestingJ, -1, false, true, false, &box_J, VDJ);
int align_V_length = min(GENE_ALIGN, box_V.end - box_V.start + 1);
int align_J_length = min(GENE_ALIGN, (int)read.size() - box_J.start + 1);
int start_V = box_V.end - align_V_length + 1;
int end_J = box_J.start + align_J_length - 1;
cout << "read \t" << start_V << "\t" ;
cout << V_COLOR << read.substr(start_V, align_V_length)
<< NO_COLOR
<< read.substr(box_V.end+1, (box_J.start - 1) - (box_V.end + 1) +1)
<< J_COLOR
<< read.substr(box_J.start, align_J_length)
<< NO_COLOR
<< "\t" << end_J << endl ;
cout << box_V.refToString(start_V, end_J) << "\t" << box_V << endl ;
cout << box_J.refToString(start_V, end_J) << "\t" << box_J << endl ;
Sequence seq = create_sequence("read", "read", read, "");
show_colored_read_germlines(cout, seq, &box_V, &box_J, GENE_ALIGN);
return 0;
}
......@@ -587,6 +587,11 @@ int main (int argc, char **argv)
bool out_gz = false;
app.add_flag("--gz", out_gz, "output compressed .tsv.gz, .vdj.fa.gz, and .vidjil.gz files") -> group(group) -> level();
bool show_alignments = false;
app.add_flag("--show-junction", show_alignments,
"show germline genes around the junction (experimental, not showing the exact alignment)")
-> group(group) -> level();
bool output_vdjfa = false;
app.add_flag("--out-vdjfa", output_vdjfa,
"output clones in a " CLONES_FILENAME " file (only for clone sequence data)")
......@@ -1782,7 +1787,12 @@ int main (int argc, char **argv)
clone->set("germline", g->code);
nb_segmented_by_germline[g->code]++ ;
cout << s << endl;
cout << s ;
if (show_alignments)
s.showAlignments(cout);
cout << endl ;
}
// Finish output preparation
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment