Commit feb4281d authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'feature-a/2828-output-airr' into 'dev'

Feature a/2828 output airr

See merge request !330
parents e91de594 8e36164d
Pipeline #44948 canceled with stages
#include "output.h"
#define NULL_VAL ""
string getout(json v)
{
if (v.is_null()) return NULL_VAL ;
if (v.is_number()) return string_of_int(v) ;
if (v.is_string()) return v;
return v.dump();
}
string Output::get(string key)
{
return getout(j[key]);
}
string Output::get(string key, string subkey)
{
return getout(j[key][subkey]);
}
string Output::get(string key, string subkey, string subsubkey)
{
return getout(j[key][subkey][subsubkey]);
}
void Output::set(string key, json val)
{
......@@ -27,6 +50,17 @@ void Output::add_warning(string code, string msg, string level)
json_add_warning(j, code, msg, level);
}
int CloneOutput::reads()
{
return j["reads"][0];
}
CloneOutput::~CloneOutput()
{
}
json CloneOutput::toJson()
{
return j;
......@@ -69,7 +103,7 @@ CloneOutput* SampleOutput::getClone(junction junction)
}
}
// .vidjil json output
void SampleOutputVidjil::out(ostream &s)
{
......@@ -82,3 +116,51 @@ void SampleOutputVidjil::out(ostream &s)
s << j.dump(2);
}
// AIRR .tsv output
map <string, string> CloneOutputAIRR::fields()
{
map <string, string> fields;
fields["locus"] = get("germline");
fields["consensus_count"] = string_of_int(reads());
fields["sequence_id"] = get("id");
fields["clone_id"] = get("id");
fields["sequence"] = get("sequence");
fields["v_call"] = get(KEY_SEG, "5", "name");
fields["d_call"] = get(KEY_SEG, "4", "name");
fields["j_call"] = get(KEY_SEG, "3", "name");
return fields;
}
void SampleOutputAIRR::out(ostream &s)
{
vector <string> fields = {
"locus",
"consensus_count",
"v_call", "d_call", "j_call",
"sequence_id",
"sequence",
"sequence_alignment",
"germline_alignment",
"v_cigar", "d_cigar", "j_cigar",
"clone_id"
};
for (string f: fields)
s << f << "\t" ;
s << endl ;
for (auto it: clones)
{
map <string, string> clone_fields = static_cast<CloneOutputAIRR *>(it.second) -> fields();
for (string f: fields)
s << clone_fields[f] << "\t" ;
s << endl;
}
}
......@@ -18,16 +18,26 @@ protected:
json j;
public:
string get(string key);
string get(string key, string subkey);
string get(string key, string subkey, string subsubkey);
void set(string key, json val);
void set(string key, string subkey, json val);
void set(string key, string subkey, string subsubkey, json val);
void add_warning(string code, string msg, string level);
};
class CloneOutput : public Output
{
public:
virtual ~CloneOutput();
int reads();
void setSeg(string subkey, json val);
json toJson();
......@@ -53,27 +63,24 @@ public:
};
/*
class CloneOutputFormatter
{
}
class CloneOutputFormatterCSV(CloneOutputFormatter)
// Native Json .vidjil format
// See vidjil-format.md
class SampleOutputVidjil : public SampleOutput
{
public:
void out(ostream &s);
};
}
class CloneOutputFormatterJson(CloneOutputFormatter)
// AIRR
// See http://docs.airr-community.org
class CloneOutputAIRR : public CloneOutput
{
public:
void out(ostream &s);
map <string, string> fields();
};
}
*/
// Native Json .vidjil format
// See vidjil-format.md
class SampleOutputVidjil : public SampleOutput
class SampleOutputAIRR : public SampleOutput
{
public:
void out(ostream &s);
......
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c clones -z 2 -3 -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH $VIDJIL_DATA/Stanford_S22.fasta > /dev/null ; cat out/Stanford_S22.tsv
$ There are four lines, all with tabs
4:
4:\t
$ The required AIRR fields are present
:consensus_count
:v_call d_call j_call
:sequence_id
:sequence
:sequence_alignment
:germline_alignment
:v_cigar d_cigar j_cigar
$ Three clones on IGH
3:IGH
$ One clone has 8 reads, two clones have 5 reads
1:IGH 8
2:IGH 5
$ v_call for 2 clones (-z 2)
2:IGHV
$ First clone window appears third times (clone_id, sequence_id, sequence)
w3:TATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTG
$ No spurious character
0:"
0:@
0:\\
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c clones -z 2 -2 -3 -r 1 -g $VIDJIL_DIR/germline/homo-sapiens.g ../should-vdj-tests/Demo-X5.should-vdj.fa > /dev/null ; cat out/Demo-X5.should-vdj.tsv
$ There are 15 = 1 + 14 lines, all with tabs
15:
15:\t
$ All clones have 1 reads
14: \t1\t
$ 8 clones with TR recombinations, some of them having special recombinations
2:TRA
1:TRA[+]D
2:TRB
1:TRB[+]
1:TRG
3:TRD
2:TRD[+]
$ One recombination with KDE
1:KDE
$ No spurious character
0:"
0:@
0:\\
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c segment -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -A $VIDJIL_DATA/overlap-d-j.fa | grep -v web | tail -4 | tr -d '\n' | wc -c
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -c segment -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH -A $VIDJIL_DATA/overlap-d-j.fa | grep -v out | tail -4 | tr -d '\n' | wc -c
$ Exported sequence has all the bases
1:116
......
......@@ -103,6 +103,7 @@ enum { CMD_WINDOWS, CMD_CLONES, CMD_SEGMENT, CMD_GERMLINES } ;
#define AFFECTS_FILENAME ".affects"
#define EDGES_FILENAME ".edges"
#define COMP_FILENAME "comp.vidjil"
#define AIRR_SUFFIX ".tsv"
#define JSON_SUFFIX ".vidjil"
#define DEFAULT_K 0
......@@ -753,6 +754,7 @@ int main (int argc, char **argv)
// JSON OUTPUT //
/////////////////////////////////////////
string f_airr = out_dir + f_basename + AIRR_SUFFIX ;
string f_json = out_dir + f_basename + JSON_SUFFIX ;
ostringstream stream_cmdline;
......@@ -1638,13 +1640,19 @@ int main (int argc, char **argv)
cout << endl;
}
//$ Output AIRR .tsv
cout << " ==> " << f_airr << " \t(AIRR output)" << endl;
ofstream out_airr(f_airr.c_str());
static_cast<SampleOutputAIRR *>(output) -> out(out_airr);
//$ Output .vidjil json
cout << " ==> " << f_json << "\t(data file for the web application)" << endl ;
cout << " ==> " << f_json << "\t(data file for the Vidjil web application)" << endl ;
ofstream out_json(f_json.c_str()) ;
SampleOutputVidjil *outputVidjil = static_cast<SampleOutputVidjil *>(output);
outputVidjil->out(out_json);
//$$ Clean
delete multigermline ;
delete reads;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment