Commit 13fa3b9a authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/4269-tsv-gz' into 'dev'

--gz makes also output AIRR .tsv.gz

Closes #4249

See merge request !675
parents 05caa0a1 9b70ef59
Pipeline #140726 failed with stages
in 7 minutes and 44 seconds
......@@ -473,3 +473,19 @@ string extractGeneName(string label){
}
return result;
}
/*
Opens a ostream, possibly gz-compressed
*/
std::ostream* new_ofgzstream(const char *f, bool gz)
{
if (gz)
{
return new ogzstream(f);
}
else
{
return new ofstream(f);
}
}
\ No newline at end of file
......@@ -43,6 +43,7 @@ typedef string junction ;
#include <cassert>
#include <vector>
#include "bioreader.hpp"
#include "../lib/gzstream.h"
#include "kmeraffect.h"
#include "../lib/json_fwd.hpp"
using json = nlohmann::json;
......@@ -281,6 +282,11 @@ void output_label_average(ostream &out, string label, long long int nb, double a
void json_add_warning(json &clone, string code, string msg, string level=LEVEL_WARN);
/*
Opens a ostream, possibly gz-compressed
*/
std::ostream* new_ofgzstream(const char *f, bool gz);
//////////////////////////////////////////////////
// Template code
......
......@@ -2,15 +2,20 @@
$ Output
1: out-a.vidjil
1: out-a.tsv
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --gz -b out-b $VIDJIL_DATA/clones_simul.fa
$ Output
$ Compressed output
1: out-b.vidjil.gz
1: out-b.tsv.gz
!NO_LAUNCHER:
!NO_EXTRA:
gunzip -f out/out-b.vidjil.gz && diff -s -I '"....-..-..' -I "gz" -I "out-" out/out-a.vidjil out/out-b.vidjil ; echo "Diff: $?"
gunzip -f out/out-b.vidjil.gz && diff -s -I '"....-..-..' -I "gz" -I "out-" out/out-a.vidjil out/out-b.vidjil ; echo "Diff .vidjil.gz: $?"
gunzip -f out/out-b.tsv.gz && diff -s out/out-a.tsv out/out-b.tsv ; echo "Diff .tsv.gz: $?"
$ Same output
1:Diff: 0
1:Diff .vidjil.gz: 0
1:Diff .tsv.gz: 0
......@@ -58,7 +58,6 @@
#include "lib/CLI11.hpp"
#include "lib/json.hpp"
#include "lib/CLI11_json.hpp"
#include "lib/gzstream.h"
#include "vidjil.h"
......@@ -585,7 +584,7 @@ int main (int argc, char **argv)
app.add_option("--base,-b", f_basename, "output basename (by default basename of the input file)") -> group(group) -> type_name("STRING");
bool out_gz = false;
app.add_flag("--gz", out_gz, "output compressed .vidjil.gz file") -> group(group) -> level();
app.add_flag("--gz", out_gz, "output compressed .tsv.gz and .vidjil.gz files") -> group(group) -> level();
bool no_airr = false;
bool no_vidjil = false;
......@@ -826,7 +825,10 @@ int main (int argc, char **argv)
string f_json = out_dir + f_basename + JSON_SUFFIX ;
if (out_gz)
{
f_airr += GZ_SUFFIX;
f_json += GZ_SUFFIX;
}
ostringstream stream_cmdline;
for (int i=0; i < argc; i++) stream_cmdline << argv[i] << " ";
......@@ -1299,7 +1301,7 @@ int main (int argc, char **argv)
<< endl ;
string f_clones = out_dir + f_basename + CLONES_FILENAME ;
cout << " ==> " << f_clones << " \t(main result file)" << endl ;
cout << " ==> " << f_clones << " \t(for post-processing with other software)" << endl ;
ofstream out_clones(f_clones.c_str()) ;
cout << " ==> " << out_seqdir + CLONE_FILENAME + "*" << "\t(detail, by clone)" << endl ;
......@@ -1745,36 +1747,27 @@ int main (int argc, char **argv)
cout << endl;
}
//$ Output AIRR .tsv
//$ Output AIRR .tsv(.gz)
if (!no_airr)
{
cout << " ==> " << f_airr << " \t(AIRR output)" << endl;
ofstream out_airr(f_airr.c_str());
static_cast<SampleOutputAIRR *>(&output) -> out(out_airr);
std::ostream *out_airr = new_ofgzstream(f_airr.c_str(), out_gz);
static_cast<SampleOutputAIRR *>(&output) -> out(*out_airr);
delete out_airr;
}
//$ Output .vidjil(.gz) json
cout << " ==> " << f_json ;
if (!no_vidjil)
{
cout << "\t(data file for the Vidjil web application)" << endl;
cout << "\t(main output file, may be opened by the Vidjil web application)" << endl;
}
else
{
cout << "\t(only metadata, no clone output)" << endl;
}
std::ostream *out_json;
if (out_gz)
{
out_json = new ogzstream(f_json.c_str());
}
else
{
out_json = new ofstream(f_json.c_str());
}
std::ostream *out_json = new_ofgzstream(f_json.c_str(), out_gz);
SampleOutputVidjil *outputVidjil = static_cast<SampleOutputVidjil *>(&output);
outputVidjil -> out(*out_json, !no_vidjil);
......
......@@ -549,6 +549,8 @@ The main output of Vidjil-algo (with the default `-c clones` command) are the th
- The `.tsv` file is the AIRR output, for compatibility with other software
using the same format. See [below](#airr-tsv-output) for details.
With the `--gz` option, this file is output as a compressed `.tsv.gz` file.
- The `.vdj.fa` file is *a FASTA file for further processing by other bioinformatics tools*.
Even if it is advised to rather use the full information in the `.vijdil` file,
the `.vdj.fa` is a convenient way to have sequences of clones for further processing.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment