Commit 13fa3b9a authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/4269-tsv-gz' into 'dev'

--gz makes also output AIRR .tsv.gz

Closes #4249

See merge request !675
parents 05caa0a1 9b70ef59
Pipeline #140726 failed with stages
in 7 minutes and 44 seconds
...@@ -473,3 +473,19 @@ string extractGeneName(string label){ ...@@ -473,3 +473,19 @@ string extractGeneName(string label){
} }
return result; return result;
} }
/*
Opens a ostream, possibly gz-compressed
*/
std::ostream* new_ofgzstream(const char *f, bool gz)
{
if (gz)
{
return new ogzstream(f);
}
else
{
return new ofstream(f);
}
}
\ No newline at end of file
...@@ -43,6 +43,7 @@ typedef string junction ; ...@@ -43,6 +43,7 @@ typedef string junction ;
#include <cassert> #include <cassert>
#include <vector> #include <vector>
#include "bioreader.hpp" #include "bioreader.hpp"
#include "../lib/gzstream.h"
#include "kmeraffect.h" #include "kmeraffect.h"
#include "../lib/json_fwd.hpp" #include "../lib/json_fwd.hpp"
using json = nlohmann::json; using json = nlohmann::json;
...@@ -281,6 +282,11 @@ void output_label_average(ostream &out, string label, long long int nb, double a ...@@ -281,6 +282,11 @@ void output_label_average(ostream &out, string label, long long int nb, double a
void json_add_warning(json &clone, string code, string msg, string level=LEVEL_WARN); void json_add_warning(json &clone, string code, string msg, string level=LEVEL_WARN);
/*
Opens a ostream, possibly gz-compressed
*/
std::ostream* new_ofgzstream(const char *f, bool gz);
////////////////////////////////////////////////// //////////////////////////////////////////////////
// Template code // Template code
......
...@@ -2,15 +2,20 @@ ...@@ -2,15 +2,20 @@
$ Output $ Output
1: out-a.vidjil 1: out-a.vidjil
1: out-a.tsv
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --gz -b out-b $VIDJIL_DATA/clones_simul.fa !LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --gz -b out-b $VIDJIL_DATA/clones_simul.fa
$ Output $ Compressed output
1: out-b.vidjil.gz 1: out-b.vidjil.gz
1: out-b.tsv.gz
!NO_LAUNCHER: !NO_LAUNCHER:
!NO_EXTRA: !NO_EXTRA:
gunzip -f out/out-b.vidjil.gz && diff -s -I '"....-..-..' -I "gz" -I "out-" out/out-a.vidjil out/out-b.vidjil ; echo "Diff: $?" gunzip -f out/out-b.vidjil.gz && diff -s -I '"....-..-..' -I "gz" -I "out-" out/out-a.vidjil out/out-b.vidjil ; echo "Diff .vidjil.gz: $?"
gunzip -f out/out-b.tsv.gz && diff -s out/out-a.tsv out/out-b.tsv ; echo "Diff .tsv.gz: $?"
$ Same output $ Same output
1:Diff: 0 1:Diff .vidjil.gz: 0
1:Diff .tsv.gz: 0
...@@ -58,7 +58,6 @@ ...@@ -58,7 +58,6 @@
#include "lib/CLI11.hpp" #include "lib/CLI11.hpp"
#include "lib/json.hpp" #include "lib/json.hpp"
#include "lib/CLI11_json.hpp" #include "lib/CLI11_json.hpp"
#include "lib/gzstream.h"
#include "vidjil.h" #include "vidjil.h"
...@@ -585,7 +584,7 @@ int main (int argc, char **argv) ...@@ -585,7 +584,7 @@ int main (int argc, char **argv)
app.add_option("--base,-b", f_basename, "output basename (by default basename of the input file)") -> group(group) -> type_name("STRING"); app.add_option("--base,-b", f_basename, "output basename (by default basename of the input file)") -> group(group) -> type_name("STRING");
bool out_gz = false; bool out_gz = false;
app.add_flag("--gz", out_gz, "output compressed .vidjil.gz file") -> group(group) -> level(); app.add_flag("--gz", out_gz, "output compressed .tsv.gz and .vidjil.gz files") -> group(group) -> level();
bool no_airr = false; bool no_airr = false;
bool no_vidjil = false; bool no_vidjil = false;
...@@ -826,7 +825,10 @@ int main (int argc, char **argv) ...@@ -826,7 +825,10 @@ int main (int argc, char **argv)
string f_json = out_dir + f_basename + JSON_SUFFIX ; string f_json = out_dir + f_basename + JSON_SUFFIX ;
if (out_gz) if (out_gz)
{
f_airr += GZ_SUFFIX;
f_json += GZ_SUFFIX; f_json += GZ_SUFFIX;
}
ostringstream stream_cmdline; ostringstream stream_cmdline;
for (int i=0; i < argc; i++) stream_cmdline << argv[i] << " "; for (int i=0; i < argc; i++) stream_cmdline << argv[i] << " ";
...@@ -1299,7 +1301,7 @@ int main (int argc, char **argv) ...@@ -1299,7 +1301,7 @@ int main (int argc, char **argv)
<< endl ; << endl ;
string f_clones = out_dir + f_basename + CLONES_FILENAME ; string f_clones = out_dir + f_basename + CLONES_FILENAME ;
cout << " ==> " << f_clones << " \t(main result file)" << endl ; cout << " ==> " << f_clones << " \t(for post-processing with other software)" << endl ;
ofstream out_clones(f_clones.c_str()) ; ofstream out_clones(f_clones.c_str()) ;
cout << " ==> " << out_seqdir + CLONE_FILENAME + "*" << "\t(detail, by clone)" << endl ; cout << " ==> " << out_seqdir + CLONE_FILENAME + "*" << "\t(detail, by clone)" << endl ;
...@@ -1745,36 +1747,27 @@ int main (int argc, char **argv) ...@@ -1745,36 +1747,27 @@ int main (int argc, char **argv)
cout << endl; cout << endl;
} }
//$ Output AIRR .tsv //$ Output AIRR .tsv(.gz)
if (!no_airr) if (!no_airr)
{ {
cout << " ==> " << f_airr << " \t(AIRR output)" << endl; cout << " ==> " << f_airr << " \t(AIRR output)" << endl;
ofstream out_airr(f_airr.c_str()); std::ostream *out_airr = new_ofgzstream(f_airr.c_str(), out_gz);
static_cast<SampleOutputAIRR *>(&output) -> out(out_airr); static_cast<SampleOutputAIRR *>(&output) -> out(*out_airr);
delete out_airr;
} }
//$ Output .vidjil(.gz) json //$ Output .vidjil(.gz) json
cout << " ==> " << f_json ; cout << " ==> " << f_json ;
if (!no_vidjil) if (!no_vidjil)
{ {
cout << "\t(data file for the Vidjil web application)" << endl; cout << "\t(main output file, may be opened by the Vidjil web application)" << endl;
} }
else else
{ {
cout << "\t(only metadata, no clone output)" << endl; cout << "\t(only metadata, no clone output)" << endl;
} }
std::ostream *out_json; std::ostream *out_json = new_ofgzstream(f_json.c_str(), out_gz);
if (out_gz)
{
out_json = new ogzstream(f_json.c_str());
}
else
{
out_json = new ofstream(f_json.c_str());
}
SampleOutputVidjil *outputVidjil = static_cast<SampleOutputVidjil *>(&output); SampleOutputVidjil *outputVidjil = static_cast<SampleOutputVidjil *>(&output);
outputVidjil -> out(*out_json, !no_vidjil); outputVidjil -> out(*out_json, !no_vidjil);
......
...@@ -549,6 +549,8 @@ The main output of Vidjil-algo (with the default `-c clones` command) are the th ...@@ -549,6 +549,8 @@ The main output of Vidjil-algo (with the default `-c clones` command) are the th
- The `.tsv` file is the AIRR output, for compatibility with other software - The `.tsv` file is the AIRR output, for compatibility with other software
using the same format. See [below](#airr-tsv-output) for details. using the same format. See [below](#airr-tsv-output) for details.
With the `--gz` option, this file is output as a compressed `.tsv.gz` file.
- The `.vdj.fa` file is *a FASTA file for further processing by other bioinformatics tools*. - The `.vdj.fa` file is *a FASTA file for further processing by other bioinformatics tools*.
Even if it is advised to rather use the full information in the `.vijdil` file, Even if it is advised to rather use the full information in the `.vijdil` file,
the `.vdj.fa` is a convenient way to have sequences of clones for further processing. the `.vdj.fa` is a convenient way to have sequences of clones for further processing.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment