Commit 357b51fc authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/4270-vdj-fa-gz' into 'dev'

--gz makes also output .vdj.fa.gz

Closes #4270

See merge request !676
parents 13fa3b9a bd285b6f
Pipeline #140879 failed with stages
in 10 minutes and 12 seconds
......@@ -3,19 +3,23 @@
$ Output
1: out-a.vidjil
1: out-a.tsv
1: out-a.vdj.fa
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --gz -b out-b $VIDJIL_DATA/clones_simul.fa
$ Compressed output
1: out-b.vidjil.gz
1: out-b.tsv.gz
1: out-b.vdj.fa.gz
!NO_LAUNCHER:
!NO_EXTRA:
gunzip -f out/out-b.vidjil.gz && diff -s -I '"....-..-..' -I "gz" -I "out-" out/out-a.vidjil out/out-b.vidjil ; echo "Diff .vidjil.gz: $?"
gunzip -f out/out-b.tsv.gz && diff -s out/out-a.tsv out/out-b.tsv ; echo "Diff .tsv.gz: $?"
gunzip -f out/out-b.vdj.fa.gz && diff -s out/out-a.vdj.fa out/out-b.vdj.fa ; echo "Diff .vdj.fa.gz: $?"
$ Same output
1:Diff .vidjil.gz: 0
1:Diff .tsv.gz: 0
1:Diff .vdj.fa.gz: 0
......@@ -584,7 +584,7 @@ int main (int argc, char **argv)
app.add_option("--base,-b", f_basename, "output basename (by default basename of the input file)") -> group(group) -> type_name("STRING");
bool out_gz = false;
app.add_flag("--gz", out_gz, "output compressed .tsv.gz and .vidjil.gz files") -> group(group) -> level();
app.add_flag("--gz", out_gz, "output compressed .tsv.gz, .vdj.fa.gz, and .vidjil.gz files") -> group(group) -> level();
bool no_airr = false;
bool no_vidjil = false;
......@@ -821,11 +821,13 @@ int main (int argc, char **argv)
// JSON OUTPUT //
/////////////////////////////////////////
string f_clones = out_dir + f_basename + CLONES_FILENAME ;
string f_airr = out_dir + f_basename + AIRR_SUFFIX ;
string f_json = out_dir + f_basename + JSON_SUFFIX ;
if (out_gz)
{
f_clones += GZ_SUFFIX;
f_airr += GZ_SUFFIX;
f_json += GZ_SUFFIX;
}
......@@ -1300,9 +1302,8 @@ int main (int argc, char **argv)
cout << " ==> suggested edges in " << out_dir+ f_basename + EDGES_FILENAME
<< endl ;
string f_clones = out_dir + f_basename + CLONES_FILENAME ;
cout << " ==> " << f_clones << " \t(for post-processing with other software)" << endl ;
ofstream out_clones(f_clones.c_str()) ;
ostream* out_clones = new_ofgzstream(f_clones.c_str(), out_gz) ;
cout << " ==> " << out_seqdir + CLONE_FILENAME + "*" << "\t(detail, by clone)" << endl ;
cout << endl ;
......@@ -1357,7 +1358,7 @@ int main (int argc, char **argv)
// If max_representatives is reached, we stop here but still outputs the window
if ((max_representatives >= 0) && (num_clone >= max_representatives + 1))
{
out_clones << window_str << endl ;
*out_clones << window_str << endl ;
continue;
}
}
......@@ -1454,7 +1455,7 @@ int main (int argc, char **argv)
{
if (clone_on_stdout)
cout << representative << endl ;
out_clones << representative << endl ;
*out_clones << representative << endl ;
continue;
}
......@@ -1481,7 +1482,7 @@ int main (int argc, char **argv)
if (clone_on_stdout)
cout << seg << endl ;
out_clone << seg << endl ;
out_clones << seg << endl ;
*out_clones << seg << endl ;
seg.toOutput(clone);
......@@ -1538,7 +1539,7 @@ int main (int argc, char **argv)
} // end for clones
out_edges.close() ;
out_clones.close();
delete out_clones;
if (num_clone > last_num_clone_on_stdout)
{
......
......@@ -544,13 +544,9 @@ The main output of Vidjil-algo (with the default `-c clones` command) are the th
setup or in a immunological study).
Please see the [web application user manual](http://www.vidjil.org/doc/user) for more information.
With the `--gz` option, this file is output as a compressed `.vidjil.gz` file.
- The `.tsv` file is the AIRR output, for compatibility with other software
using the same format. See [below](#airr-tsv-output) for details.
With the `--gz` option, this file is output as a compressed `.tsv.gz` file.
- The `.vdj.fa` file is *a FASTA file for further processing by other bioinformatics tools*.
Even if it is advised to rather use the full information in the `.vijdil` file,
the `.vdj.fa` is a convenient way to have sequences of clones for further processing.
......@@ -570,6 +566,8 @@ By default, the three output files are named
- `out` is the directory where all the outputs are stored (can be changed with the `--dir` option).
- `basename` is the basename of the input `.fasta/.fastq` file (can be overriden with the `--base` option)
With the `--gz` option, the three files are output
as compressed `.vidjil.gz`, `.tsv.gz`, and `.vdj.fa.gz` files.
Vidjil-algo also outputs the first 50 clones on the standard output.
More data can be printed on the standard output with the `-v` option.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment