Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
vidjil
vidjil
Commits
13fa3b9a
Commit
13fa3b9a
authored
Apr 28, 2020
by
Mikaël Salson
Browse files
Merge branch 'feature-a/4269-tsv-gz' into 'dev'
--gz makes also output AIRR .tsv.gz Closes
#4249
See merge request
!675
parents
05caa0a1
9b70ef59
Pipeline
#140726
failed with stages
in 7 minutes and 44 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
algo/core/tools.cpp
View file @
13fa3b9a
...
...
@@ -473,3 +473,19 @@ string extractGeneName(string label){
}
return
result
;
}
/*
Opens a ostream, possibly gz-compressed
*/
std
::
ostream
*
new_ofgzstream
(
const
char
*
f
,
bool
gz
)
{
if
(
gz
)
{
return
new
ogzstream
(
f
);
}
else
{
return
new
ofstream
(
f
);
}
}
\ No newline at end of file
algo/core/tools.h
View file @
13fa3b9a
...
...
@@ -43,6 +43,7 @@ typedef string junction ;
#include
<cassert>
#include
<vector>
#include
"bioreader.hpp"
#include
"../lib/gzstream.h"
#include
"kmeraffect.h"
#include
"../lib/json_fwd.hpp"
using
json
=
nlohmann
::
json
;
...
...
@@ -281,6 +282,11 @@ void output_label_average(ostream &out, string label, long long int nb, double a
void
json_add_warning
(
json
&
clone
,
string
code
,
string
msg
,
string
level
=
LEVEL_WARN
);
/*
Opens a ostream, possibly gz-compressed
*/
std
::
ostream
*
new_ofgzstream
(
const
char
*
f
,
bool
gz
);
//////////////////////////////////////////////////
// Template code
...
...
algo/tests/should-get-tests/gzip-out.should
View file @
13fa3b9a
...
...
@@ -2,15 +2,20 @@
$ Output
1: out-a.vidjil
1: out-a.tsv
!LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --gz -b out-b $VIDJIL_DATA/clones_simul.fa
$
O
utput
$
Compressed o
utput
1: out-b.vidjil.gz
1: out-b.tsv.gz
!NO_LAUNCHER:
!NO_EXTRA:
gunzip -f out/out-b.vidjil.gz && diff -s -I '"....-..-..' -I "gz" -I "out-" out/out-a.vidjil out/out-b.vidjil ; echo "Diff: $?"
gunzip -f out/out-b.vidjil.gz && diff -s -I '"....-..-..' -I "gz" -I "out-" out/out-a.vidjil out/out-b.vidjil ; echo "Diff .vidjil.gz: $?"
gunzip -f out/out-b.tsv.gz && diff -s out/out-a.tsv out/out-b.tsv ; echo "Diff .tsv.gz: $?"
$ Same output
1:Diff: 0
1:Diff .vidjil.gz: 0
1:Diff .tsv.gz: 0
algo/vidjil.cpp
View file @
13fa3b9a
...
...
@@ -58,7 +58,6 @@
#include
"lib/CLI11.hpp"
#include
"lib/json.hpp"
#include
"lib/CLI11_json.hpp"
#include
"lib/gzstream.h"
#include
"vidjil.h"
...
...
@@ -585,7 +584,7 @@ int main (int argc, char **argv)
app
.
add_option
(
"--base,-b"
,
f_basename
,
"output basename (by default basename of the input file)"
)
->
group
(
group
)
->
type_name
(
"STRING"
);
bool
out_gz
=
false
;
app
.
add_flag
(
"--gz"
,
out_gz
,
"output compressed .vidjil.gz file"
)
->
group
(
group
)
->
level
();
app
.
add_flag
(
"--gz"
,
out_gz
,
"output compressed
.tsv.gz and
.vidjil.gz file
s
"
)
->
group
(
group
)
->
level
();
bool
no_airr
=
false
;
bool
no_vidjil
=
false
;
...
...
@@ -826,7 +825,10 @@ int main (int argc, char **argv)
string
f_json
=
out_dir
+
f_basename
+
JSON_SUFFIX
;
if
(
out_gz
)
{
f_airr
+=
GZ_SUFFIX
;
f_json
+=
GZ_SUFFIX
;
}
ostringstream
stream_cmdline
;
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
stream_cmdline
<<
argv
[
i
]
<<
" "
;
...
...
@@ -1299,7 +1301,7 @@ int main (int argc, char **argv)
<<
endl
;
string
f_clones
=
out_dir
+
f_basename
+
CLONES_FILENAME
;
cout
<<
" ==> "
<<
f_clones
<<
"
\t
(
main result fil
e)"
<<
endl
;
cout
<<
" ==> "
<<
f_clones
<<
"
\t
(
for post-processing with other softwar
e)"
<<
endl
;
ofstream
out_clones
(
f_clones
.
c_str
())
;
cout
<<
" ==> "
<<
out_seqdir
+
CLONE_FILENAME
+
"*"
<<
"
\t
(detail, by clone)"
<<
endl
;
...
...
@@ -1745,36 +1747,27 @@ int main (int argc, char **argv)
cout
<<
endl
;
}
//$ Output AIRR .tsv
//$ Output AIRR .tsv
(.gz)
if
(
!
no_airr
)
{
cout
<<
" ==> "
<<
f_airr
<<
"
\t
(AIRR output)"
<<
endl
;
ofstream
out_airr
(
f_airr
.
c_str
());
static_cast
<
SampleOutputAIRR
*>
(
&
output
)
->
out
(
out_airr
);
std
::
ostream
*
out_airr
=
new_ofgzstream
(
f_airr
.
c_str
(),
out_gz
);
static_cast
<
SampleOutputAIRR
*>
(
&
output
)
->
out
(
*
out_airr
);
delete
out_airr
;
}
//$ Output .vidjil(.gz) json
cout
<<
" ==> "
<<
f_json
;
if
(
!
no_vidjil
)
{
cout
<<
"
\t
(
data file for
the Vidjil web application)"
<<
endl
;
cout
<<
"
\t
(
main output file, may be opened by
the Vidjil web application)"
<<
endl
;
}
else
{
cout
<<
"
\t
(only metadata, no clone output)"
<<
endl
;
}
std
::
ostream
*
out_json
;
if
(
out_gz
)
{
out_json
=
new
ogzstream
(
f_json
.
c_str
());
}
else
{
out_json
=
new
ofstream
(
f_json
.
c_str
());
}
std
::
ostream
*
out_json
=
new_ofgzstream
(
f_json
.
c_str
(),
out_gz
);
SampleOutputVidjil
*
outputVidjil
=
static_cast
<
SampleOutputVidjil
*>
(
&
output
);
outputVidjil
->
out
(
*
out_json
,
!
no_vidjil
);
...
...
doc/vidjil-algo.md
View file @
13fa3b9a
...
...
@@ -549,6 +549,8 @@ The main output of Vidjil-algo (with the default `-c clones` command) are the th
-
The
`.tsv`
file is the AIRR output, for compatibility with other software
using the same format. See
[
below
](
#airr-tsv-output
)
for details.
With the `--gz` option, this file is output as a compressed `.tsv.gz` file.
-
The
`.vdj.fa`
file is
*a FASTA file for further processing by other bioinformatics tools*
.
Even if it is advised to rather use the full information in the
`.vijdil`
file,
the
`.vdj.fa`
is a convenient way to have sequences of clones for further processing.
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment