Commit 3be38c03 authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/3839-config-labels' into 'dev'

--config, --labels-json

Closes #3839 and #3837

See merge request !456
parents a2455f80 ca86677b
Pipeline #72170 passed with stages
in 183 minutes and 9 seconds
......@@ -4,6 +4,35 @@
#include <cstdlib>
#include "tools.h"
#include "lib/json.hpp"
using nlohmann::json;
json load_into_map_from_json(map <string, string> &the_map, string json_file)
{
if (!json_file.size())
return {};
cout << " <== " << json_file << endl ;
std::ifstream json_file_stream(json_file);
json j;
json_file_stream >> j;
json jj = j["config"]["labels"] ;
int n = 0;
for(json::iterator label = jj.begin(); label != jj.end(); ++label) {
string name = (*label)["name"].get<std::string>();
string sequence = (*label)["sequence"].get<std::string>();
the_map[sequence] = name;
n++ ;
}
cout << " ==> " << n << " labels" << endl;
return jj;
}
void load_into_map(map <string, string> &the_map, string map_file, string default_value)
{
......
......@@ -7,4 +7,4 @@
#include "bioreader.hpp"
void load_into_map(map <string, string> &the_map, string map_file, string default_value);
json load_into_map_from_json(map <string, string> &the_map, string json_file);
// From CLI11 examples
#include "CLI11.hpp"
#include "json.hpp"
// This example is only built on GCC 7 on Travis due to mismatch in stdlib
// for clang (CLI11 is forgiving about mismatches, json.hpp is not)
using nlohmann::json;
class ConfigJSON : public CLI::Config {
public:
std::string to_config(const CLI::App *app, bool default_also, bool, std::string) const override {
json j;
for(const CLI::Option *opt : app->get_options({})) {
// Only process option with a long-name and configurable
if(!opt->get_lnames().empty() && opt->get_configurable()) {
std::string name = opt->get_lnames()[0];
// Non-flags
if(opt->get_type_size() != 0) {
// If the option was found on command line
if(opt->count() == 1)
j[name] = opt->results().at(0);
else if(opt->count() > 1)
j[name] = opt->results();
// If the option has a default and is requested by optional argument
else if(default_also && !opt->get_defaultval().empty())
j[name] = opt->get_defaultval();
// Flag, one passed
} else if(opt->count() == 1) {
j[name] = true;
// Flag, multiple passed
} else if(opt->count() > 1) {
j[name] = opt->count();
// Flag, not present
} else if(opt->count() == 0 && default_also) {
j[name] = false;
}
}
}
for(const CLI::App *subcom : app->get_subcommands({}))
j[subcom->get_name()] = json(to_config(subcom, default_also, false, ""));
return j.dump(4);
}
std::vector<CLI::ConfigItem> from_config(std::istream &input) const override {
json j;
input >> j;
return _from_config(j["config"]);
}
std::vector<CLI::ConfigItem>
_from_config(json j, std::string name = "", std::vector<std::string> prefix = {}) const {
std::vector<CLI::ConfigItem> results;
if(j.is_object()) {
for(json::iterator item = j.begin(); item != j.end(); ++item) {
auto copy_prefix = prefix;
if(!name.empty())
copy_prefix.push_back(name);
auto sub_results = _from_config(*item, item.key(), copy_prefix);
results.insert(results.end(), sub_results.begin(), sub_results.end());
}
} else if(!name.empty()) {
results.emplace_back();
CLI::ConfigItem &res = results.back();
res.name = name;
res.parents = prefix;
if(j.is_boolean()) {
res.inputs = {j.get<bool>() ? "true" : "false"};
} else if(j.is_number()) {
std::stringstream ss;
ss << j.get<double>();
res.inputs = {ss.str()};
} else if(j.is_string()) {
res.inputs = {j.get<std::string>()};
} else if(j.is_array()) {
for(std::string ival : j)
res.inputs.push_back(ival);
} else {
throw CLI::ConversionError("Failed to convert " + name);
}
} else {
throw CLI::ConversionError("You must make all top level values objects in json!");
}
return results;
}
};
int testCLI11_json(int argc, char **argv) {
CLI::App app;
app.config_formatter(std::make_shared<ConfigJSON>());
int item;
app.add_flag("--simple");
app.add_option("--item", item);
app.set_config("--config");
CLI11_PARSE(app, argc, argv);
std::cout << app.config_to_str(true, true) << std::endl;
return 0;
}
class ConfigJSON : public CLI::Config {
std::string to_config(const CLI::App *app, bool default_also, bool, std::string) const override ;
std::vector<CLI::ConfigItem> from_config(std::istream &input) const override ;
} ;
{
"config": {
"first-reads": 10,
"min-reads": "1",
"max-consensus": "1",
"cdr3": true
}
}
{
"config": {
"labels": [
{ "name": "lab1", "sequence": "CGAGAGTGGGCAGCAGCTGG", "foo": 42 },
{ "name": "lab2", "sequence": "GAAGGGCTACTATGGTTCGGG", "bar": 17}
]
}
}
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --max-consensus 0 --first-reads 10 --label-json ../data/labels-json.vidjil $VIDJIL_DATA/Stanford_S22.fasta
$ Labels are taken into account
: 2 labels
: Considering labeled windows
$ Report two clones, even with --max-consensus 0
: ==> 2 clones
!LAUNCH: cat out/Stanford_S22.vidjil
!NO_LAUNCHER:
$ Labels are in the .json output
1: "label": "lab1"
1: "label": "lab2"
$ Other values from label-json.vidjil are also in the .json output
1: "foo": 42
1: "bar": 17
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH --config ../data/config.vidjil $VIDJIL_DATA/Stanford_S22.fasta
$ Analyze 10 reads (--first-reads)
: in 10 reads
$ Report a clone with one read (--min-reads)
1: Clone #001 .* 1 read
$ Compute a CDR3 (--cdr3)
1:CARVGSSWS#MLLIIW
$ Report only one clone (--max-consensus)
1: Clone #
......@@ -13,3 +13,15 @@ $ Segment two reads on IgVC/IgJC (second -g option)
1: IgVC .* -> .* 1
1: IgJC .* -> .* 1
!LAUNCH: cat out/isotypes.vidjil
!NO_LAUNCHER:
$ Both germlines are in .vidjil
1: /germline"
1: /homo-sapiens-isotypes.g"
$ Check that some options are correctly reported
1: "max-consensus": "100"
1: "plain-index": false
1: "cdr3": false
......@@ -4,7 +4,7 @@
# Test limits in the .vidjil json output
$ There is exactly one clone with a detailed analysis (-z 1)
w1: "cdr3"
w1: "cdr3": {
$ There are exactly five clones with representative sequences (-y 5)
w5: "sequence": "
......
......@@ -13,5 +13,10 @@ $ 1 clone is fully analyzed, and not more (-z 1)
2:clone-.* IGHV
!LAUNCH: $VIDJIL_DIR/$EXEC $VIDJIL_DEFAULT_OPTIONS --config out/Stanford_S22.vidjil $VIDJIL_DATA/Stanford_S22.fasta ; cat out/Stanford_S22.vdj.fa
$ Same tests than before, options taken from 'out/StanfordS22.vidjil' that was just generated
1: ==> 10 clones
2:clone-005--.* bp
1:clone-006--.*--window
2:clone-.* IGHV
......@@ -17,4 +17,4 @@ $ Display advanced options
: custom Cost
$ Correct number of options
52:^..-
54:^..-
......@@ -57,6 +57,7 @@
#include "lib/CLI11.hpp"
#include "lib/json.hpp"
#include "lib/CLI11_json.hpp"
#include "vidjil.h"
......@@ -229,6 +230,7 @@ int main (int argc, char **argv)
#endif
CLI::App app{"# vidjil-algo -- V(D)J recombinations analysis", argv[0]};
app.config_formatter(std::make_shared<ConfigJSON>());
app.get_formatter()->label("REQUIRED", "");
app.get_formatter()->label("Positionnals", "");
app.failure_message(failure_message_doc);
......@@ -266,6 +268,9 @@ int main (int argc, char **argv)
// ----------------------------------------------------------------------------------------------------------------------
group = "Input" ;
app.set_config("--config", "", "read a (.json) config.vidjil file with options") -> type_name("FILE")
-> group(group) -> level();
string read_header_separator = DEFAULT_READ_HEADER_SEPARATOR ;
app.add_option("--header-sep", read_header_separator, "separator for headers in the reads file", false)
-> group(group) -> level() -> type_name("CHAR='" DEFAULT_READ_HEADER_SEPARATOR "'");
......@@ -414,9 +419,12 @@ int main (int argc, char **argv)
vector <string> windows_labels_explicit ;
string windows_labels_file = "" ;
string windows_labels_json = "" ;
app.add_option("--label", windows_labels_explicit, "label the given sequence(s)") -> group(group) -> level() -> type_name("SEQUENCE");
app.add_option("--label-file", windows_labels_file, "label a set of sequences given in <file>") -> group(group) -> level() -> type_name("FILE");
app.add_option("--label-json", windows_labels_json, "read a (.json) label.vidjil (experimental)") -> type_name("FILE")
-> group(group) -> level();
bool only_labeled_windows = false ;
app.add_flag("--label-filter", only_labeled_windows, "filter -- keep only the windows related to the labeled sequences") -> group(group) -> level();
......@@ -725,6 +733,7 @@ int main (int argc, char **argv)
/// Load labels ;
load_into_map(windows_labels, windows_labels_file, "-l");
json j_labels = load_into_map_from_json(windows_labels, windows_labels_json);
switch(command) {
case CMD_WINDOWS: cout << "Extracting windows" << endl;
......@@ -743,6 +752,11 @@ int main (int argc, char **argv)
}
cout << endl;
// Dump configuration
json j_config = json::parse(app.config_to_str(true, true));
if (!j_labels.empty())
j_config["labels"] = j_labels;
//////////////////////////////////
// Display time and date
time_t rawtime;
......@@ -1556,6 +1570,7 @@ int main (int argc, char **argv)
// Complete main output
output.set("config", j_config);
output.set("diversity", jsonDiversity);
output.set("samples", "log", { stream_segmentation_info.str() }) ;
output.set("reads", {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment