Commit 656ddd25 authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/3358-output' into 'dev'

Feature a/3358 output

See merge request !320
parents 07961c01 932b88af
Pipeline #44834 passed with stages
in 7 minutes and 14 seconds
#include "output.h"
void Output::set(string key, json val)
{
j[key] = val ;
}
void Output::set(string key, string subkey, json val)
{
j[key][subkey] = val ;
}
void Output::set(string key, string subkey, string subsubkey, json val)
{
j[key][subkey][subsubkey] = val ;
}
void CloneOutput::setSeg(string subkey, json val)
{
set(KEY_SEG, subkey, val);
}
void Output::add_warning(string code, string msg, string level)
{
json_add_warning(j, code, msg, level);
}
json CloneOutput::toJson()
{
return j;
}
SampleOutput::SampleOutput(json init)
{
j = init;
}
SampleOutput::~SampleOutput()
{
for (auto it: clones)
delete it.second;
}
void SampleOutput::out(ostream &s)
{
}
void SampleOutput::addClone(junction junction, CloneOutput *clone)
{
clones[junction] = clone;
}
CloneOutput* SampleOutput::getClone(junction junction)
{
if (clones.find(junction) != clones.end()){
return clones[junction];
}
else
{
CloneOutput *clone = new(CloneOutput);
addClone(junction, clone);
clone -> set("sequence", 0); // TODO need to compute representative sequence for this case
return clone;
}
}
void SampleOutputVidjil::out(ostream &s)
{
json j_clones;
for (auto it: clones)
j_clones.push_back(it.second->toJson());
j["clones"] = j_clones;
s << j.dump(2);
}
#ifndef OUTPUT_H
#define OUTPUT_H
#include <string>
#include <fstream>
#include <iostream>
#include "tools.h"
#include "../lib/json.hpp"
#define KEY_SEG "seg"
using namespace std;
using json = nlohmann::json;
class Output
{
protected:
json j;
public:
void set(string key, json val);
void set(string key, string subkey, json val);
void set(string key, string subkey, string subsubkey, json val);
void add_warning(string code, string msg, string level);
};
class CloneOutput : public Output
{
public:
void setSeg(string subkey, json val);
json toJson();
};
class SampleOutput : public Output
{
protected:
map <junction, CloneOutput*> clones;
public:
SampleOutput(json init);
virtual ~SampleOutput();
void addClone(junction junction, CloneOutput *clone);
// get a clone, or create a new one if needed
CloneOutput* getClone(junction junction);
json toJson();
void out(ostream &s);
};
/*
class CloneOutputFormatter
{
}
class CloneOutputFormatterCSV(CloneOutputFormatter)
{
}
class CloneOutputFormatterJson(CloneOutputFormatter)
{
}
*/
// Native Json .vidjil format
// See vidjil-format.md
class SampleOutputVidjil : public SampleOutput
{
public:
void out(ostream &s);
};
#endif
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <cassert> #include <cassert>
#include "segment.h" #include "segment.h"
#include "tools.h" #include "tools.h"
#include "output.h"
#include "../lib/json.hpp" #include "../lib/json.hpp"
#include "affectanalyser.h" #include "affectanalyser.h"
#include <sstream> #include <sstream>
...@@ -70,10 +71,9 @@ string AlignBox::getSequence(string sequence) { ...@@ -70,10 +71,9 @@ string AlignBox::getSequence(string sequence) {
return sequence.substr(start, end-start+1); return sequence.substr(start, end-start+1);
} }
void AlignBox::addToJson(json &seg, int alternative_genes) { void AlignBox::addToOutput(CloneOutput *clone, int alternative_genes) {
json j; json j;
j["name"] = ref_label; j["name"] = ref_label;
if (key != "3") // no end information for J if (key != "3") // no end information for J
...@@ -88,14 +88,16 @@ void AlignBox::addToJson(json &seg, int alternative_genes) { ...@@ -88,14 +88,16 @@ void AlignBox::addToJson(json &seg, int alternative_genes) {
j["delLeft"] = del_left; j["delLeft"] = del_left;
} }
seg[key] = j ; clone->setSeg(key, j) ;
/*Export the N best genes if threshold parameter is specified*/ /*Export the N best genes if threshold parameter is specified*/
if(rep && !this->score.empty() && rep->size() <= (int)this->score.size() && alternative_genes > 0 && alternative_genes <= (int)this->score.size()){ if(rep && !this->score.empty() && rep->size() <= (int)this->score.size() && alternative_genes > 0 && alternative_genes <= (int)this->score.size()){
seg[key + "alt"] = json::array(); json jalt = json::array();
for(int i = 0; i < alternative_genes;++i){ for(int i = 0; i < alternative_genes;++i){
int r = this->score[i].second; int r = this->score[i].second;
seg[key + "alt"].push_back(json::object({{"name",rep->label(r)}})); jalt.push_back(json::object({{"name",rep->label(r)}}));
} }
clone->setSeg(key + "alt", jalt);
} }
} }
...@@ -1330,7 +1332,7 @@ void FineSegmenter::findCDR3(){ ...@@ -1330,7 +1332,7 @@ void FineSegmenter::findCDR3(){
// Reminder: JUNCTIONstart is 1-based // Reminder: JUNCTIONstart is 1-based
} }
void FineSegmenter::checkWarnings(json &json_clone) void FineSegmenter::checkWarnings(CloneOutput *clone)
{ {
if (isSegmented()) if (isSegmented())
{ {
...@@ -1339,79 +1341,82 @@ void FineSegmenter::checkWarnings(json &json_clone) ...@@ -1339,79 +1341,82 @@ void FineSegmenter::checkWarnings(json &json_clone)
&& (box_J->ref_label.find("IGHJ1") != string::npos) && (box_J->ref_label.find("IGHJ1") != string::npos)
&& ((getMidLength() >= 90) || (getMidLength() <= 94))) && ((getMidLength() >= 90) || (getMidLength() <= 94)))
{ {
json_add_warning(json_clone, "W61", "Non-recombined D7-27/J1 sequence", LEVEL_ERROR); clone->add_warning("W61", "Non-recombined D7-27/J1 sequence", LEVEL_ERROR);
} }
} }
} }
json FineSegmenter::toJson(){ void FineSegmenter::toOutput(CloneOutput *clone){
json seg; json seg;
for (AlignBox *box: boxes) for (AlignBox *box: boxes)
{ {
box->addToJson(seg, this->alternative_genes); box->addToOutput(clone, this->alternative_genes);
} }
if (isSegmented()) { if (isSegmented()) {
clone->set("name", code);
if (isDSegmented()) { if (isDSegmented()) {
seg["N1"] = seg_N1.size(); clone->setSeg("N1", seg_N1.size());
seg["N2"] = seg_N2.size(); clone->setSeg("N2", seg_N2.size());
} }
else { else {
seg["N"] = seg_N.size(); clone->setSeg("N", seg_N.size());
} }
if (CDR3start >= 0) { if (CDR3start >= 0) {
seg["cdr3"] = { clone->setSeg("cdr3", {
{"start", CDR3start}, {"start", CDR3start},
{"stop", CDR3end}, {"stop", CDR3end},
{"aa", CDR3aa} {"aa", CDR3aa}
}; });
} }
if (JUNCTIONstart >= 0) { if (JUNCTIONstart >= 0) {
seg["junction"] = { clone->setSeg("junction", {
{"start", JUNCTIONstart}, {"start", JUNCTIONstart},
{"stop", JUNCTIONend}, {"stop", JUNCTIONend},
{"aa", JUNCTIONaa}, {"aa", JUNCTIONaa},
{"productive", JUNCTIONproductive} {"productive", JUNCTIONproductive}
}; });
} }
} }
else // not segmented
return seg; {
clone->set("name", label);
}
} }
json toJsonSegVal(string s) { json toJsonSegVal(string s) {
return {{"val", s}}; return {{"val", s}};
} }
json KmerSegmenter::toJson() { void KmerSegmenter::toOutput(CloneOutput *clone) {
json seg; json seg;
int sequenceSize = sequence.size(); int sequenceSize = sequence.size();
if (evalue > NO_LIMIT_VALUE) if (evalue > NO_LIMIT_VALUE)
seg["evalue"] = toJsonSegVal(scientific_string_of_double(evalue)); clone->setSeg("evalue", toJsonSegVal(scientific_string_of_double(evalue)));
if (evalue_left > NO_LIMIT_VALUE) if (evalue_left > NO_LIMIT_VALUE)
seg["evalue_left"] = toJsonSegVal(scientific_string_of_double(evalue_left)); clone->setSeg("evalue_left", toJsonSegVal(scientific_string_of_double(evalue_left)));
if (evalue_right > NO_LIMIT_VALUE) if (evalue_right > NO_LIMIT_VALUE)
seg["evalue_right"] = toJsonSegVal(scientific_string_of_double(evalue_right)); clone->setSeg("evalue_right", toJsonSegVal(scientific_string_of_double(evalue_right)));
if (getKmerAffectAnalyser() != NULL) { if (getKmerAffectAnalyser() != NULL) {
seg["affectValues"] = { clone->setSeg("affectValues", {
{"start", 1}, {"start", 1},
{"stop", sequenceSize}, {"stop", sequenceSize},
{"seq", getKmerAffectAnalyser()->toStringValues()} {"seq", getKmerAffectAnalyser()->toStringValues()}
}; });
seg["affectSigns"] = { clone->setSeg("affectSigns", {
{"start", 1}, {"start", 1},
{"stop", sequenceSize}, {"stop", sequenceSize},
{"seq", getKmerAffectAnalyser()->toStringSigns()} {"seq", getKmerAffectAnalyser()->toStringSigns()}
}; });
} }
return seg;
} }
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "bioreader.hpp" #include "bioreader.hpp"
#include "dynprog.h" #include "dynprog.h"
#include "tools.h" #include "tools.h"
#include "output.h"
#include "germline.h" #include "germline.h"
#include "kmerstore.h" #include "kmerstore.h"
#include "kmeraffect.h" #include "kmeraffect.h"
...@@ -102,7 +103,7 @@ class AlignBox ...@@ -102,7 +103,7 @@ class AlignBox
AlignBox(string key = "", string color=""); AlignBox(string key = "", string color="");
string getSequence(string sequence); string getSequence(string sequence);
void addToJson(json &seg, int alternative_genes=NO_LIMIT_VALUE); void addToOutput(CloneOutput *clone, int alternative_genes);
/** /**
* Returns 'V', 'D', 'J', or possibly '5', '4', '3', '?', depending on the ref_label and on the key * Returns 'V', 'D', 'J', or possibly '5', '4', '3', '?', depending on the ref_label and on the key
...@@ -326,7 +327,7 @@ class KmerSegmenter : public Segmenter ...@@ -326,7 +327,7 @@ class KmerSegmenter : public Segmenter
KmerAffectAnalyser *getKmerAffectAnalyser() const; KmerAffectAnalyser *getKmerAffectAnalyser() const;
string getInfoLineWithAffects() const; string getInfoLineWithAffects() const;
json toJson(); void toOutput(CloneOutput *clone);
private: private:
void computeSegmentation(int strand, KmerAffect left, KmerAffect right, void computeSegmentation(int strand, KmerAffect left, KmerAffect right,
...@@ -402,8 +403,8 @@ class FineSegmenter : public Segmenter ...@@ -402,8 +403,8 @@ class FineSegmenter : public Segmenter
*/ */
void findCDR3(); void findCDR3();
void checkWarnings(json &json_clone); void checkWarnings(CloneOutput *clone);
json toJson(); void toOutput(CloneOutput *clone);
}; };
......
...@@ -71,10 +71,10 @@ string spaced(const string &input, const string &seed) { ...@@ -71,10 +71,10 @@ string spaced(const string &input, const string &seed) {
} }
string string_of_int(int number) string string_of_int(int number, int w)
{ {
stringstream ss; stringstream ss;
ss << number ; ss << setfill('0') << setw(w) << number ;
return ss.str(); return ss.str();
} }
......
#ifndef TOOLS_H #ifndef TOOLS_H
#define TOOLS_H #define TOOLS_H
using namespace std ;
typedef string junction ;
// error // error
#define ERROR_STRING "[error] " #define ERROR_STRING "[error] "
...@@ -107,7 +109,7 @@ template <class T> ...@@ -107,7 +109,7 @@ template <class T>
bool pair_occurrence_sort(pair<T, int> a, pair<T, int> b); bool pair_occurrence_sort(pair<T, int> a, pair<T, int> b);
string string_of_int(int number); string string_of_int(int number, int pad_to_width=0);
string fixed_string_of_float(float number, int precision); string fixed_string_of_float(float number, int precision);
string scientific_string_of_double(double number); string scientific_string_of_double(double number);
string string_of_map(map <string, string> m, const string &before); string string_of_map(map <string, string> m, const string &before);
......
...@@ -266,35 +266,25 @@ void WindowsStorage::clearSequences(){ ...@@ -266,35 +266,25 @@ void WindowsStorage::clearSequences(){
seqs_by_window.clear(); seqs_by_window.clear();
} }
json WindowsStorage::sortedWindowsToJson(map <junction, json> json_data_segment, int max_json_output, bool delete_all) { void WindowsStorage::sortedWindowsToOutput(SampleOutput *output, int max_output, bool delete_all) {
json windowsArray;
int top = 1; int top = 1;
for (list<pair <junction, size_t> >::iterator it = sort_all_windows.begin(); for (list<pair <junction, size_t> >::iterator it = sort_all_windows.begin();
it != sort_all_windows.end(); ) it != sort_all_windows.end(); )
{ {
json windowsList; CloneOutput *clone = output->getClone(it->first);
if (json_data_segment.find(it->first) != json_data_segment.end()){
windowsList = json_data_segment[it->first];
}else{
windowsList["sequence"] = 0; //TODO need to compute representative sequence for this case
}
json reads = {it->second};
windowsList["id"] = it->first;
if (status_by_window[it->first][SEG_CHANGED_WINDOW]) if (status_by_window[it->first][SEG_CHANGED_WINDOW])
json_add_warning(windowsList, "W50", "Short or shifted window"); clone->add_warning("W50", "Short or shifted window", LEVEL_WARN);
clone->set("id", it->first);
windowsList["reads"] = reads; clone->set("reads", {it->second});
windowsList["top"] = top++; clone->set("top", top++);
windowsList["germline"] = germline_by_window[it->first]->code; clone->set("germline", germline_by_window[it->first]->code);
windowsList["seg_stat"] = this->statusToJson(it->first); clone->set("seg_stat", this->statusToJson(it->first));
windowsArray.push_back(windowsList);
if (delete_all) { if (delete_all) {
germline_by_window.erase(it->first); germline_by_window.erase(it->first);
status_by_window.erase(it->first); status_by_window.erase(it->first);
...@@ -302,11 +292,9 @@ json WindowsStorage::sortedWindowsToJson(map <junction, json> json_data_segment, ...@@ -302,11 +292,9 @@ json WindowsStorage::sortedWindowsToJson(map <junction, json> json_data_segment,
} else { } else {
it++; it++;
} }
if (top == max_json_output + 1) if (top == max_output + 1)
break ; break ;
} }
return windowsArray;
} }
ostream &WindowsStorage::windowToStream(ostream &os, junction window, int num_seq, ostream &WindowsStorage::windowToStream(ostream &os, junction window, int num_seq,
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "read_score.h" #include "read_score.h"
#include "representative.h" #include "representative.h"
#include "stats.h" #include "stats.h"
#include "output.h"
#include "../lib/json_fwd.hpp" #include "../lib/json_fwd.hpp"
#define NB_BINS 30 #define NB_BINS 30
...@@ -27,8 +28,6 @@ ...@@ -27,8 +28,6 @@
using namespace std; using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
typedef string junction ;
class WindowsStorage { class WindowsStorage {
private: private:
map<junction, BinReadStorage > seqs_by_window; map<junction, BinReadStorage > seqs_by_window;
...@@ -222,9 +221,9 @@ class WindowsStorage { ...@@ -222,9 +221,9 @@ class WindowsStorage {
/** /**
* @param delete_all: Delete the objects while they are inserted into the JSON. This prevents the memory * @param delete_all: Delete the objects while they are inserted into the JSON. This prevents the memory
* from continously increasing (see #2120, #3387) * from continously increasing (see #2120, #3387)
* @return a JSON object containing all the information * @post insert the information into the SampleOutput clones
*/ */
json sortedWindowsToJson(map<junction, json> json_data_segment, int max_json_output, bool delete_all=false); void sortedWindowsToOutput(SampleOutput *output, int max_output, bool delete_all=false);
/** /**
* Clear the seqs_by_window map. * Clear the seqs_by_window map.
......
...@@ -9,10 +9,10 @@ $ Do not segment the TR recombinations ...@@ -9,10 +9,10 @@ $ Do not segment the TR recombinations
4:TR.* UNSEG 4:TR.* UNSEG
$ Report the unsegmented sequences in the json output $ Report the unsegmented sequences in the json output
1: "germline": "not analyzed", "id": "TRA 1: "germline": "not analyzed", "id": "000001", "name": "TRA
1: "germline": "not analyzed", "id": "TRB 1: "germline": "not analyzed", "id": "000002", "name": "TRB
1: "germline": "not analyzed", "id": "TRG 1: "germline": "not analyzed", "id": "000003", "name": "TRG
1: "germline": "not analyzed", "id": "TRD 1: "germline": "not analyzed", "id": "000004", "name": "TRD
$ Count the unsegmented sequences in the json output $ Count the unsegmented sequences in the json output
1: "not analyzed": .4. 1: "not analyzed": .4.
......
...@@ -7,13 +7,12 @@ ...@@ -7,13 +7,12 @@
#include "core/dynprog.h" #include "core/dynprog.h"
#include "core/bioreader.hpp" #include "core/bioreader.hpp"
#include "core/segment.h" #include "core/segment.h"
#include "core/output.h"
#include "core/windowExtractor.h" #include "core/windowExtractor.h"