Commit 340d4ef2 authored by DAUXAIS Yann's avatar DAUXAIS Yann

Add tid list output for extracted chronicles with --tid (experimental)

parent 1e718022
......@@ -19,6 +19,7 @@ CDA::testSub(
std::vector<std::pair<int*, int*>> values = r.getValues();
unsigned int vf = 0;
std::vector<unsigned int> tid_list_pos;
for (unsigned int i = 0; i < data.size(); i++) {
bool match = false;
......@@ -45,12 +46,16 @@ CDA::testSub(
}
}
if (match) vf++;
if (match) {
vf++;
if (this->extract_with_tid) tid_list_pos.push_back(i);
}
}
if (vf >= (unsigned) f) {
unsigned int vfn = 0;
for (unsigned int i = 0; i < data_b.size(); i++) {
std::vector<unsigned int> tid_list_neg;
for (unsigned int i = 0; i < data_b.size(); i++) {
bool match = false;
for (unsigned int j = 0; j < data_b[i].size() && !match; j++) {
......@@ -66,7 +71,10 @@ CDA::testSub(
match = true;
}
if (match) vfn++;
if (match) {
vfn++;
if (this->extract_with_tid) tid_list_neg.push_back(i);
}
}
if (vfn == 0 || double(vf)/(double(vfn)*ratio) >= g) {
......@@ -83,10 +91,19 @@ CDA::testSub(
tcis.push_back(TCIterator(std::shared_ptr<TC>(tc), true));
}
if (txt)
std::cout << Chronicle(elements, tcis, vf, vfn).txt(inv_events, &disc) << std::endl;
if (txt) {
if (!this->extract_with_tid)
std::cout << Chronicle(elements, tcis, vf, vfn).txt(inv_events, &disc) << std::endl;
else
std::cout << Chronicle(elements, tcis, vf, vfn, tid_list_pos, tid_list_neg).txt(inv_events, &disc) << std::endl;
}
else
std::cout << Chronicle(elements, tcis, vf, vfn).json(inv_events, &disc) << std::endl;
{
if (!this->extract_with_tid)
std::cout << Chronicle(elements, tcis, vf, vfn).json(inv_events, &disc) << std::endl;
else
std::cout << Chronicle(elements, tcis, vf, vfn, tid_list_pos, tid_list_neg).json(inv_events, &disc) << std::endl;
}
}
}
}
......
......@@ -35,7 +35,8 @@ class CDA {
bool stop; //!< Initialized to false, switch stop to true will break the main run(...) loop.
bool next;
int nb_tc;
bool extract_with_tid;
std::vector<int> stats;
/*
......@@ -209,7 +210,7 @@ class CDA {
) const;
public :
CDA():stop(false),next(false),nb_tc(0){}
CDA():stop(false),next(false),nb_tc(0),extract_with_tid(false){}
/**
* Run the CDA algorithm (frequent chronicles).
......
......@@ -84,6 +84,17 @@ Chronicle::txt (
else
os << "f: " << frequency << "/" << ofrequency << std::endl;
if (tid_list_pos.size() > 0) {
os << "positives: [" << tid_list_pos[0];
for (unsigned i = 1; i < tid_list_pos.size(); i++) os << ", " << tid_list_pos[i];
os << "]" << std::endl;
}
if (tid_list_neg.size() > 0) {
os << "negatives: [" << tid_list_neg[0];
for (unsigned i = 1; i < tid_list_neg.size(); i++) os << ", " << tid_list_neg[i];
os << "]" << std::endl;
}
return os.str();
}
......@@ -119,6 +130,16 @@ Chronicle::json (
}
}
os << "],";
if (tid_list_pos.size() > 0) {
os << "\"positives\": [\"" << tid_list_pos[0] << "\"";
for (unsigned i = 1; i < tid_list_pos.size(); i++) os << ", \"" << tid_list_pos[i] << "\"";
os << "]," << std::endl;
}
if (tid_list_neg.size() > 0) {
os << "\"negatives\": [\"" << tid_list_neg[0] << "\"";
for (unsigned i = 1; i < tid_list_neg.size(); i++) os << ", \"" << tid_list_neg[i] << "\"";
os << "]," << std::endl;
}
if (ofrequency == -1)
os << "\"frequency\": \"" << frequency << "\"}" << std::endl;
else
......@@ -156,4 +177,5 @@ Chronicle::indice (
const int& j
) {
return j*(j-1)/2 +i;
}
\ No newline at end of file
}
......@@ -9,7 +9,7 @@
class Chronicle {
private:
static int nextId;
static int nextId;
protected :
......@@ -25,10 +25,30 @@ class Chronicle {
int id;
int frequency;
int ofrequency;
std::vector<unsigned int> tid_list_pos;
std::vector<unsigned int> tid_list_neg;
Chronicle(){}
Chronicle(
const std::vector<int>& e,
const std::vector <TCIterator>& c,
const int f,
const int of,
const std::vector<unsigned int>& tid_list_pos,
const std::vector<unsigned int>& tid_list_neg
):
_elements(e),
_constraints(c),
_flag(0),
id(nextId++),
frequency(f),
ofrequency(of),
tid_list_pos(tid_list_pos),
tid_list_neg(tid_list_neg)
{}
Chronicle(
const std::vector<int>& e,
const std::vector<TCIterator>& c,
......
......@@ -45,6 +45,7 @@ public:
("IBM,u", "Use IBM format for files instead of sequence per line")
("mincs", po::value<unsigned> (&mincs), "Minimum size of extracted chronicles")
("maxcs", po::value<unsigned> (&maxcs), "Maximum size of extracted chronicles")
("tid", "Output the tid list in addition of extracted chronicles (first line is 0) /!\\ has to be proven to really correspond to the right line numbers")
("close,c", "Extract frequent closed chronicles or discriminant chronicles from closed multisets if --disc is used")
("json,j", "Output format is json instead of plain text")
("verbose,v", "The program will speak")
......@@ -233,6 +234,7 @@ private:
if (vm.count("not_calc_freq")) calc_freq = false;
if (vm.count("json")) txt = false;
if (vm.count("verbose")) verbose = true;
if(vm.count("tid")) tid = true;
positives.open(fname);
if (!positives) {
......@@ -367,6 +369,7 @@ public:
bool dif;
bool txt;
bool verbose;
bool tid;
std::map<std::string, int> events;
std::vector<std::string> inv_events;
......@@ -377,7 +380,7 @@ public:
Options() : gname(""), ibm(false), cwm(0), mincs(0), maxcs(0), gmin(2),
calc_freq(true), close(false), episode(false), disc(false), dif(false), txt(true), verbose(false),
b(NULL), b2(NULL) {}
tid(false), b(NULL), b2(NULL) {}
};
int main(const int argc, char* argv[])
......@@ -405,6 +408,7 @@ int main(const int argc, char* argv[])
if (options.disc)
algo.inv_events = options.inv_events;
algo.txt = options.txt;
algo.extract_with_tid = options.tid;
std::vector<Chronicle> res;
bool disc_exec = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment