Commit 64a04297 authored by DAUXAIS Yann's avatar DAUXAIS Yann

Public deposit

parents
Project(${Abstraction_API_LIB})
CMAKE_MINIMUM_REQUIRED( VERSION 2.8 )
FILE(GLOB CPPFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} src/*.cpp)
MESSAGE(STATUS ">> Files: ${CPPFILES}")
set(HEADERS
${HEADERS}
${CMAKE_CURRENT_SOURCE_DIR}/src
PARENT_SCOPE)
include_directories(${HEADERS})
ADD_LIBRARY( ${PROJECT_NAME} STATIC ${CPPFILES} )
IF(OUTPUT_DIR_FOUND)
SET(EXECUTABLE_OUTPUT_PATH ${OUTPUT_DIR})
ENDIF(OUTPUT_DIR_FOUND)
set( CMAKE_VERBOSE_MAKEFILE off )
#include "Abstraction.hh"
void
Abstraction::addPunct (
std::vector<int>& vec_s,
const int&,
const int& start
) {
vec_s.push_back(start);
}
void
Abstraction::addPunct (
std::vector<Event>& vec_s,
const int& e1,
const int& start
) {
vec_s.push_back(Event(e1,start,start));
}
\ No newline at end of file
#ifndef __ABSTRACTION_HH_
#define __ABSTRACTION_HH_
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include "Event.hh"
/**
* This class or at least its should not exist in this version of code.
* Its last role is to generate the pathways for DCM.
*/
class Abstraction {
private :
static void
addPunct (
std::vector<int>& vec_s,
const int&,
const int& start
);
static void
addPunct (
std::vector<Event>& vec_s,
const int& e1,
const int& start
);
public :
Abstraction() = delete;
template <typename T>
static void
generatePathways(
unsigned int event_size,
std::vector<std::vector<std::vector<T>>>& pathway,
std::vector<std::vector<std::vector<int>>>& pathway_events
);
};
template <typename T>
void
Abstraction::generatePathways(
unsigned int event_size,
std::vector<std::vector<std::vector<T>>>& pathway,
std::vector<std::vector<std::vector<int>>>& pathway_events
) {
std::vector<int> sep;
pathway = std::vector<std::vector<std::vector<T>>>
(event_size,
std::vector<std::vector<T>>(pathway_events[0].size(),
std::vector<T>()));
for (unsigned int i = 0; i < pathway_events.size(); i++) {
for (unsigned int j = 0; j < pathway_events[i].size(); j++) {
if (pathway_events[i][j].size() > 0) {
sep.clear();
std::sort(pathway_events[i][j].begin(), pathway_events[i][j].end());
sep.push_back(-1);
sep.push_back((int) pathway_events[i][j].size() - 1);
for (unsigned int k = 1; k < sep.size(); k++) {
for (int l = sep[k - 1] + 1; l <= sep[k]; l++) {
int start = pathway_events[i][j][l];
addPunct(pathway[i][j], i, start);
}
}
}
}
}
}
#endif
Project(${Base_API_LIB})
CMAKE_MINIMUM_REQUIRED( VERSION 2.8 )
FILE(GLOB CPPFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} src/*.cpp)
MESSAGE(STATUS ">> Files: ${CPPFILES}")
set(HEADERS
${HEADERS}
${CMAKE_CURRENT_SOURCE_DIR}/src
PARENT_SCOPE)
include_directories(${HEADERS})
ADD_LIBRARY( ${PROJECT_NAME} STATIC ${CPPFILES} )
TARGET_LINK_LIBRARIES( ${PROJECT_NAME} ${Ripper_API_LIB} ${Chronicle_API_LIB} )
IF(OUTPUT_DIR_FOUND)
SET(EXECUTABLE_OUTPUT_PATH ${OUTPUT_DIR})
ENDIF(OUTPUT_DIR_FOUND)
set( CMAKE_VERBOSE_MAKEFILE off )
#include "Base.hh"
bool
Base::frequent (
Chronicle& c,
const unsigned int& fmin,
std::list<Chronicle>&,
bool&,
const bool& freq,
const int& cwm
) const {
unsigned int oc = 0;
int s = size();
for (int i = 0; i < s; i++) {
if (occur(c, i, cwm)) {
oc++;
if (!freq && oc == fmin)
return true;
}
}
if (oc >= fmin) {
if (freq)
c.frequency = oc;
return true;
}
return false;
}
void
Base::getWindow (
const Chronicle& c,
const int& e,
const std::vector<int>& match,
const int& cwm,
int& start,
int& end,
int&,
std::vector<int>& stop
) const {
int time;
for (int x = 0; x < e; x++) {
time = match[x];
if (cwm > -1) {
if (time - cwm > start)
start = time - cwm;
if (time + cwm < end)
end = time + cwm;
}
if (c.getElement(x) == c.getElement(e))
stop.push_back(time);
const TCIterator p_constraint = c.getConstraint(c.indice(x,e));
if (!p_constraint.isNull()) {
int constraint_s = start;
int constraint_e = end;
if (p_constraint->isStart()) constraint_s = p_constraint->getStart() + time;
if (p_constraint->isEnd()) constraint_e = p_constraint->getEnd() + time;
if (constraint_s > start) start = constraint_s;
if (constraint_e < end) end = constraint_e;
}
}
std::sort(stop.begin(),stop.end());
}
void
Base::getWindow (
const Chronicle& c,
const int& e,
const std::vector<Event>& match,
const int& cwm,
int& start,
int& end,
int& pos,
std::vector<int>& stop
) const {
int time;
for (int x = 0; x < e; x++) {
Event el_x = match[x];
time = el_x.getTime();
if (cwm > -1) {
if (time - cwm > start)
start = time - cwm;
if (time + cwm < end)
end = time + cwm;
}
if (el_x.getElement() == c.getElement(e))
stop.push_back(time);
const TCIterator p_constraint = c.getConstraint(c.indice(x,e));
if (!p_constraint.isNull()) {
int constraint_s = start;
int constraint_e = end;
if (p_constraint->isStart()) constraint_s = p_constraint->getStart() + time;
if (p_constraint->isEnd()) constraint_e = p_constraint->getEnd() + time;
if (constraint_s > start) start = constraint_s;
if (constraint_e < end) end = constraint_e;
if (!p_constraint.isPunct())
pos = x;
else if (el_x.getElement()+1 == c.getElement(e) && el_x.getOtherTime() > el_x.getTime())
stop.push_back(el_x.getOtherTime());
}
}
std::sort(stop.begin(),stop.end());
}
\ No newline at end of file
#ifndef __BASE_HH_
#define __BASE_HH_
#include <vector>
#include <list>
#include <algorithm>
#include <fstream>
#include <cstdio>
#include <cstdlib>
#include "EMSet.hh"
#include "Chronicle.hh"
#include "Event.hh"
#include "MSetOcc.hh"
class Base {
public :
int classe = 0;
protected :
void
getWindow (
const Chronicle& c,
const int& e,
const std::vector<int>& match,
const int& cwm,
int& start,
int& end,
int& pos,
std::vector<int>& stop
) const;
void
getWindow (
const Chronicle& c,
const int& e,
const std::vector<Event>& match,
const int& cwm,
int& start,
int& end,
int& pos,
std::vector<int>& stop
) const;
virtual bool
occur (
const Chronicle& c,
const int& seq,
const int& cwm
) const = 0;
public :
virtual
std::vector<std::pair<int,int>>
getOccurences(
const int& i,
const int& j
) = 0;
virtual
std::vector<std::pair<int,int>>
getIntOccurences(const int& i) = 0;
bool
frequent (
Chronicle& c,
const unsigned int& fmin,
std::list<Chronicle>& frequents,
bool& add,
const bool& freq,
const int& cwm
) const;
virtual unsigned int
size() const = 0;
virtual void
initOccurences (
const std::vector<bool>&,
const int&,
const int&,
const bool&
) {}
virtual void
clearOccurences () {}
virtual
unsigned int
getSubBase (std::vector<bool>&, const int&, const int&) const {
return 0;
}
virtual
MSetOcc
compileOccurrences(
const std::vector<bool>&,
const std::vector<int>&,
const std::vector<int>&,
const unsigned int&,
std::vector<std::vector<std::vector<int>>>*,
const bool&
) const {return MSetOcc({},{});}
virtual
MSetOcc
compileOccurrences(
const std::vector<bool>&,
const std::vector<int>&,
const std::vector<int>&,
const unsigned int&
) const {return MSetOcc({},{});}
virtual int getFreq(const EMSet&) const {return -1;}
virtual int getMaxOcc() const {return 0;}
virtual std::string generateLCMInputFile(int) const {return "";}
virtual std::vector<EMSet>
extractFrequentMSet (
unsigned int,
unsigned int,
unsigned int,
bool
){return std::vector<EMSet>();}
virtual
~Base(){}
};
#endif
#ifndef __EVENT_HH_
#define __EVENT_HH_
#include <iostream>
#include <cmath>
class Event {
protected :
int _element;
int _time;
int _end_time;
public :
Event() : _element(0), _time(0), _end_time(0) {}
Event(const int& e, const int& t) : _element(e), _time(t), _end_time(t) {}
Event(const int& e, const int& t, const int& et) : _element(e), _time(t), _end_time(et) {}
const int& getElement() const {return _element;}
const int& getTime() const {return _time;}
const int& getOtherTime() const {return _end_time;}
bool operator< (const Event& e) const {
return _time < e._time;
}
};
#endif
This diff is collapsed.
Project(${CDA_API_LIB})
CMAKE_MINIMUM_REQUIRED( VERSION 2.8 )
FILE(GLOB CPPFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} src/*.cpp)
MESSAGE(STATUS ">> Files: ${CPPFILES}")
set(HEADERS
${HEADERS}
${CMAKE_CURRENT_SOURCE_DIR}/src
PARENT_SCOPE)
include_directories(${HEADERS})
ADD_LIBRARY( ${PROJECT_NAME} STATIC ${CPPFILES} )
TARGET_LINK_LIBRARIES( ${PROJECT_NAME} ${Ripper_Wrapper_API_LIB} ${Chronicle_API_LIB} ${Base_API_LIB})
TARGET_LINK_LIBRARIES( ${PROJECT_NAME} ${Boost_LIBRARIES} )
IF(OUTPUT_DIR_FOUND)
SET(EXECUTABLE_OUTPUT_PATH ${OUTPUT_DIR})
ENDIF(OUTPUT_DIR_FOUND)
set( CMAKE_VERBOSE_MAKEFILE off )
This diff is collapsed.
#ifndef __CDA_HH_
#define __CDA_HH_
#include <iostream>
#include <cstdlib>
#include <algorithm>
#include <list>
#include <set>
#include <map>
#include <vector>
#include <cmath>
#include "Chronicle.hh"
#include "TCIterator.hh"
#include "Event.hh"
#include "Base.hh"
#include "MSetOcc.hh"
#include "EMSet.hh"
#include "ripper.hh"
#include <unistd.h>
#include <sys/time.h>
#include <sys/wait.h>
/**
* Chronicle Discovery Algorithm class
*/
class CDA {
public :
bool stop; //!< Initialized to false, switch stop to true will break the main run(...) loop.
bool next;
int nb_tc;
std::vector<int> stats;
/*
Is used to generated json chronicle during extraction if not empty.
*/
std::vector<std::string> inv_events;
bool txt= false;
protected :
/**
* Run the CDA algorithm on a specific events' set. /!\ Old function !
* @param chron The chronicle to specify.
* @param trace The sequences set.
* @param fmin The minimal frequency threshold.
* @param frequents The set of frequents chronicles to return.
* @param p True if chronicle added to the frequents set are printed during the execution.
* @param close True if frequents is a frequent closed chronicles set, false if it's a frequent minimal chronicles' set.
* @param freq True to force the frequency count of each chronicle when close is false.
*/
void
run (
const Chronicle& chron,
const Base& trace,
unsigned int fmin,
std::list<Chronicle>& frequents,
bool p,
int cwm,
bool close,
bool freq
);
/**
* Bind each discriminant multiset with his occurrences and print it if necessary.
* If the occurrences are not printed or the multiset not used later, the computation of all occurrences can be
* removed disable to improve the computation speed.
* @param set The discriminant multisets.
* @param trace The base from which we should extract the occurrences.
* @param nb_se The number of events in the base.
* @param res The vector in which the multisets are put if not printed.
*/
void
emergeMSetOcc (
const std::vector<EMSet>& set,
const Base& trace,
unsigned int nb_se,
std::vector<MSetOcc>& res
);
/**
* Return the subset of discriminant multisets in a given set of multisets.
* @param set1 The given set of multisets.
* @param trace The negative base to compute the negative support.
* @param bset A vector to store the non discriminant multiset.
* @param gmin The minimal growth rate.
* @param mincs The minimal size threshold.
* @param maxcs The maximal size threshold (0 for infinity).
* @return std::vector<EMSet> the vector containing the discriminant multisets.
*/
std::vector<EMSet>
emergeMSet (
std::vector<EMSet>& set1,
const Base& trace,
std::vector<EMSet>* bset,
const double& gmin,
unsigned int mincs,
unsigned int maxcs
);
/**
* Add a chronicle in a minimal (according to Cram's definition) chronicles' set.
* @param s The set.
* @param c The chronicle to add.
* @param close True if the set is a frequent closed chronicles set, False for minimal chronicle.
*/
void
minAdd (
std::list<Chronicle>& s,
const Chronicle& c,
bool
);
/**
* Run the CDA algorithm on the specialized children (same elements) of a chronicle (to avoid the use of a children' set).
* @param c The chronicle.
* @param trace The sequences' base.
* @param fmin The minimum frequency threshold.
* @param frequents A set of frequents chronicles.
* @param p True if chronicle added to the frequents set are printed during the execution.
* @param close True if frequents is a frequent closed chronicles' set, false if it's a frequent minimal chronicles' set.
* @param freq True to force the frequency count of each chronicle when close is false.
*/
void
runSpecChild(
const Chronicle& c,
const Base& trace,
unsigned int fmin,
std::list<Chronicle>& frequents,
bool p,
int cwm,
bool close,
bool freq
);
/**
* Generate extension of a chronicle with element's adding.
* @param c The chronicle to extend.
* @param base The temporal constraints' base used to add the element.
* @param nb_e The number of element represented in the base.
* @return The set of extended chronicles.
*/
std::vector<Chronicle>
generateExtendChild(
const Chronicle& c,
const std::vector<std::shared_ptr<TC>>& base_pct,
const std::vector<std::shared_ptr<TC>>& base_itv,
unsigned int nb_e,
unsigned int nb_se,
bool dif,
bool itv_only
);
bool
OccFreq (
const std::vector<std::pair<int,int>>& Ak,
unsigned int fmin,
unsigned int nb_seq,
int l,
int k
);
std::shared_ptr<TC>
G(
const std::vector<std::pair<int,int>>& Ak,
unsigned int fmin,
unsigned int nb_seq
);
std::shared_ptr<TC>
G(
const std::shared_ptr<TC>& father,
bool left,
const std::vector<std::pair<int,int>>& Ak,
unsigned int fmin,
unsigned int nb_seq,
int l,
int k
);
/**
* Test if a rule constraining a multiset is really discriminant for g and, if true, print the corresponding
* discriminant chronicle.
* @param s The object corresponding to the non discriminant multiset.
* @param data The positive dataset.
* @param data_b The negative dataset.
* @param r The rule.
* @param f The minimal support threshold.
* @param g The minimal growth rate threshold.
* @param ratio The ratio to use if the two datasets have different sizes.
*/
void
testSub(
MSetOcc& s,
const std::vector<std::vector<std::vector<int>>>& data,
const std::vector<std::vector<std::vector<int>>>& data_b,
const Rule& r,
int f,
double g,
double ratio
) const;
public :
CDA():stop(