Commit a367c1d9 authored by Florent Jacquemard's avatar Florent Jacquemard
Browse files

class Run separated into inner runs and terminal runs

parent 7f3299ed
......@@ -1079,6 +1079,10 @@
3448BD311F0A484500B49FB8 /* Options.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Options.h; sourceTree = "<group>"; };
43004671260496E6000E9898 /* squant2.2 */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squant2.2; sourceTree = BUILT_PRODUCTS_DIR; };
430046CC26049AEC000E9898 /* squant2.2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squant2.2.cpp; path = V2.2/targets/quant/squant2.2.cpp; sourceTree = "<group>"; };
430046E42604CF3F000E9898 /* RunTerm.tpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; path = RunTerm.tpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
430046E52604CF3F000E9898 /* RunTerm.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = RunTerm.hpp; sourceTree = "<group>"; };
430046F12604CF97000E9898 /* RunInner.tpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; path = RunInner.tpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
430046F22604CF97000E9898 /* RunInner.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = RunInner.hpp; sourceTree = "<group>"; };
4301F9362418EE25004D8650 /* InputSequence.tpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; path = InputSequence.tpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
4301F9372418EE25004D8650 /* InputSequence.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = InputSequence.hpp; sourceTree = "<group>"; };
4301F93C24195101004D8650 /* Sequencm.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Sequencm.cpp; sourceTree = "<group>"; };
......@@ -1272,7 +1276,7 @@
4354C4E222FEB70A001E78DD /* TableA.tpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = TableA.tpp; path = src/parsing/TableA.tpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
43552EA922A5B6F500C557BA /* SchemaFileIn.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = SchemaFileIn.cpp; sourceTree = "<group>"; };
43552EAA22A5B6F500C557BA /* SchemaFileIn.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = SchemaFileIn.hpp; sourceTree = "<group>"; };
43552ED322A6ADA800C557BA /* Ranked.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; path = Ranked.cpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
43552ED322A6ADA800C557BA /* Ranked.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; name = Ranked.cpp; path = ../V2.2/parsing/Ranked.cpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
43552ED422A6ADA800C557BA /* Ranked.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Ranked.hpp; sourceTree = "<group>"; };
43552EDD22A6CFDE00C557BA /* KeySI.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KeySI.cpp; sourceTree = "<group>"; };
43552EDE22A6CFDE00C557BA /* KeySI.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = KeySI.hpp; sourceTree = "<group>"; };
......@@ -2222,6 +2226,10 @@
439EA0BA224CCF91006E742E /* RunFilter.cpp */,
43EEB0A5224ACC56002A91E7 /* Runey.hpp */,
43C573C8224D2FD3003FA990 /* Runey.tpp */,
430046E52604CF3F000E9898 /* RunTerm.hpp */,
430046E42604CF3F000E9898 /* RunTerm.tpp */,
430046F22604CF97000E9898 /* RunInner.hpp */,
430046F12604CF97000E9898 /* RunInner.tpp */,
434F406422FB198300C3940E /* RunRanked.hpp */,
434F406322FB198300C3940E /* RunRanked.tpp */,
431864B123099B5900940ECB /* RunCompare.hpp */,
......
......@@ -102,7 +102,7 @@ void RunFilter::_init_state_unit()
// continuation by dot
else if (SymbLabel::dot(a))
{
WARN("RunFilter.initunit: unexpected dot symbol");
ERROR("RunFilter.initunit: unexpected dot symbol");
_state_unit = 4; // error
}
// event (including rest)
......@@ -126,7 +126,7 @@ void RunFilter::_init_state_unit()
else if (SymbLabel::fail(a))
{
TRACE("RunFilter.initunit: symbol FAIL({})", SymbLabel::nbEvents(a));
_state_unit = 2; // unit, filtered.
_state_unit = 3; // non-unit, filtered.
}
else if (SymbLabel::dummy(a))
{
......@@ -391,29 +391,29 @@ void RunFilter::update(const RunFilter& fil) // arg. mult
// first child
case 0:
if (fil.unit()) // state 1 or 2
_state_unit = 2;
_state_unit = 2; // unit and filtered
else
_state_unit = 3;
_state_unit = 3; // not-unit and filtered
break;
// next childs
case 1:
case 2:
case 1: // unit and unfiltered
case 2: // unit and filtered
if (fil.continuation())
_state_unit = 1;
_state_unit = 1; // unit and unfiltered
else
_state_unit = 3;
_state_unit = 3; // not-unit and filtered
break;
// next childs
case 3:
_state_unit = 3; // trash
case 3: // not-unit and filtered
_state_unit = 3; // not-unit and filtered
break;
default:
{
ERROR("filter update: unextected state {}", _state_unit);
assert(false);
_state_unit = 4; // error
}
}
}
......@@ -424,30 +424,30 @@ void RunFilter::update(const RunFilter& fil) // arg. mult
switch (_state_cont)
{
// first child
case 0:
case 0: // empty - initial state
if (fil.continuation())
_state_cont = 2;
_state_cont = 2; // cont and filtered
else
_state_cont = 3;
_state_cont = 3; // not-cont and filtered
break;
case 1:
case 2:
case 1: // cont and unfiltered
case 2: // cont and filtered
if (fil.continuation())
_state_cont = 1;
_state_cont = 1; // cont and unfiltered
else
_state_cont = 3;
_state_cont = 3; // not-cont and filtered
break;
// next childs
case 3:
_state_cont = 3;
case 3: // not-cont and filtered
_state_cont = 3; // not-cont and filtered
break;
default:
{
ERROR("filter update: unextected state {}", _state_unit);
assert(false);
_state_cont = 4; // error
}
}
}
......
//
// RunInner.hpp
// GTests
//
// Created by Florent Jacquemard on 19/03/2021.
// Copyright © 2021 Florent Jacquemard. All rights reserved.
//
/// @addtogroup parsing
/// @{
#ifndef RunInner_hpp
#define RunInner_hpp
#include <stdio.h>
#include <assert.h>
#include <vector>
#include "trace.hpp"
#include "SymbLabel.hpp" // symbolic labels
#include "Weight.hpp"
#include "Transition.hpp"
#include "Runey.hpp"
namespace Parsing{
/// An inner run is a particular case of run with at least one subrun.
/// An inner run is complete when the length of the list of children is the arity.
/// It is partial otherwise.
/// param K = class of Key in parse table
template<class K>
class RunInner : public Run<K>
{
public:
/// inner run initialized with a transition of the base wta.
/// @param tr origin transition used to build the run. must be inner.
/// tr gives label and initial weight. It cannot be changed afterwards.
/// The constructed run is partial (empty list of subruns).
RunInner(const Transition& tr);
/// inner run initialized a transition of the base wta
/// and some label and weight values which may differ from this transition.
/// @param tr origin transition used to build the run. must be inner.
/// @param lab given label. cannot be changed afterwards.
/// @param w weight must not be unknown. can be updated afterwards.
/// The constructed run is partial (empty list of subruns).
RunInner(const Transition& tr, label_t lab, const Weight& w);
/// copy constructor
RunInner(const RunInner<K>& r);
/// @brief copy/update constructor with push back of a child (record).
/// @param r run must be partial.
/// @param rec record, must have a complete key.
/// copy r, and in the copy:
/// - add rec at the end of children list.
/// - multiply current weight by the weight of the 1-best run for rec.
/// - update current filter with the filter of the 1-best run for rec.
/// @warning if rec has no 1-best run, the weight of the copy
/// is set to unknown.
RunInner(const RunInner<K>& r, Record<K>* rec);
/// @brief copy/update constructor with push back of a new best subrun.
/// @param r run must be a partial.
/// @param rec must be a complete key.
/// @param best is a pointer to the best run in rec. must be non-NULL.
/// the pointed run must be complete.
/// copy r, and in the copy:
/// - add rec at the end of children list.
/// - multiply current weight by the weight of best.
/// - update current filter with the filter of best.
RunInner(const RunInner<K>& r, Record<K>* rec, const Run<K>* best);
/// destructor
virtual ~RunInner();
RunInner<K>& operator= (const RunInner<K>& rhs);
bool operator==(const RunInner<K>& rhs) const;
/// @return current number of children for this run.
/// it is the arity - multiplicity if this run is complete.
/// it is < arity - multiplicity if this run is partial.
virtual size_t size() const;
/// @return this run has currently no children.
bool empty() const;
/// an inner run is complete when its list of children has the expected size.
virtual bool complete() const;
/// @return the multiplicity of the ith children.
/// = number of edges between the top of this run into its ith subrun.
size_t multiplicity(size_t i) const;
/// @param i the index of the subrun, between 0 and size()-1.
/// @return the ith subrun or NULL if there is none.
/// @warning this run must be complete.
virtual const Run<K>* subrun(size_t i) const;
/// return the ith key of this run.
/// @param i index of subrun, must be between 0 and run size - 1.
const K* key(size_t i) const;
friend std::ostream& operator<<(std::ostream& o, const RunInner<K>& r);
protected:
/// pointers (keys) to children sub-runs.
typename std::vector<Record<K>*> _children;
/// [opt] data for optimization.
/// backup of the best runs used for construction.
/// @todo TBR. not used.
typename std::vector<const Run<K>*> _subruns;
/// @param i the index of the subrun, between 0 and arity-1.
/// @param n the rank for n-best.
/// must be equal to 1 for unranked runs.
/// must be larger or equal to 1 for ranked runs.
/// @return the ith subrun or NULL if there is none.
/// @warning this run must be complete.
const Run<K>* subrun(size_t i, size_t n) const;
// return the ith record of this run.
// @param i index of subrun, between 0 and arity-1.
// Record<K>* operator[](size_t i) const;
private:
/// add the given record as new subrun and update the weight of this run
/// with the weight of the given best subrun.
/// @param rec record added as new subrun. must not be NULL and have a key.
/// @param best subrun to add. must not be NULL.
void init_best(Record<K>* rec, const Run<K>* best);
};
} // end namespace Parsing
// separated definition of template class
#include "RunInner.tpp"
#endif /* RunInner_hpp */
/// @}
//
// RunInner.tpp
// GTests
//
// Created by Florent Jacquemard on 19/03/2021.
// Copyright © 2021 Florent Jacquemard. All rights reserved.
//
namespace Parsing{
template<class K>
RunInner<K>::RunInner(const Transition& tr):
Run<K>(tr),
_children(), // no children yet
_subruns() // [opt]
{
assert(tr.inner());
assert(tr.arity() > 0);
}
template<class K>
RunInner<K>::RunInner(const Transition& tr, label_t a, const Weight& w):
Run<K>::Run(tr, a, w),
_children(), // no children yet
_subruns() // [opt]
{
assert(tr.inner());
assert(SymbLabel::arity(a) == tr.arity());
assert(SymbLabel::arity(a) > 0);
assert(! w.unknown());
}
// copy
template<class K>
RunInner<K>::RunInner(const RunInner<K>& r):
Run<K>(r),
_children(r._children), // copy
_subruns(r._subruns) // [opt]
{
assert(size() <= this->arity());
}
// copy/update
template<class K>
RunInner<K>::RunInner(const RunInner<K>& r, Record<K>* rec):
RunInner<K>(r) // copy
{
assert(rec);
const Run<K>* best = rec->best(1);
init_best(r, rec, best);
}
// copy/update
template<class K>
RunInner<K>::RunInner(const RunInner<K>& r, Record<K>* rec, const Run<K>* best):
RunInner<K>(r) // copy
{
assert(rec);
init_best(r, rec, best);
}
template<class K>
void RunInner<K>::init_best(Record<K>* rec, const Run<K>* best)
{
assert(this->partial());
assert(this->inner());
assert(rec);
assert(rec->key());
assert(rec->key()->complete());
// add new child
_children.push_back(rec);
// update weight and filter with the best's weight
// const Transition& tr = this->_origin;
assert(best != nullptr);
assert(best->complete());
this->_filter.update(best->_filter); // handles the case of FAIL subrun
if (best->fail())
{
// ignore weight of FAIL run
// this->_weight = Weight();
// this->_filter.reset(tr._label, tr.size());
}
else
{
assert(! best->weight().unknown());
if (best->weight().zero())
WARN("RunUpdate {}: zero-weight subrun {}", *(this), *best);
this->_weight *= best->weight();
}
_subruns.push_back(best); // [opt]
}
template<class K>
RunInner<K>::~RunInner()
{
TRACE("delete inner Run {}", *this);
_children.clear();
_subruns.clear();
//delete weight;
}
template<class K>
RunInner<K>& RunInner<K>::operator= (const RunInner<K>& rhs)
{
if(this != &rhs)
{
Run<K>::operator=(rhs);
_children.resize(rhs._children.size());
_children = rhs._children; // copy
_subruns.resize(rhs._subruns.size());
_subruns = rhs._subruns;
};
return *this;
}
template<class K>
bool RunInner<K>::operator==(const RunInner<K>& rhs) const
{
if (! Run<K>::operator==(rhs))
return false;
if (_children.size() != rhs._children.size())
return false;
typename std::vector<Record<K>*>::const_iterator li =
_children.cbegin();
typename std::vector<Record<K>*>::const_iterator ri =
rhs._children.cbegin();
while (li != _children.cend())
{
/// @warning comparison of pointers (to Records)
if (*li != *ri)
return false;
++li;
// lists have same size
assert(ri != rhs._children.cend());
++ri;
}
// if the runs are identical, their weights must be the same
assert(this->_weight == rhs._weight);
return true;
}
// actual number of children
template<class K>
size_t RunInner<K>::size() const
{
assert(_children.size() <= this->arity());
return (_children.size());
}
template<class K>
bool RunInner<K>::empty() const
{
return (_children.empty());
}
template<class K>
size_t RunInner<K>::multiplicity(size_t i) const
{
assert(i < this->_origin.size());
return this->_origin.multiplicity(i);
}
template<class K>
bool RunInner<K>::complete() const
{
assert(size() <= this->arity());
// expected number of children
return (size() == this->_origin.size()); // return (arity() == size());
}
template<class K>
const Run<K>* RunInner<K>::subrun(size_t i) const
{
return subrun(i, 1);
}
template<class K>
const Run<K>* RunInner<K>::subrun(size_t i, size_t n) const
{
assert(this->complete());
assert(i < this->size());
assert(0 < n);
Record<K>* rec = _children[i];
assert(rec);
return rec->best(n);
}
template<class K>
const K* RunInner<K>::key(size_t i) const
{
assert (i < _children.size());
Record<K>* rec = _children[i];
assert(rec);
const K* k = rec->key();
assert(k);
assert(k->complete());
return (k);
}
//template<class K>
//Record<K>* RunInner<K>::operator[](size_t i) const
//{
// assert (i < _children.size());
// Record<K>* rec = _children[i];
// assert(rec);
// return rec;
//}
template<class K>
std::ostream& operator<<(std::ostream& o, const RunInner<K>& r)
{
SymbLabel::print(r._label, o);
if (r._children.empty())
{
o << "()";
}
else
{
o << "(";
typename std::vector<Record<K>*>::const_iterator
i = r._children.cbegin();
assert(i != r._children.cend());
o << *((*i)->key());
++i;
for(; i != r._children.cend(); ++i)
{
o << " " << *((*i)->key());
}
o << ")";
}
o << " : " << r._weight;
return o;
}
} // end namespace Parsing
......@@ -18,15 +18,17 @@
#include "trace.hpp"
#include "Runey.hpp"
#include "RunInner.hpp"
namespace Parsing{
//template<class K> class Run;
/// extension of class Run with a rank value for each children.
/// for k-best computation with the algo of Huang & Chiang 2005
/// extension of the inner Run with a rank value for each children.
/// for k-best computation with the algo of Huang & Chiang 2005.
template<class K>
class RunRanked : public Run<K>
class RunRanked : public RunInner<K>
{
public:
......@@ -47,12 +49,12 @@ public:
/// @brief copy and upgrade.
/// @param r a run. must be unranked (base class).
/// @warning. r must be complete.
RunRanked(const Run<K>& r);
RunRanked(const RunInner<K>& r);
/// @brief copy and increment.
/// @param r ranked run to copy.
/// @param i index of the children whose rank will be upgraded in the copy.
/// @param i index of the children whose rank will be incremented in the copy.
/// @warning the weight of the copy is deleted and set as unknown.
/// @warning the filter of the copy is reset.
RunRanked(const RunRanked<K>& r, size_t i);
......@@ -71,49 +73,19 @@ public:
virtual bool ranked() const { return true; }
/// @param i the index of the subrun, between 0 and arity()-1.
/// @param i the index of the subrun, between 0 and size()-1.
/// @return the ith subrun or NULL if there is none.
/// @warning this run must be complete.
virtual const Run<K>* subrun(size_t i) const;
friend std::ostream& operator<<(std::ostream& o, const RunRanked<K>& r);
protected:
/// ranks of children. mutable.
/// ranks of children. mutable. must have the same size as _children.
typename std::vector<size_t> _ranks;
public:
friend std::ostream& operator<<(std::ostream& o, const RunRanked<K>& r)
{
SymbLabel::print(r._label, o);
if (r._children.empty())
{
o << "()";