-
PAPERMAN Charles authoredPAPERMAN Charles authored
automata_machines.py 7.43 KiB
from compAut.metas import allIn, allExcept, metaLetter, metaObject
from compAut.models import Automaton
from compAut.logics.regularexpression import ClassicalRegularExpression
from .line_machines import SimpleLineMachine as LineMachine
import pickle
import re
import base64
cre = ClassicalRegularExpression()
def metaletters_to_conditions(variable, letter, threshold=5):
"""
Transform a MetaLetter condition into a string representing a C-condition.
We decompose a meta letters disjunction of equality if they are less than threshold,
otherwise, we check for ordered partition. This could probably be improved a lot.
"""
assert(isinstance(letter, metaLetter))
if isinstance(letter, allExcept):
return f"!({metaletters_to_conditions(variable, letter.complement())})"
l = len(letter._fset)
if l == 0:
return "0"
if l < threshold:
return f'{" || ".join(f"{variable}=={l}" for l in letter)}'
m = min(letter)
M = max(letter)
covers = set(range(m, M+1))
errors = covers.difference(letter)
if len(errors) > len(covers)/threshold:
merror = min(errors)
return f"(({metaletters_to_conditions(variable, letter.intersection(range(m, merror)), threshold=threshold)}) || ({metaletters_to_conditions(variable, letter.intersection(range(merror, M+1)), threshold=threshold)}))"
res = f"{variable} >= {m} && {variable} <= {M}"
if errors:
res += f" && !({metaletters_to_conditions(variable, errors, threshold=threshold)})"
return res
class automaton_transitions_template:
"""
Implement the transition function of a (register) automaton in C.
TODO: precise the C-standard compatibility.
The generated code, suppose existence of the following macros:
* fetch_next_symbol: fetch a new symbol to process and quit the current process if no more is found.
* symbol_type: a macro for the symbol type
The current symbol is stored in undefined `current_symbol` variable which should be of symbol_type.
The implementation of a register automaton require to have access to a pointer to `buffer` and
to `offset`.
`buffer` and `offset` are requires for registers manipulation.
The code encode states within goto label.
"""
def __init__(self, automaton, encoder=lambda e:e.map(lambda e:e.encode()[0]), width=1, condition_generator=lambda m:metaletters_to_conditions("current_symbol", m)):
"""
* automaton: should be a CompAut Automaton
* encoder: should map Python values to bytes, default value is the string to bytes encoder.
* width: designate the stride of the automaton.
* condition_generator: produces conditions from meta letter.
"""
if width not in (1, 2, 4, 8):
raise NotImplementedError(f"width is used to determined type of variable")
if not automaton.is_deterministic():
raise NotImplementedError("automaton should be deterministic")
self.width = width
self.encoder = encoder
if width == 1:
alphabet = list(range(255))
else:
alphabet = None
automaton = automaton.map_alphabet(encoder, alphabet=alphabet)
self.automaton = automaton # automaton encoded and if width=1, an explicite alphabet provided
self.condition_generator = condition_generator
self.states_name_map = {}
self.states_names = set()
self._state_regexp = re.compile("^[\w_]+$", re.DOTALL | re.MULTILINE)
self._nstate_count = 0
def clean_state_name(self, name):
"""
Construct a valid state name from the one provided argument.
"""
name = repr(name)
if name in self.states_name_map:
return self.states_name_map[name]
old_name = name
if not name[0].isalpha():
name = "_" + name
if not self._state_regexp.match(name):
count = self._nstate_count
while (f"_state_{count}" in self.states_names):
count += 1
name = f"_state_{count}"
self.states_name_map[old_name] = name
self.states_names.add(name)
return name
def __call__(self):
code = ""
automaton = self.automaton
for state in automaton.states:
code += f"{self.clean_state_name(state)}:\n"
code += f"\tfetch_next_symbol;\n"
for i, (letter, nstate) in enumerate(automaton._transitions.get(state, {}).items()):
nstate, action = next(iter(nstate.items()))
if i == 0:
code += "\tif "
else:
code += "\telse if "
code += f"({self.condition_generator(letter[0])})\n"
code += f"\t\tgoto {self.clean_state_name(nstate)};\n"
l = list(automaton._transitions.get(state,{}))
if l:
if l[0].union(*l[1:]).complement():
# remaining cases possible: goto exit.
code += "\tgoto __exit;\n"
else:
code += "\tgoto __exit;\n"
code += "__exit:;\n"
return code
class GrepLikeAutomaton:
"""
Match the expression line by line.
"""
descr = ""
pyreload = lambda self: f"""
To restore the python Automaton used in this program, execute in Python:
with open('path/to/obj.dump') as f:
M = {self.__module__}.{self.__class__.__name__}.loads(f.read())
The automaton (or the underlying expression if provided) can be access through:
M.expression # Exists only if the machine was build through a regexp.
M.automaton # Always exists
To recompile, executes:
M.compile(name='{self.name}')
"""
def __init__(self, to_execute, minimize=True, name=None):
"""
to_execute can be either a deterministic Automaton or a string that will be compiled
to regular expression.
The default behavior is to minimize and normalize the automaton but you can prevent that by
setting minimize to False.
Provides dumping and loading facilities using pickle and dump64.
"""
self.data_state = {"to_execute" : to_execute, "minimize" : minimize, "name":name}
self.expression = None
if isinstance(to_execute, str):
self.expression = to_execute
to_execute = cre.compile(to_execute)
if name is not None:
self.name = name
automaton = to_execute
if minimize:
automaton = automaton.concatenate(cre.compile("\n?")).expanded_utf8_chars().minimize()
self.automaton = automaton
def dumps(self):
x = base64.encodebytes(pickle.dumps(self.data_state))
return x.decode().replace("\n","")
@classmethod
def loads(cls, s):
d = pickle.loads(base64.decodebytes(s.encode()))
return cls(**d)
def __repr__(self):
if self.expression is not None:
expr = self.expression.replace("\\","\\\\")
infos = f"The automaton was build from the expression `{expr}`"
else:
infos = "The automaton was directly provided."
return f"a Grep Like automaton.\n{infos}\n{self.descr}"
class SwitchLineCompilableMachine(LineMachine):
def main_loop(self):
return f"""
switch (state){{
{self.switch()}
}}
"""
class SwitchGrepLikeAutomaton(GrepLikeAutomaton, SwitchLineCompilableMachine):
"""
Will implement the transition table using a switch statement and with
block of if then ... else if ...
"""
name = "swGrepLike"
descr = "The transitions table is implemented using a switch over states of conditional statements."
def reinit(self):
return f"state={next(iter(self.automaton.initial_states))};"
def accepting (self):
return " || ".join(f"state=={i}" for i in self.automaton.final_states)
def switch(self):
tr = self.automaton._transitions
switches = []
for s in tr:
letters = list(tr[s].keys())
if len(letters) == 1 and s in tr[s][letters[0]]:
continue # if s is a sinkstate we don't do anything actually
switches.append(f"case {s}:")
for i, l in enumerate(letters):
target = next(iter(tr[s][l]))
if not i:
switches.append(f"\tif ({self.metaletters_to_conditions(l)}) state = {target};")
continue
switches.append(f"\telse if ({self.metaletters_to_conditions(l)}) state = {target};")
switches.append("break;")
return "\n\t\t\t\t".join(switches)