from compAut.metas import allIn, allExcept, metaLetter, metaObject from compAut.models import Automaton from compAut.logics.regularexpression import ClassicalRegularExpression from .line_machines import SimpleLineMachine as LineMachine import pickle import re import base64 cre = ClassicalRegularExpression() def metaletters_to_conditions(variable, letter, threshold=5): """ Transform a MetaLetter condition into a string representing a C-condition. We decompose a meta letters disjunction of equality if they are less than threshold, otherwise, we check for ordered partition. This could probably be improved a lot. """ assert(isinstance(letter, metaLetter)) if isinstance(letter, allExcept): return f"!({metaletters_to_conditions(variable, letter.complement())})" l = len(letter._fset) if l == 0: return "0" if l < threshold: return f'{" || ".join(f"{variable}=={l}" for l in letter)}' m = min(letter) M = max(letter) covers = set(range(m, M+1)) errors = covers.difference(letter) if len(errors) > len(covers)/threshold: merror = min(errors) return f"(({metaletters_to_conditions(variable, letter.intersection(range(m, merror)), threshold=threshold)}) || ({metaletters_to_conditions(variable, letter.intersection(range(merror, M+1)), threshold=threshold)}))" res = f"{variable} >= {m} && {variable} <= {M}" if errors: res += f" && !({metaletters_to_conditions(variable, errors, threshold=threshold)})" return res class automaton_transitions_template: """ Implement the transition function of a (register) automaton in C. TODO: precise the C-standard compatibility. The generated code, suppose existence of the following macros: * fetch_next_symbol: fetch a new symbol to process and quit the current process if no more is found. * symbol_type: a macro for the symbol type The current symbol is stored in undefined `current_symbol` variable which should be of symbol_type. The implementation of a register automaton require to have access to a pointer to `buffer` and to `offset`. `buffer` and `offset` are requires for registers manipulation. The code encode states within goto label. """ def __init__(self, automaton, encoder=lambda e:e.map(lambda e:e.encode()[0]), width=1, condition_generator=lambda m:metaletters_to_conditions("current_symbol", m)): """ * automaton: should be a CompAut Automaton * encoder: should map Python values to bytes, default value is the string to bytes encoder. * width: designate the stride of the automaton. * condition_generator: produces conditions from meta letter. """ if width not in (1, 2, 4, 8): raise NotImplementedError(f"width is used to determined type of variable") if not automaton.is_deterministic(): raise NotImplementedError("automaton should be deterministic") self.width = width self.encoder = encoder if width == 1: alphabet = list(range(255)) else: alphabet = None automaton = automaton.map_alphabet(encoder, alphabet=alphabet) self.automaton = automaton # automaton encoded and if width=1, an explicite alphabet provided self.condition_generator = condition_generator self.states_name_map = {} self.states_names = set() self._state_regexp = re.compile("^[\w_]+$", re.DOTALL | re.MULTILINE) self._nstate_count = 0 def clean_state_name(self, name): """ Construct a valid state name from the one provided argument. """ name = repr(name) if name in self.states_name_map: return self.states_name_map[name] old_name = name if not name[0].isalpha(): name = "_" + name if not self._state_regexp.match(name): count = self._nstate_count while (f"_state_{count}" in self.states_names): count += 1 name = f"_state_{count}" self.states_name_map[old_name] = name self.states_names.add(name) return name def __call__(self): code = "" automaton = self.automaton for state in automaton.states: code += f"{self.clean_state_name(state)}:\n" code += f"\tfetch_next_symbol;\n" for i, (letter, nstate) in enumerate(automaton._transitions.get(state, {}).items()): nstate, action = next(iter(nstate.items())) if i == 0: code += "\tif " else: code += "\telse if " code += f"({self.condition_generator(letter[0])})\n" code += f"\t\tgoto {self.clean_state_name(nstate)};\n" l = list(automaton._transitions.get(state,{})) if l: if l[0].union(*l[1:]).complement(): # remaining cases possible: goto exit. code += "\tgoto __exit;\n" else: code += "\tgoto __exit;\n" code += "__exit:;\n" return code class GrepLikeAutomaton: """ Match the expression line by line. """ descr = "" pyreload = lambda self: f""" To restore the python Automaton used in this program, execute in Python: with open('path/to/obj.dump') as f: M = {self.__module__}.{self.__class__.__name__}.loads(f.read()) The automaton (or the underlying expression if provided) can be access through: M.expression # Exists only if the machine was build through a regexp. M.automaton # Always exists To recompile, executes: M.compile(name='{self.name}') """ def __init__(self, to_execute, minimize=True, name=None): """ to_execute can be either a deterministic Automaton or a string that will be compiled to regular expression. The default behavior is to minimize and normalize the automaton but you can prevent that by setting minimize to False. Provides dumping and loading facilities using pickle and dump64. """ self.data_state = {"to_execute" : to_execute, "minimize" : minimize, "name":name} self.expression = None if isinstance(to_execute, str): self.expression = to_execute to_execute = cre.compile(to_execute) if name is not None: self.name = name automaton = to_execute if minimize: automaton = automaton.concatenate(cre.compile("\n?")).expanded_utf8_chars().minimize() self.automaton = automaton def dumps(self): x = base64.encodebytes(pickle.dumps(self.data_state)) return x.decode().replace("\n","") @classmethod def loads(cls, s): d = pickle.loads(base64.decodebytes(s.encode())) return cls(**d) def __repr__(self): if self.expression is not None: expr = self.expression.replace("\\","\\\\") infos = f"The automaton was build from the expression `{expr}`" else: infos = "The automaton was directly provided." return f"a Grep Like automaton.\n{infos}\n{self.descr}" class SwitchLineCompilableMachine(LineMachine): def main_loop(self): return f""" switch (state){{ {self.switch()} }} """ class SwitchGrepLikeAutomaton(GrepLikeAutomaton, SwitchLineCompilableMachine): """ Will implement the transition table using a switch statement and with block of if then ... else if ... """ name = "swGrepLike" descr = "The transitions table is implemented using a switch over states of conditional statements." def reinit(self): return f"state={next(iter(self.automaton.initial_states))};" def accepting (self): return " || ".join(f"state=={i}" for i in self.automaton.final_states) def switch(self): tr = self.automaton._transitions switches = [] for s in tr: letters = list(tr[s].keys()) if len(letters) == 1 and s in tr[s][letters[0]]: continue # if s is a sinkstate we don't do anything actually switches.append(f"case {s}:") for i, l in enumerate(letters): target = next(iter(tr[s][l])) if not i: switches.append(f"\tif ({self.metaletters_to_conditions(l)}) state = {target};") continue switches.append(f"\telse if ({self.metaletters_to_conditions(l)}) state = {target};") switches.append("break;") return "\n\t\t\t\t".join(switches)