Mentions légales du service

Skip to content
Snippets Groups Projects
automata_machines.py 7.43 KiB
from compAut.metas import allIn, allExcept, metaLetter, metaObject
from compAut.models import Automaton
from compAut.logics.regularexpression import ClassicalRegularExpression
from .line_machines import SimpleLineMachine as LineMachine
import pickle
import re
import base64
cre = ClassicalRegularExpression()
def metaletters_to_conditions(variable, letter, threshold=5):
	"""
	Transform a MetaLetter condition into a string representing a C-condition.
	We decompose a meta letters disjunction of equality if they are less than threshold,
	otherwise, we check for ordered partition. This could probably be improved a lot.
	"""
	assert(isinstance(letter, metaLetter))
	if isinstance(letter, allExcept):
		return f"!({metaletters_to_conditions(variable, letter.complement())})"
	l = len(letter._fset)
	if l == 0:
		return "0"
	if l < threshold:
		return f'{" || ".join(f"{variable}=={l}" for l in letter)}'
	m = min(letter)
	M = max(letter)
	covers = set(range(m, M+1))
	errors = covers.difference(letter)
	if len(errors) > len(covers)/threshold:
		merror = min(errors)
		return f"(({metaletters_to_conditions(variable, letter.intersection(range(m, merror)), threshold=threshold)}) || ({metaletters_to_conditions(variable, letter.intersection(range(merror, M+1)), threshold=threshold)}))"
	res = f"{variable} >= {m} && {variable} <= {M}"
	if errors:
		res += f" && !({metaletters_to_conditions(variable, errors, threshold=threshold)})"
	return res

class automaton_transitions_template:
	"""
	Implement the transition function of a (register) automaton in C.
	TODO: precise the C-standard compatibility.

	The generated code, suppose existence of the following macros:
	* fetch_next_symbol: fetch a new symbol to process and quit the current process if no more is found.
	* symbol_type: a macro for the symbol type

	The current symbol is stored in undefined `current_symbol` variable which should be of symbol_type.
	The implementation of a register automaton require to have access to a pointer to `buffer` and
	to `offset`.

	`buffer` and `offset` are requires for registers manipulation.

	The code encode states within goto label.
	"""
	def __init__(self, automaton, encoder=lambda e:e.map(lambda e:e.encode()[0]), width=1, condition_generator=lambda m:metaletters_to_conditions("current_symbol", m)):
		"""
		* automaton: should be a CompAut Automaton
		* encoder: should map Python values to bytes, default value is the string to bytes encoder.
		* width: designate the stride of the automaton.
		* condition_generator: produces conditions from meta letter.
		"""
		if width not in (1, 2, 4, 8):
			raise NotImplementedError(f"width is used to determined type of variable")
		if not automaton.is_deterministic():
			raise NotImplementedError("automaton should be deterministic")
		self.width = width
		self.encoder = encoder
		if width == 1:
			alphabet = list(range(255))
		else:
			alphabet = None
		automaton = automaton.map_alphabet(encoder, alphabet=alphabet)
		self.automaton = automaton # automaton encoded and if width=1, an explicite alphabet provided
		self.condition_generator = condition_generator
		self.states_name_map = {}
		self.states_names = set()
		self._state_regexp = re.compile("^[\w_]+$", re.DOTALL | re.MULTILINE)
		self._nstate_count = 0

	def clean_state_name(self, name):
		"""
		Construct a valid state name from the one provided argument.
		"""
		name = repr(name)
		if name in self.states_name_map:
			return self.states_name_map[name]
		old_name = name
		if not name[0].isalpha():
			name = "_" + name
		if not self._state_regexp.match(name):
			count = self._nstate_count
			while (f"_state_{count}" in self.states_names):
				count += 1
			name = f"_state_{count}"
		self.states_name_map[old_name] = name
		self.states_names.add(name)
		return name

	def __call__(self):
		code = ""
		automaton = self.automaton
		for state in automaton.states:
			code += f"{self.clean_state_name(state)}:\n"
			code += f"\tfetch_next_symbol;\n"
			for i, (letter, nstate) in enumerate(automaton._transitions.get(state, {}).items()):
				nstate, action = next(iter(nstate.items()))
				if i == 0:
					code += "\tif "
				else:
					code += "\telse if "
				code += f"({self.condition_generator(letter[0])})\n"
				code += f"\t\tgoto {self.clean_state_name(nstate)};\n"
			l = list(automaton._transitions.get(state,{}))
			if l:
				if l[0].union(*l[1:]).complement():
					# remaining cases possible: goto exit.
					code += "\tgoto __exit;\n"
			else:
				code += "\tgoto __exit;\n"
		code += "__exit:;\n"
		return code

class GrepLikeAutomaton:
	"""
	Match the expression line by line.
	"""
	descr = ""
	pyreload = lambda self: f"""
To restore the python Automaton used in this program, execute in Python:

with open('path/to/obj.dump') as f:
	M = {self.__module__}.{self.__class__.__name__}.loads(f.read())

The automaton (or the underlying expression if provided) can be access through:
	M.expression # Exists only if the machine was build through a regexp.
	M.automaton # Always exists

To recompile, executes:
M.compile(name='{self.name}')
"""
	def __init__(self, to_execute, minimize=True, name=None):
		"""
		to_execute can be either a deterministic Automaton or a string that will be compiled
		to regular expression.

		The default behavior is to minimize and normalize the automaton but you can prevent that by
		setting minimize to False.

		Provides dumping and loading facilities using pickle and dump64.
		"""
		self.data_state = {"to_execute" : to_execute, "minimize" : minimize, "name":name}
		self.expression = None
		if isinstance(to_execute, str):
			self.expression = to_execute
			to_execute = cre.compile(to_execute)
		if name is not None:
			self.name = name
		automaton = to_execute
		if minimize:
			automaton = automaton.concatenate(cre.compile("\n?")).expanded_utf8_chars().minimize()
		self.automaton = automaton

	def dumps(self):
		x = base64.encodebytes(pickle.dumps(self.data_state))
		return x.decode().replace("\n","")

	@classmethod
	def loads(cls, s):
		d = pickle.loads(base64.decodebytes(s.encode()))
		return cls(**d)
	def __repr__(self):
		if self.expression is not None:
			expr = self.expression.replace("\\","\\\\")
			infos = f"The automaton was build from the expression `{expr}`"
		else:
			infos = "The automaton was directly provided."
		return f"a Grep Like automaton.\n{infos}\n{self.descr}"

class SwitchLineCompilableMachine(LineMachine):
	def main_loop(self):
		return f"""
			switch (state){{
				{self.switch()}
			}}
		"""

class SwitchGrepLikeAutomaton(GrepLikeAutomaton, SwitchLineCompilableMachine):
	"""
	Will implement the transition table using a switch statement and with
	block of if then ... else if  ...
	"""
	name = "swGrepLike"
	descr = "The transitions table is implemented using a switch over states of conditional statements."
	def reinit(self):
		return f"state={next(iter(self.automaton.initial_states))};"

	def accepting (self):
		return " || ".join(f"state=={i}" for i in self.automaton.final_states)

	def switch(self):
		tr = self.automaton._transitions
		switches = []
		for s in tr:
			letters = list(tr[s].keys())
			if len(letters) == 1 and s in tr[s][letters[0]]:
				continue # if s is a sinkstate we don't do anything actually
			switches.append(f"case {s}:")
			for i, l in enumerate(letters):
				target = next(iter(tr[s][l]))
				if not i:
					switches.append(f"\tif ({self.metaletters_to_conditions(l)}) state = {target};")
					continue
				switches.append(f"\telse if ({self.metaletters_to_conditions(l)}) state = {target};")
			switches.append("break;")
		return "\n\t\t\t\t".join(switches)