diff --git a/demos/brz/Brzozowski.ml b/demos/brz/Brzozowski.ml index 82c7e941121e38a8657fec293fc73b80daec5656..3440b8f75a027899f4aacb6c09159d844b270ad9 100644 --- a/demos/brz/Brzozowski.ml +++ b/demos/brz/Brzozowski.ml @@ -392,6 +392,18 @@ type dfa = { expressions. Then, we construct a DFA whose states are the reachable expressions and whose transitions correspond to derivation. *) +(* If one wishes to stop as soon as an accepting state is encountered, then a + final state need not have any outgoing transitions; that would be + pointless. However, in some applications, one might wish to record that an + accepting state was reached and nevertheless continue reading the input. In + that case, the following flag should be set to true]. *) + +let accepting_state_can_have_successors = + false + +let may_have_successors (e : regexp) : bool = + accepting_state_can_have_successors || not (nullable e) + let dfa (e : regexp) : dfa = (* Discover and number the nonempty reachable expressions. The most nontrivial aspect of this phase is termination. The fact that expressions @@ -401,14 +413,15 @@ let dfa (e : regexp) : dfa = more expressions being identified, therefore smaller automata. *) let module G = struct type t = regexp - (* The successors of [e] are its derivatives along every character [a], - provided they are nonempty. *) let foreach_successor e f = - Char.foreach (fun a -> - let e' = delta a e in - if nonempty e' then - f e' - ) + if may_have_successors e then + (* The successors of [e] are its derivatives along every character + [a], provided they are nonempty. *) + Char.foreach (fun a -> + let e' = delta a e in + if nonempty e' then + f e' + ) (* The single root is [e], if it is nonempty. *) let foreach_root f = if nonempty e then @@ -425,7 +438,12 @@ let dfa (e : regexp) : dfa = (* The initial state is the encoding of [e]. *) let init = encode e in (* The transition function is as follows. *) - let transition q a = encode (delta a (decode q)) in + let transition q a = + if may_have_successors (decode q) then + encode (delta a (decode q)) + else + None + in (* We are done. *) { n; init; decode; transition }