diff --git a/src/Boolean.ml b/src/Boolean.ml new file mode 100644 index 0000000000000000000000000000000000000000..d4705f79b955b829f71cd448ad5bcd6cdafd77bc --- /dev/null +++ b/src/Boolean.ml @@ -0,0 +1,14 @@ +(* The Boolean lattice. *) + +type property = + bool + +let bottom = + false + +let equal (b1 : bool) (b2 : bool) = + b1 = b2 + +let is_maximal b = + b + diff --git a/src/Boolean.mli b/src/Boolean.mli new file mode 100644 index 0000000000000000000000000000000000000000..a68f433a0d87033ec2c954579b64f38893878df2 --- /dev/null +++ b/src/Boolean.mli @@ -0,0 +1,2 @@ +include Fix.PROPERTY with type property = bool + diff --git a/src/CompletedNat.ml b/src/CompletedNat.ml new file mode 100644 index 0000000000000000000000000000000000000000..760ce5dd0a7e3a8f448bd8e62a870239ee83d1ac --- /dev/null +++ b/src/CompletedNat.ml @@ -0,0 +1,64 @@ +(* The natural numbers, completed with [Infinity], and ordered towards + zero (i.e. [Infinity] is [bottom], [Finite 0] is [top]). *) + +type t = +| Finite of int +| Infinity + +type property = + t + +let equal p1 p2 = + match p1, p2 with + | Finite i1, Finite i2 -> + i1 = i2 + | Infinity, Infinity -> + true + | _, _ -> + false + +let bottom = + Infinity + +let is_maximal p = + match p with + | Finite 0 -> + true + | _ -> + false + +let min p1 p2 = + match p1, p2 with + | Finite i1, Finite i2 -> + if i1 <= i2 then p1 else p2 + | p, Infinity + | Infinity, p -> + p + +let min_lazy p1 p2 = + match p1 with + | Finite 0 -> + p1 + | _ -> + min p1 (Lazy.force p2) + +let add p1 p2 = + match p1, p2 with + | Finite i1, Finite i2 -> + Finite (i1 + i2) + | _, _ -> + Infinity + +let add_lazy p1 p2 = + match p1 with + | Infinity -> + Infinity + | _ -> + add p1 (Lazy.force p2) + +let print p = + match p with + | Finite i -> + string_of_int i + | Infinity -> + "infinity" diff --git a/src/CompletedNat.mli b/src/CompletedNat.mli new file mode 100644 index 0000000000000000000000000000000000000000..a0255a44cf1906b1a5e8874a2be8c531a0d0d1c5 --- /dev/null +++ b/src/CompletedNat.mli @@ -0,0 +1,16 @@ +(* The natural numbers, completed with [Infinity], and ordered towards + zero (i.e. [Infinity] is [bottom], [Finite 0] is [top]). *) + +type t = +| Finite of int +| Infinity + +include Fix.PROPERTY with type property = t + +val min: t -> t -> t +val add: t -> t -> t + +val min_lazy: t -> t Lazy.t -> t +val add_lazy: t -> t Lazy.t -> t + +val print: t -> string diff --git a/src/CompletedNatWitness.ml b/src/CompletedNatWitness.ml new file mode 100644 index 0000000000000000000000000000000000000000..9be019975f57a1fa0d7976beb12872a24741dfc0 --- /dev/null +++ b/src/CompletedNatWitness.ml @@ -0,0 +1,72 @@ +(* This is an enriched version of [CompletedNat], where we compute not just + numbers, but also lists of matching length. During the fixed point + computation, instead of manipulating actual lists, we manipulate only + recipes for constructing lists. These recipes can be evaluated by the user + after the fixed point has been reached. *) + +(* A property is either [Finite (n, xs)], where [n] is a natural number and + [xs] is a (recipe for constructing a) list of length [n]; or [Infinity]. *) + +type 'a t = +| Finite of int * 'a list Lazy.t +| Infinity + +let equal p1 p2 = + match p1, p2 with + | Finite (i1, _), Finite (i2, _) -> + i1 = i2 + | Infinity, Infinity -> + true + | _, _ -> + false + +let bottom = + Infinity + +let is_maximal p = + match p with + | Finite (0, _) -> + true + | _ -> + false + +let min p1 p2 = + match p1, p2 with + | Finite (i1, _), Finite (i2, _) -> + if i1 <= i2 then p1 else p2 + | p, Infinity + | Infinity, p -> + p + +let min_lazy p1 p2 = + match p1 with + | Finite (0, _) -> + p1 + | _ -> + min p1 (Lazy.force p2) + +let add p1 p2 = + match p1, p2 with + | Finite (i1, xs1), Finite (i2, xs2) -> + Finite ( + i1 + i2, + (* The only list operation in the code! *) + lazy (Lazy.force xs1 @ Lazy.force xs2) + ) + | _, _ -> + Infinity + +let add_lazy p1 p2 = + match p1 with + | Infinity -> + Infinity + | _ -> + add p1 (Lazy.force p2) + +let print conv p = + match p with + | Finite (i, xs) -> + string_of_int i ^ " " ^ + String.concat " " (List.map conv (Lazy.force xs)) + | Infinity -> + "infinity" diff --git a/src/CompletedNatWitness.mli b/src/CompletedNatWitness.mli new file mode 100644 index 0000000000000000000000000000000000000000..69a063fa2954f374b4474b8835b85275e87b364e --- /dev/null +++ b/src/CompletedNatWitness.mli @@ -0,0 +1,24 @@ +(* This is an enriched version of [CompletedNat], where we compute not just + numbers, but also lists of matching length. During the fixed point + computation, instead of manipulating actual lists, we manipulate only + recipes for constructing lists. These recipes can be evaluated by the user + after the fixed point has been reached. *) + +(* A property is either [Finite (n, xs)], where [n] is a natural number and + [xs] is a (recipe for constructing a) list of length [n]; or [Infinity]. *) + +type 'a t = +| Finite of int * 'a list Lazy.t +| Infinity + +val bottom: 'a t +val equal: 'a t -> 'b t -> bool +val is_maximal: 'a t -> bool + +val min: 'a t -> 'a t -> 'a t +val add: 'a t -> 'a t -> 'a t + +val min_lazy: 'a t -> 'a t Lazy.t -> 'a t +val add_lazy: 'a t -> 'a t Lazy.t -> 'a t + +val print: ('a -> string) -> 'a t -> string diff --git a/src/Maps.ml b/src/Maps.ml new file mode 100644 index 0000000000000000000000000000000000000000..7bd9374d688fe1cd19dcbda66d3ccda351a6f486 --- /dev/null +++ b/src/Maps.ml @@ -0,0 +1,163 @@ +(* BEGIN PERSISTENT_MAPS *) +module type PERSISTENT_MAPS = sig + type key + type 'data t + val empty: 'data t + val add: key -> 'data -> 'data t -> 'data t + val find: key -> 'data t -> 'data + val iter: (key -> 'data -> unit) -> 'data t -> unit +end +(* END PERSISTENT_MAPS *) + +(* BEGIN IMPERATIVE_MAPS *) +module type IMPERATIVE_MAPS = sig + type key + type 'data t + val create: unit -> 'data t + val clear: 'data t -> unit + val add: key -> 'data -> 'data t -> unit + val find: key -> 'data t -> 'data + val iter: (key -> 'data -> unit) -> 'data t -> unit +end +(* END IMPERATIVE_MAPS *) + +(* BEGIN IMPERATIVE_MAP *) +module type IMPERATIVE_MAP = sig + type key + type data + val set: key -> data -> unit + val get: key -> data option +end +(* END IMPERATIVE_MAP *) + +module PersistentMapsToImperativeMaps + (M : PERSISTENT_MAPS) + : IMPERATIVE_MAPS with type key = M.key + and type 'data t = 'data M.t ref += struct + + type key = + M.key + + type 'data t = + 'data M.t ref + + let create () = + ref M.empty + + let clear t = + t := M.empty + + let add k d t = + t := M.add k d !t + + let find k t = + M.find k !t + + let iter f t = + M.iter f !t + +end + +module ImperativeMapsToImperativeMap + (M : IMPERATIVE_MAPS) + (D : sig type data end) + : IMPERATIVE_MAP with type key = M.key + and type data = D.data += struct + + type key = + M.key + + type data = + D.data + + let m = + M.create() + + let set k d = + M.add k d m + + let get k = + try + Some (M.find k m) + with Not_found -> + None + +end + +module ConsecutiveIntegerKeysToImperativeMaps + (K : sig val n: int end) + : IMPERATIVE_MAPS with type key = int + and type 'data t = 'data option array + += struct + + open K + + type key = + int + + type 'data t = + 'data option array + + let create () = + Array.make n None + + let clear m = + Array.fill m 0 n None + + let add key data m = + m.(key) <- Some data + + let find key m = + match m.(key) with + | None -> + raise Not_found + | Some data -> + data + + let iter f m = + Array.iteri (fun key data -> + match data with + | None -> + () + | Some data -> + f key data + ) m + +end + +module HashTableAsImperativeMaps + (H : Hashtbl.HashedType) + : IMPERATIVE_MAPS with type key = H.t += struct + + include Hashtbl.Make(H) + + let create () = + create 1023 + + let add key data table = + add table key data + + let find table key = + find key table + +end + +module TrivialHashedType + (T : sig type t end) + : Hashtbl.HashedType with type t = T.t += struct + + include T + + let equal = + (=) + + let hash = + Hashtbl.hash + +end + diff --git a/src/Maps.mli b/src/Maps.mli new file mode 100644 index 0000000000000000000000000000000000000000..1dcfa8bb4cd8c54afe199b721ccbe197eac214f9 --- /dev/null +++ b/src/Maps.mli @@ -0,0 +1,77 @@ +(* This module defines three signatures for association maps, together + with a number of conversion functors. *) + +(* Following the convention of the ocaml standard library, the [find] + functions raise [Not_found] when the key is not a member of the + domain of the map. By contrast, [get] returns an option. *) + +(* BEGIN PERSISTENT_MAPS *) +module type PERSISTENT_MAPS = sig + type key + type 'data t + val empty: 'data t + val add: key -> 'data -> 'data t -> 'data t + val find: key -> 'data t -> 'data + val iter: (key -> 'data -> unit) -> 'data t -> unit +end +(* END PERSISTENT_MAPS *) + +(* BEGIN IMPERATIVE_MAPS *) +module type IMPERATIVE_MAPS = sig + type key + type 'data t + val create: unit -> 'data t + val clear: 'data t -> unit + val add: key -> 'data -> 'data t -> unit + val find: key -> 'data t -> 'data + val iter: (key -> 'data -> unit) -> 'data t -> unit +end +(* END IMPERATIVE_MAPS *) + +(* BEGIN IMPERATIVE_MAP *) +module type IMPERATIVE_MAP = sig + type key + type data + val set: key -> data -> unit + val get: key -> data option +end +(* END IMPERATIVE_MAP *) + +(* An implementation of persistent maps can be made to satisfy the interface + of imperative maps. An imperative map is represented as a persistent map, + wrapped within a reference cell. *) + +module PersistentMapsToImperativeMaps + (M : PERSISTENT_MAPS) + : IMPERATIVE_MAPS with type key = M.key + and type 'data t = 'data M.t ref + +(* An implementation of imperative maps can be made to satisfy the interface + of a single imperative map. This map is obtained via a single call to [create]. *) + +module ImperativeMapsToImperativeMap + (M : IMPERATIVE_MAPS) + (D : sig type data end) + : IMPERATIVE_MAP with type key = M.key + and type data = D.data + +(* An implementation of imperative maps as arrays is possible if keys + are consecutive integers. *) + +module ConsecutiveIntegerKeysToImperativeMaps + (K : sig val n: int end) + : IMPERATIVE_MAPS with type key = int + and type 'data t = 'data option array + +(* An implementation of imperative maps as a hash table. *) + +module HashTableAsImperativeMaps + (H : Hashtbl.HashedType) + : IMPERATIVE_MAPS with type key = H.t + +(* A trivial implementation of equality and hashing. *) + +module TrivialHashedType + (T : sig type t end) + : Hashtbl.HashedType with type t = T.t + diff --git a/src/conflict.ml b/src/conflict.ml index 3d0821172806563be987b58ff353e5dcd22dffad..afa899c4075fb5756a82a04959984ed8cc11e5e1 100644 --- a/src/conflict.ml +++ b/src/conflict.ml @@ -187,14 +187,14 @@ let rec follow1 tok derivation offset' = function at the moment, so let's skip it. *) derivation | (item, _, offset) :: configs -> - let _, _, rhs, pos, length = Item.def item in + let prod, _, rhs, pos, length = Item.def item in if offset = offset' then (* This is an epsilon transition. Attack a new line and add a comment that explains why the lookahead symbol is produced or inherited. *) - let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in + let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in if TerminalSet.mem tok first then @@ -304,7 +304,7 @@ let explain_reduce_item if pos < length then match rhs.(pos) with | Symbol.N nt -> - let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in + let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in let first : bool = TerminalSet.mem tok first in let lookahead' = if nullable then first || lookahead else first diff --git a/src/grammar.ml b/src/grammar.ml index 7e02f7f72086fa27e7766ee000867cef2febf3d0..3707801b75a2a7197c8488ba7ea66a7bb3c20e17 100644 --- a/src/grammar.ml +++ b/src/grammar.ml @@ -277,6 +277,18 @@ module TerminalSet = struct ) ) + (* The following definitions are used in the computation of FIRST sets + below. They are not exported outside of this file. *) + + type property = + t + + let bottom = + empty + + let is_maximal _ = + false + end (* Maps over terminals. *) @@ -492,6 +504,18 @@ module Production = struct in loop accu k + (* This funny variant is lazy. If at some point [f] does not demand its + second argument, then iteration stops. *) + let foldnt_lazy (nt : Nonterminal.t) (f : index -> 'a Lazy.t -> 'a) (seed : 'a) : 'a = + let k, k' = ntprods.(nt) in + let rec loop prod seed = + if prod < k' then + f prod (lazy (loop (prod + 1) seed)) + else + seed + in + loop k seed + (* Accessors. *) let def prod = @@ -666,37 +690,32 @@ module ProductionMap = struct end (* ------------------------------------------------------------------------ *) -(* Build the grammar's forward and backward reference graphs. +(* If requested, build and print the forward reference graph of the grammar. + There is an edge of a nonterminal symbol [nt1] to every nonterminal symbol + [nt2] that occurs in the definition of [nt1]. *) - In the backward reference graph, edges relate each nonterminal [nt] - to each of the nonterminals whose definition mentions [nt]. The - reverse reference graph is used in the computation of the nullable, - nonempty, and FIRST sets. +let () = + if Settings.graph then begin - The forward reference graph is unused but can be printed on demand. *) + (* Allocate. *) -let forward : NonterminalSet.t array = - Array.make Nonterminal.n NonterminalSet.empty + let forward : NonterminalSet.t array = + Array.make Nonterminal.n NonterminalSet.empty + in -let backward : NonterminalSet.t array = - Array.make Nonterminal.n NonterminalSet.empty + (* Populate. *) -let () = - Array.iter (fun (nt1, rhs) -> - Array.iter (function - | Symbol.T _ -> - () - | Symbol.N nt2 -> - forward.(nt1) <- NonterminalSet.add nt2 forward.(nt1); - backward.(nt2) <- NonterminalSet.add nt1 backward.(nt2) - ) rhs - ) Production.table + Array.iter (fun (nt1, rhs) -> + Array.iter (function + | Symbol.T _ -> + () + | Symbol.N nt2 -> + forward.(nt1) <- NonterminalSet.add nt2 forward.(nt1) + ) rhs + ) Production.table; -(* ------------------------------------------------------------------------ *) -(* If requested, dump the forward reference graph. *) + (* Print. *) -let () = - if Settings.graph then let module P = Dot.Print (struct type vertex = Nonterminal.t let name nt = @@ -714,37 +733,179 @@ let () = P.print f; close_out f + end + (* ------------------------------------------------------------------------ *) -(* Generic support for fixpoint computations. - - A fixpoint computation associates a property with every nonterminal. - A monotone function tells how properties are computed. [compute nt] - updates the property associated with nonterminal [nt] and returns a - flag that tells whether the property actually needed an update. The - state of the computation is maintained entirely inside [compute] and - is invisible here. - - Whenever a property of [nt] is updated, the properties of the - terminals whose definitions depend on [nt] are updated. The - dependency graph must be explicitly supplied. *) - -let fixpoint (dependencies : NonterminalSet.t array) (compute : Nonterminal.t -> bool) : unit = - let queue : Nonterminal.t Queue.t = Queue.create () in - let onqueue : bool array = Array.make Nonterminal.n true in - for i = 0 to Nonterminal.n - 1 do - Queue.add i queue - done; - Misc.qiter (fun nt -> - onqueue.(nt) <- false; - let changed = compute nt in - if changed then - NonterminalSet.iter (fun nt -> - if not onqueue.(nt) then begin - Queue.add nt queue; - onqueue.(nt) <- true - end - ) dependencies.(nt) - ) queue +(* Support for analyses of the grammar, expressed as fixed point computations. + We exploit the generic fixed point algorithm in [Fix]. *) + +(* We perform memoization only at nonterminal symbols. We assume that the + analysis of a symbol is the analysis of its definition (as opposed to, + say, a computation that depends on the occurrences of this symbol in + the grammar). *) + +module GenericAnalysis + (P : Fix.PROPERTY) + (S : sig + open P + + (* An analysis is specified by the following functions. *) + + (* [terminal] maps a terminal symbol to a property. *) + val terminal: Terminal.t -> property + + (* [disjunction] abstracts a binary alternative. That is, when we analyze + an alternative between several productions, we compute a property for + each of them independently, then we combine these properties using + [disjunction]. *) + val disjunction: property -> property Lazy.t -> property + + (* [P.bottom] should be a neutral element for [disjunction]. We use it in + the analysis of an alternative with zero branches. *) + + (* [conjunction] abstracts a binary sequence. That is, when we analyze a + sequence, we compute a property for each member independently, then we + combine these properties using [conjunction]. In general, conjunction + needs access to the first member of the sequence (a symbol), not just + to its analysis (a property). *) + val conjunction: Symbol.t -> property -> property Lazy.t -> property + + (* [epsilon] abstracts the empty sequence. It should be a neutral element + for [conjunction]. *) + val epsilon: property + + end) +: sig + open P + + (* The results of the analysis take the following form. *) + + (* To every nonterminal symbol, we associate a property. *) + val nonterminal: Nonterminal.t -> property + + (* To every symbol, we associate a property. *) + val symbol: Symbol.t -> property + + (* To every suffix of every production, we associate a property. + The offset [i], which determines the beginning of the suffix, + must be contained between [0] and [n], inclusive, where [n] + is the length of the production. *) + val production: Production.index -> int -> property + +end = struct + open P + + (* The following analysis functions are parameterized over [get], which allows + making a recursive call to the analysis at a nonterminal symbol. [get] maps + a nonterminal symbol to a property. *) + + (* Analysis of a symbol. *) + + let symbol sym get : property = + match sym with + | Symbol.T tok -> + S.terminal tok + | Symbol.N nt -> + (* Recursive call to the analysis, via [get]. *) + get nt + + (* Analysis of (a suffix of) a production [prod], starting at index [i]. *) + + let production prod i get : property = + let rhs = Production.rhs prod in + let n = Array.length rhs in + (* Conjunction over all symbols in the right-hand side. This can be viewed + as a version of [Array.fold_right], which does not necessarily begin at + index [0]. Note that, because [conjunction] is lazy, it is possible + to stop early. *) + let rec loop i = + if i = n then + S.epsilon + else + let sym = rhs.(i) in + S.conjunction sym + (symbol sym get) + (lazy (loop (i+1))) + in + loop i + + (* The analysis is the least fixed point of the following function, which + analyzes a nonterminal symbol by looking up and analyzing its definition + as a disjunction of conjunctions of symbols. *) + + let nonterminal nt get : property = + (* Disjunction over all productions for this nonterminal symbol. *) + Production.foldnt_lazy nt (fun prod rest -> + S.disjunction + (production prod 0 get) + rest + ) P.bottom + + (* The least fixed point is taken as follows. Note that it is computed + on demand, as [lfp] is called by the user. *) + + module F = + Fix.Make + (Maps.ConsecutiveIntegerKeysToImperativeMaps(Nonterminal)) + (P) + + let nonterminal = + F.lfp nonterminal + + (* The auxiliary functions can be published too. *) + + let symbol sym = + symbol sym nonterminal + + let production prod i = + production prod i nonterminal + +end + +(* ------------------------------------------------------------------------ *) +(* The computation of FOLLOW sets does not follow the above model. Instead, we + need to explicitly compute a system of equations over sets of terminal + symbols (in a first pass), then solve the constraints (in a second + pass). *) + +(* An equation's right-hand side is a set expression. *) + +type expr = +| EVar of Nonterminal.t +| EConstant of TerminalSet.t +| EUnion of expr * expr + +(* A system of equations is represented as an array, which maps nonterminal + symbols to expressions. *) + +type equations = + expr array + +(* This solver computes the least solution of a set of equations. *) + +let solve (eqs : equations) : Nonterminal.t -> TerminalSet.t = + + let rec expr e get = + match e with + | EVar nt -> + get nt + | EConstant c -> + c + | EUnion (e1, e2) -> + TerminalSet.union (expr e1 get) (expr e2 get) + in + + let nonterminal nt get = + expr eqs.(nt) get + in + + let module F = + Fix.Make + (Maps.ConsecutiveIntegerKeysToImperativeMaps(Nonterminal)) + (TerminalSet) + in + + F.lfp nonterminal (* ------------------------------------------------------------------------ *) (* Compute which nonterminals are nonempty, that is, recognize a @@ -753,80 +914,57 @@ let fixpoint (dependencies : NonterminalSet.t array) (compute : Nonterminal.t -> difference is in the base case: a single terminal symbol is not nullable, but is nonempty. *) -let compute (basecase : bool) : (bool array) * (Symbol.t -> bool) = - let property : bool array = - Array.make Nonterminal.n false - in - let symbol_has_property = function - | Symbol.T _ -> - basecase - | Symbol.N nt -> - property.(nt) - in - fixpoint backward (fun nt -> - if property.(nt) then - false (* no change *) - else - (* disjunction over all productions for this nonterminal *) - let updated = Production.foldnt nt false (fun prod accu -> - accu || - let rhs = Production.rhs prod in - (* conjunction over all symbols in the right-hand side *) - Array.fold_left (fun accu symbol -> - accu && symbol_has_property symbol - ) true rhs - ) in - property.(nt) <- updated; - updated - ); - property, symbol_has_property - -let (nonempty : bool array), _ = - compute true - -let (nullable : bool array), (nullable_symbol : Symbol.t -> bool) = - compute false +module NONEMPTY = + GenericAnalysis + (Boolean) + (struct + (* A terminal symbol is nonempty. *) + let terminal _ = true + (* An alternative is nonempty if at least one branch is nonempty. *) + let disjunction p q = p || (Lazy.force q) + (* A sequence is nonempty if both members are nonempty. *) + let conjunction _ p q = p && (Lazy.force q) + (* The sequence epsilon is nonempty. It generates the singleton + language {epsilon}. *) + let epsilon = true + end) + +module NULLABLE = + GenericAnalysis + (Boolean) + (struct + (* A terminal symbol is not nullable. *) + let terminal _ = false + (* An alternative is nullable if at least one branch is nullable. *) + let disjunction p q = p || (Lazy.force q) + (* A sequence is nullable if both members are nullable. *) + let conjunction _ p q = p && (Lazy.force q) + (* The sequence epsilon is nullable. *) + let epsilon = true + end) (* ------------------------------------------------------------------------ *) (* Compute FIRST sets. *) -let first = - Array.make Nonterminal.n TerminalSet.empty - -let first_symbol = function - | Symbol.T tok -> - TerminalSet.singleton tok - | Symbol.N nt -> - first.(nt) - -let nullable_first_rhs (rhs : Symbol.t array) (i : int) : bool * TerminalSet.t = - let length = Array.length rhs in - assert (i <= length); - let rec loop i toks = - if i = length then - true, toks - else - let symbol = rhs.(i) in - let toks = TerminalSet.union (first_symbol symbol) toks in - if nullable_symbol symbol then - loop (i+1) toks - else - false, toks - in - loop i TerminalSet.empty - -let () = - fixpoint backward (fun nt -> - let original = first.(nt) in - (* union over all productions for this nonterminal *) - let updated = Production.foldnt nt TerminalSet.empty (fun prod accu -> - let rhs = Production.rhs prod in - let _, toks = nullable_first_rhs rhs 0 in - TerminalSet.union toks accu - ) in - first.(nt) <- updated; - TerminalSet.compare original updated <> 0 - ) +module FIRST = + GenericAnalysis + (TerminalSet) + (struct + (* A terminal symbol has a singleton FIRST set. *) + let terminal = TerminalSet.singleton + (* The FIRST set of an alternative is the union of the FIRST sets. *) + let disjunction p q = TerminalSet.union p (Lazy.force q) + (* The FIRST set of a sequence is the union of: + the FIRST set of the first member, and + the FIRST set of the second member, if the first member is nullable. *) + let conjunction symbol p q = + if NULLABLE.symbol symbol then + TerminalSet.union p (Lazy.force q) + else + p + (* The FIRST set of the empty sequence is empty. *) + let epsilon = TerminalSet.empty + end) (* ------------------------------------------------------------------------ *) @@ -839,37 +977,71 @@ let () = be read. *) StringSet.iter (fun symbol -> let nt = Nonterminal.lookup symbol in - if not nonempty.(nt) then + if not (NONEMPTY.nonterminal nt) then Error.error (Nonterminal.positions nt) (Printf.sprintf "%s generates the empty language." (Nonterminal.print false nt)); - if TerminalSet.is_empty first.(nt) then + if TerminalSet.is_empty (FIRST.nonterminal nt) then Error.error (Nonterminal.positions nt) (Printf.sprintf "%s generates the language {epsilon}." (Nonterminal.print false nt)) ) Front.grammar.start_symbols; (* If a nonterminal symbol generates the empty language, issue a warning. *) for nt = Nonterminal.start to Nonterminal.n - 1 do - if not nonempty.(nt) then + if not (NONEMPTY.nonterminal nt) then Error.grammar_warning (Nonterminal.positions nt) (Printf.sprintf "%s generates the empty language." (Nonterminal.print false nt)); done +(* ------------------------------------------------------------------------ *) +(* For every nonterminal symbol [nt], compute a word of minimal length + generated by [nt]. This analysis subsumes [NONEMPTY] and [NULLABLE]. + Indeed, [nt] produces a nonempty language if only if the minimal length is + finite; [nt] is nullable if only if the minimal length is zero. *) + +(* This analysis is in principle more costly than the [NONEMPTY] and + [NULLABLE], so it is performed only on demand. In practice, it seems + to be very cheap: its cost is not measurable for any of the grammars + in our benchmark suite. *) + +module MINIMAL = + GenericAnalysis + (struct + include CompletedNatWitness + type property = Terminal.t t + end) + (struct + open CompletedNatWitness + (* A terminal symbol has length 1. *) + let terminal t = Finite (1, lazy [t]) + (* The length of an alternative is the minimum length of any branch. *) + let disjunction = min_lazy + (* The length of a sequence is the sum of the lengths of the members. *) + let conjunction _ = add_lazy + (* The epsilon sequence has length 0. *) + let epsilon = Finite (0, lazy []) + end) + (* ------------------------------------------------------------------------ *) (* Dump the analysis results. *) let () = Error.logG 2 (fun f -> - for nt = 0 to Nonterminal.n - 1 do + for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "nullable(%s) = %b\n" (Nonterminal.print false nt) - nullable.(nt) + (NULLABLE.nonterminal nt) done; - for nt = 0 to Nonterminal.n - 1 do + for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "first(%s) = %s\n" (Nonterminal.print false nt) - (TerminalSet.print first.(nt)) + (TerminalSet.print (FIRST.nonterminal nt)) + done; + for nt = Nonterminal.start to Nonterminal.n - 1 do + Printf.fprintf f "minimal(%s) = %s\n" + (Nonterminal.print false nt) + (CompletedNatWitness.print Terminal.print (MINIMAL.nonterminal nt)) done ) @@ -881,81 +1053,55 @@ let () = this is useful for the SLR(1) test. Thus, we perform this analysis only on demand. *) -let follow : TerminalSet.t array Lazy.t = - lazy ( - - let follow = - Array.make Nonterminal.n TerminalSet.empty - - and forward : NonterminalSet.t array = - Array.make Nonterminal.n NonterminalSet.empty +let follow : Nonterminal.t -> TerminalSet.t = - and backward : NonterminalSet.t array = - Array.make Nonterminal.n NonterminalSet.empty + (* First pass. Build a system of equations between sets of nonterminal + symbols. *) - in - - (* Iterate over all start symbols. *) - for nt = 0 to Nonterminal.start - 1 do - assert (Nonterminal.is_start nt); - (* Add # to FOLLOW(nt). *) - follow.(nt) <- TerminalSet.singleton Terminal.sharp - done; - (* We need to do this explicitly because our start productions are - of the form S' -> S, not S' -> S #, so # will not automatically - appear into FOLLOW(S) when the start productions are examined. *) - - (* Iterate over all productions. *) - Array.iter (fun (nt1, rhs) -> - (* Iterate over all nonterminal symbols [nt2] in the right-hand side. *) - Array.iteri (fun i symbol -> - match symbol with - | Symbol.T _ -> - () - | Symbol.N nt2 -> - let nullable, first = nullable_first_rhs rhs (i+1) in - (* The FIRST set of the remainder of the right-hand side - contributes to the FOLLOW set of [nt2]. *) - follow.(nt2) <- TerminalSet.union first follow.(nt2); - (* If the remainder of the right-hand side is nullable, - FOLLOW(nt1) contributes to FOLLOW(nt2). *) - if nullable then begin - forward.(nt1) <- NonterminalSet.add nt2 forward.(nt1); - backward.(nt2) <- NonterminalSet.add nt1 backward.(nt2) - end - ) rhs - ) Production.table; - - (* The fixpoint computation used here is not the most efficient - algorithm -- one could do better by first collapsing the - strongly connected components, then walking the graph in - topological order. But this will do. *) - - fixpoint forward (fun nt -> - let original = follow.(nt) in - (* union over all contributors *) - let updated = NonterminalSet.fold (fun nt' accu -> - TerminalSet.union follow.(nt') accu - ) backward.(nt) original in - follow.(nt) <- updated; - TerminalSet.compare original updated <> 0 - ); - - follow + let follow : equations = + Array.make Nonterminal.n (EConstant TerminalSet.empty) + in - ) + (* Iterate over all start symbols. *) + let sharp = EConstant (TerminalSet.singleton Terminal.sharp) in + for nt = 0 to Nonterminal.start - 1 do + assert (Nonterminal.is_start nt); + (* Add # to FOLLOW(nt). *) + follow.(nt) <- EUnion (sharp, follow.(nt)) + done; + (* We need to do this explicitly because our start productions are + of the form S' -> S, not S' -> S #, so # will not automatically + appear into FOLLOW(S) when the start productions are examined. *) + + (* Iterate over all productions. *) + Array.iteri (fun prod (nt1, rhs) -> + (* Iterate over all nonterminal symbols [nt2] in the right-hand side. *) + Array.iteri (fun i symbol -> + match symbol with + | Symbol.T _ -> + () + | Symbol.N nt2 -> + let nullable = NULLABLE.production prod (i+1) + and first = FIRST.production prod (i+1) in + (* The FIRST set of the remainder of the right-hand side + contributes to the FOLLOW set of [nt2]. *) + follow.(nt2) <- EUnion (EConstant first, follow.(nt2)); + (* If the remainder of the right-hand side is nullable, + FOLLOW(nt1) contributes to FOLLOW(nt2). *) + if nullable then + follow.(nt2) <- EUnion (EVar nt1, follow.(nt2)) + ) rhs + ) Production.table; -(* Define an accessor that triggers the computation of the FOLLOW sets - if it has not been performed already. *) + (* Second pass. Solve the equations (on demand). *) -let follow nt = - (Lazy.force follow).(nt) + solve follow (* At log level 2, display the FOLLOW sets. *) let () = Error.logG 2 (fun f -> - for nt = 0 to Nonterminal.n - 1 do + for nt = Nonterminal.start to Nonterminal.n - 1 do Printf.fprintf f "follow(%s) = %s\n" (Nonterminal.print false nt) (TerminalSet.print (follow nt)) @@ -974,14 +1120,15 @@ let tfollow : TerminalSet.t array Lazy.t = in (* Iterate over all productions. *) - Array.iter (fun (nt1, rhs) -> + Array.iteri (fun prod (nt1, rhs) -> (* Iterate over all terminal symbols [t2] in the right-hand side. *) Array.iteri (fun i symbol -> match symbol with | Symbol.N _ -> () | Symbol.T t2 -> - let nullable, first = nullable_first_rhs rhs (i+1) in + let nullable = NULLABLE.production prod (i+1) + and first = FIRST.production prod (i+1) in (* The FIRST set of the remainder of the right-hand side contributes to the FOLLOW set of [t2]. *) tfollow.(t2) <- TerminalSet.union first tfollow.(t2); @@ -1040,10 +1187,10 @@ let explain (tok : Terminal.t) (rhs : Symbol.t array) (i : int) = assert (Terminal.equal tok tok'); EObvious | Symbol.N nt -> - if TerminalSet.mem tok first.(nt) then + if TerminalSet.mem tok (FIRST.nonterminal nt) then EFirst (tok, nt) else begin - assert nullable.(nt); + assert (NULLABLE.nonterminal nt); match loop (i + 1) with | ENullable (symbols, e) -> ENullable (symbol :: symbols, e) @@ -1072,11 +1219,13 @@ let rec convert = function module Analysis = struct - let nullable = Array.get nullable + let nullable = NULLABLE.nonterminal - let first = Array.get first + let first = FIRST.nonterminal - let nullable_first_rhs = nullable_first_rhs + let nullable_first_prod prod i = + NULLABLE.production prod i, + FIRST.production prod i let explain_first_rhs (tok : Terminal.t) (rhs : Symbol.t array) (i : int) = convert (explain tok rhs i) diff --git a/src/grammar.mli b/src/grammar.mli index 605683680adbb725630fb95eaf45a87385c20256..27bc248e3c3c134c6837b56e5fc9fa6f58894beb 100644 --- a/src/grammar.mli +++ b/src/grammar.mli @@ -373,12 +373,12 @@ module Analysis : sig val first: Nonterminal.t -> TerminalSet.t - (* [nullable_first_rhs rhs i] considers the string of symbols found at - offset [i] in the array [rhs]. It returns its NULLABLE flag as well + (* [nullable_first_prod prod i] considers the suffix of the the production + [prod] defined by offset [i]. It returns its NULLABLE flag as well as its FIRST set. The offset [i] must be contained between [0] and - [n], where [n] is the length of [rhs], inclusive. *) + [n], inclusive, where [n] is the length of production [prod]. *) - val nullable_first_rhs: Symbol.t array -> int -> bool * TerminalSet.t + val nullable_first_prod: Production.index -> int -> bool * TerminalSet.t (* [explain_first_rhs tok rhs i] explains why the token [tok] appears in the FIRST set for the string of symbols found at offset [i] in diff --git a/src/invariant.ml b/src/invariant.ml index 68fc33756c52ae26605f8838700a7e87c0ff7f57..ee1439c1de9a678061ecd383b37e0feb55af2663 100644 --- a/src/invariant.ml +++ b/src/invariant.ml @@ -170,7 +170,9 @@ open StateLattice let stack_states : Lr1.node -> property = let module F = - Fix.Make(Lr1.ImperativeNodeMap)(StateLattice) + Fix.Make + (Maps.PersistentMapsToImperativeMaps(Lr1.NodeMap)) + (StateLattice) in F.lfp (fun node (get : Lr1.node -> property) -> diff --git a/src/item.ml b/src/item.ml index f5b1d1dd4d7710c3e32adb9876ab6c691fc03e20..7487e5c1df3dae8b3d3cf65fcbe1b0f163aeaa0c 100644 --- a/src/item.ml +++ b/src/item.ml @@ -171,7 +171,7 @@ module Closure (L : Lookahead.S) = struct let constant, transmits = if pos < length then - let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in + let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in L.constant first, nullable else (* No epsilon transitions leave this item. *) diff --git a/src/lr1.ml b/src/lr1.ml index c1c3719067b1eaa2e246b0afdc6ad152b941fb8c..d816d9630c9bbd2661a8a1c521ed282e528096a2 100644 --- a/src/lr1.ml +++ b/src/lr1.ml @@ -64,31 +64,6 @@ module NodeSet = module NodeMap = Map.Make (Node) -module ImperativeNodeMap = struct - - type key = - NodeMap.key - - type 'data t = - 'data NodeMap.t ref - - let create () = - ref NodeMap.empty - - let clear t = - t := NodeMap.empty - - let add k d t = - t := NodeMap.add k d !t - - let find k t = - NodeMap.find k !t - - let iter f t = - NodeMap.iter f !t - -end - (* ------------------------------------------------------------------------ *) (* Output debugging information if [--follow-construction] is enabled. *) diff --git a/src/lr1.mli b/src/lr1.mli index 53e2f899bb3e399eb7d737afeaf65c8e74bea6c7..626a6a116f962068684be4de705015c4b5aca477 100644 --- a/src/lr1.mli +++ b/src/lr1.mli @@ -25,8 +25,6 @@ module NodeSet : Set.S with type elt = node module NodeMap : Map.S with type key = node -module ImperativeNodeMap : Fix.IMPERATIVE_MAPS with type key = node - (* These are the automaton's entry states, indexed by the start productions. *) val entry: node ProductionMap.t