diff --git a/src/codeBackend.ml b/src/codeBackend.ml index 668e224250af5688ba3d6ed4bbd2c258d0994fa6..618b7a387cf836d013e526557229f49e9972dd81 100644 --- a/src/codeBackend.ml +++ b/src/codeBackend.ml @@ -1474,10 +1474,11 @@ let errorcasedef = The code initializes a parser environment, an empty stack, and invokes [run]. - 2015/11/04. If the state [s] can reduce an epsilon production, then the - initial stack should contain a sentinel cell with a valid [endp] field - at offset 1. Otherwise, the initial stack can be the unit value, as it - used to be. (Note that it would be OK to always have a sentinel.) *) + 2015/11/11. If the state [s] can reduce an epsilon production whose left-hand + symbol keeps track of its start or end position, or if [s] can reduce any + production that mentions [$endpos($0)], then the initial stack should contain + a sentinel cell with a valid [endp] field at offset 1. For simplicity, we + always create a sentinel cell. *) let entrydef s = let nt = Item.startnt (Lr1.start2item s) in @@ -1485,11 +1486,8 @@ let entrydef s = and lexbuf = "lexbuf" in let initial_stack = - if Lr1.has_beforeend s then - let initial_position = getendp in - etuple [ EUnit; initial_position ] - else - EUnit + let initial_position = getendp in + etuple [ EUnit; initial_position ] in { diff --git a/src/invariant.ml b/src/invariant.ml index e54d0842589e57b9941fc17b6b0a6beba6b58c6e..228051a6d97aa338a2c88ed480214e0c1c0cc016 100644 --- a/src/invariant.ml +++ b/src/invariant.ml @@ -597,6 +597,48 @@ let rewind node : instruction = (* ------------------------------------------------------------------------ *) +(* Machinery for the computation of which symbols must keep track of their + start or end positions. *) + +open Keyword + +type variable = + Symbol.t * where (* WhereStart or WhereEnd *) + +module M : Fix.IMPERATIVE_MAPS with type key = variable = struct + type key = variable + type 'data t = { + mutable startp: 'data SymbolMap.t; + mutable endp: 'data SymbolMap.t; + } + open SymbolMap + let create() = + { startp = empty; endp = empty } + let clear m = + m.startp <- empty; m.endp <- empty + let add (sym, where) data m = + match where with + | WhereStart -> + m.startp <- add sym data m.startp + | WhereEnd -> + m.endp <- add sym data m.endp + | WhereSymbolStart -> + assert false + let find (sym, where) m = + match where with + | WhereStart -> + find sym m.startp + | WhereEnd -> + find sym m.endp + | WhereSymbolStart -> + assert false + let iter f m = + iter (fun sym -> f (sym, WhereStart)) m.startp; + iter (fun sym -> f (sym, WhereEnd)) m.endp +end + +(* ------------------------------------------------------------------------ *) + (* We now determine which positions must be kept track of. For simplicity, we do this on a per-symbol basis. That is, for each symbol, either we never keep track of position information, or we always do. In fact, we do @@ -613,12 +655,12 @@ let rewind node : instruction = right-hand side (if there is one) must do so as well. That is, unless the right-hand side is empty. *) -(* 2015/11/04. When an epsilon production [prod] is reduced, the top stack cell - may be consulted for its end position. This implies that this cell must exist +(* 2015/11/11. When a production [prod] is reduced, the top stack cell may be + consulted for its end position. This implies that this cell must exist and must store an end position! Now, when does this happen? - 1- This happens if the left-hand symbol of the production, [nt prod], keeps - track of its start or end position. + 1- This happens if [prod] is an epsilon production and the left-hand symbol + of the production, [nt prod], keeps track of its start or end position. 2- This happens if the semantic action explicitly mentions the keyword [$endpos($0)]. @@ -630,96 +672,118 @@ let rewind node : instruction = the sentinel cell at the bottom of the stack must contain an end position. Point (b) doesn't concern us here, but point (a) does. We must implement the - constraint (1) \/ (2) -> (a). *) + constraint (1) \/ (2) -> (a). Point (b) is taken care of in the code back-end, + where, for simplicity, we always create a sentinel cell. *) -open Keyword +(* I will say that this is a lot more sophisticated than I would like. The code + back-end has been known for its efficiency and I am trying to maintain this + property -- in particular, I would like to keep track of no positions at all, + if the user doesn't use any position keyword. But I am suffering. *) -let startp = - ref SymbolSet.empty +module S = + FixSolver.Make(M)(Boolean) -let endp = - ref SymbolSet.empty - -let rec require where symbol = - let wherep = - match where with - | WhereStart -> - startp - | WhereEnd -> - endp - | WhereSymbolStart -> - assert false (* has been expanded away *) - in - if not (SymbolSet.mem symbol !wherep) then begin - wherep := SymbolSet.add symbol !wherep; - match symbol with - | Symbol.T _ -> - () - | Symbol.N nt -> - Production.iternt nt (require_aux where) - end - -and require_aux where prod = - let _nt, rhs = Production.def prod in - let length = Array.length rhs in - if length > 0 then - match where with - | WhereStart -> - require where rhs.(0) - | WhereEnd -> - require where rhs.(length - 1) - | WhereSymbolStart -> - assert false (* has been expanded away *) +let record_ConVar, record_VarVar, solve = + S.create() let () = + + (* We gather the constraints explained above in two loops. The first loop + looks at every (non-start) production [prod]. The second loop looks at + every (non-initial) state [s]. *) + Production.iterx (fun prod -> - let rhs = Production.rhs prod + + let nt, rhs = Production.def prod and ids = Production.identifiers prod and action = Production.action prod in + let length = Array.length rhs in + + if length > 0 then begin + (* If [nt] keeps track of its start position, then the first symbol + in the right-hand side must do so as well. *) + record_VarVar (Symbol.N nt, WhereStart) (rhs.(0), WhereStart); + (* If [nt] keeps track of its end position, then the last symbol + in the right-hand side must do so as well. *) + record_VarVar (Symbol.N nt, WhereEnd) (rhs.(length - 1), WhereEnd) + end; + KeywordSet.iter (function | SyntaxError -> - () + () | Position (Before, _, _) -> - (* Doing nothing here is OK because the presence of [$endpos($0)] - in a semantic action is taken account below when we look at - every state and check whether it can reduce a production whose - semantic action contains [$endpos($0)]. *) + (* Doing nothing here because [$endpos($0)] is dealt with in + the second loop. *) () - | Position (Left, where, _) -> - require_aux where prod + | Position (Left, _, _) -> + (* [$startpos] and [$endpos] have been expanded away. *) + assert false + | Position (RightNamed _, WhereSymbolStart, _) -> + (* [$symbolstartpos(x)] does not exist. *) + assert false | Position (RightNamed id, where, _) -> + (* If the semantic action mentions [$startpos($i)], then the + [i]-th symbol in the right-hand side must keep track of + its start position. Similarly for end positions. *) Array.iteri (fun i id' -> if id = id' then - require where rhs.(i) + record_ConVar true (rhs.(i), where) ) ids ) (Action.keywords action) - ); - Lr1.iterx (fun node -> - (* 2015/11/04. See above. *) - if Lr1.has_beforeend node then - let sym = Misc.unSome (Lr1.incoming_symbol node) in - require WhereEnd sym + + ); (* end of loop on productions *) + + Lr1.iterx (fun s -> + (* Let [sym] be the incoming symbol of state [s]. *) + let sym = Misc.unSome (Lr1.incoming_symbol s) in + + (* Condition (1) in the long comment above (2015/11/11). If an epsilon + production [prod] can be reduced in state [s], if its left-hand side + [nt] keeps track of its start or end position, then [sym] must keep + track of its end position. *) + TerminalMap.iter (fun _ prods -> + let prod = Misc.single prods in + let nt, rhs = Production.def prod in + let length = Array.length rhs in + if length = 0 then begin + record_VarVar (Symbol.N nt, WhereStart) (sym, WhereEnd); + record_VarVar (Symbol.N nt, WhereEnd) (sym, WhereEnd) + end + ) (Lr1.reductions s); + + (* Condition (2) in the long comment above (2015/11/11). If a production + can be reduced in state [s] and mentions [$endpos($0)], then [sym] + must keep track of its end position. *) + if Lr1.has_beforeend s then + record_ConVar true (sym, WhereEnd) + ) -let startp = - !startp +let track : variable -> bool = + solve() + +let startp symbol = + track (symbol, WhereStart) + +let endp symbol = + track (symbol, WhereEnd) + +let for_every_symbol (f : Symbol.t -> unit) : unit = + Terminal.iter (fun t -> f (Symbol.T t)); + Nonterminal.iter (fun nt -> f (Symbol.N nt)) -let endp = - !endp +let sum_over_every_symbol (f : Symbol.t -> bool) : int = + let c = ref 0 in + for_every_symbol (fun sym -> if f sym then c := !c + 1); + !c let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d symbols keep track of their start position.\n\ %d out of %d symbols keep track of their end position.\n" - (SymbolSet.cardinal startp) (Terminal.n + Nonterminal.n) - (SymbolSet.cardinal endp) (Terminal.n + Nonterminal.n)) - -let startp symbol = - SymbolSet.mem symbol startp - -let endp symbol = - SymbolSet.mem symbol endp + (sum_over_every_symbol startp) (Terminal.n + Nonterminal.n) + (sum_over_every_symbol endp) (Terminal.n + Nonterminal.n)) (* ------------------------------------------------------------------------- *) (* Miscellaneous. *)