lr1partial.ml 7.75 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15
open Grammar

16 17
exception Oops

18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
module Run (X : sig

  (* A restricted set of tokens of interest. *)

  val tokens: TerminalSet.t

  (* A state of the (merged) LR(1) automaton that we're trying to
     simulate. *)

  val goal: Lr1.node

end) = struct

  (* First, let's restrict our interest to the nodes of the merged
     LR(1) automaton that can reach the goal node. Some experiments
     show that this can involve one tenth to one half of all nodes.
     This optimization seems minor, but is easy to implement. *)

  let relevant =
    Lr1.reverse_dfs X.goal

  (* Second, all of the states that we shall consider are restricted
     to the set of tokens of interest. This is an important idea: by
     abstracting away some information, we make the construction much
     faster. *)

  let restrict =
    Lr0.restrict X.tokens

  (* Constructing the automaton. The automaton is represented as a
     graph. States are never merged -- this is a canonical LR(1)
     construction!

     As we go, we record the correspondence between nodes in this
     automaton and nodes in the merged LR(1) automaton. This allows
     us to tell when we have reached the desired place.

     This also allows us not to follow transitions that have already
     been eliminated, in the merged automaton, via resolution of
     shift/reduce conflicts. Whenever we follow a transition in the
     canonical LR(1) automaton, we check that the corresponding
     transition is legal in the merged LR(1) automaton.

     The automaton is explored breadth-first and shortest paths from
     every node to one of the start nodes are recorded. *)

  type node = {
      state: Lr0.lr1state;
      ancestor: (Symbol.t * node) option;
      shadow: Lr1.node;
    }

  (* A queue of pending nodes, whose successors should be explored. *)

  let queue : node Queue.t =
    Queue.create()

  (* Mapping of LR(0) state numbers to lists of nodes. *)

  let map : node list array =
78
    Array.make Lr0.n []
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108

  (* Exploring a state. This creates a new node, if necessary, and
     enqueues it for further exploration. *)

  exception Goal of node * Terminal.t

  let explore ancestor shadow (state : Lr0.lr1state) : unit =

    (* Find all existing nodes that share the same LR(0) core. *)

    let k = Lr0.core state in
    assert (k < Lr0.n);
    let similar = map.(k) in

    (* Check whether one of these nodes coincides with the candidate
       new node. If so, stop. This check requires comparing not only
       the states of the partial, canonical automaton, but also their
       shadows in the full, merged automaton. This is because a single
       state of the canonical automaton may be reached along several
       different paths, leading to distinct shadows in the merged
       automaton, and we must explore all of these paths in order to
       ensure that we eventually find a goal node. *)

    if not (List.exists (fun node ->
      Lr0.equal state node.state && shadow == node.shadow
    ) similar) then begin

      (* Otherwise, create a new node. *)

      let node = {
109 110 111
        state = state;
        ancestor = ancestor;
        shadow = shadow;
112 113 114 115 116 117
      } in

      map.(k) <- node :: similar;
      Queue.add node queue;

      (* Check whether this is a goal node. A node [N] is a goal node
118 119 120 121 122
         if (i) [N] has a conflict involving one of the tokens of
         interest and (ii) [N] corresponds to the goal node, that is,
         the path that leads to [N] in the canonical LR(1) automaton
         leads to the goal node in the merged LR(1) automaton. Note
         that these conditions do not uniquely define [N]. *)
123 124

      if shadow == X.goal then
125 126 127 128 129 130 131 132 133 134 135 136 137 138
        let can_reduce =
          ref TerminalSet.empty in
        let reductions1 : Production.index list TerminalMap.t =
          Lr1.reductions shadow in
        List.iter (fun (toks, prod) ->
          TerminalSet.iter (fun tok ->

            (* We are looking at a [(tok, prod)] pair -- a reduction
               in the canonical automaton state. *)

            (* Check that this reduction, which exists in the canonical
               automaton state, also exists in the merged automaton --
               that is, it wasn't suppressed by conflict resolution. *)

139
            if List.mem prod (TerminalMap.lookup tok reductions1) then
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161

              try
                let (_ : Lr1.node) =
                  SymbolMap.find (Symbol.T tok) (Lr1.transitions shadow)
                in
                (* Shift/reduce conflict. *)
                raise (Goal (node, tok))
              with Not_found ->
                let toks = !can_reduce in
                (* We rely on the property that [TerminalSet.add tok toks]
                   preserves physical equality when [tok] is a member of
                   [toks]. *)
                let toks' = TerminalSet.add tok toks in
                if toks == toks' then
                  (* Reduce/reduce conflict. *)
                  raise (Goal (node, tok))
                else
                  (* No conflict so far. *)
                  can_reduce := toks'

          ) toks
        ) (Lr0.reductions state)
162 163 164 165 166 167 168 169 170 171 172

    end

  (* Populate the queue with the start nodes. Until we find a goal
     node, take a node out the queue, construct the nodes that
     correspond to its successors, and enqueue them. *)

  let goal, token =
    try

      ProductionMap.iter (fun (prod : Production.index) (k : Lr0.node) ->
173 174 175 176 177 178 179
        let shadow = try
            ProductionMap.find prod Lr1.entry
          with Not_found ->
            assert false
        in
        if relevant shadow then
          explore None shadow (restrict (Lr0.start k))
180 181 182
      ) Lr0.entry;

      Misc.qiter (fun node ->
183 184 185 186 187 188 189 190
        SymbolMap.iter (fun symbol state ->
          try
            let shadow =
              SymbolMap.find symbol (Lr1.transitions node.shadow) in
            if relevant shadow then
              explore (Some (symbol, node)) shadow (restrict state)
          with Not_found ->
            (* No shadow. This can happen if a shift/reduce conflict
191 192
               was resolved in favor in reduce. Ignore that transition. *)
            ()
193
        ) (Lr0.transitions node.state)
194 195 196
      ) queue;

      (* We didn't find a goal node. This shouldn't happen! If the
197 198 199 200
         goal node in the merged LR(1) automaton has a conflict,
         then there should exist a node with a conflict in the
         canonical automaton as well. Otherwise, Pager's construction
         is incorrect. *)
201

202
      raise Oops
203 204 205

    with Goal (node, tok) ->
      node, tok
206

207 208 209 210 211 212 213 214
  (* Query the goal node that was found about the shortest path from
     it to one of the entry nodes. *)

  let source, path =

    let rec follow path node =
      match node.ancestor with
      | None ->
215
          Lr1.start2item node.shadow, Array.of_list path
216
      | Some (symbol, node) ->
217
          follow (symbol :: path) node
218 219 220 221 222 223 224 225
    in
    follow [] goal

  let goal =
    Lr0.export goal.state

end