InspectionTableInterpreter.ml 10.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU Library General Public License version 2, with a         *)
(*  special exception on linking, as described in the file LICENSE.           *)
(*                                                                            *)
(******************************************************************************)

14 15 16 17 18 19 20 21 22 23 24 25 26
(* -------------------------------------------------------------------------- *)

(* The type functor. *)

module Symbols (T : sig

  type 'a terminal
  type 'a nonterminal

end) = struct

  open T

27 28 29
  (* This should be the only place in the whole library (and generator!)
     where these types are defined. *)

30 31 32 33
  type 'a symbol =
    | T : 'a terminal -> 'a symbol
    | N : 'a nonterminal -> 'a symbol

34
  type xsymbol =
35 36 37 38 39 40 41 42
    | X : 'a symbol -> xsymbol

end

(* -------------------------------------------------------------------------- *)

(* The code functor. *)

43
module Make
44
  (TT : TableFormat.TABLES)
45 46
  (IT : InspectionTableFormat.TABLES
        with type 'a lr1state = int)
47 48 49 50
  (ET : EngineTypes.TABLE
        with type terminal = int
         and type nonterminal = int
         and type semantic_value = Obj.t)
POTTIER Francois's avatar
POTTIER Francois committed
51 52 53
  (E : sig
     type 'a env = (ET.state, ET.semantic_value, ET.token) EngineTypes.env
   end)
54
= struct
55

56
  (* Including [IT] is an easy way of inheriting the definitions of the types
57
     [symbol] and [xsymbol]. *)
58

59
  include IT
60

61 62 63
  (* This auxiliary function decodes a packed linearized array, as created by
     [TableBackend.linearize_and_marshal1]. Here, we read a row all at once. *)

POTTIER Francois's avatar
POTTIER Francois committed
64 65 66
  let read_packed_linearized
    (data, entry : PackedIntArray.t * PackedIntArray.t) (i : int) : int list
  =
67 68 69 70 71
    LinearizedArray.read_row_via
      (PackedIntArray.get data)
      (PackedIntArray.get entry)
      i

72 73 74
  (* This auxiliary function decodes a symbol. The encoding was done by
     [encode_symbol] or [encode_symbol_option] in the table back-end. *)

75
  let decode_symbol (symbol : int) : IT.xsymbol =
76 77 78 79 80 81 82 83
    (* If [symbol] is 0, then we have no symbol. This could mean e.g.
       that the function [incoming_symbol] has been applied to an
       initial state. In principle, this cannot happen. *)
    assert (symbol > 0);
    (* The low-order bit distinguishes terminal and nonterminal symbols. *)
    let kind = symbol land 1 in
    let symbol = symbol lsr 1 in
    if kind = 0 then
84
      IT.terminal (symbol - 1)
85
    else
86
      IT.nonterminal symbol
87

88 89 90 91 92 93
  (* These auxiliary functions convert a symbol to its integer code. For speed
     and for convenience, we use an unsafe type cast. This relies on the fact
     that the data constructors of the [terminal] and [nonterminal] GADTs are
     declared in an order that reflects their internal code. In the case of
     nonterminal symbols, we add [start] to account for the presence of the
     start symbols. *)
94

95
  let n2i (nt : 'a IT.nonterminal) : int =
96
    let answer = TT.start + Obj.magic nt in
POTTIER Francois's avatar
POTTIER Francois committed
97 98
    (* For safety, check that the above cast produced a correct result. *)
    assert (IT.nonterminal answer = X (N nt));
99 100
    answer

101
  let t2i (t : 'a IT.terminal) : int =
102
    let answer = Obj.magic t in
POTTIER Francois's avatar
POTTIER Francois committed
103 104
    (* For safety, check that the above cast produced a correct result. *)
    assert (IT.terminal answer = X (T t));
105 106
    answer

107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
  (* Ordering functions. *)

  let compare_terminals t1 t2 =
    (* Subtraction is safe because overflow is impossible. *)
    t2i t1 - t2i t2

  let compare_nonterminals nt1 nt2 =
    (* Subtraction is safe because overflow is impossible. *)
    n2i nt1 - n2i nt2

  let compare_symbols symbol1 symbol2 =
    match symbol1, symbol2 with
    | X (T _), X (N _) ->
        -1
    | X (N _), X (T _) ->
        1
    | X (T t1), X (T t2) ->
        compare_terminals t1 t2
    | X (N nt1), X (N nt2) ->
        compare_nonterminals nt1 nt2

128 129 130 131 132 133 134 135 136
  let compare_productions prod1 prod2 =
    (* Subtraction is safe because overflow is impossible. *)
    prod1 - prod2

  let compare_items (prod1, index1) (prod2, index2) =
    let c = compare_productions prod1 prod2 in
    (* Subtraction is safe because overflow is impossible. *)
    if c <> 0 then c else index1 - index2

137 138
  (* The function [incoming_symbol] goes through the tables [IT.lr0_core] and
     [IT.lr0_incoming]. This yields a representation of type [xsymbol], out of
139 140 141 142
     which we strip the [X] quantifier, so as to get a naked symbol. This last
     step is ill-typed and potentially dangerous. It is safe only because this
     function is used at type ['a lr1state -> 'a symbol], which forces an
     appropriate choice of ['a]. *)
143

144 145 146
  let incoming_symbol (s : 'a IT.lr1state) : 'a IT.symbol =
    let core = PackedIntArray.get IT.lr0_core s in
    let symbol = decode_symbol (PackedIntArray.get IT.lr0_incoming core) in
147
    match symbol with
148
    | IT.X symbol ->
149 150
        Obj.magic symbol

151
  (* The function [lhs] reads the table [TT.lhs] and uses [IT.nonterminal]
152 153 154
     to decode the symbol. *)

  let lhs prod =
155
    IT.nonterminal (PackedIntArray.get TT.lhs prod)
156

157
  (* The function [rhs] reads the table [IT.rhs] and uses [decode_symbol]
158
     to decode the symbol. *)
159 160

  let rhs prod =
161
    List.map decode_symbol (read_packed_linearized IT.rhs prod)
162 163

  (* The function [items] maps the LR(1) state [s] to its LR(0) core,
164
     then uses [core] as an index into the table [IT.lr0_items]. The
165 166 167
     items are then decoded by the function [export] below, which is
     essentially a copy of [Item.export]. *)

168 169 170 171
  type item =
      int * int

  let export t : item =
172 173 174 175
    (t lsr 7, t mod 128)

  let items s =
    (* Map [s] to its LR(0) core. *)
176 177 178
    let core = PackedIntArray.get IT.lr0_core s in
    (* Now use [core] to look up the table [IT.lr0_items]. *)
    List.map export (read_packed_linearized IT.lr0_items core)
179

180
  (* The function [nullable] maps the nonterminal symbol [nt] to its
181
     integer code, which it uses to look up the array [IT.nullable].
182 183
     This yields 0 or 1, which we map back to a Boolean result. *)

184 185 186 187
  let decode_bool i =
    assert (i = 0 || i = 1);
    i = 1

188
  let nullable nt =
189
    decode_bool (PackedIntArray.get1 IT.nullable (n2i nt))
190 191

  (* The function [first] maps the symbols [nt] and [t] to their integer
192
     codes, which it uses to look up the matrix [IT.first]. *)
193 194

  let first nt t =
195
    decode_bool (PackedIntArray.unflatten1 IT.first (n2i nt) (t2i t))
196

197 198 199 200 201 202 203
  let xfirst symbol t =
    match symbol with
    | X (T t') ->
        compare_terminals t t' = 0
    | X (N nt) ->
        first nt t

204
  (* The function [foreach_terminal] exploits the fact that the
205
     first component of [TT.error] is [Terminal.n - 1], i.e., the
206 207 208 209 210 211 212 213 214
     number of terminal symbols, including [error] but not [#]. *)

  let rec foldij i j f accu =
    if i = j then
      accu
    else
      foldij (i + 1) j f (f i accu)

  let foreach_terminal f accu =
215
    let n, _ = TT.error in
216
    foldij 0 n (fun i accu ->
217
      f (IT.terminal i) accu
218 219 220
    ) accu

  let foreach_terminal_but_error f accu =
221
    let n, _ = TT.error in
222
    foldij 0 n (fun i accu ->
223
      if i = TT.error_terminal then
224 225
        accu
      else
226
        f (IT.terminal i) accu
227 228
    ) accu

229 230
  (* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
231 232 233 234 235 236 237
  (* The following is the implementation of the function [feed]. This function
     is logically part of the LR engine, so it would be nice if it were placed
     in the module [Engine], but it must be placed here because, to ensure
     type safety, its arguments must be a symbol of type ['a symbol] and a
     semantic value of type ['a]. The type ['a symbol] is not available in
     [Engine]. It is available here. *)

238
  open EngineTypes
239 240
  open ET
  open E
241

242 243
  (* [feed] fails if the current state does not have an outgoing transition
     labeled with the desired symbol. This check is carried out at runtime. *)
244

245 246
  let feed_failure () =
    invalid_arg "feed: outgoing transition does not exist"
247

248 249 250 251 252 253 254 255 256 257 258 259 260 261
  (* Feeding a nonterminal symbol [nt]. Here, [nt] has type [nonterminal],
     which is a synonym for [int], and [semv] has type [semantic_value],
     which is a synonym for [Obj.t]. This type is unsafe, because pushing
     a semantic value of arbitrary type into the stack can later cause a
     semantic action to crash and burn. The function [feed] is given a safe
     type below. *)

  let feed_nonterminal
        (nt : nonterminal) startp (semv : semantic_value) endp (env : 'b env)
      : 'b env
  =
    (* Check if the source state has an outgoing transition labeled [nt].
       This is done by consulting the [goto] table. *)
    let source = env.current in
262 263
    match ET.maybe_goto_nt source nt with
    | None ->
264
        feed_failure()
265
    | Some target ->
266 267 268 269 270 271 272
        (* Push a new cell onto the stack, containing the identity of the state
           that we are leaving. The semantic value [semv] and positions [startp]
           and [endp] contained in the new cell are provided by the caller. *)
        let stack = { state = source; semv; startp; endp; next = env.stack } in
        (* Move to the target state. *)
        { env with stack; current = target }

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
  let reduce   _env _prod = feed_failure()
  let initiate _env       = feed_failure()

  let feed_terminal
        (terminal : terminal) startp (semv : semantic_value) endp (env : 'b env)
      : 'b env
  =
    (* Check if the source state has an outgoing transition labeled [terminal].
       This is done by consulting the [action] table. *)
    let source = env.current in
    ET.action source terminal semv
      (fun env _please_discard _terminal semv target ->
        (* There is indeed a transition toward the state [target].
           Push a new cell onto the stack and move to the target state. *)
        let stack = { state = source; semv; startp; endp; next = env.stack } in
        { env with stack; current = target }
      ) reduce initiate env

  (* The type assigned to [feed] ensures that the type of the semantic value
     [semv] is appropriate: it must be the semantic-value type of the symbol
     [symbol]. *)

  let feed (symbol : 'a symbol) startp (semv : 'a) endp env =
296 297 298 299 300 301 302
    let semv : semantic_value = Obj.repr semv in
    match symbol with
    | N nt ->
        feed_nonterminal (n2i nt) startp semv endp env
    | T terminal ->
        feed_terminal (t2i terminal) startp semv endp env

303
end