inspectionTableInterpreter.ml 6.21 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(* -------------------------------------------------------------------------- *)

(* The type functor. *)

module Symbols (T : sig

  type 'a terminal
  type 'a nonterminal

end) = struct

  open T

14 15 16
  (* This should be the only place in the whole library (and generator!)
     where these types are defined. *)

17 18 19 20 21 22 23 24 25 26 27 28 29
  type 'a symbol =
    | T : 'a terminal -> 'a symbol
    | N : 'a nonterminal -> 'a symbol

  type xsymbol = 
    | X : 'a symbol -> xsymbol

end

(* -------------------------------------------------------------------------- *)

(* The code functor. *)

30 31 32 33 34
module Make
  (B : TableFormat.TABLES)
  (T : InspectionTableFormat.TABLES
       with type 'a lr1state = int)
= struct
35

36 37
  (* Including [T] is an easy way of inheriting the definitions of the types
     [symbol] and [xsymbol]. *)
38

39
  include T
40

41 42 43 44 45 46 47 48 49
  (* This auxiliary function decodes a packed linearized array, as created by
     [TableBackend.linearize_and_marshal1]. Here, we read a row all at once. *)

  let read_packed_linearized ((data, entry) : PackedIntArray.t * PackedIntArray.t) (i : int) : int list =
    LinearizedArray.read_row_via
      (PackedIntArray.get data)
      (PackedIntArray.get entry)
      i

50 51 52
  (* This auxiliary function decodes a symbol. The encoding was done by
     [encode_symbol] or [encode_symbol_option] in the table back-end. *)

53
  let decode_symbol (symbol : int) : T.xsymbol =
54 55 56 57 58 59 60 61 62 63 64 65
    (* If [symbol] is 0, then we have no symbol. This could mean e.g.
       that the function [incoming_symbol] has been applied to an
       initial state. In principle, this cannot happen. *)
    assert (symbol > 0);
    (* The low-order bit distinguishes terminal and nonterminal symbols. *)
    let kind = symbol land 1 in
    let symbol = symbol lsr 1 in
    if kind = 0 then
      T.terminal (symbol - 1)
    else
      T.nonterminal symbol

66 67 68 69 70 71
  (* These auxiliary functions convert a symbol to its integer code. For speed
     and for convenience, we use an unsafe type cast. This relies on the fact
     that the data constructors of the [terminal] and [nonterminal] GADTs are
     declared in an order that reflects their internal code. In the case of
     nonterminal symbols, we add [start] to account for the presence of the
     start symbols. *)
72 73 74 75 76 77

  let n2i (nt : 'a T.nonterminal) : int =
    let answer = B.start + Obj.magic nt in
    assert (T.nonterminal answer = X (N nt)); (* TEMPORARY roundtrip *)
    answer

78 79 80 81 82
  let t2i (t : 'a T.terminal) : int =
    let answer = Obj.magic t in
    assert (T.terminal answer = X (T t)); (* TEMPORARY roundtrip *)
    answer

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
  (* Ordering functions. *)

  let compare_terminals t1 t2 =
    (* Subtraction is safe because overflow is impossible. *)
    t2i t1 - t2i t2

  let compare_nonterminals nt1 nt2 =
    (* Subtraction is safe because overflow is impossible. *)
    n2i nt1 - n2i nt2

  let compare_symbols symbol1 symbol2 =
    match symbol1, symbol2 with
    | X (T _), X (N _) ->
        -1
    | X (N _), X (T _) ->
        1
    | X (T t1), X (T t2) ->
        compare_terminals t1 t2
    | X (N nt1), X (N nt2) ->
        compare_nonterminals nt1 nt2

104 105 106 107 108 109 110 111 112
  let compare_productions prod1 prod2 =
    (* Subtraction is safe because overflow is impossible. *)
    prod1 - prod2

  let compare_items (prod1, index1) (prod2, index2) =
    let c = compare_productions prod1 prod2 in
    (* Subtraction is safe because overflow is impossible. *)
    if c <> 0 then c else index1 - index2

113 114
  (* The function [incoming_symbol] goes through the tables [T.lr0_core] and
     [T.lr0_incoming]. This yields a representation of type [xsymbol], out of
115 116 117 118
     which we strip the [X] quantifier, so as to get a naked symbol. This last
     step is ill-typed and potentially dangerous. It is safe only because this
     function is used at type ['a lr1state -> 'a symbol], which forces an
     appropriate choice of ['a]. *)
119

120 121 122 123
  let incoming_symbol (s : 'a T.lr1state) : 'a T.symbol =
    let core = PackedIntArray.get T.lr0_core s in
    let symbol = decode_symbol (PackedIntArray.get T.lr0_incoming core) in
    match symbol with
124 125 126
    | T.X symbol ->
        Obj.magic symbol

127
  (* The function [lhs] reads the table [B.lhs] and uses [T.nonterminal]
128 129 130
     to decode the symbol. *)

  let lhs prod =
131
    T.nonterminal (PackedIntArray.get B.lhs prod)
132

133
  (* The function [rhs] reads the table [T.rhs] and uses [decode_symbol]
134
     to decode the symbol. *)
135 136 137 138 139

  let rhs prod =
    List.map decode_symbol (read_packed_linearized T.rhs prod)

  (* The function [items] maps the LR(1) state [s] to its LR(0) core,
140
     then uses [core] as an index into the table [T.lr0_items]. The
141 142 143
     items are then decoded by the function [export] below, which is
     essentially a copy of [Item.export]. *)

144 145 146 147
  type item =
      int * int

  let export t : item =
148 149 150 151 152
    (t lsr 7, t mod 128)

  let items s =
    (* Map [s] to its LR(0) core. *)
    let core = PackedIntArray.get T.lr0_core s in
153
    (* Now use [core] to look up the table [T.lr0_items]. *)
154 155
    List.map export (read_packed_linearized T.lr0_items core)

156 157 158 159
  (* The function [nullable] maps the nonterminal symbol [nt] to its
     integer code, which it uses to look up the array [T.nullable].
     This yields 0 or 1, which we map back to a Boolean result. *)

160 161 162 163
  let decode_bool i =
    assert (i = 0 || i = 1);
    i = 1

164
  let nullable nt =
165 166 167 168 169 170 171
    decode_bool (PackedIntArray.get1 T.nullable (n2i nt))

  (* The function [first] maps the symbols [nt] and [t] to their integer
     codes, which it uses to look up the matrix [T.first]. *)

  let first nt t =
    decode_bool (PackedIntArray.unflatten1 T.first (n2i nt) (t2i t))
172

173 174 175 176 177 178 179
  let xfirst symbol t =
    match symbol with
    | X (T t') ->
        compare_terminals t t' = 0
    | X (N nt) ->
        first nt t

180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
  (* The function [foreach_terminal] exploits the fact that the
     first component of [B.error] is [Terminal.n - 1], i.e., the
     number of terminal symbols, including [error] but not [#]. *)

  let rec foldij i j f accu =
    if i = j then
      accu
    else
      foldij (i + 1) j f (f i accu)

  let foreach_terminal f accu =
    let n, _ = B.error in
    foldij 0 n (fun i accu ->
      f (T.terminal i) accu
    ) accu

  let foreach_terminal_but_error f accu =
    let n, _ = B.error in
    foldij 0 n (fun i accu ->
      if i = B.error_terminal then
        accu
      else
        f (T.terminal i) accu
    ) accu

205
end