engine.ml 11.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
open EngineTypes

(* The LR parsing engine. *)

(* This module is used:

   - at compile time, if so requested by the user, via the --interpret options;
   - at run time, in the table-based back-end. *)

module Make (T : TABLE) = struct

  (* This propagates type and exception definitions. *)

  include T

  let _eRR : exn =
    Error

  (* --------------------------------------------------------------------------- *)

  (* The type [void] is empty. Many of the functions below have return type
     [void]. This guarantees that they never return a value. Instead, they
     must stop by raising an exception: either [Accept] or [Error]. *)

  type void

  (* --------------------------------------------------------------------------- *)

POTTIER Francois's avatar
POTTIER Francois committed
29 30 31 32 33 34 35 36 37 38 39
  (* OK, OK. I said I would stop using [Obj.magic], yet here we go again.  I
     need to extend the type [T.token] with an extra element, which represents
     the [error] pseudo-token. I don't want to pay an extra box in memory or
     an extra field in the [env] record. (I have measured the cost of moving
     from 5 to 6 fields in this record to be 30%. This is more than I
     expected!) I don't want to add a branch to the type [T.token] because
     that would bother the user (that would be an incompatible change) and
     that would make some exhaustive case analyses appear non-exhaustive. So,
     here we go. We allocate a dummy box in memory and use its address as a
     unique value which cannot possibly be confused with a legit inhabitant of
     the type [token]. (Right?) *)
40 41 42 43 44 45

  let error_token : token =
    Obj.magic (ref 0xDEADBEEF)

  (* --------------------------------------------------------------------------- *)

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
  (* In the code-based back-end, the [run] function is sometimes responsible
     for pushing a new cell on the stack. This is motivated by code sharing
     concerns. In this interpreter, there is no such concern; [run]'s caller
     is always responsible for updating the stack. *)

  (* In the code-based back-end, there is a [run] function for each state
     [s]. This function can behave in two slightly different ways, depending
     on when it is invoked, or (equivalently) depending on [s].

     If [run] is invoked after shifting a terminal symbol (or, equivalently,
     if [s] has a terminal incoming symbol), then [run] discards a token,
     unless [s] has a default reduction on [#]. (Indeed, in that case,
     requesting the next token might drive the lexer off the end of the input
     stream.)

     If, on the other hand, [run] is invoked after performing a goto transition,
     or invoked directly by an entry point, then there is nothing to discard.

     These two cases are reflected in [CodeBackend.gettoken].

66 67 68 69
     Here, the code is structured in a slightly different way. It is up to the
     caller of [run] to indicate whether to discard a token, via the parameter
     [please_discard]. This flag is set when [s] is being entered by shifting
     a terminal symbol and [s] does not have a default reduction on [#]. *)
70 71 72 73 74

  let rec run env please_discard : void =

    (* Log the fact that we just entered this state. *)
    
75
    Log.state env.current;
76

77 78 79 80
    (* If [please_discard] is set, discard a token and fetch the next one.
       This is done by jumping to [discard], which itself will jump to
       [check_for_default_reduction]. If [please_discard] is not set, we skip
       [discard] and jump directly to [check_for_default_reduction]. *)
81 82

    if please_discard then
83 84 85 86 87 88
      discard env
    else
      check_for_default_reduction env

  (* [discard] takes a token off the input stream, queries the lexer
     for a new one, and stores it into [env.token], overwriting the
89
     previous token. *)
90 91 92 93

  and discard env =
    let lexbuf = env.lexbuf in
    let token = env.lexer lexbuf in
94
    let env = { env with token } in
95 96 97 98
    Log.lookahead_token lexbuf (T.token2terminal token);
    check_for_default_reduction env

  and check_for_default_reduction env =
99 100 101 102 103 104

    (* Examine what situation we are in. This case analysis is analogous to
       that performed in [CodeBackend.gettoken], in the sub-case where we do
       not have a terminal incoming symbol. *)

    T.default_reduction
105 106 107
      env.current
      reduce                (* there is a default reduction; perform it *)
      check_for_error_token (* there is none; continue below *)
108 109
      env

110
  and check_for_error_token env : void =
111 112 113 114 115

    (* There is no default reduction. Consult the current lookahead token
       so as to determine which action should be taken. *)

    (* Peeking at the first input token, without taking it off the input
116 117
       stream, is done by reading [env.token]. We are careful to first
       check whether this is the [error] token. *)
118

119 120
    (* Note that, if [please_discard] was true, then we have just called
       [discard], so the lookahead token cannot be [error]. *)
121

122
    if env.token == error_token then begin
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
      Log.resuming_error_handling();
      error env
    end
    else
      action env

  (* --------------------------------------------------------------------------- *)

  (* When [action] is invoked, we know that the current state does not have
     a default reduction. We also know that the current lookahead token is
     not [error]: it is a real token, stored in [env.token]. *)

  and action env : void =

    (* We consult the two-dimensional action table, indexed by the
       current state and the current lookahead token, in order to
       determine which action should be taken. *)

    let token = env.token in
    T.action
      env.current                    (* determines a row *)
      (T.token2terminal token)       (* determines a column *)
      (T.token2value token)
      shift                          (* shift continuation *)
      reduce                         (* reduce continuation *)
      initiate                       (* failure continuation *)
      env

  (* --------------------------------------------------------------------------- *)

  (* This function takes care of shift transitions along a terminal symbol.
     (Goto transitions are taken care of within [reduce] below.) The symbol
     can be either an actual token or the [error] pseudo-token. *)

  and shift env
      (please_discard : bool)
      (terminal : terminal)
      (value : semantic_value)
      (s' : state)
      : void =

    (* Log the transition. *)

    Log.shift terminal s';

    (* Push a new cell onto the stack, containing the identity of the
       state that we are leaving. *)

    let lexbuf = env.lexbuf in
172
    let stack = {
173 174 175 176 177
      state = env.current;
      semv = value;
      startp = lexbuf.Lexing.lex_start_p;
      endp = lexbuf.Lexing.lex_curr_p;
      next = env.stack;
178
    } in
179 180 181

    (* Switch to state [s']. *)

182 183
    let current = s' in
    let env = { env with stack; current } in
184 185 186 187 188 189 190 191 192 193 194 195 196
    run env please_discard

  (* --------------------------------------------------------------------------- *)

  (* This function takes care of reductions. *)

  and reduce env (prod : production) : void =

    (* Log a reduction event. *)

    Log.reduce_or_accept prod;

    (* Invoke the semantic action. The semantic action is responsible for
197 198
       truncating the stack and pushing a new cell onto the stack, which
       contains a new semantic value. It can raise [Accept] or [Error]. *)
199

200
    (* If the semantic action terminates normally, it returns a new stack,
201
       which becomes the current stack. *)
202

203 204 205 206 207 208
    (* If the semantic action raises [Error], we catch it immediately and
       initiate error handling. *)

    (* The apparently weird idiom used here is an encoding for a
       [let/unless] construct, which does not exist in ocaml. *)

209
    let success =
210
      try
211
	Some (T.semantic_action prod env)
212
      with Error ->
213 214 215 216
	None
    in
    match success with
    | Some stack ->
217

218 219 220
        (* By our convention, the semantic action has produced an updated
           stack. The state now found in the top stack cell is the return
           state. *)
221

222 223 224
        (* Perform a goto transition. The target state is determined
           by consulting the goto table at the return state and at
           production [prod]. *)
225

226 227 228
        let current = T.goto stack.state prod in
        let env = { env with stack; current } in
        run env false
229

230 231
    | None ->
        initiate env
232 233 234 235 236 237


  (* --------------------------------------------------------------------------- *)

  (* The following functions deal with errors. *)

238
  (* [initiate] initiates error handling. *)
239 240

  and initiate env : void =
241
    assert (env.token != error_token);
242
    Log.initiating_error_handling();
243
    let env = { env with token = error_token } in
244 245 246 247 248
    error env

  (* [error] handles errors. *)

  and error env : void =
249
    assert (env.token == error_token);
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298

    (* Consult the column associated with the [error] pseudo-token in the
       action table. *)

    T.action
      env.current                    (* determines a row *)
      T.error_terminal               (* determines a column *)
      T.error_value
      error_shift                    (* shift continuation *)
      error_reduce                   (* reduce continuation *)
      error_fail                     (* failure continuation *)
      env

  and error_shift env please_discard terminal value s' =

    (* Here, [terminal] is [T.error_terminal], and [value] is [T.error_value]. *)

    assert (terminal = T.error_terminal && value = T.error_value);

    (* This state is capable of shifting the [error] token. *)

    Log.handling_error env.current;
    shift env please_discard terminal value s'

  and error_reduce env prod =

    (* This state is capable of performing a reduction on [error]. *)

    Log.handling_error env.current;
    reduce env prod

  and error_fail env =

    (* This state is unable to handle errors. Attempt to pop a stack
       cell. *)

    let cell = env.stack in
    let next = cell.next in
    if next == cell then

      (* The stack is empty. Die. *)

      raise _eRR

    else begin

      (* The stack is nonempty. Pop a cell, updating the current state
	 with that found in the popped cell, and try again. *)

299 300 301 302
      let env = { env with
        stack = next;
        current = cell.state
      } in
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
      error env

    end

  (* --------------------------------------------------------------------------- *)

  let entry
      (s : state)
      (lexer : Lexing.lexbuf -> token)
      (lexbuf : Lexing.lexbuf)
      : semantic_value =

    (* Build an empty stack. This is a dummy cell, which is its own
       successor. Its fields other than [next] contain dummy values. *)

    let rec empty = {
      state = s;                          (* dummy *)
      semv = T.error_value;               (* dummy *)
      startp = lexbuf.Lexing.lex_start_p; (* dummy *)
      endp = lexbuf.Lexing.lex_curr_p;    (* dummy *)
      next = empty;
    } in

    (* Perform an initial call to the lexer. *)

    let token : token =
      lexer lexbuf
    in

    (* Log our first lookahead token. *)

    Log.lookahead_token lexbuf (T.token2terminal token);

    (* Build an initial environment. *)

    let env = {
      lexer = lexer;
      lexbuf = lexbuf;
      token = token;
      stack = empty;
      current = s;
    } in

    (* Run. Catch [Accept], which represents normal termination. Let [Error]
       escape. *)

    try

      (* If ocaml offered a [match/with] construct with zero branches, this is
	 what we would use here, since the type [void] has zero cases. *)

      let (_ : void) = run env false in
      assert false (* cannot fail *)

    with
    | Accept v ->
	v

end