engine.ml 12 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
open EngineTypes

(* The LR parsing engine. *)

(* This module is used:

   - at compile time, if so requested by the user, via the --interpret options;
   - at run time, in the table-based back-end. *)

module Make (T : TABLE) = struct

  (* This propagates type and exception definitions. *)

  include T

16 17 18 19 20
  type dummy =
      (state, semantic_value, token) env
  type env =
      dummy

21 22
  (* --------------------------------------------------------------------------- *)

23 24 25
  (* A continuation is returned to the user when the parser pauses itself. In
     normal mode, this happens when the parser wishes to request another token.
     In error-handling mode, this happens when ... TEMPORARY *)
26

27 28 29
  type result =
    | InputNeeded of env
    | Rejected
30 31 32

  (* --------------------------------------------------------------------------- *)

POTTIER Francois's avatar
POTTIER Francois committed
33 34 35 36 37 38 39 40 41 42 43
  (* OK, OK. I said I would stop using [Obj.magic], yet here we go again.  I
     need to extend the type [T.token] with an extra element, which represents
     the [error] pseudo-token. I don't want to pay an extra box in memory or
     an extra field in the [env] record. (I have measured the cost of moving
     from 5 to 6 fields in this record to be 30%. This is more than I
     expected!) I don't want to add a branch to the type [T.token] because
     that would bother the user (that would be an incompatible change) and
     that would make some exhaustive case analyses appear non-exhaustive. So,
     here we go. We allocate a dummy box in memory and use its address as a
     unique value which cannot possibly be confused with a legit inhabitant of
     the type [token]. (Right?) *)
44 45 46 47 48 49

  let error_token : token =
    Obj.magic (ref 0xDEADBEEF)

  (* --------------------------------------------------------------------------- *)

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
  (* In the code-based back-end, the [run] function is sometimes responsible
     for pushing a new cell on the stack. This is motivated by code sharing
     concerns. In this interpreter, there is no such concern; [run]'s caller
     is always responsible for updating the stack. *)

  (* In the code-based back-end, there is a [run] function for each state
     [s]. This function can behave in two slightly different ways, depending
     on when it is invoked, or (equivalently) depending on [s].

     If [run] is invoked after shifting a terminal symbol (or, equivalently,
     if [s] has a terminal incoming symbol), then [run] discards a token,
     unless [s] has a default reduction on [#]. (Indeed, in that case,
     requesting the next token might drive the lexer off the end of the input
     stream.)

     If, on the other hand, [run] is invoked after performing a goto transition,
     or invoked directly by an entry point, then there is nothing to discard.

     These two cases are reflected in [CodeBackend.gettoken].

70 71 72 73
     Here, the code is structured in a slightly different way. It is up to the
     caller of [run] to indicate whether to discard a token, via the parameter
     [please_discard]. This flag is set when [s] is being entered by shifting
     a terminal symbol and [s] does not have a default reduction on [#]. *)
74

75
  let rec run env please_discard : result =
76 77 78

    (* Log the fact that we just entered this state. *)
    
79 80
    if log then
      Log.state env.current;
81

82 83 84 85 86
    (* If [please_discard] is set, we discard the current lookahead token and
       fetch the next one. In order to request a token from the user, we
       return an [InputNeeded] continuation, which, when invoked by the user,
       will take us to [discard]. If [please_discard] is not set, we skip this
       step and jump directly to [check_for_default_reduction]. *)
87 88

    if please_discard then
89
      InputNeeded env
90 91 92
    else
      check_for_default_reduction env

93 94
  (* [discard env triple] stores [triple] into [env.triple], overwriting
     the previous token. *)
95

96
  and discard env triple =
97 98 99 100
    if log then begin
      let (token, startp, endp) = triple in
      Log.lookahead_token (T.token2terminal token) startp endp
    end;
101
    let env = { env with triple } in
102 103 104
    check_for_default_reduction env

  and check_for_default_reduction env =
105 106 107 108 109 110

    (* Examine what situation we are in. This case analysis is analogous to
       that performed in [CodeBackend.gettoken], in the sub-case where we do
       not have a terminal incoming symbol. *)

    T.default_reduction
111 112 113
      env.current
      reduce                (* there is a default reduction; perform it *)
      check_for_error_token (* there is none; continue below *)
114 115
      env

116
  and check_for_error_token env =
117 118 119 120 121

    (* There is no default reduction. Consult the current lookahead token
       so as to determine which action should be taken. *)

    (* Peeking at the first input token, without taking it off the input
122
       stream, is done by reading [env.triple]. We are careful to first
123
       check whether this is the [error] token. *)
124

125 126
    (* Note that, if [please_discard] was true, then we have just called
       [discard], so the lookahead token cannot be [error]. *)
127

128
    let (token, _, _) = env.triple in
129
    if token == error_token then begin
130 131
      if log then
        Log.resuming_error_handling();
132 133 134 135
      error env
    end
    else

136 137 138
      (* We consult the two-dimensional action table, indexed by the
         current state and the current lookahead token, in order to
         determine which action should be taken. *)
139

140 141 142 143 144 145 146 147
      T.action
        env.current                    (* determines a row *)
        (T.token2terminal token)       (* determines a column *)
        (T.token2value token)
        shift                          (* shift continuation *)
        reduce                         (* reduce continuation *)
        initiate                       (* failure continuation *)
        env
148 149 150 151 152 153 154

  (* --------------------------------------------------------------------------- *)

  (* This function takes care of shift transitions along a terminal symbol.
     (Goto transitions are taken care of within [reduce] below.) The symbol
     can be either an actual token or the [error] pseudo-token. *)

155
  (* Here, the lookahead token CAN be [error]. *)
156

157 158 159 160
  and shift env
      (please_discard : bool)
      (terminal : terminal)
      (value : semantic_value)
161
      (s' : state) =
162 163 164

    (* Log the transition. *)

165 166
    if log then
      Log.shift terminal s';
167 168 169 170

    (* Push a new cell onto the stack, containing the identity of the
       state that we are leaving. *)

171
    let (_, startp, endp) = env.triple in
172
    let stack = {
173 174
      state = env.current;
      semv = value;
175 176
      startp;
      endp;
177
      next = env.stack;
178
    } in
179 180 181

    (* Switch to state [s']. *)

182 183
    let current = s' in
    let env = { env with stack; current } in
184 185 186 187 188 189
    run env please_discard

  (* --------------------------------------------------------------------------- *)

  (* This function takes care of reductions. *)

190
  (* Here, the lookahead token CAN be [error]. *)
191

192
  and reduce env (prod : production) =
193 194 195

    (* Log a reduction event. *)

196 197
    if log then
      Log.reduce_or_accept prod;
198 199

    (* Invoke the semantic action. The semantic action is responsible for
200 201
       truncating the stack and pushing a new cell onto the stack, which
       contains a new semantic value. It can raise [Accept] or [Error]. *)
202

203
    (* If the semantic action terminates normally, it returns a new stack,
204
       which becomes the current stack. *)
205

206 207 208 209 210 211
    (* If the semantic action raises [Error], we catch it immediately and
       initiate error handling. *)

    (* The apparently weird idiom used here is an encoding for a
       [let/unless] construct, which does not exist in ocaml. *)

212
    let success =
213
      try
214
	Some (T.semantic_action prod env)
215
      with Error ->
216 217 218 219
	None
    in
    match success with
    | Some stack ->
220

221 222 223
        (* By our convention, the semantic action has produced an updated
           stack. The state now found in the top stack cell is the return
           state. *)
224

225 226 227
        (* Perform a goto transition. The target state is determined
           by consulting the goto table at the return state and at
           production [prod]. *)
228

229 230 231
        let current = T.goto stack.state prod in
        let env = { env with stack; current } in
        run env false
232

233 234
    | None ->
        initiate env
235 236 237 238 239

  (* --------------------------------------------------------------------------- *)

  (* The following functions deal with errors. *)

240 241
  (* [initiate] initiates or resumes error handling. *)

242
  (* Here, the lookahead token CAN be [error]. *)
243

244
  and initiate env =
245
    Log.initiating_error_handling();
246 247
    let (_, startp, endp) = env.triple in
    let triple = (error_token, startp, endp) in
248
    let env = { env with triple } in
249 250 251 252
    error env

  (* [error] handles errors. *)

253
  and error env =
254
    assert (let (token, _, _) = env.triple in token == error_token);
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296

    (* Consult the column associated with the [error] pseudo-token in the
       action table. *)

    T.action
      env.current                    (* determines a row *)
      T.error_terminal               (* determines a column *)
      T.error_value
      error_shift                    (* shift continuation *)
      error_reduce                   (* reduce continuation *)
      error_fail                     (* failure continuation *)
      env

  and error_shift env please_discard terminal value s' =

    (* Here, [terminal] is [T.error_terminal], and [value] is [T.error_value]. *)

    assert (terminal = T.error_terminal && value = T.error_value);

    (* This state is capable of shifting the [error] token. *)

    Log.handling_error env.current;
    shift env please_discard terminal value s'

  and error_reduce env prod =

    (* This state is capable of performing a reduction on [error]. *)

    Log.handling_error env.current;
    reduce env prod

  and error_fail env =

    (* This state is unable to handle errors. Attempt to pop a stack
       cell. *)

    let cell = env.stack in
    let next = cell.next in
    if next == cell then

      (* The stack is empty. Die. *)

297
      Rejected
298 299 300 301 302 303

    else begin

      (* The stack is nonempty. Pop a cell, updating the current state
	 with that found in the popped cell, and try again. *)

304 305 306 307
      let env = { env with
        stack = next;
        current = cell.state
      } in
308 309 310 311 312 313
      error env

    end

  (* --------------------------------------------------------------------------- *)

314
  let start
315
      (s : state)
316
      (read : unit -> token * Lexing.position * Lexing.position)
317
      : semantic_value =
318
    
319 320 321 322 323 324
    (* Build an empty stack. This is a dummy cell, which is its own
       successor. Its fields other than [next] contain dummy values. *)

    let rec empty = {
      state = s;                          (* dummy *)
      semv = T.error_value;               (* dummy *)
325 326
      startp = Lexing.dummy_pos;          (* dummy *)
      endp = Lexing.dummy_pos;            (* dummy *)
327 328 329 330 331
      next = empty;
    } in

    (* Perform an initial call to the lexer. *)

332
    let triple = read() in
333 334 335

    (* Log our first lookahead token. *)

336
    let (token, startp, endp) = triple in
337
    Log.lookahead_token (T.token2terminal token) startp endp;
338 339 340 341

    (* Build an initial environment. *)

    let env = {
342
      triple;
343 344 345 346
      stack = empty;
      current = s;
    } in

347
    (* The main loop. *)
348

349 350 351 352 353 354 355 356
    let rec loop result =
      match result with
      | InputNeeded env ->
          let triple = read() in
          loop (discard env triple)
      | Rejected ->
          raise Error
    in
357

358
    (* Catch [Accept], which represents normal termination. Let [Error] escape. *)
359

360 361
    try
      loop (run env false)
362 363
    with
    | Accept v ->
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
	v    

  (* --------------------------------------------------------------------------- *)

  (* Wrapping a lexer and lexbuf as a revised lexer. *)

  let wrap lexer lexbuf : unit -> token * Lexing.position * Lexing.position =
    fun () ->
      let token = lexer lexbuf in
      let startp = lexbuf.Lexing.lex_start_p
      and endp = lexbuf.Lexing.lex_curr_p in
      token, startp, endp

  (* --------------------------------------------------------------------------- *)

  let entry
      (s : state)
      (lexer : Lexing.lexbuf -> token)
      (lexbuf : Lexing.lexbuf)
      : semantic_value =

    start s (wrap lexer lexbuf)
386 387 388

end