Engine.ml 31.9 KB
Newer Older
1 2 3 4 5 6
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
7
(*                                                                            *)
8 9 10 11 12 13
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU Library General Public License version 2, with a         *)
(*  special exception on linking, as described in the file LICENSE.           *)
(*                                                                            *)
(******************************************************************************)

POTTIER Francois's avatar
POTTIER Francois committed
14
type position = Lexing.position
15 16 17 18 19 20 21 22 23 24 25
open EngineTypes

(* The LR parsing engine. *)

(* This module is used:

   - at compile time, if so requested by the user, via the --interpret options;
   - at run time, in the table-based back-end. *)

module Make (T : TABLE) = struct

26 27 28
  (* This propagates type and exception definitions. The functions [number],
     [production_index], [find_production], too, are defined by this [include]
     declaration. *)
29 30 31

  include T

32
  type 'a env =
33
      (state, semantic_value, token) EngineTypes.env
34

POTTIER Francois's avatar
POTTIER Francois committed
35
  (* ------------------------------------------------------------------------ *)
36

37
  (* The type [checkpoint] represents an intermediate or final result of the
38
     parser. See [EngineTypes]. *)
39

40
  (* The type [checkpoint] is presented to the user as a private type (see
POTTIER Francois's avatar
POTTIER Francois committed
41 42 43 44
     [IncrementalEngine]). This prevents the user from manufacturing
     checkpoints (i.e., continuations) that do not make sense. (Such
     continuations could potentially violate the LR invariant and lead to
     crashes.) *)
45

46 47 48 49 50
  (* 2017/03/29 Although [checkpoint] is a private type, we now expose a
     constructor function, [input_needed]. This function allows manufacturing
     a checkpoint out of an environment. For this reason, the type [env] must
     also be parameterized with ['a]. *)

51
  type 'a checkpoint =
52 53 54 55
    | InputNeeded of 'a env
    | Shifting of 'a env * 'a env * bool
    | AboutToReduce of 'a env * production
    | HandlingError of 'a env
56
    | Accepted of 'a
57
    | Rejected
58

POTTIER Francois's avatar
POTTIER Francois committed
59
  (* ------------------------------------------------------------------------ *)
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

  (* In the code-based back-end, the [run] function is sometimes responsible
     for pushing a new cell on the stack. This is motivated by code sharing
     concerns. In this interpreter, there is no such concern; [run]'s caller
     is always responsible for updating the stack. *)

  (* In the code-based back-end, there is a [run] function for each state
     [s]. This function can behave in two slightly different ways, depending
     on when it is invoked, or (equivalently) depending on [s].

     If [run] is invoked after shifting a terminal symbol (or, equivalently,
     if [s] has a terminal incoming symbol), then [run] discards a token,
     unless [s] has a default reduction on [#]. (Indeed, in that case,
     requesting the next token might drive the lexer off the end of the input
     stream.)

POTTIER Francois's avatar
POTTIER Francois committed
76 77 78
     If, on the other hand, [run] is invoked after performing a goto
     transition, or invoked directly by an entry point, then there is nothing
     to discard.
79 80 81

     These two cases are reflected in [CodeBackend.gettoken].

82 83 84 85
     Here, the code is structured in a slightly different way. It is up to the
     caller of [run] to indicate whether to discard a token, via the parameter
     [please_discard]. This flag is set when [s] is being entered by shifting
     a terminal symbol and [s] does not have a default reduction on [#]. *)
86

87
  (* The following recursive group of functions are tail recursive, produce a
88 89 90
     checkpoint of type [semantic_value checkpoint], and cannot raise an
     exception. A semantic action can raise [Error], but this exception is
     immediately caught within [reduce]. *)
91

92
  let rec run env please_discard : semantic_value checkpoint =
93 94

    (* Log the fact that we just entered this state. *)
95

96 97
    if log then
      Log.state env.current;
98

99 100 101 102 103
    (* If [please_discard] is set, we discard the current lookahead token and
       fetch the next one. In order to request a token from the user, we
       return an [InputNeeded] continuation, which, when invoked by the user,
       will take us to [discard]. If [please_discard] is not set, we skip this
       step and jump directly to [check_for_default_reduction]. *)
104 105

    if please_discard then
106
      InputNeeded env
107 108 109
    else
      check_for_default_reduction env

110 111
  (* [discard env triple] stores [triple] into [env], overwriting the previous
     token. It is invoked by [offer], which itself is invoked by the user in
112
     response to an [InputNeeded] checkpoint. *)
113

114
  and discard env triple =
115 116 117 118
    if log then begin
      let (token, startp, endp) = triple in
      Log.lookahead_token (T.token2terminal token) startp endp
    end;
119
    let env = { env with error = false; triple } in
120 121 122
    check_for_default_reduction env

  and check_for_default_reduction env =
123 124 125 126 127 128

    (* Examine what situation we are in. This case analysis is analogous to
       that performed in [CodeBackend.gettoken], in the sub-case where we do
       not have a terminal incoming symbol. *)

    T.default_reduction
129
      env.current
130
      announce_reduce       (* there is a default reduction; perform it *)
131
      check_for_error_token (* there is none; continue below *)
132 133
      env

134
  and check_for_error_token env =
135 136 137 138 139

    (* There is no default reduction. Consult the current lookahead token
       so as to determine which action should be taken. *)

    (* Peeking at the first input token, without taking it off the input
140
       stream, is done by reading [env.triple]. We are careful to first
141
       check [env.error]. *)
142

143 144
    (* Note that, if [please_discard] was true, then we have just called
       [discard], so the lookahead token cannot be [error]. *)
145

146 147 148
    (* Returning [HandlingError env] is equivalent to calling [error env]
       directly, except it allows the user to regain control. *)

149
    if env.error then begin
150 151
      if log then
        Log.resuming_error_handling();
152
      HandlingError env
153 154
    end
    else
155
      let (token, _, _) = env.triple in
156

157 158 159
      (* We consult the two-dimensional action table, indexed by the
         current state and the current lookahead token, in order to
         determine which action should be taken. *)
160

161 162 163 164 165
      T.action
        env.current                    (* determines a row *)
        (T.token2terminal token)       (* determines a column *)
        (T.token2value token)
        shift                          (* shift continuation *)
166
        announce_reduce                (* reduce continuation *)
167 168
        initiate                       (* failure continuation *)
        env
169

POTTIER Francois's avatar
POTTIER Francois committed
170
  (* ------------------------------------------------------------------------ *)
171 172 173 174 175

  (* This function takes care of shift transitions along a terminal symbol.
     (Goto transitions are taken care of within [reduce] below.) The symbol
     can be either an actual token or the [error] pseudo-token. *)

176
  (* Here, the lookahead token CAN be [error]. *)
177

178 179 180 181
  and shift env
      (please_discard : bool)
      (terminal : terminal)
      (value : semantic_value)
182
      (s' : state) =
183 184 185

    (* Log the transition. *)

186 187
    if log then
      Log.shift terminal s';
188 189 190 191

    (* Push a new cell onto the stack, containing the identity of the
       state that we are leaving. *)

192
    let (_, startp, endp) = env.triple in
193
    let stack = {
194 195
      state = env.current;
      semv = value;
196 197
      startp;
      endp;
198
      next = env.stack;
199
    } in
200 201 202

    (* Switch to state [s']. *)

203 204 205 206 207 208 209 210 211 212
    let new_env = { env with stack; current = s' } in

    (* Expose the transition to the user. (In principle, we have a choice
       between exposing the transition before we take it, after we take
       it, or at some point in between. This affects the number and type
       of the parameters carried by [Shifting]. Here, we choose to expose
       the transition after we take it; this allows [Shifting] to carry
       only three parameters, whose meaning is simple.) *)

    Shifting (env, new_env, please_discard)
213

POTTIER Francois's avatar
POTTIER Francois committed
214
  (* ------------------------------------------------------------------------ *)
215

216
  (* The function [announce_reduce] stops the parser and returns a checkpoint
217 218
     which allows the parser to be resumed by calling [reduce]. *)

219 220 221 222
  (* Only ordinary productions are exposed to the user. Start productions
     are not exposed to the user. Reducing a start production simply leads
     to the successful termination of the parser. *)

223
  and announce_reduce env (prod : production) =
224 225 226 227
    if T.is_start prod then
      accept env prod
    else
      AboutToReduce (env, prod)
228

229 230
  (* The function [reduce] takes care of reductions. It is invoked by
     [resume] after an [AboutToReduce] event has been produced. *)
231

232
  (* Here, the lookahead token CAN be [error]. *)
233

234 235
  (* The production [prod] CANNOT be a start production. *)

236
  and reduce env (prod : production) =
237 238 239

    (* Log a reduction event. *)

240 241
    if log then
      Log.reduce_or_accept prod;
242 243

    (* Invoke the semantic action. The semantic action is responsible for
244
       truncating the stack and pushing a new cell onto the stack, which
245
       contains a new semantic value. It can raise [Error]. *)
246

247
    (* If the semantic action terminates normally, it returns a new stack,
248
       which becomes the current stack. *)
249

250 251
    (* If the semantic action raises [Error], we catch it and initiate error
       handling. *)
252

253 254 255 256
    (* This [match/with/exception] construct requires OCaml 4.02. *)

    match T.semantic_action prod env with
    | stack ->
257

258 259 260
        (* By our convention, the semantic action has produced an updated
           stack. The state now found in the top stack cell is the return
           state. *)
261

262 263 264
        (* Perform a goto transition. The target state is determined
           by consulting the goto table at the return state and at
           production [prod]. *)
265

266
        let current = T.goto_prod stack.state prod in
267 268
        let env = { env with stack; current } in
        run env false
269

270
    | exception Error ->
271
        initiate env
272

273 274 275 276 277 278 279 280 281
  and accept env prod =
    (* Log an accept event. *)
    if log then
      Log.reduce_or_accept prod;
    (* Extract the semantic value out of the stack. *)
    let v = env.stack.semv in
    (* Finish. *)
    Accepted v

POTTIER Francois's avatar
POTTIER Francois committed
282
  (* ------------------------------------------------------------------------ *)
283 284 285

  (* The following functions deal with errors. *)

286 287
  (* [initiate] initiates or resumes error handling. *)

288
  (* Here, the lookahead token CAN be [error]. *)
289

290
  and initiate env =
291 292
    if log then
      Log.initiating_error_handling();
293
    let env = { env with error = true } in
294
    HandlingError env
295 296 297

  (* [error] handles errors. *)

298
  and error env =
299
    assert env.error;
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314

    (* Consult the column associated with the [error] pseudo-token in the
       action table. *)

    T.action
      env.current                    (* determines a row *)
      T.error_terminal               (* determines a column *)
      T.error_value
      error_shift                    (* shift continuation *)
      error_reduce                   (* reduce continuation *)
      error_fail                     (* failure continuation *)
      env

  and error_shift env please_discard terminal value s' =

POTTIER Francois's avatar
POTTIER Francois committed
315 316
    (* Here, [terminal] is [T.error_terminal],
       and [value] is [T.error_value]. *)
317 318 319 320 321

    assert (terminal = T.error_terminal && value = T.error_value);

    (* This state is capable of shifting the [error] token. *)

322 323
    if log then
      Log.handling_error env.current;
324 325 326 327 328 329
    shift env please_discard terminal value s'

  and error_reduce env prod =

    (* This state is capable of performing a reduction on [error]. *)

330 331
    if log then
      Log.handling_error env.current;
332
    reduce env prod
333 334 335
      (* Intentionally calling [reduce] instead of [announce_reduce].
         It does not seem very useful, and it could be confusing, to
         expose the reduction steps taken during error handling. *)
336 337 338 339 340 341 342 343 344 345 346 347

  and error_fail env =

    (* This state is unable to handle errors. Attempt to pop a stack
       cell. *)

    let cell = env.stack in
    let next = cell.next in
    if next == cell then

      (* The stack is empty. Die. *)

348
      Rejected
349 350 351 352

    else begin

      (* The stack is nonempty. Pop a cell, updating the current state
353
         with that found in the popped cell, and try again. *)
354

355 356 357 358
      let env = { env with
        stack = next;
        current = cell.state
      } in
359
      HandlingError env
360 361 362

    end

363 364
  (* End of the nest of tail recursive functions. *)

POTTIER Francois's avatar
POTTIER Francois committed
365 366
  (* ------------------------------------------------------------------------ *)
  (* ------------------------------------------------------------------------ *)
367

POTTIER Francois's avatar
POTTIER Francois committed
368
  (* The incremental interface. See [EngineTypes]. *)
369

POTTIER Francois's avatar
POTTIER Francois committed
370
  (* [start s] begins the parsing process. *)
371

POTTIER Francois's avatar
POTTIER Francois committed
372
  let start (s : state) (initial : position) : semantic_value checkpoint =
373

374 375 376 377 378
    (* Build an empty stack. This is a dummy cell, which is its own successor.
       Its [next] field WILL be accessed by [error_fail] if an error occurs and
       is propagated all the way until the stack is empty. Its [endp] field WILL
       be accessed (by a semantic action) if an epsilon production is reduced
       when the stack is empty. *)
379 380 381 382

    let rec empty = {
      state = s;                          (* dummy *)
      semv = T.error_value;               (* dummy *)
383 384
      startp = initial;                   (* dummy *)
      endp = initial;
385 386 387 388 389
      next = empty;
    } in

    (* Build an initial environment. *)

POTTIER Francois's avatar
POTTIER Francois committed
390 391 392 393 394 395 396 397
    (* Unfortunately, there is no type-safe way of constructing a
       dummy token. Tokens carry semantic values, which in general
       we cannot manufacture. This instance of [Obj.magic] could
       be avoided by adopting a different representation (e.g., no
       [env.error] field, and an option in the first component of
       [env.triple]), but I like this representation better. *)

    let dummy_token = Obj.magic () in
398
    let env = {
399
      error = false;
400
      triple = (dummy_token, initial, initial); (* dummy *)
401 402 403 404
      stack = empty;
      current = s;
    } in

405
    (* Begin parsing. *)
406

407 408 409 410 411
    (* The parameter [please_discard] here is [true], which means we know
       that we must read at least one token. This claim relies on the fact
       that we have ruled out the two special cases where a start symbol
       recognizes the empty language or the singleton language {epsilon}. *)

412
    run env true
413

414 415 416
  (* [offer checkpoint triple] is invoked by the user in response to a
     checkpoint of the form [InputNeeded env]. It checks that [checkpoint] is
     indeed of this form, and invokes [discard]. *)
417

418 419 420 421
  (* [resume checkpoint] is invoked by the user in response to a checkpoint of
     the form [AboutToReduce (env, prod)] or [HandlingError env]. It checks
     that [checkpoint] is indeed of this form, and invokes [reduce] or
     [error], as appropriate. *)
422

423
  (* In reality, [offer] and [resume] accept an argument of type
424 425 426
     [semantic_value checkpoint] and produce a checkpoint of the same type.
     The choice of [semantic_value] is forced by the fact that this is the
     parameter of the checkpoint [Accepted]. *)
427

428
  (* We change this as follows. *)
429

430
  (* We change the argument and result type of [offer] and [resume] from
431 432 433 434 435 436 437 438 439 440 441
     [semantic_value checkpoint] to ['a checkpoint]. This is safe, in this
     case, because we give the user access to values of type [t checkpoint]
     only if [t] is indeed the type of the eventual semantic value for this
     run. (More precisely, by examining the signatures [INCREMENTAL_ENGINE]
     and [INCREMENTAL_ENGINE_START], one finds that the user can build a value
     of type ['a checkpoint] only if ['a] is [semantic_value]. The table
     back-end goes further than this and produces versions of [start] composed
     with a suitable cast, which give the user access to a value of type
     [t checkpoint] where [t] is the type of the start symbol.) *)

  let offer : 'a . 'a checkpoint ->
POTTIER Francois's avatar
POTTIER Francois committed
442
                   token * position * position ->
443 444
                   'a checkpoint
  = function
445 446 447
    | InputNeeded env ->
        Obj.magic discard env
    | _ ->
POTTIER Francois's avatar
POTTIER Francois committed
448
        invalid_arg "offer expects InputNeeded"
449

450
  let resume : 'a . 'a checkpoint -> 'a checkpoint = function
451 452
    | HandlingError env ->
        Obj.magic error env
453 454
    | Shifting (_, env, please_discard) ->
        Obj.magic run env please_discard
455 456 457
    | AboutToReduce (env, prod) ->
        Obj.magic reduce env prod
    | _ ->
POTTIER Francois's avatar
POTTIER Francois committed
458
        invalid_arg "resume expects HandlingError | Shifting | AboutToReduce"
459

POTTIER Francois's avatar
POTTIER Francois committed
460 461
  (* ------------------------------------------------------------------------ *)
  (* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
462 463

  (* The traditional interface. See [EngineTypes]. *)
464

POTTIER Francois's avatar
POTTIER Francois committed
465
  (* ------------------------------------------------------------------------ *)
466

467
  (* Wrapping a lexer and lexbuf as a token supplier. *)
468

469
  type supplier =
POTTIER Francois's avatar
POTTIER Francois committed
470
    unit -> token * position * position
POTTIER Francois's avatar
POTTIER Francois committed
471

472
  let lexer_lexbuf_to_supplier
473 474
      (lexer : Lexing.lexbuf -> token)
      (lexbuf : Lexing.lexbuf)
475
  : supplier =
476 477 478 479 480 481
    fun () ->
      let token = lexer lexbuf in
      let startp = lexbuf.Lexing.lex_start_p
      and endp = lexbuf.Lexing.lex_curr_p in
      token, startp, endp

POTTIER Francois's avatar
POTTIER Francois committed
482
  (* ------------------------------------------------------------------------ *)
483

484 485 486 487
  (* The main loop repeatedly handles intermediate checkpoints, until a final
     checkpoint is obtained. This allows implementing the monolithic interface
     ([entry]) in terms of the incremental interface ([start], [offer],
     [handle], [reduce]). *)
488

POTTIER Francois's avatar
POTTIER Francois committed
489 490
  (* By convention, acceptance is reported by returning a semantic value,
     whereas rejection is reported by raising [Error]. *)
POTTIER Francois's avatar
POTTIER Francois committed
491

492 493 494 495
  (* [loop] is polymorphic in ['a]. No cheating is involved in achieving this.
     All of the cheating resides in the types assigned to [offer] and [handle]
     above. *)

496 497 498
  let rec loop : 'a . supplier -> 'a checkpoint -> 'a =
    fun read checkpoint ->
    match checkpoint with
499
    | InputNeeded _ ->
500 501
        (* The parser needs a token. Request one from the lexer,
           and offer it to the parser, which will produce a new
502
           checkpoint. Then, repeat. *)
POTTIER Francois's avatar
POTTIER Francois committed
503
        let triple = read() in
504 505
        let checkpoint = offer checkpoint triple in
        loop read checkpoint
506
    | Shifting _
507 508
    | AboutToReduce _
    | HandlingError _ ->
509 510
        (* The parser has suspended itself, but does not need
           new input. Just resume the parser. Then, repeat. *)
511 512
        let checkpoint = resume checkpoint in
        loop read checkpoint
POTTIER Francois's avatar
POTTIER Francois committed
513
    | Accepted v ->
514 515
        (* The parser has succeeded and produced a semantic value.
           Return this semantic value to the user. *)
POTTIER Francois's avatar
POTTIER Francois committed
516 517
        v
    | Rejected ->
518
        (* The parser rejects this input. Raise an exception. *)
POTTIER Francois's avatar
POTTIER Francois committed
519
        raise Error
520

POTTIER Francois's avatar
POTTIER Francois committed
521
  let entry (s : state) lexer lexbuf : semantic_value =
522 523
    let initial = lexbuf.Lexing.lex_curr_p in
    loop (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial)
524

POTTIER Francois's avatar
POTTIER Francois committed
525
  (* ------------------------------------------------------------------------ *)
526

527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
  (* [loop_handle] stops if it encounters an error, and at this point, invokes
     its failure continuation, without letting Menhir do its own traditional
     error-handling (which involves popping the stack, etc.). *)

  let rec loop_handle succeed fail read checkpoint =
    match checkpoint with
    | InputNeeded _ ->
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop_handle succeed fail read checkpoint
    | Shifting _
    | AboutToReduce _ ->
        let checkpoint = resume checkpoint in
        loop_handle succeed fail read checkpoint
    | HandlingError _
    | Rejected ->
        (* The parser has detected an error. Invoke the failure continuation. *)
        fail checkpoint
    | Accepted v ->
546 547 548 549
        (* The parser has succeeded and produced a semantic value. Invoke the
           success continuation. *)
        succeed v

POTTIER Francois's avatar
POTTIER Francois committed
550
  (* ------------------------------------------------------------------------ *)
551 552 553 554

  (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair
     of checkpoints to the failure continuation.

POTTIER Francois's avatar
POTTIER Francois committed
555 556 557 558 559 560
     The first (and oldest) checkpoint is the last [InputNeeded] checkpoint
     that was encountered before the error was detected. The second (and
     newest) checkpoint is where the error was detected, as in [loop_handle].
     Going back to the first checkpoint can be thought of as undoing any
     reductions that were performed after seeing the problematic token. (These
     reductions must be default reductions or spurious reductions.) *)
561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577

  let rec loop_handle_undo succeed fail read (inputneeded, checkpoint) =
    match checkpoint with
    | InputNeeded _ ->
        (* Update the last recorded [InputNeeded] checkpoint. *)
        let inputneeded = checkpoint in
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop_handle_undo succeed fail read (inputneeded, checkpoint)
    | Shifting _
    | AboutToReduce _ ->
        let checkpoint = resume checkpoint in
        loop_handle_undo succeed fail read (inputneeded, checkpoint)
    | HandlingError _
    | Rejected ->
        fail inputneeded checkpoint
    | Accepted v ->
578
        succeed v
579

580 581 582 583 584 585 586 587 588 589 590 591 592 593
  (* For simplicity, we publish a version of [loop_handle_undo] that takes a
     single checkpoint as an argument, instead of a pair of checkpoints. We
     check that the argument is [InputNeeded _], and duplicate it. *)

  (* The parser cannot accept or reject before it asks for the very first
     character of input. (Indeed, we statically reject a symbol that
     generates the empty language or the singleton language {epsilon}.)
     So, the [start] checkpoint must match [InputNeeded _]. Hence, it is
     permitted to call [loop_handle_undo] with a [start] checkpoint. *)

  let loop_handle_undo succeed fail read checkpoint =
    assert (match checkpoint with InputNeeded _ -> true | _ -> false);
    loop_handle_undo succeed fail read (checkpoint, checkpoint)

POTTIER Francois's avatar
POTTIER Francois committed
594 595
  (* ------------------------------------------------------------------------ *)

596
  let rec shifts checkpoint =
POTTIER Francois's avatar
POTTIER Francois committed
597 598 599
    match checkpoint with
    | Shifting (env, _, _) ->
        (* The parser is about to shift, which means it is willing to
600 601 602
           consume the terminal symbol that we have fed it. Return the
           state just before this transition. *)
        Some env
POTTIER Francois's avatar
POTTIER Francois committed
603 604
    | AboutToReduce _ ->
        (* The parser wishes to reduce. Just follow. *)
605
        shifts (resume checkpoint)
POTTIER Francois's avatar
POTTIER Francois committed
606 607
    | HandlingError _ ->
        (* The parser fails, which means it rejects the terminal symbol
608
           that we have fed it. *)
609
        None
POTTIER Francois's avatar
POTTIER Francois committed
610 611 612 613 614 615 616 617
    | InputNeeded _
    | Accepted _
    | Rejected ->
        (* None of these cases can arise. Indeed, after a token is submitted
           to it, the parser must shift, reduce, or signal an error, before
           it can request another token or terminate. *)
        assert false

POTTIER Francois's avatar
POTTIER Francois committed
618 619 620
  let acceptable checkpoint token pos =
    let triple = (token, pos, pos) in
    let checkpoint = offer checkpoint triple in
621 622 623
    match shifts checkpoint with
    | None      -> false
    | Some _env -> true
POTTIER Francois's avatar
POTTIER Francois committed
624

POTTIER Francois's avatar
POTTIER Francois committed
625
  (* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
626

627 628 629 630 631 632 633 634 635 636 637 638 639
  (* The type ['a lr1state] describes the (non-initial) states of the LR(1)
     automaton. The index ['a] represents the type of the semantic value
     associated with the state's incoming symbol. *)

  (* The type ['a lr1state] is defined as an alias for [state], which itself
     is usually defined as [int] (see [TableInterpreter]). So, ['a lr1state]
     is technically a phantom type, but should really be thought of as a GADT
     whose data constructors happen to be represented as integers. It is
     presented to the user as an abstract type (see [IncrementalEngine]). *)

  type 'a lr1state =
      state

640 641 642 643 644 645
  let find_default_reduction (state : _ lr1state) : production option =
    T.default_reduction state
      (fun () prod -> Some prod)
      (fun () -> None)
      ()

POTTIER Francois's avatar
POTTIER Francois committed
646
  (* ------------------------------------------------------------------------ *)
647

648 649 650 651 652 653 654
  (* Stack inspection. *)

  (* We offer a read-only view of the parser's state as a stream of elements.
     Each element contains a pair of a (non-initial) state and a semantic
     value associated with (the incoming symbol of) this state. Note that the
     type [element] is an existential type. *)

655 656 657 658
  (* As of 2017/03/31, the type [stack] and the function [stack] are DEPRECATED.
     If desired, they could now be implemented outside Menhir, by relying on
     the functions [top] and [pop]. *)

659
  type element =
POTTIER Francois's avatar
POTTIER Francois committed
660
    | Element: 'a lr1state * 'a * position * position -> element
661

662 663
  open General

664 665 666
  type stack =
    element stream

667
  (* If [current] is the current state and [cell] is the top stack cell,
668
     then [stack cell current] is a view of the parser's state as a stream
669
     of elements. *)
670

671
  let rec stack cell current : element stream =
672 673 674 675 676 677 678 679 680 681 682 683
    lazy (
      (* The stack is empty iff the top stack cell is its own successor. In
         that case, the current state [current] should be an initial state
         (which has no incoming symbol).
         We do not allow the user to inspect this state. *)
      let next = cell.next in
      if next == cell then
        Nil
      else
        (* Construct an element containing the current state [current] as well
           as the semantic value contained in the top stack cell. This semantic
           value is associated with the incoming symbol of this state, so it
684 685 686 687 688 689
           makes sense to pair them together. The state has type ['a state] and
           the semantic value has type ['a], for some type ['a]. Here, the OCaml
           type-checker thinks ['a] is [semantic_value] and considers this code
           well-typed. Outside, we will use magic to provide the user with a way
           of inspecting states and recovering the value of ['a]. *)
        let element = Element (
690 691 692 693 694
          current,
          cell.semv,
          cell.startp,
          cell.endp
        ) in
695
        Cons (element, stack next cell.state)
696 697
    )

698 699
  let stack env : element stream =
    stack env.stack env.current
700

701 702 703 704 705 706 707 708 709 710 711 712
  (* As explained above, the function [top] allows access to the top stack
     element only if the stack is nonempty, i.e., only if the current state
     is not an initial state. *)

  let top env : element option =
    let cell = env.stack in
    let next = cell.next in
    if next == cell then
      None
    else
      Some (Element (env.current, cell.semv, cell.startp, cell.endp))

713 714 715 716
  (* [equal] compares the stacks for physical equality, and compares the
     current states via their numbers (this seems cleaner than using OCaml's
     polymorphic equality). *)

717 718 719 720 721 722
  (* The two fields that are not compared by [equal], namely [error] and
     [triple], are overwritten by the function [discard], which handles
     [InputNeeded] checkpoints. Thus, if [equal env1 env2] holds, then the
     checkpoints [input_needed env1] and [input_needed env2] are
     equivalent: they lead the parser to behave in the same way. *)

723 724 725 726
  let equal env1 env2 =
    env1.stack == env2.stack &&
    number env1.current = number env2.current

727 728
  let current_state_number env =
    number env.current
729

POTTIER Francois's avatar
POTTIER Francois committed
730
  (* ------------------------------------------------------------------------ *)
731 732 733 734 735 736

  (* Access to the position of the lookahead token. *)

  let positions { triple = (_, startp, endp); _ } =
    startp, endp

POTTIER Francois's avatar
POTTIER Francois committed
737
  (* ------------------------------------------------------------------------ *)
738 739 740 741 742 743

  (* Access to information about default reductions. *)

  (* We can make this a function of states, or a function of environments. For
     now, the latter appears simpler. *)

744
  let env_has_default_reduction env : bool =
745 746 747 748 749 750
    T.default_reduction
      env.current
      (fun _env _prod -> true)
      (fun _env -> false)
      env

POTTIER Francois's avatar
POTTIER Francois committed
751
  (* ------------------------------------------------------------------------ *)
752

POTTIER Francois's avatar
POTTIER Francois committed
753 754 755 756 757 758 759 760 761 762 763 764 765
  (* The following functions work at the level of environments (as opposed to
     checkpoints). The function [pop] causes the automaton to go back into the
     past, pretending that the last input symbol has never been read. The
     function [force_reduction] causes the automaton to re-interpret the past,
     by recognizing the right-hand side of a production and reducing this
     production. The function [feed] causes the automaton to progress into the
     future by pretending that a (terminal or nonterminal) symbol has been
     read. *)

  (* The function [feed] would ideally be defined here. However, for this
     function to be type-safe, the GADT ['a symbol] is needed. For this
     reason, we move its definition to [InspectionTableInterpreter], where
     the inspection API is available. *)
766

POTTIER Francois's avatar
POTTIER Francois committed
767 768
  (* [pop] pops one stack cell. It cannot go wrong. *)

769
  let pop (env : 'a env) : 'a env option =
770 771 772
    let cell = env.stack in
    let next = cell.next in
    if next == cell then
POTTIER Francois's avatar
POTTIER Francois committed
773
      (* The stack is empty. *)
774 775
      None
    else
POTTIER Francois's avatar
POTTIER Francois committed
776
      (* The stack is nonempty. Pop off one cell. *)
777 778
      Some { env with stack = next; current = cell.state }

POTTIER Francois's avatar
POTTIER Francois committed
779 780 781
  (* [force_reduction] is analogous to [reduce], except that it does not
     continue by calling [run env] or [initiate env]. Instead, it returns
     [env] to the user. *)
782

POTTIER Francois's avatar
POTTIER Francois committed
783 784 785
  (* [force_reduction] is dangerous insofar as it executes a semantic action.
     This semantic action could have side effects: nontermination, state,
     exceptions, input/output, etc. *)
786

787
  let force_reduction prod (env : 'a env) : 'a env =
788 789 790 791
    (* Check if this reduction is permitted. This check is REALLY important.
       The stack must have the correct shape: that is, it must be sufficiently
       high, and must contain semantic values of appropriate types, otherwise
       the semantic action will crash and burn. *)
POTTIER Francois's avatar
POTTIER Francois committed
792 793 794 795 796 797
    (* We currently check whether the current state is WILLING to reduce this
       production (i.e., there is a reduction action in the action table row
       associated with this state), whereas it would be more liberal to check
       whether this state is CAPABLE of reducing this production (i.e., the
       stack has an appropriate shape). We currently have no means of
       performing such a check. *)
798 799 800 801 802 803 804 805 806 807
    if not (T.may_reduce env.current prod) then
      invalid_arg "force_reduction: this reduction is not permitted in this state"
    else begin
      (* Invoke the semantic action. *)
      let stack = T.semantic_action prod env in
      (* Perform a goto transition. *)
      let current = T.goto_prod stack.state prod in
      { env with stack; current }
    end

POTTIER Francois's avatar
POTTIER Francois committed
808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
  (* The environment manipulation functions -- [pop] and [force_reduction]
     above, plus [feed] -- manipulate the automaton's stack and current state,
     but do not affect the automaton's lookahead symbol. When the function
     [input_needed] is used to go back from an environment to a checkpoint
     (and therefore, resume normal parsing), the lookahead symbol is clobbered
     anyway, since the only action that the user can take is to call [offer].
     So far, so good. One problem, though, is that this call to [offer] may
     well place the automaton in a configuration of a state [s] and a
     lookahead symbol [t] that is normally unreachable. Also, perhaps the
     state [s] is a state where an input symbol normally is never demanded, so
     this [InputNeeded] checkpoint is fishy. There does not seem to be a deep
     problem here, but, when programming an error recovery strategy, one
     should pay some attention to this issue. Ideally, perhaps, one should use
     [input_needed] only in a state [s] where an input symbol is normally
     demanded, that is, a state [s] whose incoming symbol is a terminal symbol
     and which does not have a default reduction on [#]. *)

825
  let input_needed (env : 'a env) : 'a checkpoint =
826 827
    InputNeeded env

828 829
  (* The following functions are compositions of [top] and [pop]. *)

830
  let rec pop_many i env =
831 832 833 834 835 836
    if i = 0 then
      Some env
    else match pop env with
    | None ->
        None
    | Some env ->
837
        pop_many (i - 1) env
838 839

  let get i env =
840
    match pop_many i env with
841 842 843 844 845
    | None ->
        None
    | Some env ->
        top env

846
end