Engine.ml 26.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
open EngineTypes

(* The LR parsing engine. *)

(* This module is used:

   - at compile time, if so requested by the user, via the --interpret options;
   - at run time, in the table-based back-end. *)

module Make (T : TABLE) = struct

  (* This propagates type and exception definitions. *)

  include T

16
  type env =
17
      (state, semantic_value, token) EngineTypes.env
18

19 20
  (* --------------------------------------------------------------------------- *)

21
  (* The type [checkpoint] represents an intermediate or final result of the
22
     parser. See [EngineTypes]. *)
23

24 25
  (* The type [checkpoint] is presented to the user as a private type (see
     [IncrementalEngine]). This prevents the user from manufacturing checkpoints
26 27 28
     (i.e., continuations) that do not make sense. (Such continuations could
     potentially violate the LR invariant and lead to crashes.) *)

29
  type 'a checkpoint =
30
    | InputNeeded of env
31
    | Shifting of env * env * bool
32 33
    | AboutToReduce of env * production
    | HandlingError of env
34
    | Accepted of 'a
35
    | Rejected
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58

  (* --------------------------------------------------------------------------- *)

  (* In the code-based back-end, the [run] function is sometimes responsible
     for pushing a new cell on the stack. This is motivated by code sharing
     concerns. In this interpreter, there is no such concern; [run]'s caller
     is always responsible for updating the stack. *)

  (* In the code-based back-end, there is a [run] function for each state
     [s]. This function can behave in two slightly different ways, depending
     on when it is invoked, or (equivalently) depending on [s].

     If [run] is invoked after shifting a terminal symbol (or, equivalently,
     if [s] has a terminal incoming symbol), then [run] discards a token,
     unless [s] has a default reduction on [#]. (Indeed, in that case,
     requesting the next token might drive the lexer off the end of the input
     stream.)

     If, on the other hand, [run] is invoked after performing a goto transition,
     or invoked directly by an entry point, then there is nothing to discard.

     These two cases are reflected in [CodeBackend.gettoken].

59 60 61 62
     Here, the code is structured in a slightly different way. It is up to the
     caller of [run] to indicate whether to discard a token, via the parameter
     [please_discard]. This flag is set when [s] is being entered by shifting
     a terminal symbol and [s] does not have a default reduction on [#]. *)
63

64
  (* The following recursive group of functions are tail recursive, produce a
65 66 67
     checkpoint of type [semantic_value checkpoint], and cannot raise an
     exception. A semantic action can raise [Error], but this exception is
     immediately caught within [reduce]. *)
68

69
  let rec run env please_discard : semantic_value checkpoint =
70 71 72

    (* Log the fact that we just entered this state. *)
    
73 74
    if log then
      Log.state env.current;
75

76 77 78 79 80
    (* If [please_discard] is set, we discard the current lookahead token and
       fetch the next one. In order to request a token from the user, we
       return an [InputNeeded] continuation, which, when invoked by the user,
       will take us to [discard]. If [please_discard] is not set, we skip this
       step and jump directly to [check_for_default_reduction]. *)
81 82

    if please_discard then
83
      InputNeeded env
84 85 86
    else
      check_for_default_reduction env

87 88
  (* [discard env triple] stores [triple] into [env], overwriting the previous
     token. It is invoked by [offer], which itself is invoked by the user in
89
     response to an [InputNeeded] checkpoint. *)
90

91
  and discard env triple =
92 93 94 95
    if log then begin
      let (token, startp, endp) = triple in
      Log.lookahead_token (T.token2terminal token) startp endp
    end;
96
    let env = { env with error = false; triple } in
97 98 99
    check_for_default_reduction env

  and check_for_default_reduction env =
100 101 102 103 104 105

    (* Examine what situation we are in. This case analysis is analogous to
       that performed in [CodeBackend.gettoken], in the sub-case where we do
       not have a terminal incoming symbol. *)

    T.default_reduction
106
      env.current
107
      announce_reduce       (* there is a default reduction; perform it *)
108
      check_for_error_token (* there is none; continue below *)
109 110
      env

111
  and check_for_error_token env =
112 113 114 115 116

    (* There is no default reduction. Consult the current lookahead token
       so as to determine which action should be taken. *)

    (* Peeking at the first input token, without taking it off the input
117
       stream, is done by reading [env.triple]. We are careful to first
118
       check [env.error]. *)
119

120 121
    (* Note that, if [please_discard] was true, then we have just called
       [discard], so the lookahead token cannot be [error]. *)
122

123 124 125
    (* Returning [HandlingError env] is equivalent to calling [error env]
       directly, except it allows the user to regain control. *)

126
    if env.error then begin
127 128
      if log then
        Log.resuming_error_handling();
129
      HandlingError env
130 131
    end
    else
132
      let (token, _, _) = env.triple in
133

134 135 136
      (* We consult the two-dimensional action table, indexed by the
         current state and the current lookahead token, in order to
         determine which action should be taken. *)
137

138 139 140 141 142
      T.action
        env.current                    (* determines a row *)
        (T.token2terminal token)       (* determines a column *)
        (T.token2value token)
        shift                          (* shift continuation *)
143
        announce_reduce                (* reduce continuation *)
144 145
        initiate                       (* failure continuation *)
        env
146 147 148 149 150 151 152

  (* --------------------------------------------------------------------------- *)

  (* This function takes care of shift transitions along a terminal symbol.
     (Goto transitions are taken care of within [reduce] below.) The symbol
     can be either an actual token or the [error] pseudo-token. *)

153
  (* Here, the lookahead token CAN be [error]. *)
154

155 156 157 158
  and shift env
      (please_discard : bool)
      (terminal : terminal)
      (value : semantic_value)
159
      (s' : state) =
160 161 162

    (* Log the transition. *)

163 164
    if log then
      Log.shift terminal s';
165 166 167 168

    (* Push a new cell onto the stack, containing the identity of the
       state that we are leaving. *)

169
    let (_, startp, endp) = env.triple in
170
    let stack = {
171 172
      state = env.current;
      semv = value;
173 174
      startp;
      endp;
175
      next = env.stack;
176
    } in
177 178 179

    (* Switch to state [s']. *)

180 181 182 183 184 185 186 187 188 189
    let new_env = { env with stack; current = s' } in

    (* Expose the transition to the user. (In principle, we have a choice
       between exposing the transition before we take it, after we take
       it, or at some point in between. This affects the number and type
       of the parameters carried by [Shifting]. Here, we choose to expose
       the transition after we take it; this allows [Shifting] to carry
       only three parameters, whose meaning is simple.) *)

    Shifting (env, new_env, please_discard)
190 191 192

  (* --------------------------------------------------------------------------- *)

193
  (* The function [announce_reduce] stops the parser and returns a checkpoint
194 195
     which allows the parser to be resumed by calling [reduce]. *)

196 197 198 199
  (* Only ordinary productions are exposed to the user. Start productions
     are not exposed to the user. Reducing a start production simply leads
     to the successful termination of the parser. *)

200
  and announce_reduce env (prod : production) =
201 202 203 204
    if T.is_start prod then
      accept env prod
    else
      AboutToReduce (env, prod)
205

206 207
  (* The function [reduce] takes care of reductions. It is invoked by
     [resume] after an [AboutToReduce] event has been produced. *)
208

209
  (* Here, the lookahead token CAN be [error]. *)
210

211 212
  (* The production [prod] CANNOT be a start production. *)

213
  and reduce env (prod : production) =
214 215 216

    (* Log a reduction event. *)

217 218
    if log then
      Log.reduce_or_accept prod;
219 220

    (* Invoke the semantic action. The semantic action is responsible for
221
       truncating the stack and pushing a new cell onto the stack, which
222
       contains a new semantic value. It can raise [Error]. *)
223

224
    (* If the semantic action terminates normally, it returns a new stack,
225
       which becomes the current stack. *)
226

227 228
    (* If the semantic action raises [Error], we catch it and initiate error
       handling. *)
229

230 231 232 233
    (* This [match/with/exception] construct requires OCaml 4.02. *)

    match T.semantic_action prod env with
    | stack ->
234

235 236 237
        (* By our convention, the semantic action has produced an updated
           stack. The state now found in the top stack cell is the return
           state. *)
238

239 240 241
        (* Perform a goto transition. The target state is determined
           by consulting the goto table at the return state and at
           production [prod]. *)
242

243 244 245
        let current = T.goto stack.state prod in
        let env = { env with stack; current } in
        run env false
246

247
    | exception Error ->
248
        initiate env
249

250 251 252 253 254 255 256 257 258
  and accept env prod =
    (* Log an accept event. *)
    if log then
      Log.reduce_or_accept prod;
    (* Extract the semantic value out of the stack. *)
    let v = env.stack.semv in
    (* Finish. *)
    Accepted v

259 260 261 262
  (* --------------------------------------------------------------------------- *)

  (* The following functions deal with errors. *)

263 264
  (* [initiate] initiates or resumes error handling. *)

265
  (* Here, the lookahead token CAN be [error]. *)
266

267
  and initiate env =
268 269
    if log then
      Log.initiating_error_handling();
270
    let env = { env with error = true } in
271
    HandlingError env
272 273 274

  (* [error] handles errors. *)

275
  and error env =
276
    assert env.error;
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297

    (* Consult the column associated with the [error] pseudo-token in the
       action table. *)

    T.action
      env.current                    (* determines a row *)
      T.error_terminal               (* determines a column *)
      T.error_value
      error_shift                    (* shift continuation *)
      error_reduce                   (* reduce continuation *)
      error_fail                     (* failure continuation *)
      env

  and error_shift env please_discard terminal value s' =

    (* Here, [terminal] is [T.error_terminal], and [value] is [T.error_value]. *)

    assert (terminal = T.error_terminal && value = T.error_value);

    (* This state is capable of shifting the [error] token. *)

298 299
    if log then
      Log.handling_error env.current;
300 301 302 303 304 305
    shift env please_discard terminal value s'

  and error_reduce env prod =

    (* This state is capable of performing a reduction on [error]. *)

306 307
    if log then
      Log.handling_error env.current;
308
    reduce env prod
309 310 311
      (* Intentionally calling [reduce] instead of [announce_reduce].
         It does not seem very useful, and it could be confusing, to
         expose the reduction steps taken during error handling. *)
312 313 314 315 316 317 318 319 320 321 322 323

  and error_fail env =

    (* This state is unable to handle errors. Attempt to pop a stack
       cell. *)

    let cell = env.stack in
    let next = cell.next in
    if next == cell then

      (* The stack is empty. Die. *)

324
      Rejected
325 326 327 328 329 330

    else begin

      (* The stack is nonempty. Pop a cell, updating the current state
	 with that found in the popped cell, and try again. *)

331 332 333 334
      let env = { env with
        stack = next;
        current = cell.state
      } in
335
      HandlingError env
336 337 338

    end

339 340
  (* End of the nest of tail recursive functions. *)

POTTIER Francois's avatar
POTTIER Francois committed
341
  (* --------------------------------------------------------------------------- *)
342 343
  (* --------------------------------------------------------------------------- *)

POTTIER Francois's avatar
POTTIER Francois committed
344
  (* The incremental interface. See [EngineTypes]. *)
345

POTTIER Francois's avatar
POTTIER Francois committed
346
  (* [start s] begins the parsing process. *)
347

348
  let start (s : state) (initial : Lexing.position) : semantic_value checkpoint =
349
    
350 351 352 353 354
    (* Build an empty stack. This is a dummy cell, which is its own successor.
       Its [next] field WILL be accessed by [error_fail] if an error occurs and
       is propagated all the way until the stack is empty. Its [endp] field WILL
       be accessed (by a semantic action) if an epsilon production is reduced
       when the stack is empty. *)
355 356 357 358

    let rec empty = {
      state = s;                          (* dummy *)
      semv = T.error_value;               (* dummy *)
359 360
      startp = initial;                   (* dummy *)
      endp = initial;
361 362 363 364 365
      next = empty;
    } in

    (* Build an initial environment. *)

POTTIER Francois's avatar
POTTIER Francois committed
366 367 368 369 370 371 372 373
    (* Unfortunately, there is no type-safe way of constructing a
       dummy token. Tokens carry semantic values, which in general
       we cannot manufacture. This instance of [Obj.magic] could
       be avoided by adopting a different representation (e.g., no
       [env.error] field, and an option in the first component of
       [env.triple]), but I like this representation better. *)

    let dummy_token = Obj.magic () in
374
    let env = {
375
      error = false;
376
      triple = (dummy_token, initial, initial); (* dummy *)
377 378 379 380
      stack = empty;
      current = s;
    } in

381
    (* Begin parsing. *)
382

383 384 385 386 387
    (* The parameter [please_discard] here is [true], which means we know
       that we must read at least one token. This claim relies on the fact
       that we have ruled out the two special cases where a start symbol
       recognizes the empty language or the singleton language {epsilon}. *)

388
    run env true
389

390 391 392
  (* [offer checkpoint triple] is invoked by the user in response to a
     checkpoint of the form [InputNeeded env]. It checks that [checkpoint] is
     indeed of this form, and invokes [discard]. *)
393

394 395 396 397
  (* [resume checkpoint] is invoked by the user in response to a checkpoint of
     the form [AboutToReduce (env, prod)] or [HandlingError env]. It checks
     that [checkpoint] is indeed of this form, and invokes [reduce] or
     [error], as appropriate. *)
398

399
  (* In reality, [offer] and [resume] accept an argument of type
400 401 402
     [semantic_value checkpoint] and produce a checkpoint of the same type.
     The choice of [semantic_value] is forced by the fact that this is the
     parameter of the checkpoint [Accepted]. *)
403

404
  (* We change this as follows. *)
405

406
  (* We change the argument and result type of [offer] and [resume] from
407 408 409 410 411 412 413 414 415 416 417 418 419 420
     [semantic_value checkpoint] to ['a checkpoint]. This is safe, in this
     case, because we give the user access to values of type [t checkpoint]
     only if [t] is indeed the type of the eventual semantic value for this
     run. (More precisely, by examining the signatures [INCREMENTAL_ENGINE]
     and [INCREMENTAL_ENGINE_START], one finds that the user can build a value
     of type ['a checkpoint] only if ['a] is [semantic_value]. The table
     back-end goes further than this and produces versions of [start] composed
     with a suitable cast, which give the user access to a value of type
     [t checkpoint] where [t] is the type of the start symbol.) *)

  let offer : 'a . 'a checkpoint ->
                   token * Lexing.position * Lexing.position ->
                   'a checkpoint
  = function
421 422 423 424 425
    | InputNeeded env ->
        Obj.magic discard env
    | _ ->
        raise (Invalid_argument "offer expects InputNeeded")

426
  let resume : 'a . 'a checkpoint -> 'a checkpoint = function
427 428
    | HandlingError env ->
        Obj.magic error env
429 430
    | Shifting (_, env, please_discard) ->
        Obj.magic run env please_discard
431 432 433 434
    | AboutToReduce (env, prod) ->
        Obj.magic reduce env prod
    | _ ->
        raise (Invalid_argument "resume expects HandlingError | AboutToReduce")
435

POTTIER Francois's avatar
POTTIER Francois committed
436 437 438 439
  (* --------------------------------------------------------------------------- *)
  (* --------------------------------------------------------------------------- *)

  (* The traditional interface. See [EngineTypes]. *)
440 441 442

  (* --------------------------------------------------------------------------- *)

443
  (* Wrapping a lexer and lexbuf as a token supplier. *)
444

445
  type supplier =
POTTIER Francois's avatar
POTTIER Francois committed
446 447
    unit -> token * Lexing.position * Lexing.position

448
  let lexer_lexbuf_to_supplier
449 450
      (lexer : Lexing.lexbuf -> token)
      (lexbuf : Lexing.lexbuf)
451
  : supplier =
452 453 454 455 456 457 458 459
    fun () ->
      let token = lexer lexbuf in
      let startp = lexbuf.Lexing.lex_start_p
      and endp = lexbuf.Lexing.lex_curr_p in
      token, startp, endp

  (* --------------------------------------------------------------------------- *)

460 461 462 463
  (* The main loop repeatedly handles intermediate checkpoints, until a final
     checkpoint is obtained. This allows implementing the monolithic interface
     ([entry]) in terms of the incremental interface ([start], [offer],
     [handle], [reduce]). *)
464 465 466

  (* By convention, acceptance is reported by returning a semantic value, whereas
     rejection is reported by raising [Error]. *)
POTTIER Francois's avatar
POTTIER Francois committed
467

468 469 470 471
  (* [loop] is polymorphic in ['a]. No cheating is involved in achieving this.
     All of the cheating resides in the types assigned to [offer] and [handle]
     above. *)

472 473 474
  let rec loop : 'a . supplier -> 'a checkpoint -> 'a =
    fun read checkpoint ->
    match checkpoint with
475
    | InputNeeded _ ->
476 477
        (* The parser needs a token. Request one from the lexer,
           and offer it to the parser, which will produce a new
478
           checkpoint. Then, repeat. *)
POTTIER Francois's avatar
POTTIER Francois committed
479
        let triple = read() in
480 481
        let checkpoint = offer checkpoint triple in
        loop read checkpoint
482
    | Shifting _
483 484
    | AboutToReduce _
    | HandlingError _ ->
485 486
        (* The parser has suspended itself, but does not need
           new input. Just resume the parser. Then, repeat. *)
487 488
        let checkpoint = resume checkpoint in
        loop read checkpoint
POTTIER Francois's avatar
POTTIER Francois committed
489
    | Accepted v ->
490 491
        (* The parser has succeeded and produced a semantic value.
           Return this semantic value to the user. *)
POTTIER Francois's avatar
POTTIER Francois committed
492 493
        v
    | Rejected ->
494
        (* The parser rejects this input. Raise an exception. *)
POTTIER Francois's avatar
POTTIER Francois committed
495
        raise Error
496

POTTIER Francois's avatar
POTTIER Francois committed
497
  let entry (s : state) lexer lexbuf : semantic_value =
498 499
    let initial = lexbuf.Lexing.lex_curr_p in
    loop (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial)
500

501 502
  (* --------------------------------------------------------------------------- *)

503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
  (* [loop_handle] stops if it encounters an error, and at this point, invokes
     its failure continuation, without letting Menhir do its own traditional
     error-handling (which involves popping the stack, etc.). *)

  let rec loop_handle succeed fail read checkpoint =
    match checkpoint with
    | InputNeeded _ ->
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop_handle succeed fail read checkpoint
    | Shifting _
    | AboutToReduce _ ->
        let checkpoint = resume checkpoint in
        loop_handle succeed fail read checkpoint
    | HandlingError _
    | Rejected ->
        (* The parser has detected an error. Invoke the failure continuation. *)
        fail checkpoint
    | Accepted v ->
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
        (* The parser has succeeded and produced a semantic value. Invoke the
           success continuation. *)
        succeed v

  (* --------------------------------------------------------------------------- *)

  (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair
     of checkpoints to the failure continuation.

     The first (and oldest) checkpoint is the last [InputNeeded] checkpoint that
     was encountered before the error was detected. The second (and newest)
     checkpoint is where the error was detected, as in [loop_handle]. Going back
     to the first checkpoint can be thought of as undoing any reductions that
     were performed after seeing the problematic token. (These reductions must
     be default reductions or spurious reductions.) *)

  let rec loop_handle_undo succeed fail read (inputneeded, checkpoint) =
    match checkpoint with
    | InputNeeded _ ->
        (* Update the last recorded [InputNeeded] checkpoint. *)
        let inputneeded = checkpoint in
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop_handle_undo succeed fail read (inputneeded, checkpoint)
    | Shifting _
    | AboutToReduce _ ->
        let checkpoint = resume checkpoint in
        loop_handle_undo succeed fail read (inputneeded, checkpoint)
    | HandlingError _
    | Rejected ->
        fail inputneeded checkpoint
    | Accepted v ->
554
        succeed v
555

556 557 558 559 560 561 562 563 564 565 566 567 568 569
  (* For simplicity, we publish a version of [loop_handle_undo] that takes a
     single checkpoint as an argument, instead of a pair of checkpoints. We
     check that the argument is [InputNeeded _], and duplicate it. *)

  (* The parser cannot accept or reject before it asks for the very first
     character of input. (Indeed, we statically reject a symbol that
     generates the empty language or the singleton language {epsilon}.)
     So, the [start] checkpoint must match [InputNeeded _]. Hence, it is
     permitted to call [loop_handle_undo] with a [start] checkpoint. *)

  let loop_handle_undo succeed fail read checkpoint =
    assert (match checkpoint with InputNeeded _ -> true | _ -> false);
    loop_handle_undo succeed fail read (checkpoint, checkpoint)

POTTIER Francois's avatar
POTTIER Francois committed
570 571 572 573 574 575 576 577 578 579
  (* ------------------------------------------------------------------------ *)

  (* [loop_test f checkpoint accu] assumes that [checkpoint] has been obtained
     by submitting a token to the parser. It runs the parser from [checkpoint],
     through an arbitrary number of reductions, until the parser either accepts
     this token (i.e., shifts) or rejects it (i.e., signals an error). If the
     parser decides to shift, then the accumulator is updated by applying the
     user function [f] to the [env] just before shifting and to the old [accu].
     Otherwise, the accumulator is not updated, i.e., [accu] is returned. *)

POTTIER Francois's avatar
POTTIER Francois committed
580 581
  (* This test causes some semantic actions to be run! The semantic actions
     should be side-effect free, or their side-effects should be harmless. *)
POTTIER Francois's avatar
POTTIER Francois committed
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604

  let rec loop_test f checkpoint accu =
    match checkpoint with
    | Shifting (env, _, _) ->
        (* The parser is about to shift, which means it is willing to
           consume the terminal symbol that we have fed it. Update the
           accumulator with the state just before this transition. *)
        f env accu
    | AboutToReduce _ ->
        (* The parser wishes to reduce. Just follow. *)
        loop_test f (resume checkpoint) accu
    | HandlingError _ ->
        (* The parser fails, which means it rejects the terminal symbol
           that we have fed it. Do not update the accumulator. *)
        accu
    | InputNeeded _
    | Accepted _
    | Rejected ->
        (* None of these cases can arise. Indeed, after a token is submitted
           to it, the parser must shift, reduce, or signal an error, before
           it can request another token or terminate. *)
        assert false

605 606
  (* --------------------------------------------------------------------------- *)

POTTIER Francois's avatar
POTTIER Francois committed
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
  (* The function [loop_test] can be used, after an error has been detected, to
     dynamically test which tokens would have been accepted at this point. We
     provide this test, ready for use. *)

  (* For completeness, one must undo any spurious reductions before carrying out
     this test -- that is, one must apply [acceptable] to the FIRST checkpoint
     that is passed by [loop_handle_undo] to its failure continuation. *)

  (* This test causes some semantic actions to be run! The semantic actions
     should be side-effect free, or their side-effects should be harmless. *)

  (* The position [pos] is used as the start and end positions of the
     hypothetical token, and may be picked up by the semantic actions. We
     suggest using the position where the error was detected. *)

  let acceptable checkpoint token pos =
    let triple = (token, pos, pos) in
    let checkpoint = offer checkpoint triple in
    loop_test (fun _env _accu -> true) checkpoint false

  (* --------------------------------------------------------------------------- *)

629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
  (* The type ['a lr1state] describes the (non-initial) states of the LR(1)
     automaton. The index ['a] represents the type of the semantic value
     associated with the state's incoming symbol. *)

  (* The type ['a lr1state] is defined as an alias for [state], which itself
     is usually defined as [int] (see [TableInterpreter]). So, ['a lr1state]
     is technically a phantom type, but should really be thought of as a GADT
     whose data constructors happen to be represented as integers. It is
     presented to the user as an abstract type (see [IncrementalEngine]). *)

  type 'a lr1state =
      state

  (* --------------------------------------------------------------------------- *)

644 645 646 647 648 649 650
  (* Stack inspection. *)

  (* We offer a read-only view of the parser's state as a stream of elements.
     Each element contains a pair of a (non-initial) state and a semantic
     value associated with (the incoming symbol of) this state. Note that the
     type [element] is an existential type. *)

651
  type element =
652 653
    | Element: 'a lr1state * 'a * Lexing.position * Lexing.position -> element

654 655
  open General

656 657 658
  type stack =
    element stream

659
  (* If [current] is the current state and [cell] is the top stack cell,
660
     then [stack cell current] is a view of the parser's state as a stream
661
     of elements. *)
662

663
  let rec stack cell current : element stream =
664 665 666 667 668 669 670 671 672 673 674 675
    lazy (
      (* The stack is empty iff the top stack cell is its own successor. In
         that case, the current state [current] should be an initial state
         (which has no incoming symbol).
         We do not allow the user to inspect this state. *)
      let next = cell.next in
      if next == cell then
        Nil
      else
        (* Construct an element containing the current state [current] as well
           as the semantic value contained in the top stack cell. This semantic
           value is associated with the incoming symbol of this state, so it
676 677 678 679 680 681
           makes sense to pair them together. The state has type ['a state] and
           the semantic value has type ['a], for some type ['a]. Here, the OCaml
           type-checker thinks ['a] is [semantic_value] and considers this code
           well-typed. Outside, we will use magic to provide the user with a way
           of inspecting states and recovering the value of ['a]. *)
        let element = Element (
682 683 684 685 686
          current,
          cell.semv,
          cell.startp,
          cell.endp
        ) in
687
        Cons (element, stack next cell.state)
688 689
    )

690 691
  let stack env : element stream =
    stack env.stack env.current
692

693 694 695 696 697 698 699
  (* --------------------------------------------------------------------------- *)

  (* Access to the position of the lookahead token. *)

  let positions { triple = (_, startp, endp); _ } =
    startp, endp

700 701 702 703 704 705 706 707 708 709 710 711 712 713
  (* --------------------------------------------------------------------------- *)

  (* Access to information about default reductions. *)

  (* We can make this a function of states, or a function of environments. For
     now, the latter appears simpler. *)

  let has_default_reduction env : bool =
    T.default_reduction
      env.current
      (fun _env _prod -> true)
      (fun _env -> false)
      env

714 715
end