Engine.ml 30.2 KB
Newer Older
1 2 3 4 5 6
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
POTTIER Francois's avatar
POTTIER Francois committed
7
(* *)
8 9 10 11 12 13
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU Library General Public License version 2, with a         *)
(*  special exception on linking, as described in the file LICENSE.           *)
(*                                                                            *)
(******************************************************************************)

POTTIER Francois's avatar
POTTIER Francois committed
14
type position = Lexing.position
15 16 17 18 19 20 21 22 23 24 25
open EngineTypes

(* The LR parsing engine. *)

(* This module is used:

   - at compile time, if so requested by the user, via the --interpret options;
   - at run time, in the table-based back-end. *)

module Make (T : TABLE) = struct

POTTIER Francois's avatar
POTTIER Francois committed
26 27
  (* This propagates type and exception definitions. The function [number],
     too, is defined by this [include] declaration. *)
28 29 30

  include T

31
  type 'a env =
32
      (state, semantic_value, token) EngineTypes.env
33

POTTIER Francois's avatar
POTTIER Francois committed
34
  (* ------------------------------------------------------------------------ *)
35

36
  (* The type [checkpoint] represents an intermediate or final result of the
37
     parser. See [EngineTypes]. *)
38

39
  (* The type [checkpoint] is presented to the user as a private type (see
POTTIER Francois's avatar
POTTIER Francois committed
40 41 42 43
     [IncrementalEngine]). This prevents the user from manufacturing
     checkpoints (i.e., continuations) that do not make sense. (Such
     continuations could potentially violate the LR invariant and lead to
     crashes.) *)
44

45 46 47 48 49
  (* 2017/03/29 Although [checkpoint] is a private type, we now expose a
     constructor function, [input_needed]. This function allows manufacturing
     a checkpoint out of an environment. For this reason, the type [env] must
     also be parameterized with ['a]. *)

50
  type 'a checkpoint =
51 52 53 54
    | InputNeeded of 'a env
    | Shifting of 'a env * 'a env * bool
    | AboutToReduce of 'a env * production
    | HandlingError of 'a env
55
    | Accepted of 'a
56
    | Rejected
57

POTTIER Francois's avatar
POTTIER Francois committed
58
  (* ------------------------------------------------------------------------ *)
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74

  (* In the code-based back-end, the [run] function is sometimes responsible
     for pushing a new cell on the stack. This is motivated by code sharing
     concerns. In this interpreter, there is no such concern; [run]'s caller
     is always responsible for updating the stack. *)

  (* In the code-based back-end, there is a [run] function for each state
     [s]. This function can behave in two slightly different ways, depending
     on when it is invoked, or (equivalently) depending on [s].

     If [run] is invoked after shifting a terminal symbol (or, equivalently,
     if [s] has a terminal incoming symbol), then [run] discards a token,
     unless [s] has a default reduction on [#]. (Indeed, in that case,
     requesting the next token might drive the lexer off the end of the input
     stream.)

POTTIER Francois's avatar
POTTIER Francois committed
75 76 77
     If, on the other hand, [run] is invoked after performing a goto
     transition, or invoked directly by an entry point, then there is nothing
     to discard.
78 79 80

     These two cases are reflected in [CodeBackend.gettoken].

81 82 83 84
     Here, the code is structured in a slightly different way. It is up to the
     caller of [run] to indicate whether to discard a token, via the parameter
     [please_discard]. This flag is set when [s] is being entered by shifting
     a terminal symbol and [s] does not have a default reduction on [#]. *)
85

86
  (* The following recursive group of functions are tail recursive, produce a
87 88 89
     checkpoint of type [semantic_value checkpoint], and cannot raise an
     exception. A semantic action can raise [Error], but this exception is
     immediately caught within [reduce]. *)
90

91
  let rec run env please_discard : semantic_value checkpoint =
92 93

    (* Log the fact that we just entered this state. *)
94

95 96
    if log then
      Log.state env.current;
97

98 99 100 101 102
    (* If [please_discard] is set, we discard the current lookahead token and
       fetch the next one. In order to request a token from the user, we
       return an [InputNeeded] continuation, which, when invoked by the user,
       will take us to [discard]. If [please_discard] is not set, we skip this
       step and jump directly to [check_for_default_reduction]. *)
103 104

    if please_discard then
105
      InputNeeded env
106 107 108
    else
      check_for_default_reduction env

109 110
  (* [discard env triple] stores [triple] into [env], overwriting the previous
     token. It is invoked by [offer], which itself is invoked by the user in
111
     response to an [InputNeeded] checkpoint. *)
112

113
  and discard env triple =
114 115 116 117
    if log then begin
      let (token, startp, endp) = triple in
      Log.lookahead_token (T.token2terminal token) startp endp
    end;
118
    let env = { env with error = false; triple } in
119 120 121
    check_for_default_reduction env

  and check_for_default_reduction env =
122 123 124 125 126 127

    (* Examine what situation we are in. This case analysis is analogous to
       that performed in [CodeBackend.gettoken], in the sub-case where we do
       not have a terminal incoming symbol. *)

    T.default_reduction
128
      env.current
129
      announce_reduce       (* there is a default reduction; perform it *)
130
      check_for_error_token (* there is none; continue below *)
131 132
      env

133
  and check_for_error_token env =
134 135 136 137 138

    (* There is no default reduction. Consult the current lookahead token
       so as to determine which action should be taken. *)

    (* Peeking at the first input token, without taking it off the input
139
       stream, is done by reading [env.triple]. We are careful to first
140
       check [env.error]. *)
141

142 143
    (* Note that, if [please_discard] was true, then we have just called
       [discard], so the lookahead token cannot be [error]. *)
144

145 146 147
    (* Returning [HandlingError env] is equivalent to calling [error env]
       directly, except it allows the user to regain control. *)

148
    if env.error then begin
149 150
      if log then
        Log.resuming_error_handling();
151
      HandlingError env
152 153
    end
    else
154
      let (token, _, _) = env.triple in
155

156 157 158
      (* We consult the two-dimensional action table, indexed by the
         current state and the current lookahead token, in order to
         determine which action should be taken. *)
159

160 161 162 163 164
      T.action
        env.current                    (* determines a row *)
        (T.token2terminal token)       (* determines a column *)
        (T.token2value token)
        shift                          (* shift continuation *)
165
        announce_reduce                (* reduce continuation *)
166 167
        initiate                       (* failure continuation *)
        env
168

POTTIER Francois's avatar
POTTIER Francois committed
169
  (* ------------------------------------------------------------------------ *)
170 171 172 173 174

  (* This function takes care of shift transitions along a terminal symbol.
     (Goto transitions are taken care of within [reduce] below.) The symbol
     can be either an actual token or the [error] pseudo-token. *)

175
  (* Here, the lookahead token CAN be [error]. *)
176

177 178 179 180
  and shift env
      (please_discard : bool)
      (terminal : terminal)
      (value : semantic_value)
181
      (s' : state) =
182 183 184

    (* Log the transition. *)

185 186
    if log then
      Log.shift terminal s';
187 188 189 190

    (* Push a new cell onto the stack, containing the identity of the
       state that we are leaving. *)

191
    let (_, startp, endp) = env.triple in
192
    let stack = {
193 194
      state = env.current;
      semv = value;
195 196
      startp;
      endp;
197
      next = env.stack;
198
    } in
199 200 201

    (* Switch to state [s']. *)

202 203 204 205 206 207 208 209 210 211
    let new_env = { env with stack; current = s' } in

    (* Expose the transition to the user. (In principle, we have a choice
       between exposing the transition before we take it, after we take
       it, or at some point in between. This affects the number and type
       of the parameters carried by [Shifting]. Here, we choose to expose
       the transition after we take it; this allows [Shifting] to carry
       only three parameters, whose meaning is simple.) *)

    Shifting (env, new_env, please_discard)
212

POTTIER Francois's avatar
POTTIER Francois committed
213
  (* ------------------------------------------------------------------------ *)
214

215
  (* The function [announce_reduce] stops the parser and returns a checkpoint
216 217
     which allows the parser to be resumed by calling [reduce]. *)

218 219 220 221
  (* Only ordinary productions are exposed to the user. Start productions
     are not exposed to the user. Reducing a start production simply leads
     to the successful termination of the parser. *)

222
  and announce_reduce env (prod : production) =
223 224 225 226
    if T.is_start prod then
      accept env prod
    else
      AboutToReduce (env, prod)
227

228 229
  (* The function [reduce] takes care of reductions. It is invoked by
     [resume] after an [AboutToReduce] event has been produced. *)
230

231
  (* Here, the lookahead token CAN be [error]. *)
232

233 234
  (* The production [prod] CANNOT be a start production. *)

235
  and reduce env (prod : production) =
236 237 238

    (* Log a reduction event. *)

239 240
    if log then
      Log.reduce_or_accept prod;
241 242

    (* Invoke the semantic action. The semantic action is responsible for
243
       truncating the stack and pushing a new cell onto the stack, which
244
       contains a new semantic value. It can raise [Error]. *)
245

246
    (* If the semantic action terminates normally, it returns a new stack,
247
       which becomes the current stack. *)
248

249 250
    (* If the semantic action raises [Error], we catch it and initiate error
       handling. *)
251

252 253 254 255
    (* This [match/with/exception] construct requires OCaml 4.02. *)

    match T.semantic_action prod env with
    | stack ->
256

257 258 259
        (* By our convention, the semantic action has produced an updated
           stack. The state now found in the top stack cell is the return
           state. *)
260

261 262 263
        (* Perform a goto transition. The target state is determined
           by consulting the goto table at the return state and at
           production [prod]. *)
264

265
        let current = T.goto_prod stack.state prod in
266 267
        let env = { env with stack; current } in
        run env false
268

269
    | exception Error ->
270
        initiate env
271

272 273 274 275 276 277 278 279 280
  and accept env prod =
    (* Log an accept event. *)
    if log then
      Log.reduce_or_accept prod;
    (* Extract the semantic value out of the stack. *)
    let v = env.stack.semv in
    (* Finish. *)
    Accepted v

POTTIER Francois's avatar
POTTIER Francois committed
281
  (* ------------------------------------------------------------------------ *)
282 283 284

  (* The following functions deal with errors. *)

285 286
  (* [initiate] initiates or resumes error handling. *)

287
  (* Here, the lookahead token CAN be [error]. *)
288

289
  and initiate env =
290 291
    if log then
      Log.initiating_error_handling();
292
    let env = { env with error = true } in
293
    HandlingError env
294 295 296

  (* [error] handles errors. *)

297
  and error env =
298
    assert env.error;
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313

    (* Consult the column associated with the [error] pseudo-token in the
       action table. *)

    T.action
      env.current                    (* determines a row *)
      T.error_terminal               (* determines a column *)
      T.error_value
      error_shift                    (* shift continuation *)
      error_reduce                   (* reduce continuation *)
      error_fail                     (* failure continuation *)
      env

  and error_shift env please_discard terminal value s' =

POTTIER Francois's avatar
POTTIER Francois committed
314 315
    (* Here, [terminal] is [T.error_terminal],
       and [value] is [T.error_value]. *)
316 317 318 319 320

    assert (terminal = T.error_terminal && value = T.error_value);

    (* This state is capable of shifting the [error] token. *)

321 322
    if log then
      Log.handling_error env.current;
323 324 325 326 327 328
    shift env please_discard terminal value s'

  and error_reduce env prod =

    (* This state is capable of performing a reduction on [error]. *)

329 330
    if log then
      Log.handling_error env.current;
331
    reduce env prod
332 333 334
      (* Intentionally calling [reduce] instead of [announce_reduce].
         It does not seem very useful, and it could be confusing, to
         expose the reduction steps taken during error handling. *)
335 336 337 338 339 340 341 342 343 344 345 346

  and error_fail env =

    (* This state is unable to handle errors. Attempt to pop a stack
       cell. *)

    let cell = env.stack in
    let next = cell.next in
    if next == cell then

      (* The stack is empty. Die. *)

347
      Rejected
348 349 350 351

    else begin

      (* The stack is nonempty. Pop a cell, updating the current state
352
         with that found in the popped cell, and try again. *)
353

354 355 356 357
      let env = { env with
        stack = next;
        current = cell.state
      } in
358
      HandlingError env
359 360 361

    end

362 363
  (* End of the nest of tail recursive functions. *)

POTTIER Francois's avatar
POTTIER Francois committed
364 365
  (* ------------------------------------------------------------------------ *)
  (* ------------------------------------------------------------------------ *)
366

POTTIER Francois's avatar
POTTIER Francois committed
367
  (* The incremental interface. See [EngineTypes]. *)
368

POTTIER Francois's avatar
POTTIER Francois committed
369
  (* [start s] begins the parsing process. *)
370

POTTIER Francois's avatar
POTTIER Francois committed
371
  let start (s : state) (initial : position) : semantic_value checkpoint =
372

373 374 375 376 377
    (* Build an empty stack. This is a dummy cell, which is its own successor.
       Its [next] field WILL be accessed by [error_fail] if an error occurs and
       is propagated all the way until the stack is empty. Its [endp] field WILL
       be accessed (by a semantic action) if an epsilon production is reduced
       when the stack is empty. *)
378 379 380 381

    let rec empty = {
      state = s;                          (* dummy *)
      semv = T.error_value;               (* dummy *)
382 383
      startp = initial;                   (* dummy *)
      endp = initial;
384 385 386 387 388
      next = empty;
    } in

    (* Build an initial environment. *)

POTTIER Francois's avatar
POTTIER Francois committed
389 390 391 392 393 394 395 396
    (* Unfortunately, there is no type-safe way of constructing a
       dummy token. Tokens carry semantic values, which in general
       we cannot manufacture. This instance of [Obj.magic] could
       be avoided by adopting a different representation (e.g., no
       [env.error] field, and an option in the first component of
       [env.triple]), but I like this representation better. *)

    let dummy_token = Obj.magic () in
397
    let env = {
398
      error = false;
399
      triple = (dummy_token, initial, initial); (* dummy *)
400 401 402 403
      stack = empty;
      current = s;
    } in

404
    (* Begin parsing. *)
405

406 407 408 409 410
    (* The parameter [please_discard] here is [true], which means we know
       that we must read at least one token. This claim relies on the fact
       that we have ruled out the two special cases where a start symbol
       recognizes the empty language or the singleton language {epsilon}. *)

411
    run env true
412

413 414 415
  (* [offer checkpoint triple] is invoked by the user in response to a
     checkpoint of the form [InputNeeded env]. It checks that [checkpoint] is
     indeed of this form, and invokes [discard]. *)
416

417 418 419 420
  (* [resume checkpoint] is invoked by the user in response to a checkpoint of
     the form [AboutToReduce (env, prod)] or [HandlingError env]. It checks
     that [checkpoint] is indeed of this form, and invokes [reduce] or
     [error], as appropriate. *)
421

422
  (* In reality, [offer] and [resume] accept an argument of type
423 424 425
     [semantic_value checkpoint] and produce a checkpoint of the same type.
     The choice of [semantic_value] is forced by the fact that this is the
     parameter of the checkpoint [Accepted]. *)
426

427
  (* We change this as follows. *)
428

429
  (* We change the argument and result type of [offer] and [resume] from
430 431 432 433 434 435 436 437 438 439 440
     [semantic_value checkpoint] to ['a checkpoint]. This is safe, in this
     case, because we give the user access to values of type [t checkpoint]
     only if [t] is indeed the type of the eventual semantic value for this
     run. (More precisely, by examining the signatures [INCREMENTAL_ENGINE]
     and [INCREMENTAL_ENGINE_START], one finds that the user can build a value
     of type ['a checkpoint] only if ['a] is [semantic_value]. The table
     back-end goes further than this and produces versions of [start] composed
     with a suitable cast, which give the user access to a value of type
     [t checkpoint] where [t] is the type of the start symbol.) *)

  let offer : 'a . 'a checkpoint ->
POTTIER Francois's avatar
POTTIER Francois committed
441
                   token * position * position ->
442 443
                   'a checkpoint
  = function
444 445 446
    | InputNeeded env ->
        Obj.magic discard env
    | _ ->
POTTIER Francois's avatar
POTTIER Francois committed
447
        invalid_arg "offer expects InputNeeded"
448

449
  let resume : 'a . 'a checkpoint -> 'a checkpoint = function
450 451
    | HandlingError env ->
        Obj.magic error env
452 453
    | Shifting (_, env, please_discard) ->
        Obj.magic run env please_discard
454 455 456
    | AboutToReduce (env, prod) ->
        Obj.magic reduce env prod
    | _ ->
POTTIER Francois's avatar
POTTIER Francois committed
457
        invalid_arg "resume expects HandlingError | AboutToReduce"
458

POTTIER Francois's avatar
POTTIER Francois committed
459 460
  (* ------------------------------------------------------------------------ *)
  (* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
461 462

  (* The traditional interface. See [EngineTypes]. *)
463

POTTIER Francois's avatar
POTTIER Francois committed
464
  (* ------------------------------------------------------------------------ *)
465

466
  (* Wrapping a lexer and lexbuf as a token supplier. *)
467

468
  type supplier =
POTTIER Francois's avatar
POTTIER Francois committed
469
    unit -> token * position * position
POTTIER Francois's avatar
POTTIER Francois committed
470

471
  let lexer_lexbuf_to_supplier
472 473
      (lexer : Lexing.lexbuf -> token)
      (lexbuf : Lexing.lexbuf)
474
  : supplier =
475 476 477 478 479 480
    fun () ->
      let token = lexer lexbuf in
      let startp = lexbuf.Lexing.lex_start_p
      and endp = lexbuf.Lexing.lex_curr_p in
      token, startp, endp

POTTIER Francois's avatar
POTTIER Francois committed
481
  (* ------------------------------------------------------------------------ *)
482

483 484 485 486
  (* The main loop repeatedly handles intermediate checkpoints, until a final
     checkpoint is obtained. This allows implementing the monolithic interface
     ([entry]) in terms of the incremental interface ([start], [offer],
     [handle], [reduce]). *)
487

POTTIER Francois's avatar
POTTIER Francois committed
488 489
  (* By convention, acceptance is reported by returning a semantic value,
     whereas rejection is reported by raising [Error]. *)
POTTIER Francois's avatar
POTTIER Francois committed
490

491 492 493 494
  (* [loop] is polymorphic in ['a]. No cheating is involved in achieving this.
     All of the cheating resides in the types assigned to [offer] and [handle]
     above. *)

495 496 497
  let rec loop : 'a . supplier -> 'a checkpoint -> 'a =
    fun read checkpoint ->
    match checkpoint with
498
    | InputNeeded _ ->
499 500
        (* The parser needs a token. Request one from the lexer,
           and offer it to the parser, which will produce a new
501
           checkpoint. Then, repeat. *)
POTTIER Francois's avatar
POTTIER Francois committed
502
        let triple = read() in
503 504
        let checkpoint = offer checkpoint triple in
        loop read checkpoint
505
    | Shifting _
506 507
    | AboutToReduce _
    | HandlingError _ ->
508 509
        (* The parser has suspended itself, but does not need
           new input. Just resume the parser. Then, repeat. *)
510 511
        let checkpoint = resume checkpoint in
        loop read checkpoint
POTTIER Francois's avatar
POTTIER Francois committed
512
    | Accepted v ->
513 514
        (* The parser has succeeded and produced a semantic value.
           Return this semantic value to the user. *)
POTTIER Francois's avatar
POTTIER Francois committed
515 516
        v
    | Rejected ->
517
        (* The parser rejects this input. Raise an exception. *)
POTTIER Francois's avatar
POTTIER Francois committed
518
        raise Error
519

POTTIER Francois's avatar
POTTIER Francois committed
520
  let entry (s : state) lexer lexbuf : semantic_value =
521 522
    let initial = lexbuf.Lexing.lex_curr_p in
    loop (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial)
523

POTTIER Francois's avatar
POTTIER Francois committed
524
  (* ------------------------------------------------------------------------ *)
525

526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
  (* [loop_handle] stops if it encounters an error, and at this point, invokes
     its failure continuation, without letting Menhir do its own traditional
     error-handling (which involves popping the stack, etc.). *)

  let rec loop_handle succeed fail read checkpoint =
    match checkpoint with
    | InputNeeded _ ->
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop_handle succeed fail read checkpoint
    | Shifting _
    | AboutToReduce _ ->
        let checkpoint = resume checkpoint in
        loop_handle succeed fail read checkpoint
    | HandlingError _
    | Rejected ->
        (* The parser has detected an error. Invoke the failure continuation. *)
        fail checkpoint
    | Accepted v ->
545 546 547 548
        (* The parser has succeeded and produced a semantic value. Invoke the
           success continuation. *)
        succeed v

POTTIER Francois's avatar
POTTIER Francois committed
549
  (* ------------------------------------------------------------------------ *)
550 551 552 553

  (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair
     of checkpoints to the failure continuation.

POTTIER Francois's avatar
POTTIER Francois committed
554 555 556 557 558 559
     The first (and oldest) checkpoint is the last [InputNeeded] checkpoint
     that was encountered before the error was detected. The second (and
     newest) checkpoint is where the error was detected, as in [loop_handle].
     Going back to the first checkpoint can be thought of as undoing any
     reductions that were performed after seeing the problematic token. (These
     reductions must be default reductions or spurious reductions.) *)
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576

  let rec loop_handle_undo succeed fail read (inputneeded, checkpoint) =
    match checkpoint with
    | InputNeeded _ ->
        (* Update the last recorded [InputNeeded] checkpoint. *)
        let inputneeded = checkpoint in
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop_handle_undo succeed fail read (inputneeded, checkpoint)
    | Shifting _
    | AboutToReduce _ ->
        let checkpoint = resume checkpoint in
        loop_handle_undo succeed fail read (inputneeded, checkpoint)
    | HandlingError _
    | Rejected ->
        fail inputneeded checkpoint
    | Accepted v ->
577
        succeed v
578

579 580 581 582 583 584 585 586 587 588 589 590 591 592
  (* For simplicity, we publish a version of [loop_handle_undo] that takes a
     single checkpoint as an argument, instead of a pair of checkpoints. We
     check that the argument is [InputNeeded _], and duplicate it. *)

  (* The parser cannot accept or reject before it asks for the very first
     character of input. (Indeed, we statically reject a symbol that
     generates the empty language or the singleton language {epsilon}.)
     So, the [start] checkpoint must match [InputNeeded _]. Hence, it is
     permitted to call [loop_handle_undo] with a [start] checkpoint. *)

  let loop_handle_undo succeed fail read checkpoint =
    assert (match checkpoint with InputNeeded _ -> true | _ -> false);
    loop_handle_undo succeed fail read (checkpoint, checkpoint)

POTTIER Francois's avatar
POTTIER Francois committed
593 594 595 596 597 598 599 600 601 602
  (* ------------------------------------------------------------------------ *)

  (* [loop_test f checkpoint accu] assumes that [checkpoint] has been obtained
     by submitting a token to the parser. It runs the parser from [checkpoint],
     through an arbitrary number of reductions, until the parser either accepts
     this token (i.e., shifts) or rejects it (i.e., signals an error). If the
     parser decides to shift, then the accumulator is updated by applying the
     user function [f] to the [env] just before shifting and to the old [accu].
     Otherwise, the accumulator is not updated, i.e., [accu] is returned. *)

POTTIER Francois's avatar
POTTIER Francois committed
603 604
  (* This test causes some semantic actions to be run! The semantic actions
     should be side-effect free, or their side-effects should be harmless. *)
POTTIER Francois's avatar
POTTIER Francois committed
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627

  let rec loop_test f checkpoint accu =
    match checkpoint with
    | Shifting (env, _, _) ->
        (* The parser is about to shift, which means it is willing to
           consume the terminal symbol that we have fed it. Update the
           accumulator with the state just before this transition. *)
        f env accu
    | AboutToReduce _ ->
        (* The parser wishes to reduce. Just follow. *)
        loop_test f (resume checkpoint) accu
    | HandlingError _ ->
        (* The parser fails, which means it rejects the terminal symbol
           that we have fed it. Do not update the accumulator. *)
        accu
    | InputNeeded _
    | Accepted _
    | Rejected ->
        (* None of these cases can arise. Indeed, after a token is submitted
           to it, the parser must shift, reduce, or signal an error, before
           it can request another token or terminate. *)
        assert false

POTTIER Francois's avatar
POTTIER Francois committed
628
  (* ------------------------------------------------------------------------ *)
629

POTTIER Francois's avatar
POTTIER Francois committed
630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
  (* The function [loop_test] can be used, after an error has been detected, to
     dynamically test which tokens would have been accepted at this point. We
     provide this test, ready for use. *)

  (* For completeness, one must undo any spurious reductions before carrying out
     this test -- that is, one must apply [acceptable] to the FIRST checkpoint
     that is passed by [loop_handle_undo] to its failure continuation. *)

  (* This test causes some semantic actions to be run! The semantic actions
     should be side-effect free, or their side-effects should be harmless. *)

  (* The position [pos] is used as the start and end positions of the
     hypothetical token, and may be picked up by the semantic actions. We
     suggest using the position where the error was detected. *)

  let acceptable checkpoint token pos =
    let triple = (token, pos, pos) in
    let checkpoint = offer checkpoint triple in
    loop_test (fun _env _accu -> true) checkpoint false

POTTIER Francois's avatar
POTTIER Francois committed
650
  (* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
651

652 653 654 655 656 657 658 659 660 661 662 663 664
  (* The type ['a lr1state] describes the (non-initial) states of the LR(1)
     automaton. The index ['a] represents the type of the semantic value
     associated with the state's incoming symbol. *)

  (* The type ['a lr1state] is defined as an alias for [state], which itself
     is usually defined as [int] (see [TableInterpreter]). So, ['a lr1state]
     is technically a phantom type, but should really be thought of as a GADT
     whose data constructors happen to be represented as integers. It is
     presented to the user as an abstract type (see [IncrementalEngine]). *)

  type 'a lr1state =
      state

665 666 667 668 669 670
  let find_default_reduction (state : _ lr1state) : production option =
    T.default_reduction state
      (fun () prod -> Some prod)
      (fun () -> None)
      ()

POTTIER Francois's avatar
POTTIER Francois committed
671
  (* ------------------------------------------------------------------------ *)
672

673 674 675 676 677 678 679
  (* Stack inspection. *)

  (* We offer a read-only view of the parser's state as a stream of elements.
     Each element contains a pair of a (non-initial) state and a semantic
     value associated with (the incoming symbol of) this state. Note that the
     type [element] is an existential type. *)

680
  type element =
POTTIER Francois's avatar
POTTIER Francois committed
681
    | Element: 'a lr1state * 'a * position * position -> element
682

683 684
  open General

685 686 687
  type stack =
    element stream

688
  (* If [current] is the current state and [cell] is the top stack cell,
689
     then [stack cell current] is a view of the parser's state as a stream
690
     of elements. *)
691

692
  let rec stack cell current : element stream =
693 694 695 696 697 698 699 700 701 702 703 704
    lazy (
      (* The stack is empty iff the top stack cell is its own successor. In
         that case, the current state [current] should be an initial state
         (which has no incoming symbol).
         We do not allow the user to inspect this state. *)
      let next = cell.next in
      if next == cell then
        Nil
      else
        (* Construct an element containing the current state [current] as well
           as the semantic value contained in the top stack cell. This semantic
           value is associated with the incoming symbol of this state, so it
705 706 707 708 709 710
           makes sense to pair them together. The state has type ['a state] and
           the semantic value has type ['a], for some type ['a]. Here, the OCaml
           type-checker thinks ['a] is [semantic_value] and considers this code
           well-typed. Outside, we will use magic to provide the user with a way
           of inspecting states and recovering the value of ['a]. *)
        let element = Element (
711 712 713 714 715
          current,
          cell.semv,
          cell.startp,
          cell.endp
        ) in
716
        Cons (element, stack next cell.state)
717 718
    )

719 720
  let stack env : element stream =
    stack env.stack env.current
721

POTTIER Francois's avatar
POTTIER Francois committed
722
  (* ------------------------------------------------------------------------ *)
723 724 725 726 727 728

  (* Access to the position of the lookahead token. *)

  let positions { triple = (_, startp, endp); _ } =
    startp, endp

POTTIER Francois's avatar
POTTIER Francois committed
729
  (* ------------------------------------------------------------------------ *)
730 731 732 733 734 735 736 737 738 739 740 741 742

  (* Access to information about default reductions. *)

  (* We can make this a function of states, or a function of environments. For
     now, the latter appears simpler. *)

  let has_default_reduction env : bool =
    T.default_reduction
      env.current
      (fun _env _prod -> true)
      (fun _env -> false)
      env

POTTIER Francois's avatar
POTTIER Francois committed
743
  (* ------------------------------------------------------------------------ *)
744

745
  (* TEMPORARY comment *)
746 747
  (* TEMPORARY add calls to new [Log] functions? : log [pop], [feed], [force] *)

748
  let pop (env : 'a env) : 'a env option =
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765
    let cell = env.stack in
    let next = cell.next in
    if next == cell then
      None
    else
      Some { env with stack = next; current = cell.state }

  (* TEMPORARY potential danger:
     - should invoke this ONLY when the stack shape allows this reduction!
       otherwise the semantic action could crash.
       (checked at runtime; raises Invalid_argument)
     - semantic action can raise [Error] *)

  (* This function is analogous to [reduce], except that it does not continue
     by calling [run env] or [initiate env]. Instead, it returns [env] to the
     user, or raises [Error]. *)

766
  let force_reduction prod (env : 'a env) : 'a env =
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
    (* Check if this reduction is permitted. This check is REALLY important.
       The stack must have the correct shape: that is, it must be sufficiently
       high, and must contain semantic values of appropriate types, otherwise
       the semantic action will crash and burn. *)
    if not (T.may_reduce env.current prod) then
      invalid_arg "force_reduction: this reduction is not permitted in this state"
    else begin
      (* Log a reduction event. *)
      if log then
        Log.reduce_or_accept prod;
      (* Invoke the semantic action. *)
      let stack = T.semantic_action prod env in
      (* Perform a goto transition. *)
      let current = T.goto_prod stack.state prod in
      { env with stack; current }
    end

  (* TEMPORARY potential danger:
785 786 787 788
     - messing up the lookahead (i.e. moving to a state where the lookahead
       symbol cannot be [t], yet is [t]) (or moving to a state where we
       we should not ask for one more symbol, yet constructing [InputNeeded])
       -- NOT PREVENTED
789 790 791 792 793 794 795
     - violates the invariant that an input token is normally demanded only
       in a state [s] whose incoming symbol is a terminal symbol
       and which does not have a default reduction on [#]
       (so the lookahead can still be messed up)
       (not really problematic? but worth noting)
     - for type safety, should correlate 'a env with 'a checkpoint
   *)
796
  let input_needed (env : 'a env) : 'a checkpoint =
797 798
    InputNeeded env

799
end