referenceInterpreter.ml 11 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16
open Grammar
open Cst

17 18
(* ------------------------------------------------------------------------ *)

19 20 21 22 23 24 25
(* Set up all of the information required by the LR engine. Everything is
   read directly from [Grammar] and [Lr1]. *)

module T = struct

  type state =
      Lr1.node
26 27 28

  let number =
    Lr1.number
29

30 31 32 33 34 35
  type token =
      Terminal.t

  type terminal =
      Terminal.t

36 37 38
  type nonterminal =
      Nonterminal.t

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
  type semantic_value =
      cst

  let token2terminal (token : token) : terminal =
    token

  let token2value (token : token) : semantic_value =
    CstTerminal token

  let error_terminal =
    Terminal.error

  let error_value =
    CstError

54 55 56
  let foreach_terminal =
    Terminal.foldx

57 58 59 60 61 62
  type production =
      Production.index

  let default_reduction (s : state) defred nodefred env =
    match Invariant.has_default_reduction s with
    | Some (prod, _) ->
63
        defred env prod
64
    | None ->
65
        nodefred env
66 67 68 69 70 71 72 73 74 75

  let action (s : state) (tok : terminal) value shift reduce fail env =

    (* Check whether [s] has an outgoing shift transition along [tok]. *)

    try

      let s' : state = SymbolMap.find (Symbol.T tok) (Lr1.transitions s) in

      (* There is such a transition. Return either [ShiftDiscard] or
76 77
         [ShiftNoDiscard], depending on the existence of a default
         reduction on [#] at [s']. *)
78 79 80

      match Invariant.has_default_reduction s' with
      | Some (_, toks) when TerminalSet.mem Terminal.sharp toks ->
81
          shift env false tok value s'
82
      | _ ->
83
          shift env true tok value s'
84

85 86 87 88 89
    (* There is no such transition. Look for a reduction. *)

    with Not_found ->
      try

90 91
        let prod = Misc.single (TerminalMap.find tok (Lr1.reductions s)) in
        reduce env prod
92 93 94 95

      (* There is no reduction either. Fail. *)

      with Not_found ->
96
        fail env
97

98
  let goto_nt (s : state) (nt : nonterminal) : state =
99
    try
100
      SymbolMap.find (Symbol.N nt) (Lr1.transitions s)
101 102 103
    with Not_found ->
      assert false

104 105 106
  let goto_prod (s : state) (prod : production) : state =
    goto_nt s (Production.nt prod)

107 108 109 110 111 112
  let maybe_goto_nt (s : state) (nt : nonterminal) : state option =
    try
      Some (SymbolMap.find (Symbol.N nt) (Lr1.transitions s))
    with Not_found ->
      None

113 114 115 116
  open MenhirLib.EngineTypes

  exception Error

117 118 119
  (* By convention, a semantic action returns a new stack. It does not
     affect [env]. *)

120 121 122
  let is_start =
    Production.is_start

123
  type semantic_action =
124
      (state, semantic_value, token) env -> (state, semantic_value) stack
125 126 127

  let semantic_action (prod : production) : semantic_action =
    fun env ->
128 129 130
      assert (not (Production.is_start prod));

      (* Reduce. Pop a suffix of the stack, and use it to construct a
131
         new concrete syntax tree node. *)
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192

      let n = Production.length prod in

      let values : semantic_value array =
        Array.make n CstError (* dummy *)
      and startp =
        ref Lexing.dummy_pos
      and endp=
        ref Lexing.dummy_pos
      and current =
        ref env.current
      and stack =
        ref env.stack
      in

      (* We now enter a loop to pop [k] stack cells and (after that) push
         a new cell onto the stack. *)

      (* This loop does not update [env.current]. Instead, the state in
         the newly pushed stack cell will be used (by our caller) as a
         basis for a goto transition, and [env.current] will be updated
         (if necessary) then. *)

      for k = n downto 1 do

        (* Fetch a semantic value. *)

        values.(k - 1) <- !stack.semv;

        (* Pop one cell. The stack must be non-empty. As we pop a cell,
           change the automaton's current state to the one stored within
           the cell. (It is sufficient to do this only when [k] is 1,
           since the last write overwrites any and all previous writes.)
           If this is the first (last) cell that we pop, update [endp]
           ([startp]). *)

        let next = !stack.next in
        assert (!stack != next);
        if k = n then begin
          endp := !stack.endp
        end;
        if k = 1 then begin
          current := !stack.state;
          startp := !stack.startp
        end;
        stack := next

      done;

      (* Done popping. *)

      (* Construct and push a new stack cell. The associated semantic
         value is a new concrete syntax tree. *)

      {
        state = !current;
        semv = CstNonTerminal (prod, values);
        startp = !startp;
        endp = !endp;
        next = !stack
      }
193

194
  (* The logging functions that follow are called only if [log] is [true]. *)
195

196 197 198 199 200
  module Log = struct

    open Printf

    let state s =
201 202
      fprintf stderr "State %d:" (Lr1.number s);
      prerr_newline()
203 204

    let shift tok s' =
205 206
      fprintf stderr "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s');
      prerr_newline()
207 208

    let reduce_or_accept prod =
209 210 211 212 213 214 215
      match Production.classify prod with
      | Some _ ->
         fprintf stderr "Accepting";
         prerr_newline()
      | None ->
         fprintf stderr "Reducing production %s" (Production.print prod);
         prerr_newline()
216

217
    let lookahead_token tok startp endp =
218 219 220 221 222
      fprintf stderr "Lookahead token is now %s (%d-%d)"
        (Terminal.print tok)
        startp.Lexing.pos_cnum
        endp.Lexing.pos_cnum;
      prerr_newline()
223 224

    let initiating_error_handling () =
225 226
      fprintf stderr "Initiating error handling";
      prerr_newline()
227 228

    let resuming_error_handling () =
229 230
      fprintf stderr "Resuming error handling";
      prerr_newline()
231 232

    let handling_error s =
233 234
      fprintf stderr "Handling error in state %d" (Lr1.number s);
      prerr_newline()
235 236 237 238 239

  end

end

240 241
(* ------------------------------------------------------------------------ *)

242 243 244 245
(* Define a palatable user entry point. *)

let interpret log nt lexer lexbuf =

246 247 248 249 250 251 252 253
  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
      let log = log
    end)
  in
254

255
  (* Run it. *)
256 257

  try
258
    Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf)
259 260
  with T.Error ->
    None
261

262 263
(* ------------------------------------------------------------------------ *)

264 265
(* Another entry point, used internally by [LRijkstra] to check that the
   sentences that [LRijkstra] produces do lead to an error in the expected
266 267
   state. *)

268 269
open MenhirLib.General (* streams *)

270 271
type spurious_reduction =
  Lr1.node * Production.index
272 273

type target =
274
  Lr1.node * spurious_reduction list
275

276 277 278 279 280 281 282
type check_error_path_outcome =
  (* Bad: the input was read past its end. *)
| OInputReadPastEnd
  (* Bad: a syntax error occurred before all of the input was read. *)
| OInputNotFullyConsumed
  (* Bad: the parser unexpectedly accepted (part of) this input. *)
| OUnexpectedAccept
283 284
  (* Good: a syntax error occurred after reading the last input token. We
     report in which state the error took place, as well as a list of spurious
285 286 287 288
     reductions. A non-default reduction that takes place after looking at the
     last input token (i.e., the erroneous token) is spurious. Furthermore, any
     reduction that takes place after a spurious reduction is itself spurious.
     We note that a spurious reduction can take place only in a non-canonical
289 290
     LR automaton. *)
| OK of target
291

292
let check_error_path log nt input =
293 294 295 296 297 298

  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
299
      let log = log
300 301 302
    end)
  in

303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
  (* Determine the initial state. *)

  let entry = Lr1.entry_of_nt nt in

  (* This function helps extract the current parser state out of [env].
     It may become unnecessary if the [Engine] API offers it. *)

  let current env =
    (* Peek at the stack. If empty, then we must be in the initial state. *)
    match Lazy.force (E.stack env) with
    | Nil ->
        entry
    | Cons (E.Element (s, _, _, _), _) ->
        s
  in

319
  (* Set up a function that delivers tokens one by one. *)
320

321 322 323 324 325 326 327 328 329
  let input = ref input in
  let next () =
    match !input with
    | [] ->
        None
    | t :: ts ->
        input := ts;
        Some t
  in
330

331 332 333 334
  let looking_at_last_token () : bool =
    !input = []
  in

335
  (* Run. We wish to stop at the first error (without handling the error
336 337 338 339
     in any way) and report in which state the error occurred. A clean way
     of doing this is to use the incremental API, as follows. The main loop
     resembles the [loop] function in [Engine]. *)

340 341 342 343
  (* Another reason why we write our own loop is that we wish to detect
     spurious reductions. We accumulate these reductions in [spurious], a
     (reversed) list of productions. *)

344 345
  let rec loop (checkpoint : cst E.checkpoint) (spurious : spurious_reduction list) =
    match checkpoint with
346
    | E.InputNeeded _ ->
347 348 349 350
      begin match next() with
      | None ->
        OInputReadPastEnd
      | Some t ->
351
        loop (E.offer checkpoint (t, Lexing.dummy_pos, Lexing.dummy_pos)) spurious
352 353
      end
    | E.Shifting _ ->
354
      loop (E.resume checkpoint) spurious
355 356
    | E.AboutToReduce (env, prod) ->
        (* If we have requested the last input token and if this is not
357 358 359
           a default reduction, then this is a spurious reduction.
           Furthermore, if a spurious reduction has taken place already,
           then this is also a spurious reduction. *)
360
        let spurious =
361 362 363
          if looking_at_last_token() && not (E.has_default_reduction env)
          || spurious <> []
          then
364
            (current env, prod) :: spurious
365 366 367
          else
            spurious
        in
368
        loop (E.resume checkpoint) spurious
369 370 371 372
    | E.HandlingError env ->
        (* Check that all of the input has been read. Otherwise, the error
           has occurred sooner than expected. *)
        if !input = [] then
373 374
          (* Return the current state and the list of spurious reductions. *)
          OK (current env, List.rev spurious)
375 376 377 378 379 380 381 382 383 384
        else
          OInputNotFullyConsumed
    | E.Accepted _ ->
        (* The parser has succeeded. This is unexpected. *)
        OUnexpectedAccept
    | E.Rejected ->
        (* The parser rejects this input. This should not happen; we
           should observe [HandlingError _] first. *)
        assert false
  in
385

386
  loop (E.start entry Lexing.dummy_pos) []