referenceInterpreter.ml 11.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16
open Grammar
open Cst

17 18
(* ------------------------------------------------------------------------ *)

19 20 21 22 23 24 25
(* Set up all of the information required by the LR engine. Everything is
   read directly from [Grammar] and [Lr1]. *)

module T = struct

  type state =
      Lr1.node
26 27 28

  let number =
    Lr1.number
29

30 31 32 33 34 35
  type token =
      Terminal.t

  type terminal =
      Terminal.t

36 37 38
  type nonterminal =
      Nonterminal.t

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
  type semantic_value =
      cst

  let token2terminal (token : token) : terminal =
    token

  let token2value (token : token) : semantic_value =
    CstTerminal token

  let error_terminal =
    Terminal.error

  let error_value =
    CstError

54 55 56
  let foreach_terminal =
    Terminal.foldx

57 58 59
  type production =
      Production.index

60 61 62
  let production_index = Production.p2i
  let find_production  = Production.i2p

63
  let default_reduction (s : state) defred nodefred env =
64
    match Default.has_default_reduction s with
65
    | Some (prod, _) ->
66
        defred env prod
67
    | None ->
68
        nodefred env
69 70 71 72 73 74 75 76 77 78

  let action (s : state) (tok : terminal) value shift reduce fail env =

    (* Check whether [s] has an outgoing shift transition along [tok]. *)

    try

      let s' : state = SymbolMap.find (Symbol.T tok) (Lr1.transitions s) in

      (* There is such a transition. Return either [ShiftDiscard] or
79 80
         [ShiftNoDiscard], depending on the existence of a default
         reduction on [#] at [s']. *)
81

82
      match Default.has_default_reduction s' with
83
      | Some (_, toks) when TerminalSet.mem Terminal.sharp toks ->
84
          shift env false tok value s'
85
      | _ ->
86
          shift env true tok value s'
87

88 89 90 91 92
    (* There is no such transition. Look for a reduction. *)

    with Not_found ->
      try

93 94
        let prod = Misc.single (TerminalMap.find tok (Lr1.reductions s)) in
        reduce env prod
95 96 97 98

      (* There is no reduction either. Fail. *)

      with Not_found ->
99
        fail env
100

101
  let goto_nt (s : state) (nt : nonterminal) : state =
102
    try
103
      SymbolMap.find (Symbol.N nt) (Lr1.transitions s)
104 105 106
    with Not_found ->
      assert false

107 108 109
  let goto_prod (s : state) (prod : production) : state =
    goto_nt s (Production.nt prod)

110 111 112 113 114 115
  let maybe_goto_nt (s : state) (nt : nonterminal) : state option =
    try
      Some (SymbolMap.find (Symbol.N nt) (Lr1.transitions s))
    with Not_found ->
      None

116 117 118 119
  open MenhirLib.EngineTypes

  exception Error

120 121 122
  (* By convention, a semantic action returns a new stack. It does not
     affect [env]. *)

123 124 125
  let is_start =
    Production.is_start

126
  type semantic_action =
127
      (state, semantic_value, token) env -> (state, semantic_value) stack
128 129 130

  let semantic_action (prod : production) : semantic_action =
    fun env ->
131 132 133
      assert (not (Production.is_start prod));

      (* Reduce. Pop a suffix of the stack, and use it to construct a
134
         new concrete syntax tree node. *)
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195

      let n = Production.length prod in

      let values : semantic_value array =
        Array.make n CstError (* dummy *)
      and startp =
        ref Lexing.dummy_pos
      and endp=
        ref Lexing.dummy_pos
      and current =
        ref env.current
      and stack =
        ref env.stack
      in

      (* We now enter a loop to pop [k] stack cells and (after that) push
         a new cell onto the stack. *)

      (* This loop does not update [env.current]. Instead, the state in
         the newly pushed stack cell will be used (by our caller) as a
         basis for a goto transition, and [env.current] will be updated
         (if necessary) then. *)

      for k = n downto 1 do

        (* Fetch a semantic value. *)

        values.(k - 1) <- !stack.semv;

        (* Pop one cell. The stack must be non-empty. As we pop a cell,
           change the automaton's current state to the one stored within
           the cell. (It is sufficient to do this only when [k] is 1,
           since the last write overwrites any and all previous writes.)
           If this is the first (last) cell that we pop, update [endp]
           ([startp]). *)

        let next = !stack.next in
        assert (!stack != next);
        if k = n then begin
          endp := !stack.endp
        end;
        if k = 1 then begin
          current := !stack.state;
          startp := !stack.startp
        end;
        stack := next

      done;

      (* Done popping. *)

      (* Construct and push a new stack cell. The associated semantic
         value is a new concrete syntax tree. *)

      {
        state = !current;
        semv = CstNonTerminal (prod, values);
        startp = !startp;
        endp = !endp;
        next = !stack
      }
196

197 198
  let may_reduce node prod =
    Lr1.NodeSet.mem node (Lr1.production_where prod)
199

200
  (* The logging functions that follow are called only if [log] is [true]. *)
201

202 203 204 205 206
  module Log = struct

    open Printf

    let state s =
207 208
      fprintf stderr "State %d:" (Lr1.number s);
      prerr_newline()
209 210

    let shift tok s' =
211 212
      fprintf stderr "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s');
      prerr_newline()
213 214

    let reduce_or_accept prod =
215 216 217 218 219 220 221
      match Production.classify prod with
      | Some _ ->
         fprintf stderr "Accepting";
         prerr_newline()
      | None ->
         fprintf stderr "Reducing production %s" (Production.print prod);
         prerr_newline()
222

223
    let lookahead_token tok startp endp =
224 225 226 227 228
      fprintf stderr "Lookahead token is now %s (%d-%d)"
        (Terminal.print tok)
        startp.Lexing.pos_cnum
        endp.Lexing.pos_cnum;
      prerr_newline()
229 230

    let initiating_error_handling () =
231 232
      fprintf stderr "Initiating error handling";
      prerr_newline()
233 234

    let resuming_error_handling () =
235 236
      fprintf stderr "Resuming error handling";
      prerr_newline()
237 238

    let handling_error s =
239 240
      fprintf stderr "Handling error in state %d" (Lr1.number s);
      prerr_newline()
241 242 243 244 245

  end

end

246 247
(* ------------------------------------------------------------------------ *)

248 249 250 251
(* Define a palatable user entry point. *)

let interpret log nt lexer lexbuf =

252 253 254 255 256 257 258 259
  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
      let log = log
    end)
  in
260

261
  (* Run it. *)
262 263

  try
264
    Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf)
265 266
  with T.Error ->
    None
267

268 269
(* ------------------------------------------------------------------------ *)

270 271
(* Another entry point, used internally by [LRijkstra] to check that the
   sentences that [LRijkstra] produces do lead to an error in the expected
272 273
   state. *)

274 275
type spurious_reduction =
  Lr1.node * Production.index
276 277

type target =
278
  Lr1.node * spurious_reduction list
279

280 281 282 283 284 285 286
type check_error_path_outcome =
  (* Bad: the input was read past its end. *)
| OInputReadPastEnd
  (* Bad: a syntax error occurred before all of the input was read. *)
| OInputNotFullyConsumed
  (* Bad: the parser unexpectedly accepted (part of) this input. *)
| OUnexpectedAccept
287 288
  (* Good: a syntax error occurred after reading the last input token. We
     report in which state the error took place, as well as a list of spurious
289 290 291 292
     reductions. A non-default reduction that takes place after looking at the
     last input token (i.e., the erroneous token) is spurious. Furthermore, any
     reduction that takes place after a spurious reduction is itself spurious.
     We note that a spurious reduction can take place only in a non-canonical
293 294
     LR automaton. *)
| OK of target
295

296
let check_error_path log nt input =
297 298 299 300 301 302

  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
303
      let log = log
304 305 306
    end)
  in

307 308 309 310 311 312 313 314 315
  (* Determine the initial state. *)

  let entry = Lr1.entry_of_nt nt in

  (* This function helps extract the current parser state out of [env].
     It may become unnecessary if the [Engine] API offers it. *)

  let current env =
    (* Peek at the stack. If empty, then we must be in the initial state. *)
316 317
    match E.top env with
    | None ->
318
        entry
319
    | Some (E.Element (s, _, _, _)) ->
320 321 322
        s
  in

323
  (* Set up a function that delivers tokens one by one. *)
324

325 326 327 328 329 330 331 332 333
  let input = ref input in
  let next () =
    match !input with
    | [] ->
        None
    | t :: ts ->
        input := ts;
        Some t
  in
334

335 336 337 338
  let looking_at_last_token () : bool =
    !input = []
  in

339
  (* Run. We wish to stop at the first error (without handling the error
340 341 342 343
     in any way) and report in which state the error occurred. A clean way
     of doing this is to use the incremental API, as follows. The main loop
     resembles the [loop] function in [Engine]. *)

344 345 346 347
  (* Another reason why we write our own loop is that we wish to detect
     spurious reductions. We accumulate these reductions in [spurious], a
     (reversed) list of productions. *)

348 349
  let rec loop (checkpoint : cst E.checkpoint) (spurious : spurious_reduction list) =
    match checkpoint with
350
    | E.InputNeeded _ ->
351 352 353 354
      begin match next() with
      | None ->
        OInputReadPastEnd
      | Some t ->
355
        loop (E.offer checkpoint (t, Lexing.dummy_pos, Lexing.dummy_pos)) spurious
356 357
      end
    | E.Shifting _ ->
358
      loop (E.resume checkpoint) spurious
359 360
    | E.AboutToReduce (env, prod) ->
        (* If we have requested the last input token and if this is not
361 362 363
           a default reduction, then this is a spurious reduction.
           Furthermore, if a spurious reduction has taken place already,
           then this is also a spurious reduction. *)
364
        let spurious =
365
          if looking_at_last_token() && not (E.env_has_default_reduction env)
366 367
          || spurious <> []
          then
368
            (current env, prod) :: spurious
369 370 371
          else
            spurious
        in
372
        loop (E.resume checkpoint) spurious
373 374 375 376
    | E.HandlingError env ->
        (* Check that all of the input has been read. Otherwise, the error
           has occurred sooner than expected. *)
        if !input = [] then
377 378
          (* Return the current state and the list of spurious reductions. *)
          OK (current env, List.rev spurious)
379 380 381 382 383 384 385 386 387 388
        else
          OInputNotFullyConsumed
    | E.Accepted _ ->
        (* The parser has succeeded. This is unexpected. *)
        OUnexpectedAccept
    | E.Rejected ->
        (* The parser rejects this input. This should not happen; we
           should observe [HandlingError _] first. *)
        assert false
  in
389

390
  loop (E.start entry Lexing.dummy_pos) []