referenceInterpreter.ml 10.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16
open Grammar
open Cst

17 18
(* ------------------------------------------------------------------------ *)

19 20 21 22 23 24 25
(* Set up all of the information required by the LR engine. Everything is
   read directly from [Grammar] and [Lr1]. *)

module T = struct

  type state =
      Lr1.node
26 27 28

  let number =
    Lr1.number
29

30 31 32 33 34 35
  type token =
      Terminal.t

  type terminal =
      Terminal.t

36 37 38
  type nonterminal =
      Nonterminal.t

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
  type semantic_value =
      cst

  let token2terminal (token : token) : terminal =
    token

  let token2value (token : token) : semantic_value =
    CstTerminal token

  let error_terminal =
    Terminal.error

  let error_value =
    CstError

  type production =
      Production.index

  let default_reduction (s : state) defred nodefred env =
    match Invariant.has_default_reduction s with
    | Some (prod, _) ->
60
        defred env prod
61
    | None ->
62
        nodefred env
63 64 65 66 67 68 69 70 71 72

  let action (s : state) (tok : terminal) value shift reduce fail env =

    (* Check whether [s] has an outgoing shift transition along [tok]. *)

    try

      let s' : state = SymbolMap.find (Symbol.T tok) (Lr1.transitions s) in

      (* There is such a transition. Return either [ShiftDiscard] or
73 74
         [ShiftNoDiscard], depending on the existence of a default
         reduction on [#] at [s']. *)
75 76 77

      match Invariant.has_default_reduction s' with
      | Some (_, toks) when TerminalSet.mem Terminal.sharp toks ->
78
          shift env false tok value s'
79
      | _ ->
80
          shift env true tok value s'
81

82 83 84 85 86
    (* There is no such transition. Look for a reduction. *)

    with Not_found ->
      try

87 88
        let prod = Misc.single (TerminalMap.find tok (Lr1.reductions s)) in
        reduce env prod
89 90 91 92

      (* There is no reduction either. Fail. *)

      with Not_found ->
93
        fail env
94

95
  let goto_nt (s : state) (nt : nonterminal) : state =
96
    try
97
      SymbolMap.find (Symbol.N nt) (Lr1.transitions s)
98 99 100
    with Not_found ->
      assert false

101 102 103
  let goto_prod (s : state) (prod : production) : state =
    goto_nt s (Production.nt prod)

104 105 106 107
  open MenhirLib.EngineTypes

  exception Error

108 109 110
  (* By convention, a semantic action returns a new stack. It does not
     affect [env]. *)

111 112 113
  let is_start =
    Production.is_start

114
  type semantic_action =
115
      (state, semantic_value, token) env -> (state, semantic_value) stack
116 117 118

  let semantic_action (prod : production) : semantic_action =
    fun env ->
119 120 121
      assert (not (Production.is_start prod));

      (* Reduce. Pop a suffix of the stack, and use it to construct a
122
         new concrete syntax tree node. *)
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183

      let n = Production.length prod in

      let values : semantic_value array =
        Array.make n CstError (* dummy *)
      and startp =
        ref Lexing.dummy_pos
      and endp=
        ref Lexing.dummy_pos
      and current =
        ref env.current
      and stack =
        ref env.stack
      in

      (* We now enter a loop to pop [k] stack cells and (after that) push
         a new cell onto the stack. *)

      (* This loop does not update [env.current]. Instead, the state in
         the newly pushed stack cell will be used (by our caller) as a
         basis for a goto transition, and [env.current] will be updated
         (if necessary) then. *)

      for k = n downto 1 do

        (* Fetch a semantic value. *)

        values.(k - 1) <- !stack.semv;

        (* Pop one cell. The stack must be non-empty. As we pop a cell,
           change the automaton's current state to the one stored within
           the cell. (It is sufficient to do this only when [k] is 1,
           since the last write overwrites any and all previous writes.)
           If this is the first (last) cell that we pop, update [endp]
           ([startp]). *)

        let next = !stack.next in
        assert (!stack != next);
        if k = n then begin
          endp := !stack.endp
        end;
        if k = 1 then begin
          current := !stack.state;
          startp := !stack.startp
        end;
        stack := next

      done;

      (* Done popping. *)

      (* Construct and push a new stack cell. The associated semantic
         value is a new concrete syntax tree. *)

      {
        state = !current;
        semv = CstNonTerminal (prod, values);
        startp = !startp;
        endp = !endp;
        next = !stack
      }
184

185
  (* The logging functions that follow are called only if [log] is [true]. *)
186

187 188 189 190 191
  module Log = struct

    open Printf

    let state s =
192 193
      fprintf stderr "State %d:" (Lr1.number s);
      prerr_newline()
194 195

    let shift tok s' =
196 197
      fprintf stderr "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s');
      prerr_newline()
198 199

    let reduce_or_accept prod =
200 201 202 203 204 205 206
      match Production.classify prod with
      | Some _ ->
         fprintf stderr "Accepting";
         prerr_newline()
      | None ->
         fprintf stderr "Reducing production %s" (Production.print prod);
         prerr_newline()
207

208
    let lookahead_token tok startp endp =
209 210 211 212 213
      fprintf stderr "Lookahead token is now %s (%d-%d)"
        (Terminal.print tok)
        startp.Lexing.pos_cnum
        endp.Lexing.pos_cnum;
      prerr_newline()
214 215

    let initiating_error_handling () =
216 217
      fprintf stderr "Initiating error handling";
      prerr_newline()
218 219

    let resuming_error_handling () =
220 221
      fprintf stderr "Resuming error handling";
      prerr_newline()
222 223

    let handling_error s =
224 225
      fprintf stderr "Handling error in state %d" (Lr1.number s);
      prerr_newline()
226 227 228 229 230

  end

end

231 232
(* ------------------------------------------------------------------------ *)

233 234 235 236
(* Define a palatable user entry point. *)

let interpret log nt lexer lexbuf =

237 238 239 240 241 242 243 244
  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
      let log = log
    end)
  in
245

246
  (* Run it. *)
247 248

  try
249
    Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf)
250 251
  with T.Error ->
    None
252

253 254
(* ------------------------------------------------------------------------ *)

255 256
(* Another entry point, used internally by [LRijkstra] to check that the
   sentences that [LRijkstra] produces do lead to an error in the expected
257 258
   state. *)

259 260
open MenhirLib.General (* streams *)

261 262
type spurious_reduction =
  Lr1.node * Production.index
263 264

type target =
265
  Lr1.node * spurious_reduction list
266

267 268 269 270 271 272 273
type check_error_path_outcome =
  (* Bad: the input was read past its end. *)
| OInputReadPastEnd
  (* Bad: a syntax error occurred before all of the input was read. *)
| OInputNotFullyConsumed
  (* Bad: the parser unexpectedly accepted (part of) this input. *)
| OUnexpectedAccept
274 275
  (* Good: a syntax error occurred after reading the last input token. We
     report in which state the error took place, as well as a list of spurious
276 277 278 279
     reductions. A non-default reduction that takes place after looking at the
     last input token (i.e., the erroneous token) is spurious. Furthermore, any
     reduction that takes place after a spurious reduction is itself spurious.
     We note that a spurious reduction can take place only in a non-canonical
280 281
     LR automaton. *)
| OK of target
282

283
let check_error_path log nt input =
284 285 286 287 288 289

  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
290
      let log = log
291 292 293
    end)
  in

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
  (* Determine the initial state. *)

  let entry = Lr1.entry_of_nt nt in

  (* This function helps extract the current parser state out of [env].
     It may become unnecessary if the [Engine] API offers it. *)

  let current env =
    (* Peek at the stack. If empty, then we must be in the initial state. *)
    match Lazy.force (E.stack env) with
    | Nil ->
        entry
    | Cons (E.Element (s, _, _, _), _) ->
        s
  in

310
  (* Set up a function that delivers tokens one by one. *)
311

312 313 314 315 316 317 318 319 320
  let input = ref input in
  let next () =
    match !input with
    | [] ->
        None
    | t :: ts ->
        input := ts;
        Some t
  in
321

322 323 324 325
  let looking_at_last_token () : bool =
    !input = []
  in

326
  (* Run. We wish to stop at the first error (without handling the error
327 328 329 330
     in any way) and report in which state the error occurred. A clean way
     of doing this is to use the incremental API, as follows. The main loop
     resembles the [loop] function in [Engine]. *)

331 332 333 334
  (* Another reason why we write our own loop is that we wish to detect
     spurious reductions. We accumulate these reductions in [spurious], a
     (reversed) list of productions. *)

335 336
  let rec loop (checkpoint : cst E.checkpoint) (spurious : spurious_reduction list) =
    match checkpoint with
337
    | E.InputNeeded _ ->
338 339 340 341
      begin match next() with
      | None ->
        OInputReadPastEnd
      | Some t ->
342
        loop (E.offer checkpoint (t, Lexing.dummy_pos, Lexing.dummy_pos)) spurious
343 344
      end
    | E.Shifting _ ->
345
      loop (E.resume checkpoint) spurious
346 347
    | E.AboutToReduce (env, prod) ->
        (* If we have requested the last input token and if this is not
348 349 350
           a default reduction, then this is a spurious reduction.
           Furthermore, if a spurious reduction has taken place already,
           then this is also a spurious reduction. *)
351
        let spurious =
352 353 354
          if looking_at_last_token() && not (E.has_default_reduction env)
          || spurious <> []
          then
355
            (current env, prod) :: spurious
356 357 358
          else
            spurious
        in
359
        loop (E.resume checkpoint) spurious
360 361 362 363
    | E.HandlingError env ->
        (* Check that all of the input has been read. Otherwise, the error
           has occurred sooner than expected. *)
        if !input = [] then
364 365
          (* Return the current state and the list of spurious reductions. *)
          OK (current env, List.rev spurious)
366 367 368 369 370 371 372 373 374 375
        else
          OInputNotFullyConsumed
    | E.Accepted _ ->
        (* The parser has succeeded. This is unexpected. *)
        OUnexpectedAccept
    | E.Rejected ->
        (* The parser rejects this input. This should not happen; we
           should observe [HandlingError _] first. *)
        assert false
  in
376

377
  loop (E.start entry Lexing.dummy_pos) []