referenceInterpreter.ml 9.36 KB
Newer Older
1 2 3
open Grammar
open Cst

4 5
(* ------------------------------------------------------------------------ *)

6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
(* Set up all of the information required by the LR engine. Everything is
   read directly from [Grammar] and [Lr1]. *)

module T = struct

  type state =
      Lr1.node
   
  type token =
      Terminal.t

  type terminal =
      Terminal.t

  type semantic_value =
      cst

  let token2terminal (token : token) : terminal =
    token

  let token2value (token : token) : semantic_value =
    CstTerminal token

  let error_terminal =
    Terminal.error

  let error_value =
    CstError

  type production =
      Production.index

  let default_reduction (s : state) defred nodefred env =
    match Invariant.has_default_reduction s with
    | Some (prod, _) ->
	defred env prod
    | None ->
	nodefred env

  let action (s : state) (tok : terminal) value shift reduce fail env =

    (* Check whether [s] has an outgoing shift transition along [tok]. *)

    try

      let s' : state = SymbolMap.find (Symbol.T tok) (Lr1.transitions s) in

      (* There is such a transition. Return either [ShiftDiscard] or
	 [ShiftNoDiscard], depending on the existence of a default
	 reduction on [#] at [s']. *)

      match Invariant.has_default_reduction s' with
      | Some (_, toks) when TerminalSet.mem Terminal.sharp toks ->
	  shift env false tok value s'
      | _ ->
	  shift env true tok value s'
	  
    (* There is no such transition. Look for a reduction. *)

    with Not_found ->
      try

	let prod = Misc.single (TerminalMap.find tok (Lr1.reductions s)) in
	reduce env prod

      (* There is no reduction either. Fail. *)

      with Not_found ->
	fail env

  let goto (s : state) (prod : production) : state =
    try
      SymbolMap.find (Symbol.N (Production.nt prod)) (Lr1.transitions s)
    with Not_found ->
      assert false

  open MenhirLib.EngineTypes

  exception Error

86 87 88
  (* By convention, a semantic action returns a new stack. It does not
     affect [env]. *)

89 90 91
  let is_start =
    Production.is_start

92
  type semantic_action =
93
      (state, semantic_value, token) env -> (state, semantic_value) stack
94 95 96

  let semantic_action (prod : production) : semantic_action =
    fun env ->
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
      assert (not (Production.is_start prod));

      (* Reduce. Pop a suffix of the stack, and use it to construct a
	 new concrete syntax tree node. *)

      let n = Production.length prod in

      let values : semantic_value array =
        Array.make n CstError (* dummy *)
      and startp =
        ref Lexing.dummy_pos
      and endp=
        ref Lexing.dummy_pos
      and current =
        ref env.current
      and stack =
        ref env.stack
      in

      (* We now enter a loop to pop [k] stack cells and (after that) push
         a new cell onto the stack. *)

      (* This loop does not update [env.current]. Instead, the state in
         the newly pushed stack cell will be used (by our caller) as a
         basis for a goto transition, and [env.current] will be updated
         (if necessary) then. *)

      for k = n downto 1 do

        (* Fetch a semantic value. *)

        values.(k - 1) <- !stack.semv;

        (* Pop one cell. The stack must be non-empty. As we pop a cell,
           change the automaton's current state to the one stored within
           the cell. (It is sufficient to do this only when [k] is 1,
           since the last write overwrites any and all previous writes.)
           If this is the first (last) cell that we pop, update [endp]
           ([startp]). *)

        let next = !stack.next in
        assert (!stack != next);
        if k = n then begin
          endp := !stack.endp
        end;
        if k = 1 then begin
          current := !stack.state;
          startp := !stack.startp
        end;
        stack := next

      done;

      (* Done popping. *)

      (* Construct and push a new stack cell. The associated semantic
         value is a new concrete syntax tree. *)

      {
        state = !current;
        semv = CstNonTerminal (prod, values);
        startp = !startp;
        endp = !endp;
        next = !stack
      }
162

163
  (* The logging functions that follow are called only if [log] is [true]. *)
164

165 166 167 168 169
  module Log = struct

    open Printf

    let state s =
170 171
      fprintf stderr "State %d:" (Lr1.number s);
      prerr_newline()
172 173

    let shift tok s' =
174 175
      fprintf stderr "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s');
      prerr_newline()
176 177

    let reduce_or_accept prod =
178 179 180 181 182 183 184
      match Production.classify prod with
      | Some _ ->
         fprintf stderr "Accepting";
         prerr_newline()
      | None ->
         fprintf stderr "Reducing production %s" (Production.print prod);
         prerr_newline()
185

186
    let lookahead_token tok startp endp =
187 188 189 190 191
      fprintf stderr "Lookahead token is now %s (%d-%d)"
        (Terminal.print tok)
        startp.Lexing.pos_cnum
        endp.Lexing.pos_cnum;
      prerr_newline()
192 193

    let initiating_error_handling () =
194 195
      fprintf stderr "Initiating error handling";
      prerr_newline()
196 197

    let resuming_error_handling () =
198 199
      fprintf stderr "Resuming error handling";
      prerr_newline()
200 201

    let handling_error s =
202 203
      fprintf stderr "Handling error in state %d" (Lr1.number s);
      prerr_newline()
204 205 206 207 208

  end

end

209 210
(* ------------------------------------------------------------------------ *)

211 212 213 214
(* Define a palatable user entry point. *)

let interpret log nt lexer lexbuf =

215 216 217 218 219 220 221 222
  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
      let log = log
    end)
  in
223

224
  (* Run it. *)
225 226

  try
227
    Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf)
228 229
  with T.Error ->
    None
230

231 232
(* ------------------------------------------------------------------------ *)

233 234
(* Another entry point, used internally by [LRijkstra] to check that the
   sentences that [LRijkstra] produces do lead to an error in the expected
235 236
   state. *)

237 238
open MenhirLib.General (* streams *)

239 240
type spurious_reduction =
  Lr1.node * Production.index
241 242

type target =
243
  Lr1.node * spurious_reduction list
244

245 246 247 248 249 250 251
type check_error_path_outcome =
  (* Bad: the input was read past its end. *)
| OInputReadPastEnd
  (* Bad: a syntax error occurred before all of the input was read. *)
| OInputNotFullyConsumed
  (* Bad: the parser unexpectedly accepted (part of) this input. *)
| OUnexpectedAccept
252 253 254 255 256 257 258
  (* Good: a syntax error occurred after reading the last input token. We
     report in which state the error took place, as well as a list of spurious
     reductions. A spurious reduction is a non-default reduction that takes
     place after looking at the last input token -- the erroneous token. 
     We note that a spurious reduction can happen only in a non-canonical
     LR automaton. *)
| OK of target
259 260

let check_error_path nt input =
261 262 263 264 265 266 267 268 269 270

  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
      let log = false
    end)
  in

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
  (* Determine the initial state. *)

  let entry = Lr1.entry_of_nt nt in

  (* This function helps extract the current parser state out of [env].
     It may become unnecessary if the [Engine] API offers it. *)

  let current env =
    (* Peek at the stack. If empty, then we must be in the initial state. *)
    match Lazy.force (E.stack env) with
    | Nil ->
        entry
    | Cons (E.Element (s, _, _, _), _) ->
        s
  in

287
  (* Set up a function that delivers tokens one by one. *)
288

289 290 291 292 293 294 295 296 297
  let input = ref input in
  let next () =
    match !input with
    | [] ->
        None
    | t :: ts ->
        input := ts;
        Some t
  in
298

299 300 301 302
  let looking_at_last_token () : bool =
    !input = []
  in

303
  (* Run. We wish to stop at the first error (without handling the error
304 305 306 307
     in any way) and report in which state the error occurred. A clean way
     of doing this is to use the incremental API, as follows. The main loop
     resembles the [loop] function in [Engine]. *)

308 309 310 311
  (* Another reason why we write our own loop is that we wish to detect
     spurious reductions. We accumulate these reductions in [spurious], a
     (reversed) list of productions. *)

312
  let rec loop (result : cst E.result) (spurious : spurious_reduction list) =
313 314
    match result with
    | E.InputNeeded _ ->
315 316 317 318 319 320 321 322 323 324 325 326 327 328
      begin match next() with
      | None ->
        OInputReadPastEnd
      | Some t ->
        let dummy = Lexing.dummy_pos in
        loop (E.offer result (t, dummy, dummy)) spurious
      end
    | E.Shifting _ ->
      loop (E.resume result) spurious
    | E.AboutToReduce (env, prod) ->
        (* If we have requested the last input token and if this is not
           a default reduction, then this is a spurious reduction. *)
        let spurious =
          if looking_at_last_token() && not (E.has_default_reduction env) then
329
            (current env, prod) :: spurious
330 331 332 333
          else
            spurious
        in
        loop (E.resume result) spurious
334 335 336 337
    | E.HandlingError env ->
        (* Check that all of the input has been read. Otherwise, the error
           has occurred sooner than expected. *)
        if !input = [] then
338 339
          (* Return the current state and the list of spurious reductions. *)
          OK (current env, List.rev spurious)
340 341 342 343 344 345 346 347 348 349
        else
          OInputNotFullyConsumed
    | E.Accepted _ ->
        (* The parser has succeeded. This is unexpected. *)
        OUnexpectedAccept
    | E.Rejected ->
        (* The parser rejects this input. This should not happen; we
           should observe [HandlingError _] first. *)
        assert false
  in
350

351
  loop (E.start entry) []
352