referenceInterpreter.ml 8.1 KB
Newer Older
1 2 3
open Grammar
open Cst

4 5
(* ------------------------------------------------------------------------ *)

6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
(* Set up all of the information required by the LR engine. Everything is
   read directly from [Grammar] and [Lr1]. *)

module T = struct

  type state =
      Lr1.node
   
  type token =
      Terminal.t

  type terminal =
      Terminal.t

  type semantic_value =
      cst

  let token2terminal (token : token) : terminal =
    token

  let token2value (token : token) : semantic_value =
    CstTerminal token

  let error_terminal =
    Terminal.error

  let error_value =
    CstError

  type production =
      Production.index

  let default_reduction (s : state) defred nodefred env =
    match Invariant.has_default_reduction s with
    | Some (prod, _) ->
	defred env prod
    | None ->
	nodefred env

  let action (s : state) (tok : terminal) value shift reduce fail env =

    (* Check whether [s] has an outgoing shift transition along [tok]. *)

    try

      let s' : state = SymbolMap.find (Symbol.T tok) (Lr1.transitions s) in

      (* There is such a transition. Return either [ShiftDiscard] or
	 [ShiftNoDiscard], depending on the existence of a default
	 reduction on [#] at [s']. *)

      match Invariant.has_default_reduction s' with
      | Some (_, toks) when TerminalSet.mem Terminal.sharp toks ->
	  shift env false tok value s'
      | _ ->
	  shift env true tok value s'
	  
    (* There is no such transition. Look for a reduction. *)

    with Not_found ->
      try

	let prod = Misc.single (TerminalMap.find tok (Lr1.reductions s)) in
	reduce env prod

      (* There is no reduction either. Fail. *)

      with Not_found ->
	fail env

  let goto (s : state) (prod : production) : state =
    try
      SymbolMap.find (Symbol.N (Production.nt prod)) (Lr1.transitions s)
    with Not_found ->
      assert false

  open MenhirLib.EngineTypes

  exception Error

86 87 88
  (* By convention, a semantic action returns a new stack. It does not
     affect [env]. *)

89 90 91
  let is_start =
    Production.is_start

92
  type semantic_action =
93
      (state, semantic_value, token) env -> (state, semantic_value) stack
94 95 96

  let semantic_action (prod : production) : semantic_action =
    fun env ->
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
      assert (not (Production.is_start prod));

      (* Reduce. Pop a suffix of the stack, and use it to construct a
	 new concrete syntax tree node. *)

      let n = Production.length prod in

      let values : semantic_value array =
        Array.make n CstError (* dummy *)
      and startp =
        ref Lexing.dummy_pos
      and endp=
        ref Lexing.dummy_pos
      and current =
        ref env.current
      and stack =
        ref env.stack
      in

      (* We now enter a loop to pop [k] stack cells and (after that) push
         a new cell onto the stack. *)

      (* This loop does not update [env.current]. Instead, the state in
         the newly pushed stack cell will be used (by our caller) as a
         basis for a goto transition, and [env.current] will be updated
         (if necessary) then. *)

      for k = n downto 1 do

        (* Fetch a semantic value. *)

        values.(k - 1) <- !stack.semv;

        (* Pop one cell. The stack must be non-empty. As we pop a cell,
           change the automaton's current state to the one stored within
           the cell. (It is sufficient to do this only when [k] is 1,
           since the last write overwrites any and all previous writes.)
           If this is the first (last) cell that we pop, update [endp]
           ([startp]). *)

        let next = !stack.next in
        assert (!stack != next);
        if k = n then begin
          endp := !stack.endp
        end;
        if k = 1 then begin
          current := !stack.state;
          startp := !stack.startp
        end;
        stack := next

      done;

      (* Done popping. *)

      (* Construct and push a new stack cell. The associated semantic
         value is a new concrete syntax tree. *)

      {
        state = !current;
        semv = CstNonTerminal (prod, values);
        startp = !startp;
        endp = !endp;
        next = !stack
      }
162

163
  (* The logging functions that follow are called only if [log] is [true]. *)
164

165 166 167 168 169
  module Log = struct

    open Printf

    let state s =
170 171
      fprintf stderr "State %d:" (Lr1.number s);
      prerr_newline()
172 173

    let shift tok s' =
174 175
      fprintf stderr "Shifting (%s) to state %d" (Terminal.print tok) (Lr1.number s');
      prerr_newline()
176 177

    let reduce_or_accept prod =
178 179 180 181 182 183 184
      match Production.classify prod with
      | Some _ ->
         fprintf stderr "Accepting";
         prerr_newline()
      | None ->
         fprintf stderr "Reducing production %s" (Production.print prod);
         prerr_newline()
185

186
    let lookahead_token tok startp endp =
187 188 189 190 191
      fprintf stderr "Lookahead token is now %s (%d-%d)"
        (Terminal.print tok)
        startp.Lexing.pos_cnum
        endp.Lexing.pos_cnum;
      prerr_newline()
192 193

    let initiating_error_handling () =
194 195
      fprintf stderr "Initiating error handling";
      prerr_newline()
196 197

    let resuming_error_handling () =
198 199
      fprintf stderr "Resuming error handling";
      prerr_newline()
200 201

    let handling_error s =
202 203
      fprintf stderr "Handling error in state %d" (Lr1.number s);
      prerr_newline()
204 205 206 207 208

  end

end

209 210
(* ------------------------------------------------------------------------ *)

211 212 213 214
(* Define a palatable user entry point. *)

let interpret log nt lexer lexbuf =

215 216 217 218 219 220 221 222
  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
      let log = log
    end)
  in
223

224
  (* Run it. *)
225 226

  try
227
    Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf)
228 229
  with T.Error ->
    None
230

231 232
(* ------------------------------------------------------------------------ *)

233 234
(* Another entry point, used internally by [LRijkstra] to check that the
   sentences that [LRijkstra] produces do lead to an error in the expected
235 236
   state. *)

237 238 239 240 241 242 243 244 245 246 247 248 249
open MenhirLib.General (* streams *)

type check_error_path_outcome =
  (* Bad: the input was read past its end. *)
| OInputReadPastEnd
  (* Bad: a syntax error occurred before all of the input was read. *)
| OInputNotFullyConsumed
  (* Bad: the parser unexpectedly accepted (part of) this input. *)
| OUnexpectedAccept
  (* Good: a syntax error occurred after reading the last input token. *)
| OK of Lr1.node

let check_error_path nt input =
250 251 252 253 254 255 256 257 258 259

  (* Instantiate the LR engine. *)

  let module E =
    MenhirLib.Engine.Make (struct
      include T
      let log = false
    end)
  in

260
  (* Set up a function that delivers tokens one by one. *)
261

262 263 264 265 266 267 268 269 270
  let input = ref input in
  let next () =
    match !input with
    | [] ->
        None
    | t :: ts ->
        input := ts;
        Some t
  in
271

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
  (* Run it. We wish to stop at the first error (without handling the error
     in any way) and report in which state the error occurred. A clean way
     of doing this is to use the incremental API, as follows. The main loop
     resembles the [loop] function in [Engine]. *)

  let entry = Lr1.entry_of_nt nt in

  let rec loop (result : cst E.result) =
    match result with
    | E.InputNeeded _ ->
        begin match next() with
        | None ->
            OInputReadPastEnd
        | Some t ->
            let dummy = Lexing.dummy_pos in
            loop (E.offer result (t, dummy, dummy))
        end
    | E.Shifting _
    | E.AboutToReduce _ ->
        loop (E.resume result)
    | E.HandlingError env ->
        (* Check that all of the input has been read. Otherwise, the error
           has occurred sooner than expected. *)
        if !input = [] then
          (* Return the current state. This is done by peeking at the stack.
             If the stack is empty, then we must be in the initial state. *)
          OK (
            match Lazy.force (E.stack env) with
            | Nil ->
                entry
            | Cons (E.Element (s, _, _, _), _) ->
                s
          )
        else
          OInputNotFullyConsumed
    | E.Accepted _ ->
        (* The parser has succeeded. This is unexpected. *)
        OUnexpectedAccept
    | E.Rejected ->
        (* The parser rejects this input. This should not happen; we
           should observe [HandlingError _] first. *)
        assert false
  in
315

316
  loop (E.start entry)
317