interpret.ml 9.21 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
(* This module is in charge of handling the [--interpret] option,
   if it is present. *)

open Grammar
module I = Invariant (* artificial dependency; ensures that [Invariant] runs first *)

(* --------------------------------------------------------------------------- *)

(* A sentence is a pair of an optional non-terminal start symbol and a
   list of terminal symbols. *)

type sentence =
    Nonterminal.t option * Terminal.t list

(* --------------------------------------------------------------------------- *)

(* [stream] turns a finite list of terminals into a stream of terminals. *)

exception EndOfStream

let stream (toks : Terminal.t list) : unit -> Terminal.t * Lexing.position * Lexing.position =
  let toks = ref toks in
  fun () ->

    let tok =
      match !toks with
      | tok :: more ->

	  (* Take a token off the list, and return it. *)

	  toks := more;
	  tok

      | [] ->

	  (* The finite list has been exhausted. Here, two plausible behaviors
	     come to mind.

	     The first behavior consists in raising an exception. In that case,
	     we are creating a finite stream, and it is up to the parser to not
	     read past its end.

	     The second behavior consists in returning a designated token. In
	     that case, we are creating an infinite, eventually constant,
	     stream.

	     The choice between these two behaviors is somewhat arbitrary;
	     furthermore, in the second case, the choice of the designated
	     token is arbitrary as well. Here, we adopt the second behavior if
	     and only if the grammar has an EOF token, and we use EOF as the
	     designated token. Again, this is arbitrary, and could be changed
	     in the future. *)

	  match Terminal.eof with
	  | Some eof ->
	      eof
	  | None ->
	      raise EndOfStream

    in

    (* For now, return dummy positions. *)

    tok, Lexing.dummy_pos, Lexing.dummy_pos

(* --------------------------------------------------------------------------- *)

68 69 70 71 72 73 74
(* [start sentence] returns the start symbol that we should use to interpret
   the sentence [sentence]. *)

(* If a start symbol was explicitly provided as part of the sentence, we use
   it. Otherwise, we use the grammar's unique start symbol, if there is
   one. *)

75
let start poss ((nto, _) : sentence) : Nonterminal.t =
76 77 78 79 80 81
  match nto with
  | Some nt ->
      nt
  | None ->
      match ProductionMap.is_singleton Lr1.entry with
      | None ->
82
          Error.error poss
83 84 85 86 87 88 89 90 91
            "Because the grammar has multiple start symbols, each of the\n\
             sentences provided on the standard input channel must be of the\n\
             form: <start symbol>: <token>*"
      | Some (prod, _) ->
          match Production.classify prod with
          | Some nt ->
              nt
          | None ->
              assert false
92

93
(* --------------------------------------------------------------------------- *)
94

95
(* [interpret] interprets a sentence. *)
96

97
let interpret ((_, toks) as sentence) : unit =
98

99
  let nt = start [] sentence in
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142

  (* Run the reference interpreter. This can produce a concrete syntax tree
     ([Some cst]), fail with a parser error ([None]), or fail with a lexer error
     ([EndOfStream]). *)

  (* In either case, we produce just one line of output, so it should be clear
     to the user which outcomes correspond to which sentences (should multiple
     sentences be supplied). *)

  begin try
    match
      MenhirLib.Convert.Simplified.traditional2revised
	(ReferenceInterpreter.interpret Settings.trace nt)
	(stream toks)
    with

    | Some cst ->

	(* Success. *)

	Printf.printf "ACCEPT";
	if Settings.interpret_show_cst then begin
	  print_newline();
	  Cst.show stdout cst
	end

    | None ->

	(* Parser failure. *)

	Printf.printf "REJECT"

  with EndOfStream ->

    (* Lexer failure. *)
    
    Printf.printf "OVERSHOOT"

  end;
  print_newline()

(* --------------------------------------------------------------------------- *)

143 144 145 146
(* [interpret_error] interprets a sentence, expecting it to end in an error. *)

let succeed s =
  Printf.printf
POTTIER Francois's avatar
POTTIER Francois committed
147
    "OK %d\n# This sentence ends with a syntax error in state %d.\n%!"
148 149 150 151 152 153 154 155
    s s;
  exit 0

let fail msg =
  Printf.printf "BAD\n# %s.\n%!" msg;
  exit 1

let interpret_error ((_, toks) as sentence) =
156
  let nt = start [] sentence in
157 158 159 160 161 162 163 164 165 166
  let open ReferenceInterpreter in
  match check_error_path nt toks with
  | OInputReadPastEnd ->
      fail "No syntax error occurred"
  | OInputNotFullyConsumed ->
      fail "A syntax error occurred before the last token was reached"
  | OUnexpectedAccept ->
      fail "No syntax error occurred; in fact, the input was accepted"
  | OK state ->
      succeed (Lr1.number state)
167

168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
(* [convert_located_sentence] is analogous to [interpret_error]. It converts
   a (located) sentence to a state, which it returns. *)

let convert_located_sentence (poss, ((_, toks) as sentence)) =
  let nt = start poss sentence in
  let open ReferenceInterpreter in
  match check_error_path nt toks with
  | OInputReadPastEnd ->
      Error.signal poss "No syntax error occurred";
      -1 (* dummy *)
  | OInputNotFullyConsumed ->
      Error.signal poss "A syntax error occurred before the last token was reached";
      -1 (* dummy *)
  | OUnexpectedAccept ->
      Error.signal poss "No syntax error occurred; in fact, the input was accepted";
      -1 (* dummy *)
  | OK state ->
      Lr1.number state

let convert_entry (sentences, message) =
  List.map convert_located_sentence sentences, message

190
(* --------------------------------------------------------------------------- *)
191

192 193
(* [setup()] returns a function [read] which reads one sentence from the
   standard input channel. *)
194

195
let setup () : unit -> sentence option =
196

197 198 199
  let open Lexing in
  let lexbuf = from_channel stdin in
  lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = "(stdin)" };
200

201 202
  let read () =
    try
203
      SentenceParser.optional_sentence SentenceLexer.lex lexbuf
204 205 206
    with Parsing.Parse_error ->
      Error.error (Positions.lexbuf lexbuf) "Ill-formed input sentence."
  in
207

208 209 210 211 212 213
  read

(* --------------------------------------------------------------------------- *)

(* If [--interpret] is set, interpret the sentences found on the standard
   input channel, then stop, without generating a parser. *)
214

215 216 217 218 219 220 221
(* We read a series of sentences from the standard input channel. To allow
   interactive use, we interpret each sentence as soon as it is read. *)

let () =
  if Settings.interpret then
    let read = setup() in
    while true do
222 223
      match read() with
      | None ->
224
  	  exit 0
225
      | Some sentence ->
226 227
	  interpret sentence
    done
228

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
(* --------------------------------------------------------------------------- *)

(* If [--interpret-error] is set, interpret one sentence found on the standard
   input channel, then stop, without generating a parser. *)

(* We read just one sentence, confirm that this sentence ends in an error, and
   (if that is the case) display the number of the state that is reached. *)

let () =
  if Settings.interpret_error then
    let read = setup() in
    match read() with
    | None ->
      exit 1 (* abnormal: no input *)
    | Some sentence ->
        interpret_error sentence (* never returns *)
245

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
(* --------------------------------------------------------------------------- *)

(* If [--compile-errors <filename>] is set, compile the error message
   descriptions found in file [filename] down to OCaml code, then stop. *)

(* TEMPORARY old
let read filename =
  let lexbuf = Lexing.from_string (IO.read_whole_file filename) in
  lexbuf.Lexing.lex_curr_p <-
    { 
      Lexing.pos_fname = filename; 
      Lexing.pos_lnum  = 1;
      Lexing.pos_bol   = 0; 
      Lexing.pos_cnum  = 0
    };
  try
    SentenceParser.file SentenceLexer.lex lexbuf
  with Parsing.Parse_error ->
    Error.error (Positions.lexbuf lexbuf) "Ill-formed error message description file."
      (* TEMPORARY could improve this message... *)

let () =
  Settings.compile_errors |> Option.iter (fun filename ->
    (* Read the file. *)
    let entries = read filename in
    (* Convert every sentence to a state number. This can signal errors.
       We report all of them. *)
    let _entries = List.map convert_entry entries in
    if Error.errors() then exit 1;
    exit 0
  )
*)
let () =
  Settings.compile_errors |> Option.iter (fun filename ->
    (* Read the file. (Don't bother closing it.) *)
    let c = open_in filename in
    let lexbuf = Lexing.from_channel c in
    lexbuf.Lexing.lex_curr_p <-
    { 
      Lexing.pos_fname = filename; 
      Lexing.pos_lnum  = 1;
      Lexing.pos_bol   = 0; 
      Lexing.pos_cnum  = 0
    };
    let rec loop accu =
      (* Try to read a non-empty series of non-empty sentences. *)
      SentenceLexer.skip lexbuf;
      match SentenceParser.entry1 SentenceLexer.lex lexbuf with
      | [] ->
          (* We have read [EOF]. Stop. *)
          List.rev accu
      | sentences ->
          (* We have read at least one sentence. Now, read a block of text. *)
          SentenceLexer.skip lexbuf;
          let text = SentenceLexer.block (Buffer.create 512) lexbuf in
          (* Continue. *)
          loop ((sentences, text) :: accu)
    in
    let entries = loop [] in
    (* Convert every sentence to a state number. This can signal errors.
       We report all of them. *)
    let _entries = List.map convert_entry entries in
    if Error.errors() then exit 1;
    exit 0
  )