interpret.ml 10.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
(* This module is in charge of handling the [--interpret] option,
   if it is present. *)

open Grammar
module I = Invariant (* artificial dependency; ensures that [Invariant] runs first *)

(* --------------------------------------------------------------------------- *)

(* A sentence is a pair of an optional non-terminal start symbol and a
   list of terminal symbols. *)

type sentence =
    Nonterminal.t option * Terminal.t list

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
(* Debugging.

let print_sentence (nto, terminals) : string =
  let b = Buffer.create 128 in
  Option.iter (fun nt ->
    Printf.bprintf b "%s: " (Nonterminal.print true nt)
  ) nto;
  List.iter (fun t ->
    Printf.bprintf b "%s " (Terminal.print t)
  ) terminals;
  Printf.bprintf b "\n";
  Buffer.contents b

let print_sentence sentence : unit =
  print_string (print_sentence sentence)

let print_located_sentence (_, sentence) : unit =
  print_sentence sentence

*)

36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
(* --------------------------------------------------------------------------- *)

(* [stream] turns a finite list of terminals into a stream of terminals. *)

exception EndOfStream

let stream (toks : Terminal.t list) : unit -> Terminal.t * Lexing.position * Lexing.position =
  let toks = ref toks in
  fun () ->

    let tok =
      match !toks with
      | tok :: more ->

	  (* Take a token off the list, and return it. *)

	  toks := more;
	  tok

      | [] ->

	  (* The finite list has been exhausted. Here, two plausible behaviors
	     come to mind.

	     The first behavior consists in raising an exception. In that case,
	     we are creating a finite stream, and it is up to the parser to not
	     read past its end.

	     The second behavior consists in returning a designated token. In
	     that case, we are creating an infinite, eventually constant,
	     stream.

	     The choice between these two behaviors is somewhat arbitrary;
	     furthermore, in the second case, the choice of the designated
	     token is arbitrary as well. Here, we adopt the second behavior if
	     and only if the grammar has an EOF token, and we use EOF as the
	     designated token. Again, this is arbitrary, and could be changed
	     in the future. *)

	  match Terminal.eof with
	  | Some eof ->
	      eof
	  | None ->
	      raise EndOfStream

    in

    (* For now, return dummy positions. *)

    tok, Lexing.dummy_pos, Lexing.dummy_pos

(* --------------------------------------------------------------------------- *)

89 90 91 92 93 94 95
(* [start sentence] returns the start symbol that we should use to interpret
   the sentence [sentence]. *)

(* If a start symbol was explicitly provided as part of the sentence, we use
   it. Otherwise, we use the grammar's unique start symbol, if there is
   one. *)

96
let start poss ((nto, _) : sentence) : Nonterminal.t =
97 98 99 100 101 102
  match nto with
  | Some nt ->
      nt
  | None ->
      match ProductionMap.is_singleton Lr1.entry with
      | None ->
103
          Error.error poss
104 105 106 107 108 109 110 111 112
            "Because the grammar has multiple start symbols, each of the\n\
             sentences provided on the standard input channel must be of the\n\
             form: <start symbol>: <token>*"
      | Some (prod, _) ->
          match Production.classify prod with
          | Some nt ->
              nt
          | None ->
              assert false
113

114
(* --------------------------------------------------------------------------- *)
115

116
(* [interpret] interprets a sentence. *)
117

118
let interpret ((_, toks) as sentence) : unit =
119

120
  let nt = start [] sentence in
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163

  (* Run the reference interpreter. This can produce a concrete syntax tree
     ([Some cst]), fail with a parser error ([None]), or fail with a lexer error
     ([EndOfStream]). *)

  (* In either case, we produce just one line of output, so it should be clear
     to the user which outcomes correspond to which sentences (should multiple
     sentences be supplied). *)

  begin try
    match
      MenhirLib.Convert.Simplified.traditional2revised
	(ReferenceInterpreter.interpret Settings.trace nt)
	(stream toks)
    with

    | Some cst ->

	(* Success. *)

	Printf.printf "ACCEPT";
	if Settings.interpret_show_cst then begin
	  print_newline();
	  Cst.show stdout cst
	end

    | None ->

	(* Parser failure. *)

	Printf.printf "REJECT"

  with EndOfStream ->

    (* Lexer failure. *)
    
    Printf.printf "OVERSHOOT"

  end;
  print_newline()

(* --------------------------------------------------------------------------- *)

164 165
(* [interpret_error_aux] interprets a sentence, expecting it to end in an
   error. Failure or success is reported via two continuations. *)
166

167 168
let interpret_error_aux poss ((_, terminals) as sentence) fail succeed =
  let nt = start poss sentence in
169
  let open ReferenceInterpreter in
170
  match check_error_path nt terminals with
171
  | OInputReadPastEnd ->
172
      fail "No syntax error occurs."
173
  | OInputNotFullyConsumed ->
174
      fail "A syntax error occurs before the last token is reached."
175
  | OUnexpectedAccept ->
176
      fail "No syntax error occurs; in fact, this input is accepted."
177
  | OK state ->
178
      succeed state
179

180
(* --------------------------------------------------------------------------- *)
181

182 183 184 185 186 187 188 189 190
(* [interpret_error] interprets a sentence, expecting it to end in an error.
   Failure or success is reported on the standard output channel. This is
   used by [--interpret-error]. *)

let fail msg =
  Printf.printf "BAD\n# %s\n%!" msg;
  exit 1

let succeed s =
191
  let s = Lr1.number s in
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
  Printf.printf
    "OK %d\n# This sentence ends with a syntax error in state %d.\n%!"
    s s;
  exit 0

let interpret_error sentence =
  interpret_error_aux [] sentence fail succeed

(* --------------------------------------------------------------------------- *)

(* [convert_located_sentence] interprets a (located) sentence, expecting it to
   end in an error, and returns the state in which the error is obtained. This
   is used by [--compile-errors]. *)

let convert_located_sentence (poss, sentence) =
  let fail msg =
    Error.signal poss (Printf.sprintf
      "This sentence does not end with a syntax error, as desired.\n%s"
      msg
    );
212
    [] (* dummy result *)
213
  in
214
  interpret_error_aux poss sentence fail (fun s -> [ (poss, sentence), s ])
215 216

let convert_entry (sentences, message) =
217
  List.flatten (List.map convert_located_sentence sentences), message
218

219
(* --------------------------------------------------------------------------- *)
220

221 222
(* [setup()] returns a function [read] which reads one sentence from the
   standard input channel. *)
223

224
let setup () : unit -> sentence option =
225

226 227 228
  let open Lexing in
  let lexbuf = from_channel stdin in
  lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = "(stdin)" };
229

230 231
  let read () =
    try
232
      SentenceParser.optional_sentence SentenceLexer.lex lexbuf
233 234 235
    with Parsing.Parse_error ->
      Error.error (Positions.lexbuf lexbuf) "Ill-formed input sentence."
  in
236

237 238 239 240 241 242
  read

(* --------------------------------------------------------------------------- *)

(* If [--interpret] is set, interpret the sentences found on the standard
   input channel, then stop, without generating a parser. *)
243

244 245 246 247 248 249 250
(* We read a series of sentences from the standard input channel. To allow
   interactive use, we interpret each sentence as soon as it is read. *)

let () =
  if Settings.interpret then
    let read = setup() in
    while true do
251 252
      match read() with
      | None ->
253
  	  exit 0
254
      | Some sentence ->
255 256
	  interpret sentence
    done
257

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
(* --------------------------------------------------------------------------- *)

(* If [--interpret-error] is set, interpret one sentence found on the standard
   input channel, then stop, without generating a parser. *)

(* We read just one sentence, confirm that this sentence ends in an error, and
   (if that is the case) display the number of the state that is reached. *)

let () =
  if Settings.interpret_error then
    let read = setup() in
    match read() with
    | None ->
      exit 1 (* abnormal: no input *)
    | Some sentence ->
        interpret_error sentence (* never returns *)
274

275 276 277 278 279 280 281
(* --------------------------------------------------------------------------- *)

(* If [--compile-errors <filename>] is set, compile the error message
   descriptions found in file [filename] down to OCaml code, then stop. *)

let () =
  Settings.compile_errors |> Option.iter (fun filename ->
282 283 284 285 286 287 288 289 290

    (* Read and segment the file. Each segment is a pair of a string and a
       lexbuf. *)
    let segments = Segment.segment filename in
    (* Process the segments, two by two. We expect one segment to contain
       a non-empty series of sentences, and the next segment to contain
       free-form text. *)
    let rec loop accu segments =
      match segments with
291 292
      | [] ->
          List.rev accu
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
      | (_, lexbuf) :: [] ->
          (* Oops, we are desynchronized. *)
          Error.signal
            (Positions.one (Lexing.lexeme_end_p lexbuf))
            "Syntax error: missing a final message. I may be desynchronized.";
          List.rev accu
      | (_, lexbuf) :: (text, _) :: segments ->
          (* Read a non-empty series of located sentences. *)
          match SentenceParser.entry SentenceLexer.lex lexbuf with
          | exception Parsing.Parse_error ->
              (* Report an error. *)
              Error.signal
                (Positions.one (Lexing.lexeme_start_p lexbuf))
                "Syntax error: ill-formed sentence.";
              (* Continue anyway. *)
              loop accu segments
          | sentences ->
              loop ((sentences, text) :: accu) segments
311
    in
312 313 314 315 316
    let entries = loop [] segments in
    if Error.errors() then exit 1;
    (* Although we try to report several errors, [SentenceLexer.lex] may
       abort the whole process after just one error. This could be improved. *)

317 318 319
    (* Convert every sentence to a state number. We signal an error if a
       sentence does not end in an error, as expected. *)
    let entries = List.map convert_entry entries in
320
    if Error.errors() then exit 1;
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340

    (* Build a mapping of states to located sentences. This allows us to
       detect if two sentences lead to the same state. *)
    let _mapping =
      List.fold_left (fun mapping (sentences_and_states, _message) ->
        List.fold_left (fun mapping (sentence2, s) ->
          match Lr1.NodeMap.find s mapping with
          | sentence1 ->
              Error.signal (fst sentence1 @ fst sentence2)
                (Printf.sprintf
                   "Redundancy: these sentences both cause an error in state %d."
                   (Lr1.number s));
              mapping
          | exception Not_found ->
              Lr1.NodeMap.add s sentence2 mapping
        ) mapping sentences_and_states
      ) Lr1.NodeMap.empty entries
    in
    if Error.errors() then exit 1;

341 342 343
    exit 0
  )

344
module S = Segment