interpret.ml 17.2 KB
Newer Older
1 2 3 4 5 6 7
(* This module is in charge of handling the [--interpret] option,
   if it is present. *)

module I = Invariant (* artificial dependency; ensures that [Invariant] runs first *)

(* --------------------------------------------------------------------------- *)

8 9 10 11 12 13 14 15 16 17 18
(* The following definitions are in sync with [SentenceParser]. *)

open Grammar
type terminals = Terminal.t list
type sentence = Nonterminal.t option * terminals
type located_sentence = Positions.positions * sentence
type message = string

(* A run is a series of sentences together with an error message. *)

type run = located_sentence list * message
19

20 21
(* A targeted sentence is a located sentence together with the state
   into which it leads. *)
22

23 24 25 26 27 28 29 30
type targeted_sentence = located_sentence * Lr1.node

(* A targeted run is a series of targeted sentences together with an error
   message. *)

type targeted_run = targeted_sentence list * message

(* --------------------------------------------------------------------------- *)
POTTIER Francois's avatar
POTTIER Francois committed
31

32
(* Display and debugging. *)
33 34 35 36

let print_sentence (nto, terminals) : string =
  let b = Buffer.create 128 in
  Option.iter (fun nt ->
37
    Printf.bprintf b "%s: " (Nonterminal.print false nt)
38 39 40 41 42 43 44
  ) nto;
  List.iter (fun t ->
    Printf.bprintf b "%s " (Terminal.print t)
  ) terminals;
  Printf.bprintf b "\n";
  Buffer.contents b

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
(* --------------------------------------------------------------------------- *)

(* [stream] turns a finite list of terminals into a stream of terminals. *)

exception EndOfStream

let stream (toks : Terminal.t list) : unit -> Terminal.t * Lexing.position * Lexing.position =
  let toks = ref toks in
  fun () ->

    let tok =
      match !toks with
      | tok :: more ->

	  (* Take a token off the list, and return it. *)

	  toks := more;
	  tok

      | [] ->

	  (* The finite list has been exhausted. Here, two plausible behaviors
	     come to mind.

	     The first behavior consists in raising an exception. In that case,
	     we are creating a finite stream, and it is up to the parser to not
	     read past its end.

	     The second behavior consists in returning a designated token. In
	     that case, we are creating an infinite, eventually constant,
	     stream.

	     The choice between these two behaviors is somewhat arbitrary;
	     furthermore, in the second case, the choice of the designated
	     token is arbitrary as well. Here, we adopt the second behavior if
	     and only if the grammar has an EOF token, and we use EOF as the
	     designated token. Again, this is arbitrary, and could be changed
	     in the future. *)

	  match Terminal.eof with
	  | Some eof ->
	      eof
	  | None ->
	      raise EndOfStream

    in

    (* For now, return dummy positions. *)

    tok, Lexing.dummy_pos, Lexing.dummy_pos

(* --------------------------------------------------------------------------- *)

98 99 100 101 102 103 104
(* [start sentence] returns the start symbol that we should use to interpret
   the sentence [sentence]. *)

(* If a start symbol was explicitly provided as part of the sentence, we use
   it. Otherwise, we use the grammar's unique start symbol, if there is
   one. *)

105
let start poss ((nto, _) : sentence) : Nonterminal.t =
106 107 108 109 110 111
  match nto with
  | Some nt ->
      nt
  | None ->
      match ProductionMap.is_singleton Lr1.entry with
      | None ->
112
          Error.error poss
113 114 115 116 117 118 119 120 121
            "Because the grammar has multiple start symbols, each of the\n\
             sentences provided on the standard input channel must be of the\n\
             form: <start symbol>: <token>*"
      | Some (prod, _) ->
          match Production.classify prod with
          | Some nt ->
              nt
          | None ->
              assert false
122

123
(* --------------------------------------------------------------------------- *)
124

125
(* [interpret] interprets a sentence. *)
126

127
let interpret ((_, toks) as sentence) : unit =
128

129
  let nt = start [] sentence in
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172

  (* Run the reference interpreter. This can produce a concrete syntax tree
     ([Some cst]), fail with a parser error ([None]), or fail with a lexer error
     ([EndOfStream]). *)

  (* In either case, we produce just one line of output, so it should be clear
     to the user which outcomes correspond to which sentences (should multiple
     sentences be supplied). *)

  begin try
    match
      MenhirLib.Convert.Simplified.traditional2revised
	(ReferenceInterpreter.interpret Settings.trace nt)
	(stream toks)
    with

    | Some cst ->

	(* Success. *)

	Printf.printf "ACCEPT";
	if Settings.interpret_show_cst then begin
	  print_newline();
	  Cst.show stdout cst
	end

    | None ->

	(* Parser failure. *)

	Printf.printf "REJECT"

  with EndOfStream ->

    (* Lexer failure. *)
    
    Printf.printf "OVERSHOOT"

  end;
  print_newline()

(* --------------------------------------------------------------------------- *)

173 174
(* [interpret_error_aux] interprets a sentence, expecting it to end in an
   error. Failure or success is reported via two continuations. *)
175

176 177
let interpret_error_aux poss ((_, terminals) as sentence) fail succeed =
  let nt = start poss sentence in
178
  let open ReferenceInterpreter in
179
  match check_error_path nt terminals with
180
  | OInputReadPastEnd ->
181
      fail "No syntax error occurs."
182
  | OInputNotFullyConsumed ->
183
      fail "A syntax error occurs before the last token is reached."
184
  | OUnexpectedAccept ->
185
      fail "No syntax error occurs; in fact, this input is accepted."
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
  | OK s' ->
      succeed nt terminals s'

(* --------------------------------------------------------------------------- *)

(* This default error message is produced by [--list-errors] when it creates a
   [.messages] file, and is recognized by [--compare-errors] when it compares
   two such files. *)

let default_message =
  "<YOUR SYNTAX ERROR MESSAGE HERE>\n"

(* [print_messages_item] displays one data item. The item is of the form [nt,
   w, s'], which means that beginning at the start symbol [nt], the sentence
   [w] ends in an error in state [s']. The display obeys the [.messages] file
   format. *)

let print_messages_item (nt, w, s') : unit =
  (* Print the sentence, followed with a few comments, followed with a
     blank line, followed with a proposed error message, followed with
     another blank line. *)
  Printf.printf
    "%s##\n## Ends in an error in state: %d.\n##\n%s\n%s\n"
    (print_sentence (Some nt, w))
    (Lr1.number s')
    (* [Lr0.print] or [Lr0.print_closure] could be used here. The latter
       could sometimes be helpful, but is usually intolerably verbose. *)
    (Lr0.print "## " (Lr1.state s'))
    default_message
215

216
(* --------------------------------------------------------------------------- *)
217

218 219 220 221 222
(* [interpret_error] interprets a sentence, expecting it to end in an error.
   Failure or success is reported on the standard output channel. This is
   used by [--interpret-error]. *)

let fail msg =
223
  Error.error [] msg
224

225 226
let succeed nt terminals s' =
  print_messages_item (nt, terminals, s');
227 228 229 230 231 232 233
  exit 0

let interpret_error sentence =
  interpret_error_aux [] sentence fail succeed

(* --------------------------------------------------------------------------- *)

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
(* [target_sentence] interprets a (located) sentence, expecting it to end in
   an error, computes the state in which the error is obtained, and constructs
   a targeted sentence. *)

let fail poss msg =
  Error.signal poss (Printf.sprintf
    "This sentence does not end with a syntax error, as desired.\n%s"
    msg
  );
  [] (* dummy result *)

let target_sentence : located_sentence -> targeted_sentence list =
  fun (poss, sentence) ->
    interpret_error_aux poss sentence
      (fail poss)
249
      (fun _nt _terminals s' -> [ (poss, sentence), s' ])
250 251 252 253 254 255 256 257 258 259

let target_run : run -> targeted_run =
  fun (sentences, message) ->
    List.flatten (List.map target_sentence sentences), message

let target_runs : run list -> targeted_run list =
  fun runs ->
    let runs = List.map target_run runs in
    if Error.errors() then exit 1;
    runs
260

261
(* --------------------------------------------------------------------------- *)
262

263 264
(* [setup()] returns a function [read] which reads one sentence from the
   standard input channel. *)
265

266
let setup () : unit -> sentence option =
267

268 269 270
  let open Lexing in
  let lexbuf = from_channel stdin in
  lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = "(stdin)" };
271

272 273
  let read () =
    try
274
      SentenceParser.optional_sentence SentenceLexer.lex lexbuf
275 276 277
    with Parsing.Parse_error ->
      Error.error (Positions.lexbuf lexbuf) "Ill-formed input sentence."
  in
278

279 280 281 282 283 284
  read

(* --------------------------------------------------------------------------- *)

(* If [--interpret] is set, interpret the sentences found on the standard
   input channel, then stop, without generating a parser. *)
285

286 287 288 289 290 291 292
(* We read a series of sentences from the standard input channel. To allow
   interactive use, we interpret each sentence as soon as it is read. *)

let () =
  if Settings.interpret then
    let read = setup() in
    while true do
293 294
      match read() with
      | None ->
295
  	  exit 0
296
      | Some sentence ->
297 298
	  interpret sentence
    done
299

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
(* --------------------------------------------------------------------------- *)

(* If [--interpret-error] is set, interpret one sentence found on the standard
   input channel, then stop, without generating a parser. *)

(* We read just one sentence, confirm that this sentence ends in an error, and
   (if that is the case) display the number of the state that is reached. *)

let () =
  if Settings.interpret_error then
    let read = setup() in
    match read() with
    | None ->
      exit 1 (* abnormal: no input *)
    | Some sentence ->
        interpret_error sentence (* never returns *)
316

317 318
(* --------------------------------------------------------------------------- *)

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
(* Reading a [.messages] file. *)

let read_messages filename : run list =
  (* Read and segment the file. *)
  let segments : (string * Lexing.lexbuf) list = Segment.segment filename in
  (* Process the segments, two by two. We expect one segment to contain
     a non-empty series of sentences, and the next segment to contain
     free-form text. *)
  let rec loop accu segments =
    match segments with
    | [] ->
        List.rev accu
    | (_, lexbuf) :: [] ->
        (* Oops, we are desynchronized. *)
        Error.signal
          (Positions.one (Lexing.lexeme_end_p lexbuf))
          "Syntax error: missing a final message. I may be desynchronized.";
        List.rev accu
    | (_, lexbuf) :: (text, _) :: segments ->
        (* Read a non-empty series of located sentences. *)
        match SentenceParser.entry SentenceLexer.lex lexbuf with
        | exception Parsing.Parse_error ->
            (* Report an error. *)
            Error.signal
              (Positions.one (Lexing.lexeme_start_p lexbuf))
              "Syntax error: ill-formed sentence.";
            (* Continue anyway. *)
            loop accu segments
        | sentences ->
            loop ((sentences, text) :: accu) segments
  in
  let runs = loop [] segments in
  if Error.errors() then exit 1;
  (* Although we try to report several errors, [SentenceLexer.lex] may
     abort the whole process after just one error. This could be improved. *)
  runs

(* --------------------------------------------------------------------------- *)

358
(* [message_table] converts a list of targeted runs to a table (a mapping) of
359 360
   states to located sentences and messages. Optionally, it can detect that
   two sentences lead to the same state, and report an error. *)
361 362

let message_table (detect_redundancy : bool) (runs : targeted_run list)
363
  : (located_sentence * message) Lr1.NodeMap.t =
364 365

  let table =
366
    List.fold_left (fun table (sentences_and_states, message) ->
367 368
      List.fold_left (fun table (sentence2, s) ->
        match Lr1.NodeMap.find s table with
369
        | sentence1, _ ->
370 371 372 373 374 375 376
            if detect_redundancy then
              Error.signal (fst sentence1 @ fst sentence2)
                (Printf.sprintf
                   "Redundancy: these sentences both cause an error in state %d."
                   (Lr1.number s));
            table
        | exception Not_found ->
377
            Lr1.NodeMap.add s (sentence2, message) table
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
      ) table sentences_and_states
    ) Lr1.NodeMap.empty runs
  in
  if Error.errors() then exit 1;
  table

(* --------------------------------------------------------------------------- *)

(* [compile_runs] converts a list of targeted runs to OCaml code that encodes
   a mapping of state numbers to error messages. The code is sent to the
   standard output channel. *)

let compile_runs filename (runs : targeted_run list) : unit =

  (* We wish to produce a function that maps a state number to a message.
     By convention, we call this function [message]. *)

  let name = "message" in

  let open IL in
  let open CodeBits in
  let default = {
    branchpat  = PWildcard;
    branchbody = eraisenotfound
  (* The default branch raises an exception, which can be caught by
     the user, who can then produce a generic error message. *)
  } in
  let branches =
    List.fold_left (fun branches (sentences_and_states, message) ->
      (* Create an or-pattern for these states. *)
      let states = List.map (fun (_, s) ->
        pint (Lr1.number s)
      ) sentences_and_states in
      (* Map all these states to this message. *)
      { branchpat = POr states;
        branchbody = EStringConst message } :: branches
    ) [ default ] runs
  in
  let messagedef = {
    valpublic = true;
    valpat = PVar name;
    valval = EFun ([ PVar "s" ], EMatch (EVar "s", branches))
  } in
  let program = [
    SIComment (Printf.sprintf
      "This file was auto-generated based on \"%s\"." filename);
    SIComment (Printf.sprintf
      "Please note that the function [%s] can raise [Not_found]." name);
    SIValDefs (false,
      [ messagedef ]);
  ] in

  (* Write this program to the standard output channel. *)

  let module P = Printer.Make (struct
    let f = stdout
    let locate_stretches = None
  end) in
  P.program program

(* --------------------------------------------------------------------------- *)

440 441 442 443 444
(* If [--compile-errors <filename>] is set, compile the error message
   descriptions found in file [filename] down to OCaml code, then stop. *)

let () =
  Settings.compile_errors |> Option.iter (fun filename ->
445

446 447
    (* Read the file. *)
    let runs = read_messages filename in
448

449 450
    (* Convert every sentence to a state number. We signal an error if a
       sentence does not end in an error, as expected. *)
451
    let runs = target_runs runs in
452 453 454

    (* Build a mapping of states to located sentences. This allows us to
       detect if two sentences lead to the same state. *)
455
    let _ = message_table true runs in
456

457 458 459 460 461 462
    (* In principle, we would like to check whether this set of sentences is
       complete (i.e., covers all states where an error can arise), but this
       may be costly -- it requires running [LRijkstra]. Instead, we offer a
       separate facility for comparing two [.messages] files, one of which can
       be produced via [--list-errors]. This can be used to ensure
       completeness. *)
POTTIER Francois's avatar
POTTIER Francois committed
463

464 465 466
    (* Now, compile this information down to OCaml code. We wish to
       produce a function that maps a state number to a message. By
       convention, we call this function [message]. *)
467
    compile_runs filename runs;
468

469 470 471
    exit 0
  )

472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
(* --------------------------------------------------------------------------- *)

(* If two [--compare-errors <filename>] directives are provided, compare the
   two message descriptions files, and stop. We wish to make sure that every
   state that appears on the left-hand side appears on the right-hand side as
   well. *)

let () =
  Settings.compare_errors |> Option.iter (fun (filename1, filename2) ->

    (* Read and convert both files, as above. *)
    let runs1 = read_messages filename1
    and runs2 = read_messages filename2 in
    let runs1 = target_runs runs1
    and runs2 = target_runs runs2 in (* here, it would be OK to ignore errors *)
    let table1 = message_table false runs1
    and table2 = message_table false runs2 in
    
    (* Check that the domain of [table1] is a subset of the domain of [table2]. *)
    table1 |> Lr1.NodeMap.iter (fun s ((poss1, _), _) ->
      if not (Lr1.NodeMap.mem s table2) then
        Error.signal poss1 (Printf.sprintf
          "This sentence leads to an error in state %d.\n\
           No sentence that leads to this state exists in \"%s\"."
          (Lr1.number s) filename2
        )
    );

    (* Check that [table1] is a subset of [table2], that is, for every state
       [s] in the domain of [table1], [s] is mapped by [table1] and [table2]
502 503 504 505 506
       to the same error message. As an exception, if the message found in
       [table1] is the default message, then no comparison takes place. This
       allows using [--list-errors] and [--compare-errors] in conjunction to
       ensure that a [.messages] file is complete, without seeing warnings
       about different messages. *)
507
    table1 |> Lr1.NodeMap.iter (fun s ((poss1, _), message1) ->
508 509 510 511 512 513 514 515 516 517 518
      if message1 <> default_message then
        try
          let (poss2, _), message2 = Lr1.NodeMap.find s table2 in
          if message1 <> message2 then
            Error.warning (poss1 @ poss2) (Printf.sprintf
              "These sentences lead to an error in state %d.\n\
               The corresponding messages in \"%s\" and \"%s\" differ."
              (Lr1.number s) filename1 filename2
            )
        with Not_found ->
          ()
519 520 521 522 523 524 525
    );

    if Error.errors() then exit 1;
    exit 0

  )