Commit ce21b6c1 authored by Guillaume Melquiond's avatar Guillaume Melquiond

Remove handcrafted error messages (fix issues #150 and #172).

Short story: it was a source of bugs, there was only one handcrafted
message over 650, and, to quote François Pottier, "you seem to have
misunderstood what the various commands do".

Long story: the proper steps to update the error messages after modifying
the parser are

1. update the old states with --update-errors
2. generate the new states with --list-errors
3. compare the old and new states with --compare-errors
4. manually reconcile the differences between the old and new states
5. write error messages for the new states
6. add %on_error_reduce and go back to step 1, if step 5 is too hard
7. check that the error messages for the old states are still meaningful
8. check that the set of states is both correct (--compile-errors) and
   complete (--compare-errors)

We were doing only step 1 and half of step 8. Doing the other half of
step 8 would have prevented issue #172 from occurring. But that would have
meant doing step 4 after each parser modification, which was never done.
Note that step 2 is so expensive that it is impossible to perform step 8
during continuous integration.

Given the work needed to update the error messages after a syntax change,
I don't think we can reliably use them until WhyML no longer evolves.
parent 201d8696
......@@ -198,8 +198,6 @@ pvsbin/
/src/parser/parser.ml
/src/parser/parser.mli
/src/parser/parser.conflicts
/src/parser/handcrafted.messages.temp
/src/parser/parser_messages.ml
# /src/why3doc/
/src/why3doc/doc_lexer.ml
......
......@@ -19,8 +19,6 @@ else
HIDE = @
endif
.DELETE_ON_ERROR:
# install the binaries
DESTDIR =
......@@ -178,7 +176,6 @@ LIBGENERATED = src/util/config.ml \
src/util/json_lexer.ml \
src/parser/lexer.ml \
src/parser/parser.mli src/parser/parser.ml \
src/parser/parser_messages.ml \
src/driver/driver_parser.mli src/driver/driver_parser.ml \
src/driver/driver_lexer.ml \
src/driver/parse_smtv2_model_parser.mli src/driver/parse_smtv2_model_parser.ml \
......@@ -207,7 +204,7 @@ LIB_MLW = ity expr pdecl eval_match typeinv vc pmodule dexpr \
pinterp mltree compile mlinterp pdriver cprinter ml_printer \
ocaml_printer cakeml_printer
LIB_PARSER = ptree glob typing parser_messages parser typing report lexer
LIB_PARSER = ptree glob typing parser typing lexer
LIB_TRANSFORM = simplify_formula inlining split_goal \
args_wrapper detect_polymorphism reduction_engine compute \
......@@ -279,33 +276,6 @@ src/session/compress.ml: config.status src/session/compress_none.ml
cp src/session/compress_none.ml $@
endif
.PHONY: initialize_messages
initialize_messages: src/parser/parser.mly
$(MENHIR) --explain --strict src/parser/parser.mly --list-errors > src/parser/handcrafted.messages
src/parser/parser_messages.ml: src/parser/parser.mly src/parser/handcrafted.messages
$(MENHIR) --explain --strict src/parser/parser.mly --update-errors \
src/parser/handcrafted.messages > src/parser/handcrafted.messages.temp;
# If the update fails, the exit code is 0 so we also try to compile-errors directly
# before replacing handcrafted.messages. If the following line fails, exit code
# 1 is returned stopping the command. And empty file parser_messages.ml is
# deleted by .DELETE_ON_ERROR. This behavior ensures that second run of make
# will not continue with a fake parser_messages.ml.
$(MENHIR) --explain --strict src/parser/parser.mly --compile-errors \
src/parser/handcrafted.messages.temp > src/parser/parser_messages.ml;
if cmp -s src/parser/handcrafted.messages.temp src/parser/handcrafted.messages; then \
echo "parser_messages.ml OK"; \
else \
echo "Regenerating handcrafted.messages"; \
rm -f src/parser/parser_messages.ml; \
cp src/parser/handcrafted.messages.temp src/parser/handcrafted.messages; \
rm -f src/parser/handcrafted.messages.temp; \
fi
clean ::
rm -f src/parser/handcrafted.messages.temp
# hide deprecated warnings for strings
src/util/strings.cmo: WARNINGS:=$(WARNINGS)-3
......
#!/bin/sh
# This checks specifically error messages
dir=`dirname $0`
updateoracle=false
files=""
while test $# != 0; do
case "$1" in
"-update-oracle")
updateoracle=true;;
"-"*)
printf "unknown option: %s\n" "$1"
printf "usage: parsing-bench [-update-oracle] <files>\n"
printf " <files> must be given without the '.mlw' suffix\n"
printf " if <files> empty, use all files from directory 'parsing/bad'\n"
exit 2;;
*)
files="$files $1"
esac
shift
done
if test "$files" = "" ; then
files="$dir/parsing/bad/*.mlw"
fi
run () {
printf " $1 "
f="$1"
$dir/../bin/why3prove.opt --parse-only "$1.mlw" 2> "$f.out"
if cmp "$f.oracle" "$f.out" > /dev/null 2>&1 ; then
echo "ok"
else
if $updateoracle; then
echo "Updating oracle for $1"
mv "$f.out" "$f.oracle"
else
echo "FAILED!"
echo "diff is the following:"
diff "$f.oracle" "$f.out"
fi
fi
}
for file in $files; do
filedir=`dirname $file`
filebase=`basename $file .mlw`
printf "Parsing files\n";
run $filedir/$filebase
done
File "bench/parsing/bad/module_name.mlw", line 1, characters 7-11:
syntax error:
module name expected.
found "name".
expecting a capitalized identifier (token UIDENT_NQ)
module Strange
let range a = absurd || (a = 3)
end
This diff is collapsed.
......@@ -265,70 +265,33 @@ rule token = parse
let debug = Debug.register_info_flag "print_modules"
~desc:"Print@ program@ modules@ after@ typechecking."
exception Error of string
let build_parsing_function entry lb = Loc.with_location (entry token) lb
let () = Exn_printer.register (fun fmt exn -> match exn with
(* This ad hoc switch allows to not edit the automatically generated
handcrafted.messages in ad hoc ways. *)
| Error s when s = "<YOUR SYNTAX ERROR MESSAGE HERE>\n" ->
Format.fprintf fmt "syntax error"
| Error s -> Format.fprintf fmt "syntax error:\n %s" s
| _ -> raise exn)
let parse_term = build_parsing_function Parser.term_eof
(* Associate each token to a text representing it *)
let match_tokens t =
(* TODO generate this automatically *)
match t with
| None -> assert false
| Some _t -> "NOT IMPLEMENTED"
let parse_term_list = build_parsing_function Parser.term_comma_list_eof
let build_parsing_function (parser_entry: Lexing.position -> 'a) lb =
(* This records the last token which was read (for error messages) *)
let last = ref None in
let module I = Parser.MenhirInterpreter in
let checkpoint = parser_entry lb.Lexing.lex_curr_p
and supplier =
I.lexer_lexbuf_to_supplier (fun x -> let t = token x in last := Some t; t) lb
and succeed t = t
and fail checkpoint =
let t = Lexing.lexeme lb in
let token = match_tokens !last in
let fname = lb.Lexing.lex_curr_p.Lexing.pos_fname in
(* TODO/FIXME: ad-hoc fix for TryWhy3/Str incompatibility *)
let s = if Strings.has_prefix "/trywhy3_input." fname
then "<YOUR SYNTAX ERROR MESSAGE HERE>\n"
else Report.report (t, token) checkpoint in
(* Typing.close_file is supposedly done at the end of the file in
parsing.mly. If there is a syntax error, we still need to close it (to
be able to reload). *)
Loc.with_location (fun _x ->
(try ignore(Typing.close_file ()) with
| _e -> ());
raise (Error s)) lb
in
I.loop_handle succeed fail supplier checkpoint
let parse_qualid = build_parsing_function Parser.qualid_eof
let parse_list_ident = build_parsing_function Parser.ident_comma_list_eof
let parse_list_qualid = build_parsing_function Parser.qualid_comma_list_eof
open Wstdlib
open Ident
open Theory
open Pmodule
let parse_term lb =
build_parsing_function Parser.Incremental.term_eof lb
let parse_term_list lb = build_parsing_function Parser.Incremental.term_comma_list_eof lb
let parse_qualid lb = build_parsing_function Parser.Incremental.qualid_eof lb
let parse_list_ident lb = build_parsing_function Parser.Incremental.ident_comma_list_eof lb
let parse_list_qualid lb = build_parsing_function Parser.Incremental.qualid_comma_list_eof lb
let read_channel env path file c =
let lb = Lexing.from_channel c in
Loc.set_file file lb;
Typing.open_file env path;
let mm = build_parsing_function Parser.Incremental.mlw_file lb in
let mm =
try
build_parsing_function Parser.mlw_file lb
with
e -> ignore (Typing.discard_file ()); raise e
in
if path = [] && Debug.test_flag debug then begin
let print_m _ m = Format.eprintf "%a@\n@." print_module m in
let add_m _ m mm = Mid.add m.mod_theory.th_name m mm in
......
......@@ -147,7 +147,7 @@
let error_loc loc = Loc.error ~loc Error
let () = Exn_printer.register (fun fmt exn -> match exn with
| Error -> Format.fprintf fmt "syntax error %s" (Parser_messages.message 1)
| Error -> Format.fprintf fmt "syntax error"
| _ -> raise exn)
%}
......
(*
This file is completely copied from Compcert's preparser. Reference manual of
menhir takes this as example.
*)
open Parser.MenhirInterpreter
module S = MenhirLib.General (* Streams *)
(* -------------------------------------------------------------------------- *)
(* There are places where we may hit an internal error and we would like to fail
abruptly because "this cannot happen". Yet, it is safer when shipping to
silently cover up for our internal error. Thus, we typically use an idiom of
the form [if debug then assert false else <some default value>]. *)
let debug = false
(* -------------------------------------------------------------------------- *)
(* [stack checkpoint] extracts the parser's stack out of a checkpoint. *)
let stack checkpoint =
match checkpoint with
| HandlingError env ->
stack env
| _ ->
assert false (* this cannot happen, I promise *)
(* -------------------------------------------------------------------------- *)
(* [state checkpoint] extracts the number of the current state out of a
parser checkpoint. *)
let state checkpoint : int =
match Lazy.force (stack checkpoint) with
| S.Nil ->
(* Hmm... The parser is in its initial state. Its number is
usually 0. This is a BIG HACK. TEMPORARY *)
0
| S.Cons (Element (s, _, _, _), _) ->
number s
(* -------------------------------------------------------------------------- *)
(* We allow an error message to contain the special form $i. We only use it to
print what was read. *)
let fragment text message =
try
let i = int_of_string (Str.matched_group 1 message) in
match i with
| 0 -> fst text
| _ -> snd text
with
| Failure _ ->
(* In principle, this should not happen, but if it does, let's cover up
for our internal error. *)
if debug then assert false else "???"
| Not_found ->
(* In principle, this should not happen, but if it does, let's cover up
for our internal error. *)
if debug then assert false else "???"
let fragments text (message : string) : string =
Str.global_substitute
(Str.regexp "\\$\\([0-9]+\\)")
(fragment text)
message
let report text checkpoint : string =
(* Find out in which state the parser failed. *)
let s : int = state checkpoint in
(* Choose an error message, based on the state number [s].
Then, customize it, based on dynamic information. *)
let message = try
Parser_messages.message s |>
fragments text
with Not_found ->
(* If the state number cannot be found -- which, in principle,
should not happen, since our list of erroneous states is
supposed to be complete! -- produce a generic message. *)
Printf.sprintf "This is an unknown syntax error (%d).\n\
Please report.\n" s
in
(* Construct the full error message. *)
Printf.sprintf "%s" message
val report: string * string ->
'a Parser.MenhirInterpreter.checkpoint -> string
......@@ -1398,6 +1398,10 @@ let close_file () =
assert (not (Stack.is_empty state) && (Stack.top state).muc = None);
(Stack.pop state).file
let discard_file () =
assert (not (Stack.is_empty state));
ignore (Stack.pop state)
let open_module ({id_str = nm; id_loc = loc} as id) =
assert (not (Stack.is_empty state) && (Stack.top state).muc = None);
let slice = Stack.top state in
......
......@@ -17,6 +17,8 @@ val open_file : Env.env -> Env.pathname -> unit
val close_file : unit -> Pmodule.pmodule Wstdlib.Mstr.t
val discard_file : unit -> unit
val open_module : Ptree.ident -> unit
val close_module : Loc.position -> unit
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment