Commit 68532c3b authored by Sylvain Dailler's avatar Sylvain Dailler

Tentatively add new error handling mode of menhir

This commit tries to add the new parsing errors from menhir. It changes
the lexer so that precise error made from the provided handcrafted.messages
can be displayed on failure.
Also fix #111

This adds a rule for src/parser/ which either compile
the messages or regenerates them. It needs regeneration when changes are
added in .mly or .messages file. This also adds option .DELETE_ON_ERROR.

* src/parser/lexer.mll
The parser now uses loop_handle function from menhir (see menhir ref.

* src/parser/
Added file from Compcert's preparser to report error from "checkpoint". It
allows to use $0 notation in handcrafted.messages

* src/parser/handcrafted.messages
To be edited file for error messages.

* bench/*
Adding a parsing-bench for parsing errors following the model of ce-bench:
an oracle is provided so that error messages are not lost.
parent cc1f3f37
......@@ -19,6 +19,8 @@ else
HIDE = @
# install the binaries
......@@ -178,11 +180,13 @@ GENERATED =
LIBGENERATED = src/util/ \
src/util/ src/util/ \
src/util/ src/util/ src/parser/ \
src/parser/ \
src/util/json_parser.mli src/util/ \
src/util/ \
src/parser/ \
src/parser/parser.mli src/parser/ \
src/parser/ \
src/driver/driver_parser.mli src/driver/ \
src/driver/ \
src/driver/parse_smtv2_model_parser.mli src/driver/ \
......@@ -211,7 +215,7 @@ LIB_MLW = ity expr pdecl eval_match typeinv vc pmodule dexpr \
pinterp mltree compile mlinterp pdriver cprinter ocaml_printer \
LIB_PARSER = ptree glob typing parser lexer
LIB_PARSER = ptree glob typing parser_messages parser typing report lexer
LIB_TRANSFORM = simplify_formula inlining split_goal \
args_wrapper detect_polymorphism reduction_engine compute \
......@@ -281,6 +285,33 @@ src/session/ config.status src/session/
cp src/session/ $@
.PHONY: initialize_messages
initialize_messages: src/parser/parser.mly
$(MENHIR) --explain --strict src/parser/parser.mly --list-errors > src/parser/handcrafted.messages
src/parser/ src/parser/parser.mly src/parser/handcrafted.messages
$(MENHIR) --explain --strict src/parser/parser.mly --update-errors \
src/parser/handcrafted.messages > src/parser/handcrafted.messages.temp;
# If the update fails, the exit code is 0 so we also try to compile-errors directly
# before replacing handcrafted.messages. If the following line fails, exit code
# 1 is returned stopping the command. And empty file is
# deleted by .DELETE_ON_ERROR. This behavior ensures that second run of make
# will not continue with a fake
$(MENHIR) --explain --strict src/parser/parser.mly --compile-errors \
src/parser/handcrafted.messages.temp > src/parser/;
if cmp -s src/parser/handcrafted.messages.temp src/parser/handcrafted.messages; then \
echo " OK"; \
else \
echo "Regenerating handcrafted.messages"; \
rm -f src/parser/; \
cp src/parser/handcrafted.messages.temp src/parser/handcrafted.messages; \
rm -f src/parser/handcrafted.messages.temp; \
clean ::
rm -f src/parser/handcrafted.messages.temp
# hide deprecated warnings for strings
src/util/strings.cmo: WARNINGS:=$(WARNINGS)-3
# This checks specifically error messages
dir=`dirname $0`
while test $# != 0; do
case "$1" in
printf "unknown option: %s\n" "$1"
printf "usage: parsing-bench [-update-oracle] <files>\n"
printf " <files> must be given without the '.mlw' suffix\n"
printf " if <files> empty, use all files from directory 'parsing/bad'\n"
exit 2;;
files="$files $1"
if test "$files" = "" ; then
run () {
printf " $1 "
$dir/../bin/why3prove.opt --parse-only "$1.mlw" 2> "$f.out"
if cmp "$" "$f.out" > /dev/null 2>&1 ; then
echo "ok"
if $updateoracle; then
echo "Updating oracle for $1"
mv "$f.out" "$"
echo "FAILED!"
echo "diff is the following:"
diff "$" "$f.out"
for file in $files; do
filedir=`dirname $file`
filebase=`basename $file .mlw`
printf "Parsing files\n";
run $filedir/$filebase
File "bench/parsing/bad/module_name.mlw", line 1, characters 7-11:
syntax error:
module name expected.
found "name".
expecting a capitalized identifier (token UIDENT_NQ)
module Strange
let range a = absurd || (a = 3)
This diff is collapsed.
......@@ -257,34 +257,66 @@ rule token = parse
let debug = Debug.register_info_flag "print_modules"
~desc:"Print@ program@ modules@ after@ typechecking."
let parse_logic_file env path lb =
open_file token (Lexing.from_string "") (Typing.open_file env path);
Loc.with_location (logic_file token) lb;
Typing.close_file ()
exception Error of string
let parse_term lb = Loc.with_location (Parser.term_eof token) lb
let () = Exn_printer.register (fun fmt exn -> match exn with
(* This ad hoc switch allows to not edit the automatically generated
handcrafted.messages in ad hoc ways. *)
| Error s when s = "<YOUR SYNTAX ERROR MESSAGE HERE>\n" ->
Format.fprintf fmt "syntax error"
| Error s -> Format.fprintf fmt "syntax error:\n %s" s
| _ -> raise exn)
let parse_term_list lb = Loc.with_location (Parser.term_comma_list_eof token) lb
let parse_qualid lb = Loc.with_location (Parser.qualid_eof token) lb
let parse_list_ident lb = Loc.with_location (Parser.ident_comma_list_eof token) lb
let parse_list_qualid lb = Loc.with_location (Parser.qualid_comma_list_eof token) lb
(* Associate each token to a text representing it *)
let match_tokens t =
(* TODO generate this automatically *)
match t with
| None -> assert false
| Some _t -> "NOT IMPLEMENTED"
let build_parsing_function (parser_entry: Lexing.position -> 'a) lb =
(* This records the last token which was read (for error messages) *)
let last = ref None in
let module I = Parser.MenhirInterpreter in
let checkpoint = parser_entry lb.Lexing.lex_curr_p
and supplier =
I.lexer_lexbuf_to_supplier (fun x -> let t = token x in last := Some t; t) lb
and succeed t = t
and fail checkpoint =
let t = Lexing.lexeme lb in
let token = match_tokens !last in
let s = (t, token) checkpoint in
(* Typing.close_file is supposedly done at the end of the file in
parsing.mly. If there is a syntax error, we still need to close it (to
be able to reload). *)
Loc.with_location (fun _x ->
(try ignore(Typing.close_file ()) with
| _e -> ());
raise (Error s)) lb
I.loop_handle succeed fail supplier checkpoint
open Stdlib
open Ident
open Theory
open Pmodule
let parse_term lb =
build_parsing_function Parser.Incremental.term_eof lb
let parse_term_list lb = build_parsing_function Parser.Incremental.term_comma_list_eof lb
let parse_qualid lb = build_parsing_function Parser.Incremental.qualid_eof lb
let parse_list_ident lb = build_parsing_function Parser.Incremental.ident_comma_list_eof lb
let parse_list_qualid lb = build_parsing_function Parser.Incremental.qualid_comma_list_eof lb
let read_channel env path file c =
let lb = Lexing.from_channel c in
Loc.set_file file lb;
Typing.open_file env path;
let mm = Loc.with_location (mlw_file token) lb in
let mm = build_parsing_function Parser.Incremental.mlw_file lb in
if path = [] && Debug.test_flag debug then begin
let print_m _ m = Format.eprintf "%a@\n@." print_module m in
let add_m _ m mm = Mid.add m.mod_theory.th_name m mm in
......@@ -110,7 +110,7 @@
let error_loc loc = Loc.error ~loc Error
let () = Exn_printer.register (fun fmt exn -> match exn with
| Error -> Format.fprintf fmt "syntax error"
| Error -> Format.fprintf fmt "syntax error %s" (Parser_messages.message 1)
| _ -> raise exn)
This file is completely copied from Compcert's preparser. Reference manual of
menhir takes this as example.
open Parser.MenhirInterpreter
module S = MenhirLib.General (* Streams *)
(* -------------------------------------------------------------------------- *)
(* There are places where we may hit an internal error and we would like to fail
abruptly because "this cannot happen". Yet, it is safer when shipping to
silently cover up for our internal error. Thus, we typically use an idiom of
the form [if debug then assert false else <some default value>]. *)
let debug = false
(* -------------------------------------------------------------------------- *)
(* [stack checkpoint] extracts the parser's stack out of a checkpoint. *)
let stack checkpoint =
match checkpoint with
| HandlingError env ->
stack env
| _ ->
assert false (* this cannot happen, I promise *)
(* -------------------------------------------------------------------------- *)
(* [state checkpoint] extracts the number of the current state out of a
parser checkpoint. *)
let state checkpoint : int =
match Lazy.force (stack checkpoint) with
| S.Nil ->
(* Hmm... The parser is in its initial state. Its number is
usually 0. This is a BIG HACK. TEMPORARY *)
| S.Cons (Element (s, _, _, _), _) ->
number s
(* -------------------------------------------------------------------------- *)
(* We allow an error message to contain the special form $i. We only use it to
print what was read. *)
let fragment text message =
let i = int_of_string (Str.matched_group 1 message) in
match i with
| 0 -> fst text
| _ -> snd text
| Failure _ ->
(* In principle, this should not happen, but if it does, let's cover up
for our internal error. *)
if debug then assert false else "???"
| Not_found ->
(* In principle, this should not happen, but if it does, let's cover up
for our internal error. *)
if debug then assert false else "???"
let fragments text (message : string) : string =
(Str.regexp "\\$\\([0-9]+\\)")
(fragment text)
let report text checkpoint : string =
(* Find out in which state the parser failed. *)
let s : int = state checkpoint in
(* Choose an error message, based on the state number [s].
Then, customize it, based on dynamic information. *)
let message = try
Parser_messages.message s |>
fragments text
with Not_found ->
(* If the state number cannot be found -- which, in principle,
should not happen, since our list of erroneous states is
supposed to be complete! -- produce a generic message. *)
Printf.sprintf "This is an unknown syntax error (%d).\n\
Please report.\n" s
(* Construct the full error message. *)
Printf.sprintf "%s" message
val report: string * string ->
'a Parser.MenhirInterpreter.checkpoint -> string
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment