Commit b79569f4 authored by POTTIER Francois's avatar POTTIER Francois

Updated [ReferenceInterpreter] to produce a list of spurious reductions.

parent 7d8b7d27
......@@ -1001,17 +1001,19 @@ let () =
(* ------------------------------------------------------------------------ *)
(* The following code validates the fact that an error can be triggered in
state [s'] by beginning in the initial state [s] and reading the
sequence of terminal symbols [w]. We use this for debugging purposes. *)
state [s'] by beginning at the start symbol [nt] and reading the
sequence of terminal symbols [w]. We use this for debugging purposes.
Furthermore, this gives us a list of spurious reductions, which we use
to produce a comment. *)
let fail msg =
Printf.eprintf "LRijkstra: internal error: %s.\n%!" msg;
false
exit 1
let validate s s' w : bool =
let validate nt s' w : ReferenceInterpreter.target =
let open ReferenceInterpreter in
match
check_error_path (Lr1.nt_of_entry s) (W.elements w)
check_error_path nt (W.elements w)
with
| OInputReadPastEnd ->
fail "input was read past its end"
......@@ -1019,13 +1021,15 @@ let validate s s' w : bool =
fail "input was not fully consumed"
| OUnexpectedAccept ->
fail "input was unexpectedly accepted"
| OK state ->
Lr1.Node.compare state s' = 0 ||
fail (
Printf.sprintf "error occurred in state %d instead of %d"
(Lr1.number state)
(Lr1.number s')
)
| OK ((state, _) as target) ->
if Lr1.Node.compare state s' <> 0 then
fail (
Printf.sprintf "error occurred in state %d instead of %d"
(Lr1.number state)
(Lr1.number s')
)
else
target
(* ------------------------------------------------------------------------ *)
......@@ -1111,11 +1115,13 @@ end)
let explored =
ref 0
(* We wish to store a set of triples [nt, w, s'], meaning that an error can be
triggered in state [s'] by beginning in the initial state that corresponds
to [nt] and by reading the sequence of terminal symbols [w]. We wish to
store at most one such triple for every state [s'], so we organize the data
as a set [domain] of states [s'] and a list [data] of triples [nt, w, s']. *)
(* We wish to store a set of triples [nt, w, (s', spurious)], meaning that an
error can be triggered in state [s'] by beginning in the initial state that
corresponds to [nt] and by reading the sequence of terminal symbols [w]. We
wish to store at most one such triple for every state [s'], so we organize
the data as a set [domain] of states [s'] and a list [data] of triples [nt,
w, (s', spurious)]. The list [spurious] documents the spurious reductions
that are performed by the parser at the end. *)
(* We could print this data as we go, which would naturally result in sorting
the output by increasing word sizes. However, it seems preferable to sort
......@@ -1126,7 +1132,7 @@ let explored =
let domain =
ref Lr1.NodeSet.empty
let data : (Nonterminal.t * W.word * Lr1.node) list ref =
let data : (Nonterminal.t * W.word * ReferenceInterpreter.target) list ref =
ref []
(* The set [reachable] stores every reachable state (regardless of whether an
......@@ -1148,12 +1154,13 @@ let _, _ =
to this error. *)
let (s, _), ws = A.reverse path in
let w = List.fold_right W.append ws (W.singleton z) in
(* Check that the reference interpreter confirms our finding. *)
assert (validate s s' w);
(* Store this new data. *)
(* Check that the reference interpreter confirms our finding.
At the same time, compute a list of spurious reductions. *)
let nt = Lr1.nt_of_entry s in
let target = validate nt s' w in
(* Store this new data. *)
domain := Lr1.NodeSet.add s' !domain;
data := (nt, w, s') :: !data
data := (nt, w, target) :: !data
end
)
......@@ -1165,7 +1172,7 @@ let () =
let c = Nonterminal.compare nt1 nt2 in
if c <> 0 then c else W.compare w2 w1
)
|> List.map (fun (nt, w, s') -> (nt, W.elements w, s'))
|> List.map (fun (nt, w, target) -> (nt, W.elements w, target))
|> List.iter Interpret.print_messages_item
(* ------------------------------------------------------------------------ *)
......
......@@ -18,14 +18,21 @@ type message =
type run =
located_sentence or_comment list * message
(* A targeted sentence is a located sentence together with the state
into which it leads. *)
(* A targeted sentence is a located sentence together with the target into
which it leads. A target tells us which state a sentence leads to, as well
as which spurious reductions are performed at the end. *)
type target =
ReferenceInterpreter.target
let target2state (s, _spurious) =
s
type maybe_targeted_sentence =
located_sentence * Lr1.node option
located_sentence * target option
type targeted_sentence =
located_sentence * Lr1.node
located_sentence * target
(* A targeted run is a series of targeted sentences or comments together with
an error message. *)
......@@ -198,8 +205,8 @@ let interpret_error_aux poss ((_, terminals) as sentence) fail succeed =
fail "A syntax error occurs before the last token is reached."
| OUnexpectedAccept ->
fail "No syntax error occurs; in fact, this input is accepted."
| OK s' ->
succeed nt terminals s'
| OK target ->
succeed nt terminals target
(* --------------------------------------------------------------------------- *)
......@@ -214,11 +221,12 @@ let default_message =
comments. [os'] may be [None], in which case the auto-generated comment
is just a warning that this sentence does not end in an error. *)
let print_messages_auto (nt, sentence, os') : unit =
let print_messages_auto (nt, sentence, otarget) : unit =
(* Print the sentence, followed with auto-generated comments. *)
print_string (print_sentence (Some nt, sentence));
match os' with
| Some s' ->
match otarget with
| Some (s', _spurious) ->
(* TEMPORARY warn about spurious reductions *)
Printf.printf
"##\n## Ends in an error in state: %d.\n##\n%s##\n"
(Lr1.number s')
......@@ -230,13 +238,14 @@ let print_messages_auto (nt, sentence, os') : unit =
"##\n## WARNING: This sentence does NOT end with a syntax error, as it should.\n##\n"
(* [print_messages_item] displays one data item. The item is of the form [nt,
sentence, s'], which means that beginning at the start symbol [nt], the
sentence [sentence] ends in an error in state [s']. The display obeys the
[.messages] file format. *)
sentence, target], which means that beginning at the start symbol [nt], the
sentence [sentence] ends in an error in the target state given by [target].
[target] also contains information about which spurious reductions are
performed at the end. The display obeys the [.messages] file format. *)
let print_messages_item (nt, sentence, s') : unit =
let print_messages_item (nt, sentence, target) : unit =
(* Print the sentence, followed with auto-generated comments. *)
print_messages_auto (nt, sentence, Some s');
print_messages_auto (nt, sentence, Some target);
(* Then, print a proposed error message, between two blank lines. *)
Printf.printf "\n%s\n" default_message
......@@ -270,8 +279,8 @@ let write_run : maybe_targeted_run -> unit =
let fail msg =
Error.error [] msg
let succeed nt terminals s' =
print_messages_item (nt, terminals, s');
let succeed nt terminals target =
print_messages_item (nt, terminals, target);
exit 0
let interpret_error sentence =
......@@ -296,7 +305,7 @@ let target_sentence signal : located_sentence -> maybe_targeted_sentence =
None
)
(* success: *)
(fun _nt _terminals s' -> Some s')
(fun _nt _terminals target -> Some target)
let target_run_1 signal : run -> maybe_targeted_run =
fun (sentences, message) ->
......@@ -425,7 +434,8 @@ let message_table (detect_redundancy : bool) (runs : filtered_targeted_run list)
let table =
List.fold_left (fun table (sentences_and_states, message) ->
List.fold_left (fun table (sentence2, s) ->
List.fold_left (fun table (sentence2, target) ->
let s = target2state target in
match Lr1.NodeMap.find s table with
| sentence1, _ ->
if detect_redundancy then
......@@ -466,7 +476,8 @@ let compile_runs filename (runs : filtered_targeted_run list) : unit =
let branches =
List.fold_left (fun branches (sentences_and_states, message) ->
(* Create an or-pattern for these states. *)
let states = List.map (fun (_, s) ->
let states = List.map (fun (_, target) ->
let s = target2state target in
pint (Lr1.number s)
) sentences_and_states in
(* Map all these states to this message. *)
......
......@@ -9,11 +9,13 @@
val default_message: string
(* [print_messages_item] displays one data item. The item is of the form [nt,
sentence, s'], which means that beginning at the start symbol [nt], the
sentence [sentence] ends in an error in state [s']. The display obeys the
[.messages] file format. *)
sentence, target], which means that beginning at the start symbol [nt], the
sentence [sentence] ends in an error in the target state given by [target].
[target] also contains information about which spurious reductions are
performed at the end. The display obeys the [.messages] file format. *)
open Grammar
val print_messages_item: Nonterminal.t * Terminal.t list * Lr1.node -> unit
val print_messages_item:
Nonterminal.t * Terminal.t list * ReferenceInterpreter.target -> unit
......@@ -236,6 +236,12 @@ let interpret log nt lexer lexbuf =
open MenhirLib.General (* streams *)
type spurious_reductions =
Production.index list
type target =
Lr1.node * spurious_reductions
type check_error_path_outcome =
(* Bad: the input was read past its end. *)
| OInputReadPastEnd
......@@ -243,8 +249,13 @@ type check_error_path_outcome =
| OInputNotFullyConsumed
(* Bad: the parser unexpectedly accepted (part of) this input. *)
| OUnexpectedAccept
(* Good: a syntax error occurred after reading the last input token. *)
| OK of Lr1.node
(* Good: a syntax error occurred after reading the last input token. We
report in which state the error took place, as well as a list of spurious
reductions. A spurious reduction is a non-default reduction that takes
place after looking at the last input token -- the erroneous token.
We note that a spurious reduction can happen only in a non-canonical
LR automaton. *)
| OK of target
let check_error_path nt input =
......@@ -269,39 +280,57 @@ let check_error_path nt input =
Some t
in
let looking_at_last_token () : bool =
!input = []
in
(* Run it. We wish to stop at the first error (without handling the error
in any way) and report in which state the error occurred. A clean way
of doing this is to use the incremental API, as follows. The main loop
resembles the [loop] function in [Engine]. *)
(* Another reason why we write our own loop is that we wish to detect
spurious reductions. We accumulate these reductions in [spurious], a
(reversed) list of productions. *)
let entry = Lr1.entry_of_nt nt in
let rec loop (result : cst E.result) =
let rec loop (result : cst E.result) (spurious : Production.index list) =
match result with
| E.InputNeeded _ ->
begin match next() with
| None ->
OInputReadPastEnd
| Some t ->
let dummy = Lexing.dummy_pos in
loop (E.offer result (t, dummy, dummy))
end
| E.Shifting _
| E.AboutToReduce _ ->
loop (E.resume result)
begin match next() with
| None ->
OInputReadPastEnd
| Some t ->
let dummy = Lexing.dummy_pos in
loop (E.offer result (t, dummy, dummy)) spurious
end
| E.Shifting _ ->
loop (E.resume result) spurious
| E.AboutToReduce (env, prod) ->
(* If we have requested the last input token and if this is not
a default reduction, then this is a spurious reduction. *)
let spurious =
if looking_at_last_token() && not (E.has_default_reduction env) then
prod :: spurious
else
spurious
in
loop (E.resume result) spurious
| E.HandlingError env ->
(* Check that all of the input has been read. Otherwise, the error
has occurred sooner than expected. *)
if !input = [] then
(* Return the current state. This is done by peeking at the stack.
If the stack is empty, then we must be in the initial state. *)
OK (
let s =
match Lazy.force (E.stack env) with
| Nil ->
entry
| Cons (E.Element (s, _, _, _), _) ->
s
)
in
OK (s, List.rev spurious)
else
OInputNotFullyConsumed
| E.Accepted _ ->
......@@ -313,5 +342,5 @@ let check_error_path nt input =
assert false
in
loop (E.start entry)
loop (E.start entry) []
......@@ -21,7 +21,14 @@ val interpret:
(* This variant of the reference interpreter is used internally by us. We use
it to debug [LRijkstra]. It checks that a sentence leads to a syntax error
in the expected state. *)
in the expected state. It is also used by several of the command line
options [--interpret-error], [--compile-errors], etc. *)
type spurious_reductions =
Production.index list
type target =
Lr1.node * spurious_reductions
type check_error_path_outcome =
(* Bad: the input was read past its end. *)
......@@ -30,8 +37,13 @@ type check_error_path_outcome =
| OInputNotFullyConsumed
(* Bad: the parser unexpectedly accepted (part of) this input. *)
| OUnexpectedAccept
(* Good: a syntax error occurred after reading the last input token. *)
| OK of Lr1.node
(* Good: a syntax error occurred after reading the last input token. We
report in which state the error took place, as well as a list of spurious
reductions. A spurious reduction is a non-default reduction that takes
place after looking at the last input token -- the erroneous token.
We note that a spurious reduction can happen only in a non-canonical
LR automaton. *)
| OK of target
val check_error_path:
Nonterminal.t -> (* initial non-terminal symbol *)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment