Commit 78a9fdc5 authored by POTTIER Francois's avatar POTTIER Francois

Modify [ErrorReporting] to include positions as part of explanations.

Modify [Engine] to give access to the positions of the lookahead token. Use a tainted dummy position to help debug.
parent 385ae35f
(* An explanation is a description of what the parser has recognized in the
recent past and what it expects next. *)
type ('item, 'symbol) explanation = {
(* An explanation is based on an item. *)
item: 'item;
(* A past. This is a non-empty sequence of (terminal and non-terminal)
symbols, each of which corresponds to a range of the input file. These
symbols correspond to the first half (up to the bullet) of the item's
right-hand side. In short, they represent what we have recognized in
the recent past. *)
past: ('symbol * Lexing.position * Lexing.position) list;
(* A future. This is a non-empty sequence of (terminal and non-terminal)
symbols These symbols correspond to the second half (after the bullet)
of the item's right-hand side. In short, they represent what we expect
to recognize in the future, if this item is a good prediction. *)
future: 'symbol list;
(* A goal. This is a non-terminal symbol. It corresponds to the item's
left-hand side. In short, it represents the reduction that we will
be able to perform if we successfully recognize this future. *)
goal: 'symbol
}
module Make
(I : IncrementalEngine.EVERYTHING)
(User : sig
......@@ -50,34 +78,6 @@ module Make
we would have to reduce before we can shift [t].) *)
index < length && xfirst (List.nth rhs index) t
(* An explanation is a description of what the parser has recognized in the
recent past and what it expects next. *)
type 'symbol explanation = {
(* An explanation is based on an item. *)
item: item;
(* A past. This is a non-empty sequence of (terminal and non-terminal)
symbols, each of which corresponds to a range of the input file. These
symbols correspond to the first half (up to the bullet) of the item's
right-hand side. In short, they represent what we have recognized in
the recent past. *)
past: ('symbol * Lexing.position * Lexing.position) list;
(* A future. This is a non-empty sequence of (terminal and non-terminal)
symbols These symbols correspond to the second half (after the bullet)
of the item's right-hand side. In short, they represent what we expect
to recognize in the future, if this item is a good prediction. *)
future: 'symbol list;
(* A goal. This is a non-terminal symbol. It corresponds to the item's
left-hand side. In short, it represents the reduction that we will
be able to perform if we successfully recognize this future. *)
goal: 'symbol
}
let compare_explanations x1 x2 =
let c = compare_items x1.item x2.item in
(* TEMPORARY checking that if [c] is 0 then the positions are the same *)
......@@ -89,10 +89,7 @@ module Make
);
c
(* We build lists of explanations. These explanations may originate in
distinct LR(1) states. *)
(* [marry past stack] TEMPORARY *)
(* [marry past stack] TEMPORARY comment *)
let rec marry past stack =
match past, stack with
......@@ -108,8 +105,8 @@ module Make
offering the terminal symbol [t] to the parser. It runs the parser,
through an arbitrary number of reductions, until the parser either
accepts this token (i.e., shifts) or rejects it (i.e., signals an
error). If the parser decides to shift, then the shift items found in the
LR(1) state before the shift are used to produce new explanations. *)
error). If the parser decides to shift, then the shift items found
in the LR(1) state before the shift are used to produce new explanations. *)
(* It is desirable that the semantic actions be side-effect free, or
that their side-effects be harmless (replayable). *)
......@@ -152,18 +149,21 @@ module Make
it can request another token or terminate. *)
assert false
(* [investigate result] assumes that [result] is of the form [InputNeeded _].
For every terminal symbol [t], it investigates how the parser reacts when
fed the symbol [t], and returns a list of explanations. *)
(* [investigate pos result] assumes that [result] is of the form
[InputNeeded _]. For every terminal symbol [t], it investigates
how the parser reacts when fed the symbol [t], and returns a list
of explanations. The position [pos] is where a syntax error was
detected; it is used when manufacturing dummy tokens. This is
important because the position of the dummy token may end up in
the explanations that we produce. *)
let investigate (result : _ result) =
let investigate pos (result : _ result) =
weed compare_explanations (
foreach_terminal_but_error (fun symbol explanations ->
match symbol with
| X (N _) -> assert false
| X (T t) ->
(* Build a dummy token for the terminal symbol [t]. *)
let pos = Lexing.dummy_pos in
let token = (terminal2token t, pos, pos) in
(* Submit it to the parser. Accumulate explanations. *)
investigate t (offer result token) explanations
......@@ -188,7 +188,7 @@ module Make
this state and analyzes it in order to produce a meaningful
diagnostic. *)
exception Error of xsymbol explanation list
exception Error of (Lexing.position * Lexing.position) * (item, xsymbol) explanation list
(* TEMPORARY why loop-style? we should offer a simplified incremental API *)
......@@ -209,10 +209,12 @@ module Make
| AboutToReduce _ ->
let current = resume current in
loop read { checkpoint; current }
| HandlingError _ ->
(* The parser signals a syntax error. Go back to the checkpoint
and investigate. *)
raise (Error (investigate checkpoint))
| HandlingError env ->
(* The parser signals a syntax error. Note the position of the
problematic token, which is useful. Then, go back to the
checkpoint and investigate. *)
let (startp, _) as positions = positions env in
raise (Error (positions, investigate startp checkpoint))
| Accepted v ->
v
| Rejected ->
......
(* This module is part of MenhirLib. *)
(* An explanation is a description of what the parser has recognized in the
recent past and what it expects next. *)
type ('item, 'symbol) explanation = {
(* An explanation is based on an item. *)
item: 'item;
(* A past. This is a non-empty sequence of (terminal and non-terminal)
symbols, each of which corresponds to a range of the input file. These
symbols correspond to the first half (up to the bullet) of the item's
right-hand side. In short, they represent what we have recognized in
the recent past. *)
past: ('symbol * Lexing.position * Lexing.position) list;
(* A future. This is a non-empty sequence of (terminal and non-terminal)
symbols These symbols correspond to the second half (after the bullet)
of the item's right-hand side. In short, they represent what we expect
to recognize in the future, if this item is a good prediction. *)
future: 'symbol list;
(* A goal. This is a non-terminal symbol. It corresponds to the item's
left-hand side. In short, it represents the reduction that we will
be able to perform if we successfully recognize this future. *)
goal: 'symbol
}
module Make
(I : IncrementalEngine.EVERYTHING)
(User : sig
......@@ -16,34 +44,6 @@ module Make
open I
(* An explanation is a description of what the parser has recognized in the
recent past and what it expects next. *)
type 'symbol explanation = {
(* An explanation is based on an item. *)
item: item;
(* A past. This is a non-empty sequence of (terminal and non-terminal)
symbols, each of which corresponds to a range of the input file. These
symbols correspond to the first half (up to the bullet) of the item's
right-hand side. In short, they represent what we have recognized in
the recent past. *)
past: ('symbol * Lexing.position * Lexing.position) list;
(* A future. This is a non-empty sequence of (terminal and non-terminal)
symbols These symbols correspond to the second half (after the bullet)
of the item's right-hand side. In short, they represent what we expect
to recognize in the future, if this item is a good prediction. *)
future: 'symbol list;
(* A goal. This is a non-terminal symbol. It corresponds to the item's
left-hand side. In short, it represents the reduction that we will
be able to perform if we successfully recognize this future. *)
goal: 'symbol
}
(* We build lists of explanations. These explanations may originate in
distinct LR(1) states. *)
......@@ -54,7 +54,7 @@ module Make
(* TEMPORARY *)
exception Error of xsymbol explanation list
exception Error of (Lexing.position * Lexing.position) * (item, xsymbol) explanation list
val entry: 'a I.result -> (Lexing.lexbuf -> token) -> Lexing.lexbuf -> 'a
......
......@@ -86,13 +86,20 @@ module type INCREMENTAL_ENGINE = sig
type stack =
element stream
(* The parser's stack, a stream of elements, can be examined. This stream is
empty if the parser is in an initial state; otherwise, it is non-empty.
The LR(1) automaton's current state is the one found in the top element
of the stack. *)
(* This is the parser's stack, a stream of elements. This stream is empty if
the parser is in an initial state; otherwise, it is non-empty. The LR(1)
automaton's current state is the one found in the top element of the
stack. *)
val stack: env -> stack
(* These are the start and end positions of the current lookahead token. It
is legal to invoke this function only after at least one token has been
offered to the parser via [offer]. In other words, it is illegal to
invoke it in an initial state. *)
val positions: env -> Lexing.position * Lexing.position
end
(* This signature is a fragment of the inspection API that is made available
......
......@@ -7,6 +7,16 @@ open EngineTypes
- at compile time, if so requested by the user, via the --interpret options;
- at run time, in the table-based back-end. *)
(* A tainted dummy position. In principe, it should never be exposed. *)
let dummy_pos =
let open Lexing in {
pos_fname = "<MenhirLib.Engine>";
pos_lnum = 0;
pos_bol = 0;
pos_cnum = -1;
}
module Make (T : TABLE) = struct
(* This propagates type and exception definitions. *)
......@@ -352,8 +362,8 @@ module Make (T : TABLE) = struct
let rec empty = {
state = s; (* dummy *)
semv = T.error_value; (* dummy *)
startp = Lexing.dummy_pos; (* dummy *)
endp = Lexing.dummy_pos; (* dummy *)
startp = dummy_pos; (* dummy *)
endp = dummy_pos; (* dummy *)
next = empty;
} in
......@@ -369,7 +379,7 @@ module Make (T : TABLE) = struct
let dummy_token = Obj.magic () in
let env = {
error = false;
triple = (dummy_token, Lexing.dummy_pos, Lexing.dummy_pos); (* dummy *)
triple = (dummy_token, dummy_pos, dummy_pos); (* dummy *)
stack = empty;
current = s;
} in
......@@ -552,5 +562,17 @@ module Make (T : TABLE) = struct
let stack env : element stream =
stack env.stack env.current
(* --------------------------------------------------------------------------- *)
(* Access to the position of the lookahead token. *)
let positions { triple = (_, startp, endp); _ } =
(* In principle, as soon as the lexer has been called at least once,
[startp] cannot be a dummy position. Our dummy position risks
exposure only if we are in the very initial state, as produced
by [start s] above. We declare this situation illegal. *)
assert (startp != dummy_pos && endp != dummy_pos);
startp, endp
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment