Commit 09ea074c authored by POTTIER Francois's avatar POTTIER Francois

Merge branch 'canonical' into merr

parents 53e2c164 d1f461c9
2015/09/25:
Modified the meaning of --canonical to allow default reductions to take
place. There implies no loss of precision in terms of lookahead sets,
and should allow gaining more contextual information when a syntax
error is encountered. (It should also lead to a smaller automaton.)
2015/09/23:
New option --list-errors, which produces a list of input sentences which
are representative of all possible syntax errors. (Costly.)
......
......@@ -9,7 +9,9 @@
correlate with star size and alphabet size, etc.
create separate graphs for 3 modes: --lalr, pager, --canonical
In Menhir's bootstrapped parser:
use a canonical automaton?
clean up parserMessages.messages for the canonical automaton
Improve --interpret-error by printing not just the state number
but also its description (items)
* When dealing with errors, should we back up to the last shift action,
undoing any non-canonical reduce actions? if so, a lot of code is
......
File "nonsense.mly", line 1, characters 47-50:
Error: Ill-formed rule.
Either a list of formal parameters or a colon is expected at this point.
Either a parenthesized, comma-delimited list of formal parameters
or a colon is expected at this point.
Examples of well-formed rules:
main: e = expr EOL { e }
expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 }
......
File "rule-error.mly", line 11, characters 0-3:
Error: Ill-formed production.
A production is a sequence of producers, followed with a semantic action.
Examples of well-formed producers:
expr
option(COMMA)
separated_list(COMMA, expr)
e = expr
ds = declaration*
es = list(terminated(expr, SEMI))
es = list(e = expr SEMI { e })
xs = list(x = var { Some x } | WILDCARD { None })
Error: Either another production | ...
or a comma or a closing parenthesis
is expected at this point.
......@@ -63,7 +63,7 @@ stage1:
# descends into another directory when executing commands.
SRC := $(shell pwd)
FLAGS := -v -lg 1 -la 1 -lc 1 --table --infer --stdlib $(SRC) --strict --fixed-exception
FLAGS := -v -lg 1 -la 1 -lc 1 --table --infer --stdlib $(SRC) --strict --fixed-exception --canonical
stage2:
@$(OCAMLBUILD) -build-dir _stage2 -tag fancy_parser \
......
......@@ -206,7 +206,7 @@ optional_bar:
production_group:
productions = separated_nonempty_list(BAR, production)
action = ACTION
oprec2 = precedence?
oprec2 = ioption(precedence)
{
(* If multiple productions share a single semantic action, check
that all of them bind the same names. *)
......@@ -238,7 +238,7 @@ production_group:
precedence declaration. */
production:
producers = producer* oprec = precedence?
producers = producer* oprec = ioption(precedence)
{ producers,
oprec,
ParserAux.current_reduce_precedence(),
......
......@@ -765,11 +765,14 @@ let errorpeeker node =
not do anything about it for the moment. (Furthermore, someone who
uses precedence declarations is looking for trouble anyway.)
20120525: if [--canonical] has been specified, then we disallow
default reductions on a normal token, because we do not want to
introduce any spurious actions into the automaton. We do still
allow default reductions on "#", since they are needed for the
automaton to terminate properly. *)
Between 2012/05/25 and 2015/09/25, if [--canonical] has been specified,
then we disallow default reductions on a normal token, because we do not
want to introduce any spurious actions into the automaton. We do still
allow default reductions on "#", since they are needed for the automaton to
terminate properly. From 2015/09/25 on, we again always allow default
reductions, as they seem to be beneficial when explaining syntax errors. *)
(**)
let (has_default_reduction : Lr1.node -> (Production.index * TerminalSet.t) option), hdrcount =
Misc.tabulateo Lr1.number Lr1.fold Lr1.n (fun s ->
......@@ -778,27 +781,14 @@ let (has_default_reduction : Lr1.node -> (Production.index * TerminalSet.t) opti
None
else
match ProductionMap.is_singleton (Lr1.invert (Lr1.reductions s)) with
| Some (_, toks) as reduction
when SymbolMap.purelynonterminal (Lr1.transitions s) ->
if TerminalSet.mem Terminal.sharp toks then
(* Perform default reduction on "#". *)
reduction
else begin
(* Perform default reduction, unless [--canonical] has been specified. *)
match Settings.construction_mode with
| Settings.ModeCanonical ->
None
| Settings.ModeInclusionOnly
| Settings.ModePager
| Settings.ModeLALR ->
reduction
end
| Some _
let reduction = ProductionMap.is_singleton (Lr1.invert (Lr1.reductions s)) in
match reduction with
| Some _ ->
if SymbolMap.purelynonterminal (Lr1.transitions s)
then reduction
else None
| None ->
None
reduction
)
......
......@@ -6,14 +6,20 @@ Either a declaration or %% is expected at this point.
# ----------------------------------------------------------------------------
HEADER UID
Either another declaration or %% is expected at this point.
# ----------------------------------------------------------------------------
TYPE UID
TYPE OCAMLTYPE TYPE
TYPE OCAMLTYPE UID PREC
TYPE OCAMLTYPE UID RPAREN
TYPE OCAMLTYPE UID LPAREN TYPE
TYPE OCAMLTYPE UID COMMA TYPE
TYPE OCAMLTYPE UID LPAREN UID UID
TYPE OCAMLTYPE UID LPAREN UID COMMA TYPE
TYPE OCAMLTYPE UID PLUS RPAREN
Ill-formed %type declaration.
Examples of well-formed declarations:
......@@ -71,15 +77,15 @@ Examples of well-formed declarations:
# ----------------------------------------------------------------------------
HEADER UID
PERCENTPERCENT TYPE
Either another declaration or %% is expected at this point.
Either a rule or %% is expected at this point.
# ----------------------------------------------------------------------------
PERCENTPERCENT TYPE
PERCENTPERCENT UID COLON ACTION TYPE
Either a rule or %% is expected at this point.
Either another rule or %% is expected at this point.
# ----------------------------------------------------------------------------
......@@ -99,7 +105,8 @@ Examples of well-formed rules:
PERCENTPERCENT UID UID
Ill-formed rule.
Either a list of formal parameters or a colon is expected at this point.
Either a parenthesized, comma-delimited list of formal parameters
or a colon is expected at this point.
Examples of well-formed rules:
main: e = expr EOL { e }
expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 }
......@@ -133,39 +140,39 @@ Examples of well-formed rules:
# ----------------------------------------------------------------------------
PERCENTPERCENT UID COLON ACTION RPAREN
PERCENTPERCENT UID COLON ACTION TYPE
# These sentences are distinct in the non-canonical automaton,
# but lead to the same state in the canonical automaton.
Either another rule or %% is expected at this point.
# ----------------------------------------------------------------------------
PERCENTPERCENT UID COLON UID TYPE
PERCENTPERCENT UID COLON LID TYPE
PERCENTPERCENT UID COLON LID EQUAL TYPE
PERCENTPERCENT UID COLON UID PLUS TYPE
PERCENTPERCENT UID COLON UID LPAREN UID TYPE
# Above: this sentence seems tricky. All we know is we have just read a
# symbol, and in front of us could be many things (comma, closing parenthesis,
# identifier, modifier, %prec keyword, etc.). We expect to reduce this to
# actual or lax_actual. Let's just back up to a safe level of abstraction and
# say this is an ill-formed production.
PERCENTPERCENT UID COLON UID LPAREN ACTION UID
# Above: another tricky case. Looking at the description of this state, it seems that
# only COMMA and RPAREN can follow here. But in fact, other tokens are
# possible, such as BAR, simply because they will NOT take us into this state.
# This is a manifestation of the fact that non-canonical automata ``reduce too
# far'' and can enter a state that they should not enter, before they detect
# the error. Again, let's back up to a safe (?) level of abstraction.
PERCENTPERCENT UID COLON LID EQUAL UID PLUS TYPE
Ill-formed production.
A production is a sequence of producers, followed with a semantic action.
Examples of well-formed producers:
expr
option(COMMA)
separated_list(COMMA, expr)
e = expr
ds = declaration*
es = list(terminated(expr, SEMI))
es = list(e = expr SEMI { e })
xs = list(x = var { Some x } | WILDCARD { None })
# The following sentences are similar, but have an open parenthesis.
# Suggesting that a parenthesis could be closed seems a safe bet. These
# sentences are otherwise tricky. In front of us could be many things (comma,
# closing parenthesis, identifier, modifier, %prec keyword, etc.). We expect
# to reduce to actual or lax_actual. Let's just back up to a safe level of
# abstraction and say this is an ill-formed production.
PERCENTPERCENT UID COLON UID LPAREN UID TYPE
PERCENTPERCENT UID COLON UID LPAREN UID STAR TYPE
PERCENTPERCENT UID COLON UID LPAREN UID LPAREN LID RPAREN TYPE
# Above two sentences: again cases where we have two different possible contexts,
# with different expectations as to what comes next. Again, let's back up.
PERCENTPERCENT UID COLON UID LPAREN LID TYPE
PERCENTPERCENT UID COLON UID LPAREN ACTION BAR TYPE
Ill-formed production.
Maybe you meant to close a parenthesis at this point?
A production is a sequence of producers, followed with a semantic action.
Examples of well-formed producers:
expr
......@@ -179,7 +186,27 @@ Examples of well-formed producers:
# ----------------------------------------------------------------------------
PERCENTPERCENT UID COLON UID LPAREN ACTION UID
# In the non-canonical automaton, this is a tricky case where, looking at the
# description of the state, it seems that only COMMA and RPAREN can follow
# here. But in fact, other tokens are possible, such as BAR, simply because
# they will NOT take us into this state. In the canonical automaton, the list
# of possibilities is explicit in the lookahead sets.
PERCENTPERCENT UID COLON UID LPAREN ACTION PREC UID UID
# In the first case above, we may expect a %prec annotation, whereas in the
# second case above, we have just seen it. In the error message, we merge
# these two situations and do not mention the possibility of a %prec
# annotation.
Either another production | ...
or a comma or a closing parenthesis
is expected at this point.
# ----------------------------------------------------------------------------
PERCENTPERCENT UID COLON PREC TYPE
PERCENTPERCENT UID COLON UID LPAREN ACTION PREC TYPE
PERCENTPERCENT UID COLON ACTION PREC TYPE
Ill-formed %prec annotation.
A symbol is expected at this point.
......@@ -224,4 +251,35 @@ PERCENTPERCENT UID COLON ACTION PREC UID TYPE
Either another rule
or another production | ...
is expected at this point.
Examples of well-formed rules:
option(X): { None } | x = X { Some x }
# ----------------------------------------------------------------------------
TYPE OCAMLTYPE UID LPAREN UID LPAREN TYPE
PERCENTPERCENT UID COLON UID LPAREN UID LPAREN TYPE
Ill-formed list of actual parameters.
A comma-delimited list of actual parameters is expected at this point.
Examples of well-formed actual parameters:
expr
expr+
option(expr)
separated_list(COMMA, expr)
# Omitting the fact that an anonymous rule is a valid actual parameter...
# Also omitting the subtle distinctions between lax_actual, actual, etc.
# ----------------------------------------------------------------------------
TYPE OCAMLTYPE UID LPAREN UID PLUS UID
Ill-formed list of actual parameters.
A modifier, a closing parenthesis, or a comma is expected at this point.
A modifier is * or + or ?.
Examples of well-formed actual parameters:
expr
expr+
option(expr)
separated_list(COMMA, expr)
......@@ -28,9 +28,8 @@ let version =
type construction_mode =
| ModeCanonical (* --canonical: canonical Knuth LR(1) automaton *)
| ModeInclusionOnly (* --no-pager : states are merged when there is an inclusion
relationship, default reductions are used *)
| ModePager (* normal mode: states are merged as per Pager's criterion,
default reductions are used *)
relationship *)
| ModePager (* normal mode: states are merged as per Pager's criterion *)
| ModeLALR (* --lalr : states are merged as in an LALR generator,
i.e. as soon as they have the same LR(0) core *)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment