Commit 9511cd9b authored by POTTIER Francois's avatar POTTIER Francois

Improve the cycle detection code and add detection of hidden left recursion.

parent f6cd9aee
......@@ -42,23 +42,42 @@ module Run () = struct
check that a grammar is cycle-free, it suffices to check that the relation
R is acyclic. *)
(* Here is the relation R: *)
let successors (yield : Nonterminal.t -> unit) (nt : Nonterminal.t) : unit =
(* The relation R is defined as follows. Upon first reading, take
[require_nullable_suffix] to be [true] and
[require_nonempty_prefix] to be [false]. *)
let nullable_suffix prod i =
let nullable, _ = Analysis.nullable_first_prod prod i in
nullable
let successors
~require_nullable_suffix
~require_nonempty_prefix
(yield : Production.index -> Nonterminal.t -> unit)
(nt : Nonterminal.t) : unit
=
Production.iternt nt begin fun prod ->
let rhs = Production.rhs prod in
let n = Array.length rhs in
let nullable_prefix = ref true in
Production.rhs prod |> Array.iteri begin fun i symbol ->
match symbol with
let i = ref 0 in
while !nullable_prefix && !i < n do
match rhs.(Misc.postincrement i) with
| Symbol.T _ ->
nullable_prefix := false
| Symbol.N nt' ->
let nullable_suffix, _ = Analysis.nullable_first_prod prod (i + 1) in
if !nullable_prefix && nullable_suffix then
yield nt';
nullable_prefix := !nullable_prefix && Analysis.nullable nt'
end
if (not require_nullable_suffix || nullable_suffix prod !i)
&& (not require_nonempty_prefix || !i > 1) then
yield prod nt';
nullable_prefix := Analysis.nullable nt'
done
end
(* This adapter hides [prod] from the user function [yield]. *)
let adapt successors yield nt =
successors (fun _prod nt' -> yield nt') nt
(* A detailed explanation of cycles whose length is greater than one. *)
let show_cycle nts nt =
......@@ -89,16 +108,20 @@ let fail nts nt =
(* To detect a cycle in a relation, we use the combinator [defensive_fix] that
is provided by the library Fix. We define a function of type [Nonterminal.t
-> unit] that computes nothing but calls itself recursively according to the
pattern defined by the function [successors] above. Then, we evaluate this
function everywhere. If there is a cycle, it is detected and reported. *)
-> unit] that computes nothing but calls itself recursively according to
the pattern defined by the relation R. Then, we evaluate this function
everywhere. If there is a cycle, it is detected and reported. *)
(* The claim that "a cyclic grammar is ambiguous" implicitly assumes that
every nonterminal symbol is reachable and inhabited. *)
let () =
let module M = Fix.Memoize.ForType(Nonterminal) in
let check = M.defensive_fix successors in
let successors_R =
successors ~require_nullable_suffix:true ~require_nonempty_prefix:false
|> adapt
in
let check = M.defensive_fix successors_R in
try
Nonterminal.iter check
with M.Cycle (nts, nt) ->
......@@ -106,4 +129,82 @@ let () =
(* -------------------------------------------------------------------------- *)
(* Another anomaly that we wish to detect is hidden left recursion. In the
paper "Looping LR parsers" (1988), Soisalon-Soininen and Tarhio define
hidden left recursion (although they do not explicitly use this term)
and point out that: 1- a grammar that has hidden left recursion cannot
be LR(k) for any k, and (worse) 2- if the shift/reduce conflict that it
causes is resolved in favor in reduction, then the deterministic parser
that is constructed can diverge by entering an infinite sequence of
reductions. Conversely, they show if a grammar exhibits no cycle and
no hidden left recursion, then the parser must terminate, regardless of
how conflicts are resolved. *)
(* One possible definition of hidden left recursion, given by Nederhof and
Sarbo in the paper "Increasing the Applicability of LR Parsing" (1993), is
the existence of a production A -> B alpha where B is nullable and alpha
expands (in zero or more steps) to A beta. *)
(* Let us define a relation S as follows. A S B holds if and only if there is
a production A -> alpha B beta where alpha is nullable. This relation can
be viewed as the disjoint union of two smaller relations L and H, defined
as follows:
- A L B holds if and only if there is a production A -> B beta;
- A H B holds if and only if there is a production A -> alpha B beta
where alpha is nullable but is not epsilon.
A cycle in the relation L is fine: it represents ordinary left recursion.
A cycle that involves at least one H edge and any number of L and H edges,
however, denotes hidden left recursion. *)
(* An error message. *)
let fail prod =
let nt, rhs = Production.def prod in
let positions = Production.positions prod in
Error.error positions
"the grammar exhibits hidden left recursion: in the production\n\
%s,\n\
the nonterminal symbol %s is nullable,\n\
and the remainder of the right-hand side expands to a sentential form\n\
that begins with the nonterminal symbol %s.\n\
This implies that the grammar is not LR(k) for any k."
(Production.print prod)
(Symbol.print rhs.(0))
(Symbol.print (Symbol.N nt))
(* Furthermore, this creates a shift/reduce conflict, which, if it were
resolved in favor of reduction, would cause the parser to diverge. *)
(* To detect hidden left recursion in linear time, we first compute the
strongly connected components of the relation S. Then, we check every edge
in the relation H. If the source and destination vertices of this edge lie
in the same component, then we have detected hidden left recursion. *)
let () =
let module T = Tarjan.Run (struct
type node = Nonterminal.t
let n = Nonterminal.n
let index = Nonterminal.n2i
let iter = Nonterminal.iter
(* The relation S is computed as follows. *)
let successors =
successors ~require_nullable_suffix:false ~require_nonempty_prefix:false
|> adapt
end) in
(* The relation H is computed as follows. *)
let successors_H =
successors ~require_nullable_suffix:false ~require_nonempty_prefix:true in
(* Iterate on every edge in the relation H. *)
Nonterminal.iter begin fun nt ->
nt |> successors_H begin fun prod nt' ->
(* If the source vertex [nt] and the destination vertex [nt'] lie in
the same component, then we have detected hidden left recursion. *)
if T.representative nt = T.representative nt' then
fail prod
end
end
(* -------------------------------------------------------------------------- *)
end (* Run *)
......@@ -356,6 +356,24 @@
(rule (alias forbidden-pos)
(action (diff forbidden-pos.exp forbidden-pos.out)))
(rule (target hidden-left-recursion-one.out)
(deps hidden-left-recursion-one.mly)
(action
(with-outputs-to hidden-left-recursion-one.out
(with-accepted-exit-codes (not 0) (run menhir %{deps})))))
(rule (alias hidden-left-recursion-one)
(action (diff hidden-left-recursion-one.exp hidden-left-recursion-one.out)))
(rule (target hidden-left-recursion-two.out)
(deps hidden-left-recursion-two.mly)
(action
(with-outputs-to hidden-left-recursion-two.out
(with-accepted-exit-codes (not 0) (run menhir %{deps})))))
(rule (alias hidden-left-recursion-two)
(action (diff hidden-left-recursion-two.exp hidden-left-recursion-two.out)))
(rule (target ill-formed-attribute.out) (deps ill-formed-attribute.mly)
(action
(with-outputs-to ill-formed-attribute.out
......@@ -1461,6 +1479,8 @@
(alias forbidden-dollar)
(alias forbidden-dollar-bis)
(alias forbidden-pos)
(alias hidden-left-recursion-one)
(alias hidden-left-recursion-two)
(alias ill-formed-attribute)
(alias ill-formed-prec)
(alias ill-sorted-attribute)
......
File "hidden-left-recursion-one.mly", line 25, characters 2-7:
Error: the grammar exhibits hidden left recursion: in the production
s -> a s C,
the nonterminal symbol a is nullable,
and the remainder of the right-hand side expands to a sentential form
that begins with the nonterminal symbol s.
This implies that the grammar is not LR(k) for any k.
File "hidden-left-recursion-two.mly", line 11, characters 2-6:
Error: the grammar exhibits hidden left recursion: in the production
b -> option(A) a,
the nonterminal symbol option(A) is nullable,
and the remainder of the right-hand side expands to a sentential form
that begins with the nonterminal symbol b.
This implies that the grammar is not LR(k) for any k.
%token A B C
%start<unit> a
%%
a:
A* b C {}
b:
B C
| A? a {}
......@@ -6535,32 +6535,6 @@
(diff logtk.0.8.1-parse_theory.conflicts.exp
logtk.0.8.1-parse_theory.conflicts)))
(rule (target loop.opp.out) (deps loop.mly)
(action
(with-outputs-to loop.opp.out
(with-accepted-exit-codes 0 (run menhir --only-preprocess %{deps})))))
(rule (alias loop) (action (diff loop.opp.exp loop.opp.out)))
(rule
(targets loop.out loop.automaton loop.automaton.resolved loop.conflicts
loop.timings)
(deps loop.mly)
(action
(with-outputs-to loop.out
(with-accepted-exit-codes 0
(run menhir --dump --dump-resolved --explain -lg 2 -la 2 -lc 2
--timings-to loop.timings %{deps})))))
(rule (alias loop) (action (diff loop.exp loop.out)))
(rule (alias loop) (action (diff loop.automaton.exp loop.automaton)))
(rule (alias loop)
(action (diff loop.automaton.resolved.exp loop.automaton.resolved)))
(rule (alias loop) (action (diff loop.conflicts.exp loop.conflicts)))
(rule (target lr-but-not-lalr.opp.out) (deps lr-but-not-lalr.mly)
(action
(with-outputs-to lr-but-not-lalr.opp.out
......@@ -12358,7 +12332,6 @@
(alias llparse)
(alias logic_parser)
(alias logtk.0.8.1-parse_theory)
(alias loop)
(alias lr-but-not-lalr)
(alias ltlparser)
(alias lustre-v6.1.737-lv6parser)
......
State 0:
## Known stack suffix:
##
## LR(1) items:
phrase' -> . phrase [ # ]
## Transitions:
-- On s shift to state 1
-- On phrase shift to state 3
-- On a shift to state 4
## Reductions:
-- On B
-- reduce production a ->
State 1:
## Known stack suffix:
## s
## LR(1) items:
phrase -> s . EOF [ # ]
## Transitions:
-- On EOF shift to state 2
## Reductions:
State 2:
## Known stack suffix:
## s EOF
## LR(1) items:
phrase -> s EOF . [ # ]
## Transitions:
## Reductions:
-- On #
-- reduce production phrase -> s EOF
State 3:
## Known stack suffix:
## phrase
## LR(1) items:
phrase' -> phrase . [ # ]
## Transitions:
## Reductions:
-- On #
-- accept phrase
State 4:
## Known stack suffix:
## a
## LR(1) items:
s -> a . s C [ EOF C ]
## Transitions:
-- On s shift to state 5
-- On a shift to state 4
## Reductions:
-- On B
-- reduce production a ->
State 5:
## Known stack suffix:
## a s
## LR(1) items:
s -> a s . C [ EOF C ]
## Transitions:
-- On C shift to state 6
## Reductions:
State 6:
## Known stack suffix:
## a s C
## LR(1) items:
s -> a s C . [ EOF C ]
## Transitions:
## Reductions:
-- On EOF C
-- reduce production s -> a s C
State 0:
## Known stack suffix:
##
## LR(1) items:
phrase' -> . phrase [ # ]
## Transitions:
-- On s shift to state 1
-- On phrase shift to state 3
-- On a shift to state 4
## Default reduction:
-- After reading the next token, reduce production a ->
State 1:
## Known stack suffix:
## s
## LR(1) items:
phrase -> s . EOF [ # ]
## Transitions:
-- On EOF shift to state 2
## Reductions:
State 2:
## Known stack suffix:
## s EOF
## LR(1) items:
phrase -> s EOF . [ # ]
## Transitions:
## Default reduction:
-- Without reading the next token, reduce production phrase -> s EOF
State 3:
## Known stack suffix:
## phrase
## LR(1) items:
phrase' -> phrase . [ # ]
## Transitions:
## Default reduction:
-- Without reading the next token, accept phrase
State 4:
## Known stack suffix:
## a
## LR(1) items:
s -> a . s C [ EOF C ]
## Transitions:
-- On s shift to state 5
-- On a shift to state 4
## Default reduction:
-- After reading the next token, reduce production a ->
State 5:
## Known stack suffix:
## a s
## LR(1) items:
s -> a s . C [ EOF C ]
## Transitions:
-- On C shift to state 6
## Reductions:
State 6:
## Known stack suffix:
## a s C
## LR(1) items:
s -> a s C . [ EOF C ]
## Transitions:
## Default reduction:
-- After reading the next token, reduce production s -> a s C
Grammar has 3 nonterminal symbols, among which 1 start symbols.
Grammar has 3 terminal symbols.
Grammar has 4 productions.
nullable(s) = false
nullable(phrase) = false
nullable(a) = true
first(s) = B
first(phrase) = B
first(a) =
minimal(s) = (* 1 *) B
minimal(phrase) = (* 2 *) B EOF
minimal(a) = (* 0 *)
follow(s) = EOF C
follow(phrase) = #
follow(a) = B
Built an LR(0) automaton with 8 states.
The grammar is not SLR(1) -- 2 states have a conflict.
The construction mode is pager.
Built an LR(1) automaton with 8 states.
2 shift/reduce conflicts were silently solved.
Only 7 states remain after resolving shift/reduce conflicts.
File "loop.mly", line 26, characters 2-3:
Warning: production s -> B is never reduced.
Warning: in total, 1 production is never reduced.
5 out of 7 states have a default reduction.
2 out of 7 states are represented.
0 out of 9 symbols keep track of their start position.
0 out of 9 symbols keep track of their end position.
4 out of 5 productions exploit shiftreduce optimization.
0 out of 7 states can peek at an error.
26 functions before inlining, 2 functions after inlining.
%start phrase
%token B
%token C
%token EOF
%nonassoc B
%nonassoc reduce
%type <unit> phrase
%%
phrase:
_1 = s _2 = EOF
{ ()}
s:
_1 = a _2 = s _3 = C
{ ()}
| _1 = B
{ ()}
a:
%prec reduce
{ ()}
%%
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment