Commit 23947ec1 authored by POTTIER Francois's avatar POTTIER Francois
Browse files

Add a new loop detection algorithm.

The code in Item that used to detect so-called "epsilon-cycles" is removed, as
it detected only loops of the form A -> B beta where beta is nullable, whereas
the new code detects loops of the more general form A -> alpha B beta where
alpha and beta are nullable.
parent 14e17a0f
Pipeline #200236 passed with stages
in 59 seconds
(******************************************************************************)
(* *)
(* Menhir *)
(* *)
(* François Pottier, Inria Paris *)
(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
(* *)
(* Copyright Inria. All rights reserved. This file is distributed under the *)
(* terms of the GNU General Public License version 2, as described in the *)
(* file LICENSE. *)
(* *)
(******************************************************************************)
open Grammar
module Run () = struct
(* -------------------------------------------------------------------------- *)
(* Let us write A -> alpha when there exists a production A -> alpha, and let
us write beta => gamma when the sentential form beta expands (in one step)
to gamma. *)
(* According to Aho and Ullman ("The Theory of Parsing, Translation, and
Compiling -- Volume I: Parsing", page 150), a grammar is cycle-free if
there is no derivation of the form A =>+ A. In other words, there is a
cycle when a nonterminal symbol A expands, in one more steps, to itself. *)
(* Under the assumption that every nonterminal symbol is reachable and
generates a nonempty language, the presence of a cycle implies that the
grammar is infinitely ambiguous: for some inputs, there is an infinite
number of parse trees. *)
(* We reject such a grammar, on two grounds: first, it seems pathological, and
is likely the result of a mistake; second, the algorithm that we use to
speed up closure computations (in the module Item) does not tolerate the
presence of certain cycles. *)
(* Let us define a relation R as follows: A R B holds if and only if there is
a production A -> alpha B beta where alpha and beta are nullable. Then, it
is not difficult to see that the relations =>+ and R+ coincide. That is, to
check that a grammar is cycle-free, it suffices to check that the relation
R is acyclic. *)
(* Here is the relation R: *)
let successors (yield : Nonterminal.t -> unit) (nt : Nonterminal.t) : unit =
Production.iternt nt begin fun prod ->
let nullable_prefix = ref true in
Production.rhs prod |> Array.iteri begin fun i symbol ->
match symbol with
| Symbol.T _ ->
nullable_prefix := false
| Symbol.N nt' ->
let nullable_suffix, _ = Analysis.nullable_first_prod prod (i + 1) in
if !nullable_prefix && nullable_suffix then
yield nt';
nullable_prefix := !nullable_prefix && Analysis.nullable nt'
end
end
(* A detailed explanation of cycles whose length is greater than one. *)
let show_cycle nts nt =
assert (List.hd nts = nt);
if List.length nts = 1 then "" else begin
let nts = Array.of_list (nts @ [nt]) in
let i = ref 0 in
let next () = Nonterminal.print false nts.(Misc.postincrement i)
and finished () = !i = Array.length nts in
Misc.with_buffer 1024 begin fun b ->
let out format = Printf.bprintf b format in
out "%s" (next());
while not (finished()) do
out " expands to %s" (next());
if finished() then out ".\n" else out ",\nwhich"
done
end
end
(* To detect a cycle in a relation, we use the combinator [defensive_fix] that
is provided by the library Fix. We define a function of type [Nonterminal.t
-> unit] that computes nothing but calls itself recursively according to the
pattern defined by the function [successors] above. Then, we evaluate this
function everywhere. If there is a cycle, it is detected and reported. *)
(* The claim that "a cyclic grammar is ambiguous" implicitly assumes that
every nonterminal symbol is reachable and inhabited. *)
let () =
let module M = Fix.Memoize.ForType(Nonterminal) in
let check = M.defensive_fix successors in
try
Nonterminal.iter check
with M.Cycle (nts, nt) ->
let positions = List.flatten (List.map Nonterminal.positions nts) in
Error.error positions
"the grammar is cyclic:\n\
the nonterminal symbol %s expands to itself.\n%s\
A cyclic grammar is ambiguous."
(Nonterminal.print false nt)
(show_cycle nts nt)
(* -------------------------------------------------------------------------- *)
end (* Run *)
(******************************************************************************)
(* *)
(* Menhir *)
(* *)
(* François Pottier, Inria Paris *)
(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
(* *)
(* Copyright Inria. All rights reserved. This file is distributed under the *)
(* terms of the GNU General Public License version 2, as described in the *)
(* file LICENSE. *)
(* *)
(******************************************************************************)
(* [Run] detects and rejects certain anomalies in the grammar, which cause the
grammar to be outside of the class LR(1). *)
module Run () : sig end
......@@ -74,12 +74,6 @@ let () =
let equal (item1 : t) (item2: t) =
item1 = item2
(* Position. *)
let positions (item : t) =
let prod, _ = export item in
Production.positions prod
(* [def item] looks up the production associated with this item in the
grammar and returns [prod, nt, rhs, pos, length], where [prod] is
the production's index, [nt] and [rhs] represent the production,
......@@ -258,86 +252,16 @@ module Closure (L : Lookahead.S) = struct
) mapping.(Production.p2i prod)
)
(* Detect and reject cycles of transitions that transmit a lookahead
set.
We need to ensure that there are no such cycles in order to be
able to traverse these transitions in topological order.
Each such cycle corresponds to a set of productions of the form
A1 -> A2, A2 -> A3, ..., An -> A1 (modulo nullable
trailers). Such cycles are unlikely to occur in realistic
grammars, so our current approach is to reject the grammar if
such a cycle exists. Actually, according to DeRemer and Pennello
(1982), such a cycle is exactly an includes cycle, and implies
that the grammar is not LR(k) for any k, unless A1, ..., An are
in fact uninhabited. In other words, this is a pathological
case. *)
(* Yes, indeed, this is called a cycle in Aho & Ullman's book,
and a loop in Grune & Jacobs' book. It is not difficult to
see that (provided all symbols are inhabited) the grammar
is infinitely ambiguous if and only if there is a loop. *)
module P = struct
type foo = node
type node = foo
let n =
!count
(* We can be certain that there are no cycles of transitions that transmit a
lookahead set. This guarantees that we can traverse these transitions in
a topological order.
let index node =
node.num
let iter f =
Array.iter (fun nodes ->
Array.iter f nodes
) mapping
let successors f node =
if node.epsilon_transmits then
List.iter f node.epsilon_transitions
end
module T = Tarjan.Run (P)
let cycle scc =
let items = List.map (fun node -> node.item) scc in
let positions = List.flatten (List.map positions items) in
let names = String.concat "\n" (List.map print items) in
Error.error
positions
"the grammar is ambiguous.\n\
The following items participate in an epsilon-cycle:\n\
%s" names
let () =
P.iter (fun node ->
let scc = T.scc node in
match scc with
| [] ->
()
| [ node ] ->
(* This is a strongly connected component of one node. Check
whether it carries a self-loop. Forbidding self-loops is not
strictly required by the code that follows, but is consistent
with the fact that we forbid cycles of length greater than 1. *)
P.successors (fun successor ->
if successor.num = node.num then
cycle scc
) node
| _ ->
(* This is a strongly connected component of at least two
elements. *)
cycle scc
)
Indeed, if there was such a cycle, then every item in this cycle would
have to be of the form A -> . B beta, where beta is nullable. DeRemer and
Pennello (1982) call this an includes cycle. An includes cycle is a
special case of a cycle, as defined by Aho and Ullman. The module
LoopDetection detects and rejects cycles, so we can be assured at this
point that no such cycle exists. *)
(* Closure computation. *)
......
......@@ -16,6 +16,13 @@ open Grammar
module InfiniteArray =
MenhirLib.InfiniteArray
(* ------------------------------------------------------------------------ *)
(* Perform loop detection before attempting to build the LR(0) automaton. *)
module _ =
LoopDetection.Run()
(* ------------------------------------------------------------------------ *)
(* Symbolic lookahead information. *)
......
File "cycle-length-one.mly", line 7, characters 0-1:
Error: the grammar is cyclic:
the nonterminal symbol a expands to itself.
A cyclic grammar is ambiguous.
%start a
%type <unit> a
%token A B C
%%
a:
B {}
| A? a C? {}
File "cycle-length-three.mly", line 7, characters 0-1:
File "cycle-length-three.mly", line 11, characters 0-1:
File "cycle-length-three.mly", line 15, characters 0-1:
Error: the grammar is cyclic:
the nonterminal symbol a expands to itself.
a expands to b,
which expands to c,
which expands to a.
A cyclic grammar is ambiguous.
%start a
%type <unit> a
%token A B C
%%
a:
A? b C? | B
{}
b:
A* c d
{}
c:
a C*
{}
d:
B?
{}
File "cycle-length-two.mly", line 7, characters 0-1:
File "cycle-length-two.mly", line 10, characters 0-1:
Error: the grammar is cyclic:
the nonterminal symbol a expands to itself.
a expands to b,
which expands to a.
A cyclic grammar is ambiguous.
......@@ -167,6 +167,30 @@
(rule (alias crazy_long_production)
(action (diff crazy_long_production.exp crazy_long_production.out)))
(rule (target cycle-length-one.out) (deps cycle-length-one.mly)
(action
(with-outputs-to cycle-length-one.out
(with-accepted-exit-codes (not 0) (run menhir %{deps})))))
(rule (alias cycle-length-one)
(action (diff cycle-length-one.exp cycle-length-one.out)))
(rule (target cycle-length-three.out) (deps cycle-length-three.mly)
(action
(with-outputs-to cycle-length-three.out
(with-accepted-exit-codes (not 0) (run menhir %{deps})))))
(rule (alias cycle-length-three)
(action (diff cycle-length-three.exp cycle-length-three.out)))
(rule (target cycle-length-two.out) (deps cycle-length-two.mly)
(action
(with-outputs-to cycle-length-two.out
(with-accepted-exit-codes (not 0) (run menhir %{deps})))))
(rule (alias cycle-length-two)
(action (diff cycle-length-two.exp cycle-length-two.out)))
(rule (target cyclic-parameterized-definition.out)
(deps cyclic-parameterized-definition.mly)
(action
......@@ -254,14 +278,6 @@
(rule (alias either-public)
(action (diff either-public.exp either-public.out)))
(rule (target epsilon-cycle.out) (deps epsilon-cycle.mly)
(action
(with-outputs-to epsilon-cycle.out
(with-accepted-exit-codes (not 0) (run menhir %{deps})))))
(rule (alias epsilon-cycle)
(action (diff epsilon-cycle.exp epsilon-cycle.out)))
(rule (target equals-expected.out) (deps equals-expected.mly)
(action
(with-outputs-to equals-expected.out
......@@ -1422,6 +1438,9 @@
(alias constant-arity-in-multiple-def-1)
(alias constant-inline-in-multiple-def-1)
(alias crazy_long_production)
(alias cycle-length-one)
(alias cycle-length-three)
(alias cycle-length-two)
(alias cyclic-parameterized-definition)
(alias declaration-error-a)
(alias declaration-error-b)
......@@ -1432,7 +1451,6 @@
(alias either-another-production)
(alias either-another-rule)
(alias either-public)
(alias epsilon-cycle)
(alias equals-expected)
(alias error-reserved)
(alias expansion-capture)
......
File "epsilon-cycle.mly", line 8, characters 2-3:
File "epsilon-cycle.mly", line 11, characters 2-3:
Error: the grammar is ambiguous.
The following items participate in an epsilon-cycle:
a -> . b
b -> . a
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment