diff --git a/CHANGES.md b/CHANGES.md
index 7c46c2fcb9a74b6680b0b7d5168b99a4e999f423..7c74d00e55797bbbfb75cbeb3439a9d7b030b931 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,11 @@
# Changes
+## 2020/03/04
+
+* Fix the static check that is performed prior to expanding the parameterized
+ nonterminal symbols. The previous check was incorrect: it would sometimes
+ accept a grammar whose expansion would diverge.
+
## 2020/02/11
* Re-implement Menhir's default algorithm for constructing LR(1) automata,
diff --git a/src/CheckSafeParameterizedGrammar.ml b/src/CheckSafeParameterizedGrammar.ml
index 984b07738a7c1c5248f1e43d4e607751a5fb6919..1fa5e22752cfc772875172b2345c44d5c69bbcd7 100644
--- a/src/CheckSafeParameterizedGrammar.ml
+++ b/src/CheckSafeParameterizedGrammar.ml
@@ -73,23 +73,54 @@ let formals : formal array =
there is a "dangerous" edge from the formal parameter F/i to the formal G/j.
This reflects the fact that there is a flow from F/i to G/j. This flow is
"dangerous" in the sense that it is size-increasing: X is transformed to
- H(..., X, ...). *)
+ H(..., X, ...). In this example, there should also be a safe edge from the
+ formal F/i to the formal H/k.
+
+ More generally, an occurrence of X in the right-hand side can occur deeply
+ nested in a context K:
+
+ F(..., X, ...):
+ ... K[X] ...
+
+ In that case, we must create as many edges as the context K is deep, and
+ all of these edges should be dangerous, except the one that corresponds
+ to the innermost layer of K, which is safe.
+
+ Another way of putting this is, when the left-hand side is:
+
+ F(..., X, ...): # where X is the i-th formal parameter of F
+
+ and the right-hand side contains a possibly-nested occurrence of:
+
+ G(..., K[X], ...) # where K[X] is the j-th actual parameter of G
+
+ then we must create an edge of F/i to G/j, and this edge is safe if and
+ only if the context K is empty, i.e., X occurs at depth 0 in K[x]. *)
+
+(* The code below has quadratic complexity because [Parameters.occurs_deep]
+ is expensive. In principle, we could achieve linear complexity by first
+ annotating subterm (bottom-up) with a Boolean flag that indicates whether
+ [x] occurs (shallowly/deeply) in it; that would allow us to implement
+ [occurs_deep] in constant time. However, in practice, quadratic complexity
+ is probably good enough. *)
type edge =
| Safe
| Dangerous
-let successors_parameter (f : edge -> formal -> unit) x (param : parameter) =
+let rec successors_parameter (f : edge -> formal -> unit) x (param : parameter) =
match param with
| ParameterVar _ ->
(* This is not an application. No successors. *)
()
| ParameterApp (sym, params) ->
let nt = value sym in
- (* If [x] occurs in the [i]-th actual parameter of this application,
- then there is an edge to the formal [nt, i]. Whether it is a safe
- or dangerous edge depends on whether [x] occurs shallow or deep. *)
List.iteri (fun i param ->
+ (* Check, recursively, the applications that appear inside [param]. *)
+ successors_parameter f x param;
+ (* If [x] occurs in the [i]-th actual parameter of this application,
+ then there is an edge to the formal [nt, i]. Whether it is a safe
+ or dangerous edge depends on whether [x] occurs shallow or deep. *)
if Parameters.occurs_shallow x param then
f Safe (nt, i)
else if Parameters.occurs_deep x param then
diff --git a/test/static/bad/expansion_diverges.exp b/test/static/bad/expansion_diverges.exp
new file mode 100644
index 0000000000000000000000000000000000000000..84ee7bff40d774d36fb513743e0f7da73c678b74
--- /dev/null
+++ b/test/static/bad/expansion_diverges.exp
@@ -0,0 +1,3 @@
+Error: the parameterized nonterminal symbols in this grammar
+cannot be expanded away: expansion would not terminate.
+The first formal parameter of "seq(_)" grows without bound.
diff --git a/test/static/bad/expansion_diverges.mly b/test/static/bad/expansion_diverges.mly
new file mode 100644
index 0000000000000000000000000000000000000000..2faad3f84aa9c608cf8db7f2ae0b4e8c82278673
--- /dev/null
+++ b/test/static/bad/expansion_diverges.mly
@@ -0,0 +1,22 @@
+(* This grammar must be rejected, as the expansion of parameterized
+ nonterminal symbols does not terminate: seq(t) calls seq(wrap(t)).
+
+ This problem has been reported by Stéphane Lescuyer
+ and studied by Yann Régis-Gianas.
+
+ See https://gitlab.inria.fr/fpottier/menhir/issues/4
+
+ *)
+
+%token A
+
+%%
+
+start:
+ seq(A) {}
+
+wrap(t):
+ t {}
+
+seq(t):
+| wrap(seq(wrap(t))) {}
diff --git a/test/static/src/dune.auto b/test/static/src/dune.auto
index f16064e49269172122baa952c0139e2953ebb4ed..f04ef34ba9336337874baf361ed514d0e8268c68 100644
--- a/test/static/src/dune.auto
+++ b/test/static/src/dune.auto
@@ -8133,6 +8133,14 @@
(rule (alias expansion-capture)
(action (diff ../bad/expansion-capture.exp expansion-capture.out)))
+(rule (target expansion_diverges.out) (deps ../bad/expansion_diverges.mly)
+ (action
+ (with-outputs-to expansion_diverges.out
+ (chdir ../bad (with-accepted-exit-codes (not 0) (run menhir %{deps}))))))
+
+(rule (alias expansion_diverges)
+ (action (diff ../bad/expansion_diverges.exp expansion_diverges.out)))
+
(rule (target expression.out) (deps ../bad/expression.mly)
(action
(with-outputs-to expression.out
@@ -9421,6 +9429,7 @@
(alias error-reserved)
(alias execparser)
(alias expansion-capture)
+ (alias expansion_diverges)
(alias expression)
(alias expression-expected)
(alias expressions)