Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
POTTIER Francois
menhir
Commits
9511cd9b
Commit
9511cd9b
authored
Jan 04, 2021
by
POTTIER Francois
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Improve the cycle detection code and add detection of hidden left recursion.
parent
f6cd9aee
Changes
12
Hide whitespace changes
Inline
Sidebyside
Showing
12 changed files
with
160 additions
and
239 deletions
+160
239
src/LoopDetection.ml
src/LoopDetection.ml
+115
14
test/static/bad/dune.auto
test/static/bad/dune.auto
+20
0
test/static/bad/hiddenleftrecursionone.exp
test/static/bad/hiddenleftrecursionone.exp
+7
0
test/static/bad/hiddenleftrecursionone.mly
test/static/bad/hiddenleftrecursionone.mly
+0
0
test/static/bad/hiddenleftrecursiontwo.exp
test/static/bad/hiddenleftrecursiontwo.exp
+7
0
test/static/bad/hiddenleftrecursiontwo.mly
test/static/bad/hiddenleftrecursiontwo.mly
+11
0
test/static/good/dune.auto
test/static/good/dune.auto
+0
27
test/static/good/loop.automaton.exp
test/static/good/loop.automaton.exp
+0
73
test/static/good/loop.automaton.resolved.exp
test/static/good/loop.automaton.resolved.exp
+0
68
test/static/good/loop.conflicts.exp
test/static/good/loop.conflicts.exp
+0
0
test/static/good/loop.exp
test/static/good/loop.exp
+0
31
test/static/good/loop.opp.exp
test/static/good/loop.opp.exp
+0
26
No files found.
src/LoopDetection.ml
View file @
9511cd9b
...
...
@@ 42,23 +42,42 @@ module Run () = struct
check that a grammar is cyclefree, it suffices to check that the relation
R is acyclic. *)
(* Here is the relation R: *)
let
successors
(
yield
:
Nonterminal
.
t
>
unit
)
(
nt
:
Nonterminal
.
t
)
:
unit
=
(* The relation R is defined as follows. Upon first reading, take
[require_nullable_suffix] to be [true] and
[require_nonempty_prefix] to be [false]. *)
let
nullable_suffix
prod
i
=
let
nullable
,
_
=
Analysis
.
nullable_first_prod
prod
i
in
nullable
let
successors
~
require_nullable_suffix
~
require_nonempty_prefix
(
yield
:
Production
.
index
>
Nonterminal
.
t
>
unit
)
(
nt
:
Nonterminal
.
t
)
:
unit
=
Production
.
iternt
nt
begin
fun
prod
>
let
rhs
=
Production
.
rhs
prod
in
let
n
=
Array
.
length
rhs
in
let
nullable_prefix
=
ref
true
in
Production
.
rhs
prod
>
Array
.
iteri
begin
fun
i
symbol
>
match
symbol
with
let
i
=
ref
0
in
while
!
nullable_prefix
&&
!
i
<
n
do
match
rhs
.
(
Misc
.
postincrement
i
)
with

Symbol
.
T
_
>
nullable_prefix
:=
false

Symbol
.
N
nt'
>
let
nullable_suffix
,
_
=
Analysis
.
nullable_first_prod
prod
(
i
+
1
)
in
if
!
nullable_prefix
&&
nullable_suffix
then
yield
nt'
;
nullable_prefix
:=
!
nullable_prefix
&&
Analysis
.
nullable
nt'
end
if
(
not
require_nullable_suffix

nullable_suffix
prod
!
i
)
&&
(
not
require_nonempty_prefix

!
i
>
1
)
then
yield
prod
nt'
;
nullable_prefix
:=
Analysis
.
nullable
nt'
done
end
(* This adapter hides [prod] from the user function [yield]. *)
let
adapt
successors
yield
nt
=
successors
(
fun
_prod
nt'
>
yield
nt'
)
nt
(* A detailed explanation of cycles whose length is greater than one. *)
let
show_cycle
nts
nt
=
...
...
@@ 89,16 +108,20 @@ let fail nts nt =
(* To detect a cycle in a relation, we use the combinator [defensive_fix] that
is provided by the library Fix. We define a function of type [Nonterminal.t
> unit] that computes nothing but calls itself recursively according to
the
pattern defined by the
func
tion
[successors] above
. Then, we evaluate this
function
everywhere. If there is a cycle, it is detected and reported. *)
> unit] that computes nothing but calls itself recursively according to
the
pattern defined by the
rela
tion
R
. Then, we evaluate this
function
everywhere. If there is a cycle, it is detected and reported. *)
(* The claim that "a cyclic grammar is ambiguous" implicitly assumes that
every nonterminal symbol is reachable and inhabited. *)
let
()
=
let
module
M
=
Fix
.
Memoize
.
ForType
(
Nonterminal
)
in
let
check
=
M
.
defensive_fix
successors
in
let
successors_R
=
successors
~
require_nullable_suffix
:
true
~
require_nonempty_prefix
:
false
>
adapt
in
let
check
=
M
.
defensive_fix
successors_R
in
try
Nonterminal
.
iter
check
with
M
.
Cycle
(
nts
,
nt
)
>
...
...
@@ 106,4 +129,82 @@ let () =
(*  *)
(* Another anomaly that we wish to detect is hidden left recursion. In the
paper "Looping LR parsers" (1988), SoisalonSoininen and Tarhio define
hidden left recursion (although they do not explicitly use this term)
and point out that: 1 a grammar that has hidden left recursion cannot
be LR(k) for any k, and (worse) 2 if the shift/reduce conflict that it
causes is resolved in favor in reduction, then the deterministic parser
that is constructed can diverge by entering an infinite sequence of
reductions. Conversely, they show if a grammar exhibits no cycle and
no hidden left recursion, then the parser must terminate, regardless of
how conflicts are resolved. *)
(* One possible definition of hidden left recursion, given by Nederhof and
Sarbo in the paper "Increasing the Applicability of LR Parsing" (1993), is
the existence of a production A > B alpha where B is nullable and alpha
expands (in zero or more steps) to A beta. *)
(* Let us define a relation S as follows. A S B holds if and only if there is
a production A > alpha B beta where alpha is nullable. This relation can
be viewed as the disjoint union of two smaller relations L and H, defined
as follows:
 A L B holds if and only if there is a production A > B beta;
 A H B holds if and only if there is a production A > alpha B beta
where alpha is nullable but is not epsilon.
A cycle in the relation L is fine: it represents ordinary left recursion.
A cycle that involves at least one H edge and any number of L and H edges,
however, denotes hidden left recursion. *)
(* An error message. *)
let
fail
prod
=
let
nt
,
rhs
=
Production
.
def
prod
in
let
positions
=
Production
.
positions
prod
in
Error
.
error
positions
"the grammar exhibits hidden left recursion: in the production
\n
\
%s,
\n
\
the nonterminal symbol %s is nullable,
\n
\
and the remainder of the righthand side expands to a sentential form
\n
\
that begins with the nonterminal symbol %s.
\n
\
This implies that the grammar is not LR(k) for any k."
(
Production
.
print
prod
)
(
Symbol
.
print
rhs
.
(
0
))
(
Symbol
.
print
(
Symbol
.
N
nt
))
(* Furthermore, this creates a shift/reduce conflict, which, if it were
resolved in favor of reduction, would cause the parser to diverge. *)
(* To detect hidden left recursion in linear time, we first compute the
strongly connected components of the relation S. Then, we check every edge
in the relation H. If the source and destination vertices of this edge lie
in the same component, then we have detected hidden left recursion. *)
let
()
=
let
module
T
=
Tarjan
.
Run
(
struct
type
node
=
Nonterminal
.
t
let
n
=
Nonterminal
.
n
let
index
=
Nonterminal
.
n2i
let
iter
=
Nonterminal
.
iter
(* The relation S is computed as follows. *)
let
successors
=
successors
~
require_nullable_suffix
:
false
~
require_nonempty_prefix
:
false
>
adapt
end
)
in
(* The relation H is computed as follows. *)
let
successors_H
=
successors
~
require_nullable_suffix
:
false
~
require_nonempty_prefix
:
true
in
(* Iterate on every edge in the relation H. *)
Nonterminal
.
iter
begin
fun
nt
>
nt
>
successors_H
begin
fun
prod
nt'
>
(* If the source vertex [nt] and the destination vertex [nt'] lie in
the same component, then we have detected hidden left recursion. *)
if
T
.
representative
nt
=
T
.
representative
nt'
then
fail
prod
end
end
(*  *)
end
(* Run *)
test/static/bad/dune.auto
View file @
9511cd9b
...
...
@@ 356,6 +356,24 @@
(rule (alias forbiddenpos)
(action (diff forbiddenpos.exp forbiddenpos.out)))
(rule (target hiddenleftrecursionone.out)
(deps hiddenleftrecursionone.mly)
(action
(withoutputsto hiddenleftrecursionone.out
(withacceptedexitcodes (not 0) (run menhir %{deps})))))
(rule (alias hiddenleftrecursionone)
(action (diff hiddenleftrecursionone.exp hiddenleftrecursionone.out)))
(rule (target hiddenleftrecursiontwo.out)
(deps hiddenleftrecursiontwo.mly)
(action
(withoutputsto hiddenleftrecursiontwo.out
(withacceptedexitcodes (not 0) (run menhir %{deps})))))
(rule (alias hiddenleftrecursiontwo)
(action (diff hiddenleftrecursiontwo.exp hiddenleftrecursiontwo.out)))
(rule (target illformedattribute.out) (deps illformedattribute.mly)
(action
(withoutputsto illformedattribute.out
...
...
@@ 1461,6 +1479,8 @@
(alias forbiddendollar)
(alias forbiddendollarbis)
(alias forbiddenpos)
(alias hiddenleftrecursionone)
(alias hiddenleftrecursiontwo)
(alias illformedattribute)
(alias illformedprec)
(alias illsortedattribute)
...
...
test/static/bad/hiddenleftrecursionone.exp
0 → 100644
View file @
9511cd9b
File "hiddenleftrecursionone.mly", line 25, characters 27:
Error: the grammar exhibits hidden left recursion: in the production
s > a s C,
the nonterminal symbol a is nullable,
and the remainder of the righthand side expands to a sentential form
that begins with the nonterminal symbol s.
This implies that the grammar is not LR(k) for any k.
test/static/
good/loop
.mly
→
test/static/
bad/hiddenleftrecursionone
.mly
View file @
9511cd9b
File moved
test/static/bad/hiddenleftrecursiontwo.exp
0 → 100644
View file @
9511cd9b
File "hiddenleftrecursiontwo.mly", line 11, characters 26:
Error: the grammar exhibits hidden left recursion: in the production
b > option(A) a,
the nonterminal symbol option(A) is nullable,
and the remainder of the righthand side expands to a sentential form
that begins with the nonterminal symbol b.
This implies that the grammar is not LR(k) for any k.
test/static/bad/hiddenleftrecursiontwo.mly
0 → 100644
View file @
9511cd9b
%
token
A
B
C
%
start
<
unit
>
a
%%
a
:
A
*
b
C
{}
b
:
B
C

A
?
a
{}
test/static/good/dune.auto
View file @
9511cd9b
...
...
@@ 6535,32 +6535,6 @@
(diff logtk.0.8.1parse_theory.conflicts.exp
logtk.0.8.1parse_theory.conflicts)))
(rule (target loop.opp.out) (deps loop.mly)
(action
(withoutputsto loop.opp.out
(withacceptedexitcodes 0 (run menhir onlypreprocess %{deps})))))
(rule (alias loop) (action (diff loop.opp.exp loop.opp.out)))
(rule
(targets loop.out loop.automaton loop.automaton.resolved loop.conflicts
loop.timings)
(deps loop.mly)
(action
(withoutputsto loop.out
(withacceptedexitcodes 0
(run menhir dump dumpresolved explain lg 2 la 2 lc 2
timingsto loop.timings %{deps})))))
(rule (alias loop) (action (diff loop.exp loop.out)))
(rule (alias loop) (action (diff loop.automaton.exp loop.automaton)))
(rule (alias loop)
(action (diff loop.automaton.resolved.exp loop.automaton.resolved)))
(rule (alias loop) (action (diff loop.conflicts.exp loop.conflicts)))
(rule (target lrbutnotlalr.opp.out) (deps lrbutnotlalr.mly)
(action
(withoutputsto lrbutnotlalr.opp.out
...
...
@@ 12358,7 +12332,6 @@
(alias llparse)
(alias logic_parser)
(alias logtk.0.8.1parse_theory)
(alias loop)
(alias lrbutnotlalr)
(alias ltlparser)
(alias lustrev6.1.737lv6parser)
...
...
test/static/good/loop.automaton.exp
deleted
100644 → 0
View file @
f6cd9aee
State 0:
## Known stack suffix:
##
## LR(1) items:
phrase' > . phrase [ # ]
## Transitions:
 On s shift to state 1
 On phrase shift to state 3
 On a shift to state 4
## Reductions:
 On B
 reduce production a >
State 1:
## Known stack suffix:
## s
## LR(1) items:
phrase > s . EOF [ # ]
## Transitions:
 On EOF shift to state 2
## Reductions:
State 2:
## Known stack suffix:
## s EOF
## LR(1) items:
phrase > s EOF . [ # ]
## Transitions:
## Reductions:
 On #
 reduce production phrase > s EOF
State 3:
## Known stack suffix:
## phrase
## LR(1) items:
phrase' > phrase . [ # ]
## Transitions:
## Reductions:
 On #
 accept phrase
State 4:
## Known stack suffix:
## a
## LR(1) items:
s > a . s C [ EOF C ]
## Transitions:
 On s shift to state 5
 On a shift to state 4
## Reductions:
 On B
 reduce production a >
State 5:
## Known stack suffix:
## a s
## LR(1) items:
s > a s . C [ EOF C ]
## Transitions:
 On C shift to state 6
## Reductions:
State 6:
## Known stack suffix:
## a s C
## LR(1) items:
s > a s C . [ EOF C ]
## Transitions:
## Reductions:
 On EOF C
 reduce production s > a s C
test/static/good/loop.automaton.resolved.exp
deleted
100644 → 0
View file @
f6cd9aee
State 0:
## Known stack suffix:
##
## LR(1) items:
phrase' > . phrase [ # ]
## Transitions:
 On s shift to state 1
 On phrase shift to state 3
 On a shift to state 4
## Default reduction:
 After reading the next token, reduce production a >
State 1:
## Known stack suffix:
## s
## LR(1) items:
phrase > s . EOF [ # ]
## Transitions:
 On EOF shift to state 2
## Reductions:
State 2:
## Known stack suffix:
## s EOF
## LR(1) items:
phrase > s EOF . [ # ]
## Transitions:
## Default reduction:
 Without reading the next token, reduce production phrase > s EOF
State 3:
## Known stack suffix:
## phrase
## LR(1) items:
phrase' > phrase . [ # ]
## Transitions:
## Default reduction:
 Without reading the next token, accept phrase
State 4:
## Known stack suffix:
## a
## LR(1) items:
s > a . s C [ EOF C ]
## Transitions:
 On s shift to state 5
 On a shift to state 4
## Default reduction:
 After reading the next token, reduce production a >
State 5:
## Known stack suffix:
## a s
## LR(1) items:
s > a s . C [ EOF C ]
## Transitions:
 On C shift to state 6
## Reductions:
State 6:
## Known stack suffix:
## a s C
## LR(1) items:
s > a s C . [ EOF C ]
## Transitions:
## Default reduction:
 After reading the next token, reduce production s > a s C
test/static/good/loop.conflicts.exp
deleted
100644 → 0
View file @
f6cd9aee
test/static/good/loop.exp
deleted
100644 → 0
View file @
f6cd9aee
Grammar has 3 nonterminal symbols, among which 1 start symbols.
Grammar has 3 terminal symbols.
Grammar has 4 productions.
nullable(s) = false
nullable(phrase) = false
nullable(a) = true
first(s) = B
first(phrase) = B
first(a) =
minimal(s) = (* 1 *) B
minimal(phrase) = (* 2 *) B EOF
minimal(a) = (* 0 *)
follow(s) = EOF C
follow(phrase) = #
follow(a) = B
Built an LR(0) automaton with 8 states.
The grammar is not SLR(1)  2 states have a conflict.
The construction mode is pager.
Built an LR(1) automaton with 8 states.
2 shift/reduce conflicts were silently solved.
Only 7 states remain after resolving shift/reduce conflicts.
File "loop.mly", line 26, characters 23:
Warning: production s > B is never reduced.
Warning: in total, 1 production is never reduced.
5 out of 7 states have a default reduction.
2 out of 7 states are represented.
0 out of 9 symbols keep track of their start position.
0 out of 9 symbols keep track of their end position.
4 out of 5 productions exploit shiftreduce optimization.
0 out of 7 states can peek at an error.
26 functions before inlining, 2 functions after inlining.
test/static/good/loop.opp.exp
deleted
100644 → 0
View file @
f6cd9aee
%start phrase
%token B
%token C
%token EOF
%nonassoc B
%nonassoc reduce
%type <unit> phrase
%%
phrase:
_1 = s _2 = EOF
{ ()}
s:
_1 = a _2 = s _3 = C
{ ()}
 _1 = B
{ ()}
a:
%prec reduce
{ ()}
%%
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment