Commit 6d3f5375 by POTTIER Francois

Added recognition and on-the-fly expansion (during parsing) of anonymous rules.

There remains to document this feature and to produce better names.
parent 267ac77b
2015/02/11:
Added support for anonymous rules. This allows writing, e.g.,
list(e = expression SEMI { e })
whereas previously one should have written
list(terminated(e, SEMI)).
2015/02/09:
Moved all of the demos to ocamlbuild (instead of make).
......
* Document anonymous rules.
Produce well-chosen (predictable) names for anonymous rules.
* ErrorReporting: experiment with merging several symbols
so as to reduce the noise (e.g. operators, expressions,
etc.). Or just print the same way, but don't quotient
......
Warning: you are using the standard library and/or the %inline keyword. We
recommend switching on --infer in order to avoid obscure type error messages.
/* Test of the new anonymous rule syntax. */
%token<int> A B C D EOF
%start<int list> phrase
%%
phrase:
xs = list(x = A | x = B | x = C { x })
ys = list(x = D y = D { x + y })
EOF
{ xs @ ys }
Warning: you are using the standard library and/or the %inline keyword. We
recommend switching on --infer in order to avoid obscure type error messages.
%start phrase
%token <int> EOF
%token <int> D
%token <int> C
%token <int> B
%token <int> A
%type <int list> phrase
%%
phrase:
| xs = list___anonymous_0_ ys = list___anonymous_1_ EOF
{ xs @ ys }
list___anonymous_1_:
|
{ [] }
| x0 = D y0 = D xs = list___anonymous_1_
{let x =
let y = y0 in
let x = x0 in
( x + y )
in
x :: xs }
list___anonymous_0_:
|
{ [] }
| x0 = A xs = list___anonymous_0_
{let x =
let x = x0 in
( x )
in
x :: xs }
| x0 = B xs = list___anonymous_0_
{let x =
let x = x0 in
( x )
in
x :: xs }
| x0 = C xs = list___anonymous_0_
{let x =
let x = x0 in
( x )
in
x :: xs }
%%
Warning: you are using the standard library and/or the %inline keyword. We
recommend switching on --infer in order to avoid obscure type error messages.
%token <int> INT
%token PLUS MINUS TIMES DIV
%token LPAREN RPAREN
%token EOL
%left PLUS MINUS /* lowest precedence */
%left TIMES DIV /* medium precedence */
%nonassoc UMINUS /* highest precedence */
%start <int> main
(* The calc demo, with an anonymous rule for binary operators. *)
%%
main:
| e = expr EOL
{ e }
expr:
| i = INT
{ i }
| LPAREN e = expr RPAREN
{ e }
| e1 = expr
op = anonymous(PLUS { (+) } | MINUS { (-) } | TIMES { ( * ) } | DIV { (/) })
e2 = expr
{ op e1 e2 }
| MINUS e = expr %prec UMINUS
{ - e }
Warning: you are using the standard library and/or the %inline keyword. We
recommend switching on --infer in order to avoid obscure type error messages.
%start main
%token RPAREN
%token LPAREN
%token <int> INT
%token EOL
%token PLUS
%token MINUS
%token TIMES
%token DIV
%left PLUS MINUS
%left TIMES DIV
%nonassoc UMINUS
%type <int> main
%%
main:
| e = expr EOL
{ e }
expr:
| i = INT
{ i }
| LPAREN e = expr RPAREN
{ e }
| e1 = expr PLUS e2 = expr
{let op =
let x =
( (+) )
in
( x )
in
op e1 e2 }
| e1 = expr MINUS e2 = expr
{let op =
let x =
( (-) )
in
( x )
in
op e1 e2 }
| e1 = expr TIMES e2 = expr
{let op =
let x =
( ( * ) )
in
( x )
in
op e1 e2 }
| e1 = expr DIV e2 = expr
{let op =
let x =
( (/) )
in
( x )
in
op e1 e2 }
| MINUS e = expr %prec UMINUS
{ - e }
%%
......@@ -63,7 +63,7 @@ grammar:
{
pg_filename = ""; (* filled in by the caller *)
pg_declarations = List.flatten ds;
pg_rules = rs;
pg_rules = rs @ ParserAux.rules();
pg_trailer = t
}
}
......@@ -111,11 +111,11 @@ declaration:
])
}
| TYPE t = OCAMLTYPE ss = clist(actual_parameter) %prec decl
| TYPE t = OCAMLTYPE ss = clist(strict_actual) %prec decl
{ List.map (Positions.map (fun nt -> DType (t, nt)))
(List.map Parameters.with_pos ss) }
| TYPE OCAMLTYPE clist(actual_parameter) error
| TYPE OCAMLTYPE clist(strict_actual) error
| TYPE OCAMLTYPE error
| TYPE error
{ Error.error (Positions.two $startpos $endpos) (String.concat "\n" [
......@@ -224,6 +224,7 @@ rule:
symbol = symbol /* the symbol that is being defined */
params = plist(symbol) /* formal parameters */
COLON
optional_bar
branches = branches
{
let public, inline = flags in
......@@ -240,7 +241,7 @@ rule:
{ Error.error (Positions.two $startpos $endpos) "syntax error inside the definition of a nonterminal symbol." }
%inline branches:
optional_bar prods = separated_nonempty_list(BAR, production_group)
prods = separated_nonempty_list(BAR, production_group)
{ List.flatten prods }
flags:
......@@ -307,27 +308,64 @@ production:
Because both [ioption] and [terminated] are defined as inlined by
the standard library, this definition expands to two productions,
one of which begins with id = LID, the other of which begins with
p = actual_parameter. The token LID is in FIRST(actual_parameter),
p = actual. The token LID is in FIRST(actual),
but the LR(1) formalism can deal with that. If [option] was used
instead of [ioption], an LR(1) conflict would arise -- looking
ahead at LID would not allow determining whether to reduce an
empty [option] or to shift. */
producer:
| id = ioption(terminated(LID, EQUAL)) p = actual_parameter
| id = ioption(terminated(LID, EQUAL)) p = actual
{ id, p }
/* ------------------------------------------------------------------------- */
/* The syntax of actual parameters allows applications, whereas the syntax
of formal parameters does not. It also allows use of the "?", "+", and
"*" shortcuts. */
actual_parameter:
symbol = symbol actuals = plist(actual_parameter)
/* The ideal syntax of actual parameters includes:
1. a symbol, optionally applied to a list of actual parameters;
2. an actual parameter followed with a modifier;
3. an anonymous rule. (Not delimited by parentheses! Otherwise
one would often end up writing two pairs of parentheses.) */
/* In order to avoid a few ambiguities, we restrict this ideal syntax as
follows:
a. Within a %type declaration, we use [strict_actual], which
allows 1- and 2- (this is undocumented; the documentation says we
require a symbol) but not 3-, which would not make semantic sense
anyway.
b. Within a producer, we use [actual], which allows 1- and
2- but not 3-. Case 3- is allowed by switching to [lax_actual]
within the actual arguments of an application, which are clearly
delimited by parentheses and commas.
c. In front of a modifier, we can never allow [lax_actual],
as this would create an ambiguity: basically, [A | B?] could be
interpreted either as [(A | B)?] or as [A | (B?)].
*/
%inline generic_actual(A, B):
(* 1- *)
symbol = symbol actuals = plist(A)
{ Parameters.app symbol actuals }
| p = actual_parameter m = modifier
(* 2- *)
| p = B m = modifier
{ ParameterApp (m, [ p ]) }
strict_actual:
p = generic_actual(strict_actual, strict_actual)
{ p }
actual:
p = generic_actual(lax_actual, actual)
{ p }
lax_actual:
p = generic_actual(lax_actual, /* cannot be lax_ */ actual)
{ p }
(* 3- *)
| /* leading bar disallowed */
branches = branches
{ let position = position (with_poss $startpos $endpos ()) in
let symbol = ParserAux.anonymous position branches in
ParameterVar (with_pos position symbol) }
/* ------------------------------------------------------------------------- */
/* Formal or actual parameter lists are delimited with parentheses and
separated with commas. They are optional. */
......
......@@ -64,3 +64,40 @@ let override pos o1 o2 =
o2
| _, None ->
o1
(* Support for on-the-fly expansion of anonymous rules. Whenever such
a rule is encountered, we create a fresh non-terminal symbol, add
a definition of this symbol to a global variable, and return a
reference to this symbol. Quick and dirty. So, in the end, clean. *)
let fresh : unit -> string =
let next = ref 0 in
fun () ->
Printf.sprintf "__anonymous_%d" (Misc.postincrement next)
let rules =
ref []
let anonymous pos branches =
(* Generate a fresh non-terminal symbol. *)
let symbol = fresh() in
(* Construct its definition. Note that it is implicitly marked %inline. *)
let rule = {
pr_public_flag = false;
pr_inline_flag = true;
pr_nt = symbol;
pr_positions = [ pos ]; (* this list is not allowed to be empty *)
pr_parameters = [];
pr_branches = branches
} in
(* Record this definition. *)
rules := rule :: !rules;
(* Return the symbol that stands for it. *)
symbol
let rules () =
let result = !rules in
(* Reset the global state, in case we need to read several .mly files. *)
rules := [];
result
......@@ -23,3 +23,11 @@ val check_production_group:
val override: Positions.t -> 'a option -> 'a option -> 'a option
(* Support for on-the-fly expansion of anonymous rules. When such a
rule is encountered, invoke [anonymous], which creates a fresh
non-terminal symbol, records the definition of this symbol to a
global variable, and returns this symbol. In the end, invoke
[rules], so as to obtain a list of all recorded definitions. *)
val anonymous: Positions.t -> parameterized_branch list -> string
val rules: unit -> parameterized_rule list
......@@ -5,6 +5,18 @@
%%
(* ------------------------------------------------------------------------- *)
(* The identity. *)
(* [anonymous(X)] is the same as [X]. *)
(* This can be useful because it allows placing an anonymous sub-rule in
the middle of a rule, as in [foo anonymous(bar { ... } | quux { ...}) baz]. *)
%public %inline anonymous(X):
x = X
{ x }
(* ------------------------------------------------------------------------- *)
(* Options. *)
(* [option(X)] recognizes either nothing or [X]. It produces a value
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment