Commit 6d3f5375 authored by POTTIER Francois's avatar POTTIER Francois

Added recognition and on-the-fly expansion (during parsing) of anonymous rules.

There remains to document this feature and to produce better names.
parent 267ac77b
2015/02/11:
Added support for anonymous rules. This allows writing, e.g.,
list(e = expression SEMI { e })
whereas previously one should have written
list(terminated(e, SEMI)).
2015/02/09: 2015/02/09:
Moved all of the demos to ocamlbuild (instead of make). Moved all of the demos to ocamlbuild (instead of make).
......
* Document anonymous rules.
Produce well-chosen (predictable) names for anonymous rules.
* ErrorReporting: experiment with merging several symbols * ErrorReporting: experiment with merging several symbols
so as to reduce the noise (e.g. operators, expressions, so as to reduce the noise (e.g. operators, expressions,
etc.). Or just print the same way, but don't quotient etc.). Or just print the same way, but don't quotient
......
Warning: you are using the standard library and/or the %inline keyword. We
recommend switching on --infer in order to avoid obscure type error messages.
/* Test of the new anonymous rule syntax. */
%token<int> A B C D EOF
%start<int list> phrase
%%
phrase:
xs = list(x = A | x = B | x = C { x })
ys = list(x = D y = D { x + y })
EOF
{ xs @ ys }
Warning: you are using the standard library and/or the %inline keyword. We
recommend switching on --infer in order to avoid obscure type error messages.
%start phrase
%token <int> EOF
%token <int> D
%token <int> C
%token <int> B
%token <int> A
%type <int list> phrase
%%
phrase:
| xs = list___anonymous_0_ ys = list___anonymous_1_ EOF
{ xs @ ys }
list___anonymous_1_:
|
{ [] }
| x0 = D y0 = D xs = list___anonymous_1_
{let x =
let y = y0 in
let x = x0 in
( x + y )
in
x :: xs }
list___anonymous_0_:
|
{ [] }
| x0 = A xs = list___anonymous_0_
{let x =
let x = x0 in
( x )
in
x :: xs }
| x0 = B xs = list___anonymous_0_
{let x =
let x = x0 in
( x )
in
x :: xs }
| x0 = C xs = list___anonymous_0_
{let x =
let x = x0 in
( x )
in
x :: xs }
%%
Warning: you are using the standard library and/or the %inline keyword. We
recommend switching on --infer in order to avoid obscure type error messages.
%token <int> INT
%token PLUS MINUS TIMES DIV
%token LPAREN RPAREN
%token EOL
%left PLUS MINUS /* lowest precedence */
%left TIMES DIV /* medium precedence */
%nonassoc UMINUS /* highest precedence */
%start <int> main
(* The calc demo, with an anonymous rule for binary operators. *)
%%
main:
| e = expr EOL
{ e }
expr:
| i = INT
{ i }
| LPAREN e = expr RPAREN
{ e }
| e1 = expr
op = anonymous(PLUS { (+) } | MINUS { (-) } | TIMES { ( * ) } | DIV { (/) })
e2 = expr
{ op e1 e2 }
| MINUS e = expr %prec UMINUS
{ - e }
Warning: you are using the standard library and/or the %inline keyword. We
recommend switching on --infer in order to avoid obscure type error messages.
%start main
%token RPAREN
%token LPAREN
%token <int> INT
%token EOL
%token PLUS
%token MINUS
%token TIMES
%token DIV
%left PLUS MINUS
%left TIMES DIV
%nonassoc UMINUS
%type <int> main
%%
main:
| e = expr EOL
{ e }
expr:
| i = INT
{ i }
| LPAREN e = expr RPAREN
{ e }
| e1 = expr PLUS e2 = expr
{let op =
let x =
( (+) )
in
( x )
in
op e1 e2 }
| e1 = expr MINUS e2 = expr
{let op =
let x =
( (-) )
in
( x )
in
op e1 e2 }
| e1 = expr TIMES e2 = expr
{let op =
let x =
( ( * ) )
in
( x )
in
op e1 e2 }
| e1 = expr DIV e2 = expr
{let op =
let x =
( (/) )
in
( x )
in
op e1 e2 }
| MINUS e = expr %prec UMINUS
{ - e }
%%
...@@ -63,7 +63,7 @@ grammar: ...@@ -63,7 +63,7 @@ grammar:
{ {
pg_filename = ""; (* filled in by the caller *) pg_filename = ""; (* filled in by the caller *)
pg_declarations = List.flatten ds; pg_declarations = List.flatten ds;
pg_rules = rs; pg_rules = rs @ ParserAux.rules();
pg_trailer = t pg_trailer = t
} }
} }
...@@ -111,11 +111,11 @@ declaration: ...@@ -111,11 +111,11 @@ declaration:
]) ])
} }
| TYPE t = OCAMLTYPE ss = clist(actual_parameter) %prec decl | TYPE t = OCAMLTYPE ss = clist(strict_actual) %prec decl
{ List.map (Positions.map (fun nt -> DType (t, nt))) { List.map (Positions.map (fun nt -> DType (t, nt)))
(List.map Parameters.with_pos ss) } (List.map Parameters.with_pos ss) }
| TYPE OCAMLTYPE clist(actual_parameter) error | TYPE OCAMLTYPE clist(strict_actual) error
| TYPE OCAMLTYPE error | TYPE OCAMLTYPE error
| TYPE error | TYPE error
{ Error.error (Positions.two $startpos $endpos) (String.concat "\n" [ { Error.error (Positions.two $startpos $endpos) (String.concat "\n" [
...@@ -224,6 +224,7 @@ rule: ...@@ -224,6 +224,7 @@ rule:
symbol = symbol /* the symbol that is being defined */ symbol = symbol /* the symbol that is being defined */
params = plist(symbol) /* formal parameters */ params = plist(symbol) /* formal parameters */
COLON COLON
optional_bar
branches = branches branches = branches
{ {
let public, inline = flags in let public, inline = flags in
...@@ -240,7 +241,7 @@ rule: ...@@ -240,7 +241,7 @@ rule:
{ Error.error (Positions.two $startpos $endpos) "syntax error inside the definition of a nonterminal symbol." } { Error.error (Positions.two $startpos $endpos) "syntax error inside the definition of a nonterminal symbol." }
%inline branches: %inline branches:
optional_bar prods = separated_nonempty_list(BAR, production_group) prods = separated_nonempty_list(BAR, production_group)
{ List.flatten prods } { List.flatten prods }
flags: flags:
...@@ -307,27 +308,64 @@ production: ...@@ -307,27 +308,64 @@ production:
Because both [ioption] and [terminated] are defined as inlined by Because both [ioption] and [terminated] are defined as inlined by
the standard library, this definition expands to two productions, the standard library, this definition expands to two productions,
one of which begins with id = LID, the other of which begins with one of which begins with id = LID, the other of which begins with
p = actual_parameter. The token LID is in FIRST(actual_parameter), p = actual. The token LID is in FIRST(actual),
but the LR(1) formalism can deal with that. If [option] was used but the LR(1) formalism can deal with that. If [option] was used
instead of [ioption], an LR(1) conflict would arise -- looking instead of [ioption], an LR(1) conflict would arise -- looking
ahead at LID would not allow determining whether to reduce an ahead at LID would not allow determining whether to reduce an
empty [option] or to shift. */ empty [option] or to shift. */
producer: producer:
| id = ioption(terminated(LID, EQUAL)) p = actual_parameter | id = ioption(terminated(LID, EQUAL)) p = actual
{ id, p } { id, p }
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
/* The syntax of actual parameters allows applications, whereas the syntax /* The ideal syntax of actual parameters includes:
of formal parameters does not. It also allows use of the "?", "+", and 1. a symbol, optionally applied to a list of actual parameters;
"*" shortcuts. */ 2. an actual parameter followed with a modifier;
3. an anonymous rule. (Not delimited by parentheses! Otherwise
actual_parameter: one would often end up writing two pairs of parentheses.) */
symbol = symbol actuals = plist(actual_parameter)
/* In order to avoid a few ambiguities, we restrict this ideal syntax as
follows:
a. Within a %type declaration, we use [strict_actual], which
allows 1- and 2- (this is undocumented; the documentation says we
require a symbol) but not 3-, which would not make semantic sense
anyway.
b. Within a producer, we use [actual], which allows 1- and
2- but not 3-. Case 3- is allowed by switching to [lax_actual]
within the actual arguments of an application, which are clearly
delimited by parentheses and commas.
c. In front of a modifier, we can never allow [lax_actual],
as this would create an ambiguity: basically, [A | B?] could be
interpreted either as [(A | B)?] or as [A | (B?)].
*/
%inline generic_actual(A, B):
(* 1- *)
symbol = symbol actuals = plist(A)
{ Parameters.app symbol actuals } { Parameters.app symbol actuals }
| p = actual_parameter m = modifier (* 2- *)
| p = B m = modifier
{ ParameterApp (m, [ p ]) } { ParameterApp (m, [ p ]) }
strict_actual:
p = generic_actual(strict_actual, strict_actual)
{ p }
actual:
p = generic_actual(lax_actual, actual)
{ p }
lax_actual:
p = generic_actual(lax_actual, /* cannot be lax_ */ actual)
{ p }
(* 3- *)
| /* leading bar disallowed */
branches = branches
{ let position = position (with_poss $startpos $endpos ()) in
let symbol = ParserAux.anonymous position branches in
ParameterVar (with_pos position symbol) }
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
/* Formal or actual parameter lists are delimited with parentheses and /* Formal or actual parameter lists are delimited with parentheses and
separated with commas. They are optional. */ separated with commas. They are optional. */
......
...@@ -64,3 +64,40 @@ let override pos o1 o2 = ...@@ -64,3 +64,40 @@ let override pos o1 o2 =
o2 o2
| _, None -> | _, None ->
o1 o1
(* Support for on-the-fly expansion of anonymous rules. Whenever such
a rule is encountered, we create a fresh non-terminal symbol, add
a definition of this symbol to a global variable, and return a
reference to this symbol. Quick and dirty. So, in the end, clean. *)
let fresh : unit -> string =
let next = ref 0 in
fun () ->
Printf.sprintf "__anonymous_%d" (Misc.postincrement next)
let rules =
ref []
let anonymous pos branches =
(* Generate a fresh non-terminal symbol. *)
let symbol = fresh() in
(* Construct its definition. Note that it is implicitly marked %inline. *)
let rule = {
pr_public_flag = false;
pr_inline_flag = true;
pr_nt = symbol;
pr_positions = [ pos ]; (* this list is not allowed to be empty *)
pr_parameters = [];
pr_branches = branches
} in
(* Record this definition. *)
rules := rule :: !rules;
(* Return the symbol that stands for it. *)
symbol
let rules () =
let result = !rules in
(* Reset the global state, in case we need to read several .mly files. *)
rules := [];
result
...@@ -23,3 +23,11 @@ val check_production_group: ...@@ -23,3 +23,11 @@ val check_production_group:
val override: Positions.t -> 'a option -> 'a option -> 'a option val override: Positions.t -> 'a option -> 'a option -> 'a option
(* Support for on-the-fly expansion of anonymous rules. When such a
rule is encountered, invoke [anonymous], which creates a fresh
non-terminal symbol, records the definition of this symbol to a
global variable, and returns this symbol. In the end, invoke
[rules], so as to obtain a list of all recorded definitions. *)
val anonymous: Positions.t -> parameterized_branch list -> string
val rules: unit -> parameterized_rule list
...@@ -4,6 +4,18 @@ ...@@ -4,6 +4,18 @@
%% %%
(* ------------------------------------------------------------------------- *)
(* The identity. *)
(* [anonymous(X)] is the same as [X]. *)
(* This can be useful because it allows placing an anonymous sub-rule in
the middle of a rule, as in [foo anonymous(bar { ... } | quux { ...}) baz]. *)
%public %inline anonymous(X):
x = X
{ x }
(* ------------------------------------------------------------------------- *) (* ------------------------------------------------------------------------- *)
(* Options. *) (* Options. *)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment