diff --git a/CHANGES b/CHANGES index 33a9b68288192af30f217c2d47a48d4fdf628519..36c7ba31f4e32a2c56b0e3dee3551f9e9b718488 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +2016/05/18: +Anonymous rules now work also when used inside a parameterized rule. +(This did not work until now.) + 2016/05/04: In the Coq backend, split the largest definitions into smaller ones. This circumvenents a limitation of vm_compute on 32 bit diff --git a/TODO b/TODO index d1bdd989f4f17ca315a435a3a2ee1fd7644b520c..865220d0eedb3255121c29d33b7a2c1828d8f7f4 100644 --- a/TODO +++ b/TODO @@ -115,9 +115,6 @@ * Produce well-chosen (predictable) names for anonymous rules? -* The construction of an anonymous rule should work also when the - body has parameters (should produce a parameterized rule). - * In the standard library, possibly rename "anonymous" and "embedded" and document them. The non-inline version allows embedding an action in the middle of a rule. diff --git a/bench/good/anonymous-nested.exp b/bench/good/anonymous-nested.exp new file mode 100644 index 0000000000000000000000000000000000000000..506434c8e3e063ba9169a96a1e1504b7c2515778 --- /dev/null +++ b/bench/good/anonymous-nested.exp @@ -0,0 +1,2 @@ +Warning: you are using the standard library and/or the %inline keyword. We +recommend switching on --infer in order to avoid obscure type error messages. diff --git a/bench/good/anonymous-nested.mly b/bench/good/anonymous-nested.mly new file mode 100644 index 0000000000000000000000000000000000000000..928302f3b36179b3878d9edf6746b1027848eba4 --- /dev/null +++ b/bench/good/anonymous-nested.mly @@ -0,0 +1,22 @@ +/* Test of the new anonymous rule syntax, including + anonymous rules nested in anonymous rules. */ + +%token A B C D EOF +%start phrase + +%% + +%inline id(X): + x = X { x } + +foo: + A B { 1 } + +bar: + C D { 2 } + +phrase: + y = id(id(x = foo { x } | z = bar { z })) + t = id(x = foo { x } | id(z = bar { z }) { 2 }) + EOF + { y + t } diff --git a/bench/good/anonymous-nested.opp.exp b/bench/good/anonymous-nested.opp.exp new file mode 100644 index 0000000000000000000000000000000000000000..32e565663522ce6fc1a4271fa18dd42ed5cf7dba --- /dev/null +++ b/bench/good/anonymous-nested.opp.exp @@ -0,0 +1,130 @@ +Warning: you are using the standard library and/or the %inline keyword. We +recommend switching on --infer in order to avoid obscure type error messages. +%start phrase +%token EOF +%token D +%token C +%token B +%token A + +%type phrase +%% + +foo: + _1 = A _2 = B + { ( 1 )} + +bar: + _1 = C _2 = D + { ( 2 )} + +phrase: + x000 = foo x00 = foo _3 = EOF + {let t = + let x0 = x00 in + let x = + let x = x0 in + ( x ) + in + ( x ) +in +let y = + let x00 = x000 in + let x = + let x0 = x00 in + let x = + let x = x0 in + ( x ) + in + ( x ) + in + ( x ) +in + ( y + t )} +| x000 = foo z0000 = bar _3 = EOF + {let t = + let z000 = z0000 in + let x = + let z00 = z000 in + let _1 = + let z0 = z00 in + let x = + let z = z0 in + ( z ) + in + ( x ) + in + ( 2 ) + in + ( x ) +in +let y = + let x00 = x000 in + let x = + let x0 = x00 in + let x = + let x = x0 in + ( x ) + in + ( x ) + in + ( x ) +in + ( y + t )} +| z000 = bar x00 = foo _3 = EOF + {let t = + let x0 = x00 in + let x = + let x = x0 in + ( x ) + in + ( x ) +in +let y = + let z00 = z000 in + let x = + let z0 = z00 in + let x = + let z = z0 in + ( z ) + in + ( x ) + in + ( x ) +in + ( y + t )} +| z000 = bar z0000 = bar _3 = EOF + {let t = + let z000 = z0000 in + let x = + let z00 = z000 in + let _1 = + let z0 = z00 in + let x = + let z = z0 in + ( z ) + in + ( x ) + in + ( 2 ) + in + ( x ) +in +let y = + let z00 = z000 in + let x = + let z0 = z00 in + let x = + let z = z0 in + ( z ) + in + ( x ) + in + ( x ) +in + ( y + t )} + +%% + + + diff --git a/src/anonymous.ml b/src/anonymous.ml new file mode 100644 index 0000000000000000000000000000000000000000..07b6cdad50c802d858d6d744e295388dcc60f98f --- /dev/null +++ b/src/anonymous.ml @@ -0,0 +1,105 @@ +open Syntax + +(* For each anonymous rule, we define a fresh nonterminal symbol, and + replace the anonymous rule with a reference to this symbol. If the + anonymous rule appears inside a parameterized rule, then we must + define a parameterized nonterminal symbol. *) + +(* ------------------------------------------------------------------------ *) + +(* This functor makes it easy to share mutable internal state between + the functions that follow. *) + +module Run (X : sig end) = struct + +(* ------------------------------------------------------------------------ *) + +(* A fresh name generator. *) + +let fresh : unit -> string = + let next = ref 0 in + fun () -> + Printf.sprintf "__anonymous_%d" (Misc.postincrement next) + +(* ------------------------------------------------------------------------ *) + +(* A rule accumulator. Used to collect the fresh definitions that we + produce. *) + +let rules = + ref [] + +(* ------------------------------------------------------------------------ *) + +(* [anonymous pos parameters branches] deals with an anonymous rule, + at position [pos], which appears inside a possibly-parameterized + rule whose parameters are [parameters], and whose body is + [branches]. We assume that [branches] does not itself contain any + anonymous rules. As a side effect, we create a fresh definition, + and return its name. *) + +let var (symbol : symbol) : parameter = + ParameterVar (Positions.with_pos Positions.dummy symbol) + +let anonymous pos (parameters : symbol list) (branches : parameterized_branch list) : parameter = + (* Generate a fresh non-terminal symbol. *) + let symbol = fresh() in + (* Construct its definition. Note that it is implicitly marked %inline. *) + let rule = { + pr_public_flag = false; + pr_inline_flag = true; + pr_nt = symbol; + pr_positions = [ pos ]; (* this list is not allowed to be empty *) + pr_parameters = parameters; + pr_branches = branches + } in + (* Record this definition. *) + rules := rule :: !rules; + (* Return the symbol that stands for it. *) + Parameters.app (Positions.with_pos pos symbol) (List.map var parameters) + (* TEMPORARY should use as few parameters as possible *) + +(* ------------------------------------------------------------------------ *) + +(* Traversal code. *) + +let rec transform_parameter (parameters : symbol list) (p : parameter) : parameter = + match p with + | ParameterVar _ -> + p + | ParameterApp (x, ps) -> + ParameterApp (x, List.map (transform_parameter parameters) ps) + | ParameterAnonymous branches -> + let pos = Positions.position branches + and branches = Positions.value branches in + (* Do not forget the recursive invocation! *) + let branches = List.map (transform_parameterized_branch parameters) branches in + (* This is where the real work is done. *) + anonymous pos parameters branches + +and transform_producer parameters (x, p) = + x, transform_parameter parameters p + +and transform_parameterized_branch parameters branch = + let pr_producers = + List.map (transform_producer parameters) branch.pr_producers + in + { branch with pr_producers } + +let transform_parameterized_rule rule = + let pr_branches = + List.map (transform_parameterized_branch rule.pr_parameters) rule.pr_branches + in + { rule with pr_branches } + +end + +(* ------------------------------------------------------------------------ *) + +(* The main entry point invokes the functor and reads its result. *) + +let transform_partial_grammar g = + let module R = Run(struct end) in + let pg_rules = List.map R.transform_parameterized_rule g.pg_rules in + let pg_rules = !R.rules @ pg_rules in + { g with pg_rules } diff --git a/src/anonymous.mli b/src/anonymous.mli new file mode 100644 index 0000000000000000000000000000000000000000..f5608aba29c8c21b1c870dbe1219a50bbad2ece3 --- /dev/null +++ b/src/anonymous.mli @@ -0,0 +1,3 @@ +open Syntax + +val transform_partial_grammar: partial_grammar -> partial_grammar diff --git a/src/fancy-parser.mly b/src/fancy-parser.mly index 854c21292abc1d99247ad51dd0cdc136865990de..fa07638d696f6edf41fca12c7877df1d96e513d3 100644 --- a/src/fancy-parser.mly +++ b/src/fancy-parser.mly @@ -56,7 +56,7 @@ grammar: { pg_filename = ""; (* filled in by the caller *) pg_declarations = List.flatten ds; - pg_rules = rs @ ParserAux.rules(); + pg_rules = rs; pg_trailer = t } } @@ -309,9 +309,13 @@ lax_actual: (* 3- *) | /* leading bar disallowed */ branches = branches - { let position = position (with_poss $startpos $endpos ()) in - let symbol = ParserAux.anonymous position branches in - ParameterVar (with_pos position symbol) } + { ParameterAnonymous (with_poss $startpos $endpos branches) } + (* 2016/05/18: we used to eliminate anonymous rules on the fly during + parsing. However, when an anonymous rule appears in a parameterized + definition, the fresh nonterminal symbol that is created should be + parameterized. This was not done, and is not easy to do on the fly, + as it requires inherited attributes (or a way of simulating them). + We now use explicit abstract syntax for anonymous rules. *) /* ------------------------------------------------------------------------- */ /* Formal or actual parameter lists are delimited with parentheses and diff --git a/src/front.ml b/src/front.ml index b1de6bfd13553f56a300e46960486cf5137624d8..8b836dd079721b0fc9f16ebaa674358cc64213ea 100644 --- a/src/front.ml +++ b/src/front.ml @@ -39,6 +39,13 @@ let () = (* ------------------------------------------------------------------------- *) +(* Eliminate anonymous rules. *) + +let partial_grammars = + List.map Anonymous.transform_partial_grammar partial_grammars + +(* ------------------------------------------------------------------------- *) + (* If several grammar files were specified, merge them. *) let parameterized_grammar = diff --git a/src/parserAux.ml b/src/parserAux.ml index 54a3f1a7ad3c0939e4e03cf07609a04f97bd1227..417ff01c2c34bdbc30561ec96f7f8cbc86ebe78b 100644 --- a/src/parserAux.ml +++ b/src/parserAux.ml @@ -71,42 +71,6 @@ let override pos o1 o2 = | _, None -> o1 -(* Support for on-the-fly expansion of anonymous rules. Whenever such - a rule is encountered, we create a fresh non-terminal symbol, add - a definition of this symbol to a global variable, and return a - reference to this symbol. Quick and dirty. So, in the end, clean. *) - -let fresh : unit -> string = - let next = ref 0 in - fun () -> - Printf.sprintf "__anonymous_%d" (Misc.postincrement next) - -let rules = - ref [] - -let anonymous pos branches = - (* Generate a fresh non-terminal symbol. *) - let symbol = fresh() in - (* Construct its definition. Note that it is implicitly marked %inline. *) - let rule = { - pr_public_flag = false; - pr_inline_flag = true; - pr_nt = symbol; - pr_positions = [ pos ]; (* this list is not allowed to be empty *) - pr_parameters = []; - pr_branches = branches - } in - (* Record this definition. *) - rules := rule :: !rules; - (* Return the symbol that stands for it. *) - symbol - -let rules () = - let result = !rules in - (* Reset the global state, in case we need to read several .mly files. *) - rules := []; - result - (* Only unnamed producers can be referred to using positional identifiers. Besides, such positions must be taken in the interval [1 .. List.length producers]. The output array [p] is such that @@ -116,4 +80,3 @@ let producer_names producers = producers |> List.map (fun (_, oid, _) -> Option.map Positions.value oid) |> Array.of_list - diff --git a/src/parserAux.mli b/src/parserAux.mli index 3fb556d645540fb27f44de73a1b31f25c0e6536e..6b8326e60767c0c7a9b21397a440be5ca7f2ec23 100644 --- a/src/parserAux.mli +++ b/src/parserAux.mli @@ -46,15 +46,6 @@ val normalize_producers: val override: Positions.t -> 'a option -> 'a option -> 'a option -(* Support for on-the-fly expansion of anonymous rules. When such a - rule is encountered, invoke [anonymous], which creates a fresh - non-terminal symbol, records the definition of this symbol to a - global variable, and returns this symbol. In the end, invoke - [rules], so as to obtain a list of all recorded definitions. *) - -val anonymous: Positions.t -> parameterized_branch list -> string -val rules: unit -> parameterized_rule list - (* [producer_names producers] returns an array [names] such that [names.(idx) = None] if the (idx + 1)-th producer is unnamed and [names.(idx) = Some id] if it is called [id]. *)