Commit 9c0d16ec authored by POTTIER Francois's avatar POTTIER Francois

Introduce attributes in the abstract syntax of parameterized grammars,

that is, in [Syntax], and in the files that depend on it.
Updated the parsers to produce attributes.
[UnparameterizedSyntax] is not yet updated.
parent 68bc58d6
......@@ -9,7 +9,7 @@ open Syntax
(* Computing the free names of some syntactic categories. *)
let rec fn_parameter accu p =
let rec fn_parameter accu (p : parameter) =
(* [p] cannot be [ParameterAnonymous _]. *)
let x, ps = Parameters.unapp p in
let accu = StringSet.add (Positions.value x) accu in
......@@ -18,7 +18,7 @@ let rec fn_parameter accu p =
and fn_parameters accu ps =
List.fold_left fn_parameter accu ps
let fn_producer accu (_, p) =
let fn_producer accu ((_, p, _) : producer) =
fn_parameter accu p
let fn_branch accu branch =
......@@ -74,12 +74,15 @@ let anonymous pos (parameters : symbol list) (branches : parameterized_branch li
let parameters = List.filter (fun x -> StringSet.mem x used) parameters in
(* Generate a fresh non-terminal symbol. *)
let symbol = fresh() in
(* Construct its definition. Note that it is implicitly marked %inline. *)
(* Construct its definition. Note that it is implicitly marked %inline.
Also, it does not carry any attributes; this is consistent
with the fact that %inline symbols cannot carry attributes. *)
let rule = {
pr_public_flag = false;
pr_inline_flag = true;
pr_nt = symbol;
pr_positions = [ pos ]; (* this list is not allowed to be empty *)
pr_attributes = [];
pr_parameters = parameters;
pr_branches = branches
} in
......@@ -106,8 +109,8 @@ let rec transform_parameter (parameters : symbol list) (p : parameter) : paramet
(* This is where the real work is done. *)
anonymous pos parameters branches
and transform_producer parameters (x, p) =
x, transform_parameter parameters p
and transform_producer parameters ((x, p, attrs) : producer) =
x, transform_parameter parameters p, attrs
and transform_parameterized_branch parameters branch =
let pr_producers =
......
......@@ -29,8 +29,10 @@ open Positions
%token PERCENTATTRIBUTE
/* ------------------------------------------------------------------------- */
/* Start symbol. */
/* Type annotations and start symbol. */
%type <ParserAux.early_producer> producer
%type <ParserAux.early_production> production
%start <Syntax.partial_grammar> grammar
/* ------------------------------------------------------------------------- */
......@@ -72,8 +74,8 @@ declaration:
| h = HEADER /* lexically delimited by %{ ... %} */
{ [ with_poss $startpos $endpos (DCode h) ] }
| TOKEN t = OCAMLTYPE? ts = clist(terminal)
{ List.map (Positions.map (fun terminal -> DToken (t, terminal))) ts }
| TOKEN ty = OCAMLTYPE? ts = clist(terminal)
{ List.map (Positions.map (fun (terminal, attrs) -> DToken (ty, terminal, attrs))) ts }
| START t = OCAMLTYPE? nts = clist(nonterminal)
/* %start <ocamltype> foo is syntactic sugar for %start foo %type <ocamltype> foo */
......@@ -97,6 +99,12 @@ declaration:
| PARAMETER t = OCAMLTYPE
{ [ with_poss $startpos $endpos (DParameter t) ] }
| attr = GRAMMARATTRIBUTE
{ [ with_poss $startpos $endpos (DGrammarAttribute attr) ] }
| PERCENTATTRIBUTE actuals = clist(strict_actual) attrs = ATTRIBUTE+
{ [ with_poss $startpos $endpos (DSymbolAttributes (actuals, attrs)) ] }
| ON_ERROR_REDUCE ss = clist(strict_actual)
{ let prec = ParserAux.new_on_error_reduce_level() in
List.map (Positions.map (fun nt -> DOnErrorReduce (nt, prec)))
......@@ -154,8 +162,8 @@ symbol:
declared to be start symbols must begin with a lowercase letter. */
%inline terminal:
id = UID
{ id }
id = UID attrs = ATTRIBUTE*
{ Positions.map (fun uid -> (uid, attrs)) id }
%inline nonterminal:
id = LID
......@@ -169,6 +177,7 @@ symbol:
rule:
flags = flags /* flags */
symbol = symbol /* the symbol that is being defined */
attributes = ATTRIBUTE*
params = plist(symbol) /* formal parameters */
COLON
optional_bar
......@@ -180,6 +189,7 @@ rule:
pr_inline_flag = inline;
pr_nt = Positions.value symbol;
pr_positions = [ Positions.position symbol ];
pr_attributes = attributes;
pr_parameters = List.map Positions.value params;
pr_branches = branches
}
......@@ -253,7 +263,7 @@ production:
/* ------------------------------------------------------------------------- */
/* A producer is an actual parameter, possibly preceded by a
binding.
binding, and possibly followed with attributes.
Because both [ioption] and [terminated] are defined as inlined by
the standard library, this definition expands to two productions,
......@@ -265,8 +275,8 @@ production:
empty [option] or to shift. */
producer:
| id = ioption(terminated(LID, EQUAL)) p = actual
{ position (with_poss $startpos $endpos ()), id, p }
| id = ioption(terminated(LID, EQUAL)) p = actual attrs = ATTRIBUTE*
{ position (with_poss $startpos $endpos ()), id, p, attrs }
/* ------------------------------------------------------------------------- */
/* The ideal syntax of actual parameters includes:
......
......@@ -224,6 +224,7 @@ module Terminal = struct
tk_ocamltype = None;
tk_is_declared = true;
tk_position = Positions.dummy;
tk_attributes = [];
}
in
Array.init n (fun tok ->
......
......@@ -284,7 +284,7 @@ let check_grammar (p_grammar : Syntax.grammar) =
(* We only are interested by parameterized non terminals. *)
if parameters node <> [] then
List.fold_left (fun succs { pr_producers = symbols } ->
List.fold_left (fun succs -> function (_, p) ->
List.fold_left (fun succs -> function (_, p, _) ->
let symbol, _ = Parameters.unapp p in
try
let symbol_node = conv symbol.value in
......@@ -416,7 +416,7 @@ let check_grammar (p_grammar : Syntax.grammar) =
let check_producers () =
List.iter
(fun { pr_producers = symbols } -> List.iter
(function (_, p) ->
(function (_, p, _) ->
(* We take the use of each symbol into account. *)
check_parameter_type env p;
(* If it is in the same component, check in addition that
......@@ -562,7 +562,7 @@ let expand p_grammar =
instantiate the parameterized branch. *)
let rec expand_branch subst pbranch =
let new_producers = List.map
(function (ido, p) ->
(function (ido, p, _) ->
let sym, actual_parameters =
Parameters.unapp p in
let sym, actual_parameters =
......
open Positions
open Syntax
type early_producer =
Positions.t *
identifier located option *
parameter *
attributes
type early_producers =
early_producer list
type early_production =
early_producers *
string located option * (* optional precedence *)
branch_production_level *
Positions.t
type early_productions =
early_production list
let new_precedence_level =
let c = ref 0 in
fun pos1 pos2 ->
......@@ -24,13 +42,13 @@ module IdSet = Set.Make (struct
compare (value id1) (value id2)
end)
let defined_identifiers (_, ido, _) accu =
let defined_identifiers (_, ido, _, _) accu =
Option.fold IdSet.add ido accu
let defined_identifiers producers =
let defined_identifiers (producers : early_producers) =
List.fold_right defined_identifiers producers IdSet.empty
let check_production_group right_hand_sides =
let check_production_group (right_hand_sides : early_productions) =
match right_hand_sides with
| [] ->
(* A production group cannot be empty. *)
......@@ -56,15 +74,15 @@ let check_production_group right_hand_sides =
(* [normalize_producer i p] assigns a name of the form [_i]
to the unnamed producer [p]. *)
let normalize_producer i (pos, opt_identifier, parameter) =
let normalize_producer i (pos, opt_identifier, parameter, attrs) =
let id =
match opt_identifier with
| Some id -> id
| None -> Positions.with_pos pos ("_" ^ string_of_int (i + 1))
in
(id, parameter)
(id, parameter, attrs)
let normalize_producers producers =
let normalize_producers (producers : early_producers) : producer list =
List.mapi normalize_producer producers
let override pos o1 o2 =
......@@ -82,7 +100,7 @@ let override pos o1 o2 =
.. List.length producers]. The output array [p] is such that
[p.(idx) = Some x] if [idx] must be referred to using [x], not
[$(idx + 1)]. *)
let producer_names producers =
let producer_names (producers : early_producers) =
producers
|> List.map (fun (_, oid, _) -> Option.map Positions.value oid)
|> List.map (fun (_, oid, _, _) -> Option.map Positions.value oid)
|> Array.of_list
(* This module provides utilities that are shared by the two versions
of the parser. *)
open Positions
open Syntax
(* A few types used in the parser. *)
type early_producer =
Positions.t *
identifier located option *
parameter *
attributes
type early_producers =
early_producer list
type early_production =
early_producers *
string located option * (* optional precedence *)
branch_production_level *
Positions.t
type early_productions =
early_production list
(* [new_precedence_level pos1 pos2] creates a new precendence level, which is
stronger than any levels previously created by this function. It should be
called every time a [%left], [%right], or [%nonassoc] declaration is found.
......@@ -32,18 +53,14 @@ val new_on_error_reduce_level: unit -> on_error_reduce_level
(* [check_production_group] accepts a production group and checks that all
productions in the group define the same set of identifiers. *)
val check_production_group:
((Positions.t * identifier Positions.located option * parameter) list * 'a * 'b * 'c) list ->
unit
val check_production_group: early_productions -> unit
(* [normalize_producers] accepts a list of producers where identifiers are
optional and returns a list of producers where identifiers are mandatory.
A missing identifier in the [i]-th position receives the conventional
name [_i]. *)
val normalize_producers:
(Positions.t * identifier Positions.located option * parameter) list ->
producer list
val normalize_producers: early_producers -> producer list
(* [override pos oprec1 oprec2] decides which of the two optional
%prec declarations [oprec1] and [oprec2] applies to a
......@@ -54,6 +71,4 @@ val override: Positions.t -> 'a option -> 'a option -> 'a option
(* [producer_names producers] returns an array [names] such that
[names.(idx) = None] if the (idx + 1)-th producer is unnamed
and [names.(idx) = Some id] if it is called [id]. *)
val producer_names :
(_ * Syntax.identifier Positions.located option * _) list ->
Syntax.identifier option array
val producer_names: early_producers -> identifier option array
......@@ -24,7 +24,7 @@ let join_declaration filename (grammar : grammar) decl =
difficult by the fact that %token and %left-%right-%nonassoc
declarations are independent. *)
| DToken (ocamltype, terminal) ->
| DToken (ocamltype, terminal, attributes) ->
let token_property =
try
......@@ -40,17 +40,17 @@ let join_declaration filename (grammar : grammar) decl =
if token_property.tk_is_declared then
Error.errorp decl
"the token %s has multiple definitions." terminal
"the token %s has multiple definitions." terminal;
(* Otherwise, update the previous definition. *)
else
{ token_property with
tk_is_declared = true;
tk_ocamltype = ocamltype;
tk_filename = filename;
tk_position = decl.position;
}
{ token_property with
tk_is_declared = true;
tk_ocamltype = ocamltype;
tk_filename = filename;
tk_position = decl.position;
tk_attributes = attributes;
}
with Not_found ->
......@@ -62,6 +62,7 @@ let join_declaration filename (grammar : grammar) decl =
tk_associativity = UndefinedAssoc;
tk_precedence = UndefinedPrecedence;
tk_position = decl.position;
tk_attributes = attributes;
tk_is_declared = true
}
......@@ -105,6 +106,7 @@ let join_declaration filename (grammar : grammar) decl =
tk_associativity = UndefinedAssoc;
tk_precedence = prec;
tk_is_declared = false;
tk_attributes = [];
(* Will be updated later. *)
tk_position = decl.position;
} in
......@@ -125,6 +127,14 @@ let join_declaration filename (grammar : grammar) decl =
token_properties.tk_associativity <- assoc;
grammar
| DGrammarAttribute attr ->
{ grammar with
p_grammar_attributes = attr :: grammar.p_grammar_attributes }
| DSymbolAttributes (actuals, attrs) ->
{ grammar with
p_symbol_attributes = (actuals, attrs) :: grammar.p_symbol_attributes }
(* ------------------------------------------------------------------------- *)
(* This stores an optional postlude into a grammar.
Postludes are stored in an arbitrary order. *)
......@@ -152,11 +162,11 @@ let rewrite_nonterminal (phi : renaming) nonterminal =
let rewrite_parameter phi parameter =
Parameters.map (Positions.map (Misc.support_assoc phi)) parameter
let rewrite_element phi (ido, parameter) =
ido, rewrite_parameter phi parameter
let rewrite_producer phi ((ido, parameter, attrs) : producer) =
ido, rewrite_parameter phi parameter, attrs
let rewrite_branch phi ({ pr_producers = producers } as branch) =
{ branch with pr_producers = List.map (rewrite_element phi) producers }
{ branch with pr_producers = List.map (rewrite_producer phi) producers }
let rewrite_branches phi branches =
match phi with
......@@ -452,7 +462,7 @@ let symbols_of grammar (pgrammar : Syntax.partial_grammar) =
(* Analyse each branch. *)
let symbols = List.fold_left (fun symbols branch ->
List.fold_left (fun symbols (_, p) ->
List.fold_left (fun symbols (_, p, _) ->
store_except_rule_parameters symbols p
) symbols branch.pr_producers
) symbols prule.pr_branches
......@@ -548,7 +558,8 @@ let merge_rules symbols pgs =
let rbr = rewrite_branches phi r.pr_branches in
{ r' with
pr_positions = positions;
pr_branches = rbr @ r'.pr_branches
pr_branches = rbr @ r'.pr_branches;
pr_attributes = r.pr_attributes @ r'.pr_attributes;
}
with Not_found ->
(* We alphaconvert the rule in order to avoid the capture of
......@@ -568,6 +579,8 @@ let empty_grammar =
p_tokens = StringMap.empty;
p_rules = StringMap.empty;
p_on_error_reduce = [];
p_grammar_attributes = [];
p_symbol_attributes = [];
}
let join grammar pgrammar =
......@@ -642,7 +655,7 @@ let check_parameterized_grammar_is_well_defined grammar =
} -> ignore (List.fold_left
(* Check the producers. *)
(fun already_seen (id, p) ->
(fun already_seen (id, p, _) ->
let symbol, parameters = Parameters.unapp p in
let s = symbol.value and p = symbol.position in
let already_seen =
......
......@@ -52,6 +52,20 @@ type attribute =
type attributes =
attribute list
(* Attributes allow the user to annotate the grammar with information that is
ignored by Menhir, but can be exploited by other tools, via the SDK. *)
(* Attributes can be attached in the following places:
- with the grammar: %[@bar ...]
- with a terminal symbol: %token FOO [@bar ...]
- with a rule: foo(X) [@bar ...]: ...
- with a producer: e = foo(quux) [@bar ...]
- with an arbitrary symbol: %attribute FOO foo(quux) [@bar ...]
After expanding away parameterized nonterminal symbols, things become
a bit simpler, as %attribute declarations are desugared away. *)
(* ------------------------------------------------------------------------ *)
(* Information about tokens. (Only after joining.) *)
......@@ -76,6 +90,7 @@ type token_properties =
tk_filename : filename;
tk_ocamltype : Stretch.ocamltype option;
tk_position : Positions.t;
tk_attributes : attributes;
mutable tk_associativity : token_associativity;
mutable tk_precedence : precedence_level;
mutable tk_is_declared : bool;
......@@ -126,7 +141,7 @@ and parameters =
it could be [e = expr], for instance. *)
and producer =
identifier Positions.located * parameter
identifier Positions.located * parameter * attributes
(* ------------------------------------------------------------------------ *)
......@@ -151,6 +166,7 @@ type parameterized_rule =
pr_inline_flag : bool;
pr_nt : nonterminal;
pr_positions : Positions.t list;
pr_attributes : attributes;
pr_parameters : symbol list;
pr_branches : parameterized_branch list;
}
......@@ -171,7 +187,7 @@ type declaration =
(* Terminal symbol (token) declaration. *)
| DToken of Stretch.ocamltype option * terminal
| DToken of Stretch.ocamltype option * terminal * attributes
(* Start symbol declaration. *)
......@@ -185,6 +201,14 @@ type declaration =
| DType of Stretch.ocamltype * parameter
(* Grammar-level attribute declaration. *)
| DGrammarAttribute of attribute
(* Attributes shared among multiple symbols, i.e., [%attribute]. *)
| DSymbolAttributes of parameter list * attributes
(* On-error-reduce declaration. *)
| DOnErrorReduce of parameter * on_error_reduce_level
......@@ -208,8 +232,9 @@ type partial_grammar =
(* The differences with partial grammars (above) are as follows:
1. the file name is gone (there could be several file names, anyway).
2. there can be several postludes.
3. declarations are organized by kind: preludes, functor %parameters,
%start symbols, %types, %tokens, %on_error_reduce.
3. declarations are organized by kind: preludes, postludes,
functor %parameters, %start symbols, %types, %tokens, %on_error_reduce,
grammar attributes, %attributes.
4. rules are stored in a map, indexed by symbol names, instead of a list.
*)
......@@ -222,5 +247,7 @@ type grammar =
p_types : (parameter * Stretch.ocamltype Positions.located) list;
p_tokens : token_properties StringMap.t;
p_on_error_reduce : (parameter * on_error_reduce_level) list;
p_grammar_attributes : attributes;
p_symbol_attributes : (parameter list * attributes) list;
p_rules : parameterized_rule StringMap.t;
}
......@@ -23,6 +23,8 @@ open Positions
%token <Syntax.attribute> ATTRIBUTE GRAMMARATTRIBUTE
%token PERCENTATTRIBUTE
%start grammar
%type <ParserAux.early_producer> producer
%type <ParserAux.early_production> production
%type <Syntax.partial_grammar> grammar
/* These declarations solve a shift-reduce conflict in favor of
......@@ -73,7 +75,7 @@ declaration:
{ [ unknown_pos (DCode $1) ] }
| TOKEN optional_ocamltype terminals
{ List.map (Positions.map (fun terminal -> DToken ($2, terminal))) $3 }
{ List.map (Positions.map (fun (terminal, attrs) -> DToken ($2, terminal, attrs))) $3 }
| START nonterminals
{ List.map (Positions.map (fun nonterminal -> DStart nonterminal)) $2 }
......@@ -94,6 +96,12 @@ declaration:
| PARAMETER OCAMLTYPE
{ [ unknown_pos (DParameter $2) ] }
| GRAMMARATTRIBUTE
{ [ unknown_pos (DGrammarAttribute $1) ] }
| PERCENTATTRIBUTE actuals attributes
{ [ unknown_pos (DSymbolAttributes ($2, $3)) ] }
| ON_ERROR_REDUCE actuals
{ let prec = ParserAux.new_on_error_reduce_level() in
List.map (Positions.map (fun nt -> DOnErrorReduce (nt, prec)))
......@@ -139,6 +147,11 @@ optional_comma:
| COMMA
{ () }
attributes:
/* epsilon */
{ [] }
| ATTRIBUTE attributes { $1 :: $2 }
/* ------------------------------------------------------------------------- */
/* Terminals must begin with an uppercase letter. Nonterminals that are
declared to be start symbols must begin with a lowercase letter. */
......@@ -146,8 +159,8 @@ optional_comma:
terminals:
/* epsilon */
{ [] }
| terminals optional_comma UID
{ $3 :: $1 }
| terminals optional_comma UID attributes
{ (Positions.map (fun uid -> (uid, $4)) $3) :: $1 }
nonterminals:
/* epsilon */
......@@ -169,6 +182,7 @@ rules:
rule:
flags
symbol
attributes
optional_formal_parameters
COLON
optional_bar
......@@ -179,8 +193,9 @@ rule:
pr_inline_flag = inline;
pr_nt = Positions.value $2;
pr_positions = [ Positions.position $2 ];
pr_parameters = $3;
pr_branches = List.flatten ($6 :: List.rev $7)
pr_attributes = $3;
pr_parameters = $4;
pr_branches = List.flatten ($7 :: List.rev $8)
}
}
......@@ -329,12 +344,12 @@ producers:
/* ------------------------------------------------------------------------- */
/* A producer is an actual parameter, possibly preceded by a
binding. */
binding, and possibly followed with attributes. */
producer:
| actual
{ Positions.lex_join (symbol_start_pos()) (symbol_end_pos()), None, $1 }
| LID EQUAL actual
{ Positions.lex_join (symbol_start_pos()) (symbol_end_pos()), Some $1, $3 }
| actual attributes
{ Positions.lex_join (symbol_start_pos()) (symbol_end_pos()), None, $1, $2 }
| LID EQUAL actual attributes
{ Positions.lex_join (symbol_start_pos()) (symbol_end_pos()), Some $1, $3, $4 }
%%
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment