Commit 1dc044e0 authored by POTTIER Francois's avatar POTTIER Francois

The semicolon character is now considered as significant by the lexer,

which produces a token [SEMI]. The parser SEMI to appear in three places:
after a declaration; after a rule; after a producer. Only two grammars in
the test suite have required changes.
parent d9b87844
......@@ -2,7 +2,15 @@
## 2018/10/24
* Until today, the semicolon character `;` was insignificant: it was
considered as whitespace by Menhir. It is now accepted only in a
few specific places, namely: after a declaration; after a rule;
after a producer. If Menhir suddenly complains about a semicolon,
just remove it. This change is being made in preparation for
further syntactic changes.
* New flag `--no-dollars`, which disallows the use of `$i` in semantic actions.
The default behavior remains to allow the use of `$i`.
* When generating OCaml code, include all record fields in record patterns,
even when bound to a wildcard pattern. Thus, avoid triggering OCaml's
......
......@@ -324,10 +324,12 @@ number and exit.
\section{Lexical conventions}
The semicolon character (\kw{;}) is treated as insignificant, just like white
space. Thus, rules and producers (for instance) can be separated with
semicolons if it is thought that this improves readability. Semicolons can be
omitted otherwise.
A semicolon character (\kw{;}) may appear after a declaration
(\sref{sec:decls}), after a rule (\sref{sec:rules}), and after each producer
inside a production (\sref{sec:producers}). All of these semicolons are
optional. The semicolon character is not allowed to appear anywhere else. This
is in contrast with \ocamlyacc, which views semicolons as insignificant,
just like whitespace.
Identifiers (\nt{id}) coincide with \ocaml identifiers, except they are not
allowed to contain the quote (\kw{'}) character. Following
......@@ -576,6 +578,7 @@ $\donerrorreduce\;\nt{lid}_1 \ldots \nt{lid}_n$, the symbols $\nt{lid}_1, \ldots
than those listed in later \donerrorreduce declarations.
\subsection{Rules}
\label{sec:rules}
Following the mandatory \percentpercent keyword, a sequence of rules is
expected. Each rule defines a nonterminal symbol~\nt{id}.
......
......@@ -33,13 +33,13 @@ open Positions
%token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL
%token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE
%token PERCENTATTRIBUTE SEMI
%token <string Positions.located> LID UID
%token <Stretch.t> HEADER
%token <Stretch.ocamltype> OCAMLTYPE
%token <Stretch.t Lazy.t> PERCENTPERCENT
%token <Settings.dollars -> Syntax.identifier option array -> Action.t> ACTION
%token <Syntax.attribute> ATTRIBUTE GRAMMARATTRIBUTE
%token PERCENTATTRIBUTE
/* ------------------------------------------------------------------------- */
/* Type annotations and start symbol. */
......@@ -68,11 +68,14 @@ open Positions
postlude, which we do not parse. */
grammar:
ds = declaration* PERCENTPERCENT rs = rule* t = postlude
ds = flatten(declaration*)
PERCENTPERCENT
rs = flatten(rule*)
t = postlude
{
{
pg_filename = ""; (* filled in by the caller *)
pg_declarations = List.flatten ds;
pg_declarations = ds;
pg_rules = rs;
pg_postlude = t
}
......@@ -123,6 +126,9 @@ declaration:
List.map (Positions.map (fun nt -> DOnErrorReduce (nt, prec)))
(List.map Parameters.with_pos ss) }
| SEMI
{ [] }
/* This production recognizes tokens that are valid in the rules section,
but not in the declarations section. This is a hint that a %% was
forgotten. */
......@@ -197,7 +203,7 @@ rule:
branches = branches
{
let public, inline = flags in
{
let rule = {
pr_public_flag = public;
pr_inline_flag = inline;
pr_nt = Positions.value symbol;
......@@ -206,7 +212,10 @@ rule:
pr_parameters = List.map Positions.value params;
pr_branches = branches
}
in [rule]
}
| SEMI
{ [] }
%inline branches:
prods = separated_nonempty_list(BAR, production_group)
......@@ -288,7 +297,7 @@ production:
empty [option] or to shift. */
producer:
| id = ioption(terminated(LID, EQUAL)) p = actual attrs = ATTRIBUTE*
| id = ioption(terminated(LID, EQUAL)) p = actual attrs = ATTRIBUTE* SEMI*
{ position (with_loc $loc ()), id, p, attrs }
/* ------------------------------------------------------------------------- */
......
......@@ -347,7 +347,7 @@ let reserved =
let newline = ('\010' | '\013' | "\013\010")
let whitespace = [ ' ' '\t' ';' ]
let whitespace = [ ' ' '\t' ]
let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
......@@ -416,6 +416,8 @@ rule main = parse
let closingpos = finish lexbuf in
mk_stretch openingpos closingpos false []
)) }
| ";"
{ SEMI }
| ":"
{ COLON }
| ","
......
......@@ -82,6 +82,7 @@ Examples of well-formed declarations:
# ----------------------------------------------------------------------------
grammar: PERCENTPERCENT TYPE
grammar: PERCENTPERCENT SEMI TYPE
Either a rule or %% is expected at this point.
......@@ -114,10 +115,10 @@ Either a parenthesized, comma-delimited list of formal parameters
or an attribute
or a colon is expected at this point.
Examples of well-formed rules:
main: e = expr EOL { e }
expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 }
main: e = expr; EOL { e }
expr: i = INT { i } | e1 = expr; PLUS; e2 = expr { e1 + e2 }
option(X): { None } | x = X { Some x }
main [@cost 0]: e = expr EOL { e }
main [@cost 0]: e = expr; EOL { e }
# ----------------------------------------------------------------------------
......@@ -141,13 +142,14 @@ grammar: PERCENTPERCENT UID COLON UID BAR TYPE
Ill-formed rule.
A list of productions is expected at this point.
Examples of well-formed rules:
main: e = expr EOL { e }
expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 }
main: e = expr; EOL { e }
expr: i = INT { i } | e1 = expr; PLUS; e2 = expr { e1 + e2 }
symbol: s = LID | s = UID { s }
# ----------------------------------------------------------------------------
grammar: PERCENTPERCENT UID COLON UID TYPE
grammar: PERCENTPERCENT UID COLON UID SEMI TYPE
grammar: PERCENTPERCENT UID COLON UID ATTRIBUTE TYPE
grammar: PERCENTPERCENT UID COLON LID TYPE
grammar: PERCENTPERCENT UID COLON LID EQUAL TYPE
......
......@@ -28,13 +28,13 @@ open Positions
%token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL
%token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE
%token PERCENTATTRIBUTE SEMI
%token <string Positions.located> LID UID
%token <Stretch.t> HEADER
%token <Stretch.ocamltype> OCAMLTYPE
%token <Stretch.t Lazy.t> PERCENTPERCENT
%token <Settings.dollars -> Syntax.identifier option array -> Action.t> ACTION
%token <Syntax.attribute> ATTRIBUTE GRAMMARATTRIBUTE
%token PERCENTATTRIBUTE
%start grammar
%type <ParserAux.early_producer> producer
%type <ParserAux.early_production> production
......@@ -82,6 +82,8 @@ declarations:
{ [] }
| declarations declaration
{ $2 @ $1 }
| declarations SEMI
{ $1 }
declaration:
| HEADER /* lexically delimited by %{ ... %} */
......@@ -191,6 +193,8 @@ rules:
{ [] }
| rules rule
{ $2 :: $1 }
| rules SEMI
{ $1 }
rule:
flags
......@@ -361,9 +365,17 @@ producers:
binding, and possibly followed with attributes. */
producer:
| actual attributes
| actual attributes optional_semis
{ Positions.import (symbol_start_pos(), symbol_end_pos()), None, $1, $2 }
| LID EQUAL actual attributes
| LID EQUAL actual attributes optional_semis
{ Positions.import (symbol_start_pos(), symbol_end_pos()), Some $1, $3, $4 }
/* ------------------------------------------------------------------------- */
/* Semicolons used to be considered whitespace by our lexer, but are no longer.
We must allow optional semicolons in a few conventional places. */
optional_semis:
/* empty */ { () }
| optional_semis SEMI { () }
%%
......@@ -5,7 +5,7 @@ Either a parenthesized, comma-delimited list of formal parameters
or an attribute
or a colon is expected at this point.
Examples of well-formed rules:
main: e = expr EOL { e }
expr: i = INT { i } | e1 = expr PLUS e2 = expr { e1 + e2 }
main: e = expr; EOL { e }
expr: i = INT { i } | e1 = expr; PLUS; e2 = expr { e1 + e2 }
option(X): { None } | x = X { Some x }
main [@cost 0]: e = expr EOL { e }
main [@cost 0]: e = expr; EOL { e }
......@@ -706,7 +706,7 @@ primary_expression :
| object_literal
{(Object_construction ((default_annotation (sndo3 $1) (trdo3 $1)), fsto3 $1),
sndo3 $1, trdo3 $1)}
| Llparen expression Lrparen {(fsto3 $2, $1, $3)};
| Llparen expression Lrparen {(fsto3 $2, $1, $3)}
/* EAX Extends */
| property_identifier
{(Property_construction ((default_annotation (sndo3 $1) (trdo3 $1)), fsto3 $1),
......@@ -952,7 +952,6 @@ call_expression :
{(Object_access ((default_annotation (sndo3 $1) (trdo3 $3)),
(fsto3 $1), ((fsto3 $3))),
sndo3 $1, trdo3 $3)}
;
/* E4X Extends */
| call_expression Ldot property_identifier
......@@ -990,7 +989,6 @@ call_init_expression :
{(Object_access ((default_annotation (sndo3 $1) (trdo3 $3)),
(fsto3 $1), ((fsto3 $3))),
sndo3 $1, trdo3 $3)}
;
/* E4X Extends */
| call_init_expression Ldot property_identifier
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment