fancy-parser.mly 13.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/******************************************************************************/
/*                                                                            */
/*                                   Menhir                                   */
/*                                                                            */
/*                       François Pottier, Inria Paris                        */
/*              Yann Régis-Gianas, PPS, Université Paris Diderot              */
/*                                                                            */
/*  Copyright Inria. All rights reserved. This file is distributed under the  */
/*  terms of the GNU General Public License version 2, as described in the    */
/*  file LICENSE.                                                             */
/*                                                                            */
/******************************************************************************/

14 15 16
/* This is the fancy version of the parser, to be processed by menhir.
   It is kept in sync with [Parser], but exercises menhir's features. */

17 18 19 20
/* As of 2014/12/02, the $previouserror keyword and the --error-recovery
   mode no longer exists. Thus, we replace all calls to [Error.signal]
   with calls to [Error.error], and report just one error. */

21 22 23 24 25 26 27 28 29 30 31 32 33 34
/* ------------------------------------------------------------------------- */
/* Imports. */

%{

open Syntax
open Positions

%}

/* ------------------------------------------------------------------------- */
/* Tokens. */

%token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL
35
%token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE
POTTIER Francois's avatar
POTTIER Francois committed
36
%token <string Positions.located> LID UID
37 38
%token <Stretch.t> HEADER
%token <Stretch.ocamltype> OCAMLTYPE
39
%token <Stretch.t Lazy.t> PERCENTPERCENT
40
%token <Syntax.identifier option array -> Action.t> ACTION
41 42
%token <Syntax.attribute> ATTRIBUTE GRAMMARATTRIBUTE
%token PERCENTATTRIBUTE
43 44

/* ------------------------------------------------------------------------- */
45
/* Type annotations and start symbol. */
46

47 48
%type <ParserAux.early_producer> producer
%type <ParserAux.early_production> production
49
%start <Syntax.partial_grammar> grammar
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67

/* ------------------------------------------------------------------------- */
/* Priorities. */

/* These declarations solve a shift-reduce conflict in favor of
   shifting: when the declaration of a non-terminal symbol begins with
   a leading bar, it is understood as an (insignificant) leading
   optional bar, *not* as an empty right-hand side followed by a bar.
   This ambiguity arises due to the existence of a new notation for
   letting several productions share a single semantic action. */

%nonassoc no_optional_bar
%nonassoc BAR

%%

/* ------------------------------------------------------------------------- */
/* A grammar consists of declarations and rules, followed by an optional
68
   postlude, which we do not parse. */
69 70

grammar:
71
  ds = declaration* PERCENTPERCENT rs = rule* t = postlude
POTTIER Francois's avatar
POTTIER Francois committed
72 73
    {
      {
74 75
        pg_filename          = ""; (* filled in by the caller *)
        pg_declarations      = List.flatten ds;
76
        pg_rules             = rs;
77
        pg_postlude          = t
78 79 80 81
      }
    }

/* ------------------------------------------------------------------------- */
82
/* A declaration is an %{ OCaml header %}, or a %token, %start,
83 84 85 86 87
   %type, %left, %right, or %nonassoc declaration. */

declaration:

| h = HEADER /* lexically delimited by %{ ... %} */
88
    { [ with_loc $loc (DCode h) ] }
89

90 91
| TOKEN ty = OCAMLTYPE? ts = clist(terminal)
    { List.map (Positions.map (fun (terminal, attrs) -> DToken (ty, terminal, attrs))) ts }
92

93
| START t = OCAMLTYPE? nts = clist(nonterminal)
94 95 96 97
    /* %start <ocamltype> foo is syntactic sugar for %start foo %type <ocamltype> foo */
    {
      match t with
      | None ->
98
          List.map (Positions.map (fun nonterminal -> DStart nonterminal)) nts
99
      | Some t ->
100
          Misc.mapd (fun ntloc ->
101 102 103
            Positions.mapd (fun nt -> DStart nt, DType (t, ParameterVar ntloc)) ntloc) nts
    }

104
| TYPE t = OCAMLTYPE ss = clist(strict_actual)
105 106 107
    { List.map (Positions.map (fun nt -> DType (t, nt)))
        (List.map Parameters.with_pos ss) }

108
| k = priority_keyword ss = clist(symbol)
109
    { let prec = ParserAux.new_precedence_level $loc(k) in
110 111 112
      List.map (Positions.map (fun symbol -> DTokenProperties (symbol, k, prec))) ss }

| PARAMETER t = OCAMLTYPE
113
    { [ with_loc $loc (DParameter t) ] }
114

115
| attr = GRAMMARATTRIBUTE
116
    { [ with_loc $loc (DGrammarAttribute attr) ] }
117 118

| PERCENTATTRIBUTE actuals = clist(strict_actual) attrs = ATTRIBUTE+
119
    { [ with_loc $loc (DSymbolAttributes (actuals, attrs)) ] }
120

121
| ON_ERROR_REDUCE ss = clist(strict_actual)
122 123
    { let prec = ParserAux.new_on_error_reduce_level() in
      List.map (Positions.map (fun nt -> DOnErrorReduce (nt, prec)))
124 125
        (List.map Parameters.with_pos ss) }

126 127 128 129 130 131
/* This production recognizes tokens that are valid in the rules section,
   but not in the declarations section. This is a hint that a %% was
   forgotten. */

| rule_specific_token
    {
POTTIER Francois's avatar
POTTIER Francois committed
132
      Error.error [Positions.import $loc]
133
        "syntax error inside a declaration.\n\
POTTIER Francois's avatar
POTTIER Francois committed
134
         Did you perhaps forget the %%%% that separates declarations and rules?"
135 136
    }

137 138 139 140 141 142 143 144
priority_keyword:
  LEFT
    { LeftAssoc }
| RIGHT
    { RightAssoc }
| NONASSOC
    { NonAssoc }

145 146 147 148 149 150 151
%inline rule_specific_token:
| PUBLIC
| INLINE
| COLON
| EOF
    { () }

152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
/* ------------------------------------------------------------------------- */
/* Our lists of symbols are separated with optional commas. Order is
   irrelevant. */

%inline clist(X):
  xs = separated_nonempty_list(COMMA?, X)
    { xs }

/* ------------------------------------------------------------------------- */
/* A symbol is a terminal or nonterminal symbol. One would like to
   require nonterminal symbols to begin with a lowercase letter, so as
   to lexically distinguish them from terminal symbols, which must
   begin with an uppercase letter. However, for compatibility with
   ocamlyacc, this is impossible. It can be required only for
   nonterminal symbols that are also start symbols. */

symbol:
  id = LID
| id = UID
    { id }

/* ------------------------------------------------------------------------- */
/* Terminals must begin with an uppercase letter. Nonterminals that are
   declared to be start symbols must begin with a lowercase letter. */

%inline terminal:
178 179
  id = UID attrs = ATTRIBUTE*
    { Positions.map (fun uid -> (uid, attrs)) id }
180 181 182 183 184 185 186 187 188 189 190 191 192

%inline nonterminal:
  id = LID
    { id }

/* ------------------------------------------------------------------------- */
/* A rule defines a symbol. It is optionally declared %public, and optionally
   carries a number of formal parameters. The right-hand side of the definition
   consists of a list of productions. */

rule:
  flags = flags                                             /* flags */
  symbol = symbol                                           /* the symbol that is being defined */
193
  attributes = ATTRIBUTE*
194
  params = plist(symbol)                                    /* formal parameters */
195
  COLON
196
  optional_bar
197
  branches = branches
POTTIER Francois's avatar
POTTIER Francois committed
198
    {
199
      let public, inline = flags in
POTTIER Francois's avatar
POTTIER Francois committed
200
      {
POTTIER Francois's avatar
POTTIER Francois committed
201 202
        pr_public_flag = public;
        pr_inline_flag = inline;
POTTIER Francois's avatar
POTTIER Francois committed
203 204
        pr_nt          = Positions.value symbol;
        pr_positions   = [ Positions.position symbol ];
205
        pr_attributes  = attributes;
POTTIER Francois's avatar
POTTIER Francois committed
206 207 208
        pr_parameters  = List.map Positions.value params;
        pr_branches    = branches
      }
209 210
    }

211
%inline branches:
212
  prods = separated_nonempty_list(BAR, production_group)
213 214
    { List.flatten prods }

215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
flags:
  /* epsilon */
    { false, false }
| PUBLIC
    { true, false }
| INLINE
    { false, true }
| PUBLIC INLINE
| INLINE PUBLIC
    { true, true }

optional_bar:
  /* epsilon */ %prec no_optional_bar
| BAR
    { () }

/* ------------------------------------------------------------------------- */
/* A production group consists of a list of productions, followed by a
   semantic action and an optional precedence specification. */

production_group:
  productions = separated_nonempty_list(BAR, production)
  action = ACTION
238
  oprec2 = ioption(precedence)
239
    {
240 241 242 243
      (* If multiple productions share a single semantic action, check
         that all of them bind the same names. *)
      ParserAux.check_production_group productions;
      (* Then, *)
244
      List.map (fun (producers, oprec1, level, pos) ->
245 246 247 248
        (* Replace [$i] with [_i]. *)
        let pr_producers = ParserAux.normalize_producers producers in
        (* Distribute the semantic action. Also, check that every [$i]
           is within bounds. *)
249
        let action : Syntax.identifier option array -> Action.t = action in
250
        let pr_action = action (ParserAux.producer_names producers) in
251 252 253 254 255 256 257
        {
          pr_producers;
          pr_action;
          pr_branch_prec_annotation   = ParserAux.override pos oprec1 oprec2;
          pr_branch_production_level  = level;
          pr_branch_position          = pos
        })
258
      productions
259 260 261 262 263 264 265 266 267 268 269
    }

%inline precedence:
  PREC symbol = symbol
    { symbol }

/* ------------------------------------------------------------------------- */
/* A production is a list of producers, optionally followed by a
   precedence declaration. */

production:
270
  producers = producer* oprec = ioption(precedence)
271 272
    { producers,
      oprec,
273
      ParserAux.new_production_level(),
274
      Positions.import $loc
275 276 277 278
    }

/* ------------------------------------------------------------------------- */
/* A producer is an actual parameter, possibly preceded by a
279
   binding, and possibly followed with attributes.
280 281 282 283

   Because both [ioption] and [terminated] are defined as inlined by
   the standard library, this definition expands to two productions,
   one of which begins with id = LID, the other of which begins with
284
   p = actual. The token LID is in FIRST(actual),
285 286 287 288 289 290
   but the LR(1) formalism can deal with that. If [option] was used
   instead of [ioption], an LR(1) conflict would arise -- looking
   ahead at LID would not allow determining whether to reduce an
   empty [option] or to shift. */

producer:
291
| id = ioption(terminated(LID, EQUAL)) p = actual attrs = ATTRIBUTE*
292
    { position (with_loc $loc ()), id, p, attrs }
293 294

/* ------------------------------------------------------------------------- */
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
/* The ideal syntax of actual parameters includes:
   1. a symbol, optionally applied to a list of actual parameters;
   2. an actual parameter followed with a modifier;
   3. an anonymous rule. (Not delimited by parentheses! Otherwise
      one would often end up writing two pairs of parentheses.) */

/* In order to avoid a few ambiguities, we restrict this ideal syntax as
   follows:
   a. Within a %type declaration, we use [strict_actual], which
      allows 1- and 2- (this is undocumented; the documentation says we
      require a symbol) but not 3-, which would not make semantic sense
      anyway.
   b. Within a producer, we use [actual], which allows 1- and
      2- but not 3-. Case 3- is allowed by switching to [lax_actual]
      within the actual arguments of an application, which are clearly
      delimited by parentheses and commas.
   c. In front of a modifier, we can never allow [lax_actual],
      as this would create an ambiguity: basically, [A | B?] could be
      interpreted either as [(A | B)?] or as [A | (B?)].
*/

%inline generic_actual(A, B):
(* 1- *)
  symbol = symbol actuals = plist(A)
319
    { Parameters.app symbol actuals }
320
(* 2- *)
321
| p = B m = located(modifier)
322
    { ParameterApp (m, [ p ]) }
323

324 325 326 327 328 329 330 331 332 333 334 335 336
strict_actual:
  p = generic_actual(strict_actual, strict_actual)
    { p }

actual:
  p = generic_actual(lax_actual, actual)
    { p }

lax_actual:
  p = generic_actual(lax_actual, /* cannot be lax_ */ actual)
    { p }
(* 3- *)
| /* leading bar disallowed */
337 338
  branches = located(branches)
    { ParameterAnonymous branches }
339 340 341 342 343 344
    (* 2016/05/18: we used to eliminate anonymous rules on the fly during
       parsing. However, when an anonymous rule appears in a parameterized
       definition, the fresh nonterminal symbol that is created should be
       parameterized. This was not done, and is not easy to do on the fly,
       as it requires inherited attributes (or a way of simulating them).
       We now use explicit abstract syntax for anonymous rules. *)
345

346 347 348 349 350 351 352 353 354 355 356 357 358 359
/* ------------------------------------------------------------------------- */
/* Formal or actual parameter lists are delimited with parentheses and
   separated with commas. They are optional. */

%inline plist(X):
  params = loption(delimited(LPAREN, separated_nonempty_list(COMMA, X), RPAREN))
    { params }

/* ------------------------------------------------------------------------- */
/* The "?", "+", and "*" modifiers are short-hands for applications of
   certain parameterized nonterminals, defined in the standard library. */

modifier:
  QUESTION
360
    { "option" }
361
| PLUS
362
    { "nonempty_list" }
363
| STAR
364
    { "list" }
365 366

/* ------------------------------------------------------------------------- */
367
/* A postlude is announced by %%, but is optional. */
368

369
postlude:
370 371
  EOF
    { None }
372
| p = PERCENTPERCENT /* followed by actual postlude */
373 374
    { Some (Lazy.force p) }

375 376 377 378 379 380 381 382 383
/* -------------------------------------------------------------------------- */

/* [located(X)] recognizes the same language as [X] and converts the resulting
   value from type ['a] to type ['a located]. */

located(X):
  x = X
    { with_loc $loc x }

384
%%