fancy-parser.mly 11.2 KB
Newer Older
1
2
3
/* This is the fancy version of the parser, to be processed by menhir.
   It is kept in sync with [Parser], but exercises menhir's features. */

4
5
6
7
/* As of 2014/12/02, the $previouserror keyword and the --error-recovery
   mode no longer exists. Thus, we replace all calls to [Error.signal]
   with calls to [Error.error], and report just one error. */

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/* ------------------------------------------------------------------------- */
/* Imports. */

%{

open ConcreteSyntax
open Syntax
open Positions

%}

/* ------------------------------------------------------------------------- */
/* Tokens. */

%token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL
23
%token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE
24
25
26
%token <string Positions.located> LID UID 
%token <Stretch.t> HEADER
%token <Stretch.ocamltype> OCAMLTYPE
27
%token <Stretch.t Lazy.t> PERCENTPERCENT
28
%token <Syntax.identifier option array -> Action.t> ACTION
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

/* ------------------------------------------------------------------------- */
/* Start symbol. */

%start <ConcreteSyntax.grammar> grammar

/* ------------------------------------------------------------------------- */
/* Priorities. */

/* These declarations solve a shift-reduce conflict in favor of
   shifting: when the declaration of a non-terminal symbol begins with
   a leading bar, it is understood as an (insignificant) leading
   optional bar, *not* as an empty right-hand side followed by a bar.
   This ambiguity arises due to the existence of a new notation for
   letting several productions share a single semantic action. */

%nonassoc no_optional_bar
%nonassoc BAR

%%

/* ------------------------------------------------------------------------- */
/* A grammar consists of declarations and rules, followed by an optional
   trailer, which we do not parse. */

grammar:
  ds = declaration* PERCENTPERCENT rs = rule* t = trailer
    { 
      { 
	pg_filename          = ""; (* filled in by the caller *)
	pg_declarations      = List.flatten ds;
60
	pg_rules	     = rs @ ParserAux.rules();
61
62
63
64
65
66
67
68
69
70
71
72
73
	pg_trailer           = t
      }
    }

/* ------------------------------------------------------------------------- */
/* A declaration is an %{ Objective Caml header %}, or a %token, %start,
   %type, %left, %right, or %nonassoc declaration. */

declaration:

| h = HEADER /* lexically delimited by %{ ... %} */
    { [ with_poss $startpos $endpos (DCode h) ] }

74
| TOKEN t = OCAMLTYPE? ts = clist(terminal)
75
76
    { List.map (Positions.map (fun terminal -> DToken (t, terminal))) ts }

77
| START t = OCAMLTYPE? nts = clist(nonterminal)
78
79
80
81
82
83
84
85
86
87
    /* %start <ocamltype> foo is syntactic sugar for %start foo %type <ocamltype> foo */
    {
      match t with
      | None ->
	  List.map (Positions.map (fun nonterminal -> DStart nonterminal)) nts
      | Some t ->
	  Misc.mapd (fun ntloc ->
            Positions.mapd (fun nt -> DStart nt, DType (t, ParameterVar ntloc)) ntloc) nts
    }

88
| TYPE t = OCAMLTYPE ss = clist(strict_actual)
89
90
91
    { List.map (Positions.map (fun nt -> DType (t, nt)))
        (List.map Parameters.with_pos ss) }

92
| k = priority_keyword ss = clist(symbol)
93
    { let prec = ParserAux.new_precedence_level $startpos(k) $endpos(k) in
94
95
96
97
98
      List.map (Positions.map (fun symbol -> DTokenProperties (symbol, k, prec))) ss }

| PARAMETER t = OCAMLTYPE
    { [ with_poss $startpos $endpos (DParameter t) ] }

99
100
101
102
| ON_ERROR_REDUCE ss = clist(strict_actual)
    { List.map (Positions.map (fun nt -> DOnErrorReduce nt))
        (List.map Parameters.with_pos ss) }

103
104
105
106
107
108
109
110
111
112
113
/* This production recognizes tokens that are valid in the rules section,
   but not in the declarations section. This is a hint that a %% was
   forgotten. */

| rule_specific_token
    {
      Error.error (Positions.two $startpos $endpos)
        "syntax error inside a declaration.\n\
         Did you perhaps forget the %% that separates declarations and rules?"
    }

114
115
116
117
118
119
120
121
priority_keyword:
  LEFT
    { LeftAssoc }
| RIGHT
    { RightAssoc }
| NONASSOC
    { NonAssoc }

122
123
124
125
126
127
128
%inline rule_specific_token:
| PUBLIC
| INLINE
| COLON
| EOF
    { () }

129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/* ------------------------------------------------------------------------- */
/* Our lists of symbols are separated with optional commas. Order is
   irrelevant. */

%inline clist(X):
  xs = separated_nonempty_list(COMMA?, X)
    { xs }

/* ------------------------------------------------------------------------- */
/* A symbol is a terminal or nonterminal symbol. One would like to
   require nonterminal symbols to begin with a lowercase letter, so as
   to lexically distinguish them from terminal symbols, which must
   begin with an uppercase letter. However, for compatibility with
   ocamlyacc, this is impossible. It can be required only for
   nonterminal symbols that are also start symbols. */

symbol:
  id = LID
| id = UID
    { id }

/* ------------------------------------------------------------------------- */
/* Terminals must begin with an uppercase letter. Nonterminals that are
   declared to be start symbols must begin with a lowercase letter. */

%inline terminal:
  id = UID
    { id }

%inline nonterminal:
  id = LID
    { id }

/* ------------------------------------------------------------------------- */
/* A rule defines a symbol. It is optionally declared %public, and optionally
   carries a number of formal parameters. The right-hand side of the definition
   consists of a list of productions. */

rule:
  flags = flags                                             /* flags */
  symbol = symbol                                           /* the symbol that is being defined */
  params = plist(symbol)                                    /* formal parameters */
171
  COLON
172
  optional_bar
173
  branches = branches
174
175
    { 
      let public, inline = flags in
POTTIER Francois's avatar
POTTIER Francois committed
176
177
178
179
180
181
182
183
      {
        pr_public_flag = public; 
        pr_inline_flag = inline; 
        pr_nt          = Positions.value symbol;
        pr_positions   = [ Positions.position symbol ];
        pr_parameters  = List.map Positions.value params;
        pr_branches    = branches
      }
184
185
    }

186
%inline branches:
187
  prods = separated_nonempty_list(BAR, production_group)
188
189
    { List.flatten prods }

190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
flags:
  /* epsilon */
    { false, false }
| PUBLIC
    { true, false }
| INLINE
    { false, true }
| PUBLIC INLINE
| INLINE PUBLIC
    { true, true }

optional_bar:
  /* epsilon */ %prec no_optional_bar
| BAR
    { () }

/* ------------------------------------------------------------------------- */
/* A production group consists of a list of productions, followed by a
   semantic action and an optional precedence specification. */

production_group:
  productions = separated_nonempty_list(BAR, production)
  action = ACTION
213
  oprec2 = ioption(precedence)
214
    {
215
216
217
218
      (* If multiple productions share a single semantic action, check
         that all of them bind the same names. *)
      ParserAux.check_production_group productions;
      (* Then, *)
219
      List.map (fun (producers, oprec1, level, pos) ->
220
221
222
223
        (* Replace [$i] with [_i]. *)
        let pr_producers = ParserAux.normalize_producers producers in
        (* Distribute the semantic action. Also, check that every [$i]
           is within bounds. *)
224
        let action : Syntax.identifier option array -> Action.t = action in
225
        let pr_action = action (ParserAux.producer_names producers) in
226
	{
227
228
	  pr_producers;
	  pr_action;
229
	  pr_branch_prec_annotation   = ParserAux.override pos oprec1 oprec2;
230
	  pr_branch_production_level  = level;
231
232
	  pr_branch_position          = pos
	})
233
      productions
234
235
236
237
238
239
240
241
242
243
244
    }

%inline precedence:
  PREC symbol = symbol
    { symbol }

/* ------------------------------------------------------------------------- */
/* A production is a list of producers, optionally followed by a
   precedence declaration. */

production:
245
  producers = producer* oprec = ioption(precedence)
246
247
    { producers,
      oprec,
248
      ParserAux.new_production_level(),
249
250
251
252
253
254
255
256
257
258
      Positions.lex_join $startpos $endpos
    }

/* ------------------------------------------------------------------------- */
/* A producer is an actual parameter, possibly preceded by a
   binding.

   Because both [ioption] and [terminated] are defined as inlined by
   the standard library, this definition expands to two productions,
   one of which begins with id = LID, the other of which begins with
259
   p = actual. The token LID is in FIRST(actual),
260
261
262
263
264
265
   but the LR(1) formalism can deal with that. If [option] was used
   instead of [ioption], an LR(1) conflict would arise -- looking
   ahead at LID would not allow determining whether to reduce an
   empty [option] or to shift. */

producer:
266
| id = ioption(terminated(LID, EQUAL)) p = actual
267
    { position (with_poss $startpos $endpos ()), id, p }
268
269

/* ------------------------------------------------------------------------- */
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
/* The ideal syntax of actual parameters includes:
   1. a symbol, optionally applied to a list of actual parameters;
   2. an actual parameter followed with a modifier;
   3. an anonymous rule. (Not delimited by parentheses! Otherwise
      one would often end up writing two pairs of parentheses.) */

/* In order to avoid a few ambiguities, we restrict this ideal syntax as
   follows:
   a. Within a %type declaration, we use [strict_actual], which
      allows 1- and 2- (this is undocumented; the documentation says we
      require a symbol) but not 3-, which would not make semantic sense
      anyway.
   b. Within a producer, we use [actual], which allows 1- and
      2- but not 3-. Case 3- is allowed by switching to [lax_actual]
      within the actual arguments of an application, which are clearly
      delimited by parentheses and commas.
   c. In front of a modifier, we can never allow [lax_actual],
      as this would create an ambiguity: basically, [A | B?] could be
      interpreted either as [(A | B)?] or as [A | (B?)].
*/

%inline generic_actual(A, B):
(* 1- *)
  symbol = symbol actuals = plist(A)
294
    { Parameters.app symbol actuals }
295
296
(* 2- *)
| p = B m = modifier
297
    { ParameterApp (m, [ p ]) }
298

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
strict_actual:
  p = generic_actual(strict_actual, strict_actual)
    { p }

actual:
  p = generic_actual(lax_actual, actual)
    { p }

lax_actual:
  p = generic_actual(lax_actual, /* cannot be lax_ */ actual)
    { p }
(* 3- *)
| /* leading bar disallowed */
  branches = branches
    { let position = position (with_poss $startpos $endpos ()) in
      let symbol = ParserAux.anonymous position branches in
      ParameterVar (with_pos position symbol) }

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
/* ------------------------------------------------------------------------- */
/* Formal or actual parameter lists are delimited with parentheses and
   separated with commas. They are optional. */

%inline plist(X):
  params = loption(delimited(LPAREN, separated_nonempty_list(COMMA, X), RPAREN))
    { params }

/* ------------------------------------------------------------------------- */
/* The "?", "+", and "*" modifiers are short-hands for applications of
   certain parameterized nonterminals, defined in the standard library. */

modifier:
  QUESTION
    { with_poss $startpos $endpos "option" }
| PLUS
    { with_poss $startpos $endpos "nonempty_list" }
| STAR
    { with_poss $startpos $endpos "list" }

/* ------------------------------------------------------------------------- */
/* A trailer is announced by %%, but is optional. */

trailer:
  EOF
    { None }
| p = PERCENTPERCENT /* followed by actual trailer */
    { Some (Lazy.force p) }

%%