fancy-parser.mly 11.6 KB
Newer Older
1
2
3
/* This is the fancy version of the parser, to be processed by menhir.
   It is kept in sync with [Parser], but exercises menhir's features. */

4
5
6
7
/* As of 2014/12/02, the $previouserror keyword and the --error-recovery
   mode no longer exists. Thus, we replace all calls to [Error.signal]
   with calls to [Error.error], and report just one error. */

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/* ------------------------------------------------------------------------- */
/* Imports. */

%{

open Keyword
open ConcreteSyntax
open Syntax
open Positions

%}

/* ------------------------------------------------------------------------- */
/* Tokens. */

%token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL
%token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER
%token <string Positions.located> LID UID 
%token <Stretch.t> HEADER
%token <Stretch.ocamltype> OCAMLTYPE
%token <string Lazy.t> PERCENTPERCENT
%token <Action.t> ACTION

/* ------------------------------------------------------------------------- */
/* Start symbol. */

%start <ConcreteSyntax.grammar> grammar

/* ------------------------------------------------------------------------- */
/* Priorities. */

/* These declarations solve a shift-reduce conflict in favor of
   shifting: when the declaration of a non-terminal symbol begins with
   a leading bar, it is understood as an (insignificant) leading
   optional bar, *not* as an empty right-hand side followed by a bar.
   This ambiguity arises due to the existence of a new notation for
   letting several productions share a single semantic action. */

%nonassoc no_optional_bar
%nonassoc BAR

/* These declarations encourage the [error] token to be shifted if
   found at the end of what seems like a legal declaration. */

%nonassoc decl
%nonassoc error

%%

/* ------------------------------------------------------------------------- */
/* A grammar consists of declarations and rules, followed by an optional
   trailer, which we do not parse. */

grammar:
  ds = declaration* PERCENTPERCENT rs = rule* t = trailer
    { 
      { 
	pg_filename          = ""; (* filled in by the caller *)
	pg_declarations      = List.flatten ds;
	pg_rules	     = List.flatten rs;
	pg_trailer           = t
      }
    }

/* ------------------------------------------------------------------------- */
/* A declaration is an %{ Objective Caml header %}, or a %token, %start,
   %type, %left, %right, or %nonassoc declaration. */

declaration:

| h = HEADER /* lexically delimited by %{ ... %} */
    { [ with_poss $startpos $endpos (DCode h) ] }

| TOKEN t = OCAMLTYPE? ts = clist(terminal) %prec decl
    { List.map (Positions.map (fun terminal -> DToken (t, terminal))) ts }

| TOKEN OCAMLTYPE? clist(terminal) error
| TOKEN OCAMLTYPE? error
86
87
88
89
90
91
    { Error.error (Positions.two $startpos $endpos) (String.concat "\n" [
      "Syntax error in a %token declaration.";
      "Here are sample valid declarations:";
      "  %token DOT SEMICOLON";
      "  %token <string> LID UID";
      ])
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
    }

| START t = OCAMLTYPE? nts = clist(nonterminal) %prec decl
    /* %start <ocamltype> foo is syntactic sugar for %start foo %type <ocamltype> foo */
    {
      match t with
      | None ->
	  List.map (Positions.map (fun nonterminal -> DStart nonterminal)) nts
      | Some t ->
	  Misc.mapd (fun ntloc ->
            Positions.mapd (fun nt -> DStart nt, DType (t, ParameterVar ntloc)) ntloc) nts
    }

| START OCAMLTYPE? clist(nonterminal) error
| START OCAMLTYPE? error
107
108
109
110
111
112
    { Error.error (Positions.two $startpos $endpos) (String.concat "\n" [
      "Syntax error in a %start declaration.";
      "Here are sample valid declarations:";
      "  %start expression phrase";
      "  %start <int> date time";
      ])
113
114
115
116
117
118
119
120
121
    }

| TYPE t = OCAMLTYPE ss = clist(actual_parameter) %prec decl
    { List.map (Positions.map (fun nt -> DType (t, nt)))
        (List.map Parameters.with_pos ss) }

| TYPE OCAMLTYPE clist(actual_parameter) error
| TYPE OCAMLTYPE error
| TYPE error
122
123
124
125
126
127
    { Error.error (Positions.two $startpos $endpos) (String.concat "\n" [
      "Syntax error in a %type declaration.";
      "Here are sample valid declarations:";
      "  %type <Syntax.expression> expression";
      "  %type <int> date time";
      ])
128
129
130
131
132
133
134
135
    }

| k = priority_keyword ss = clist(symbol) %prec decl
    { let prec = ParserAux.current_token_precedence $startpos(k) $endpos(k) in
      List.map (Positions.map (fun symbol -> DTokenProperties (symbol, k, prec))) ss }

| priority_keyword clist(symbol) error
| priority_keyword error
136
137
138
139
140
141
142
    { Error.error (Positions.two $startpos $endpos) (String.concat "\n" [
      "Syntax error in a precedence declaration.";
      "Here are sample valid declarations:";
      "  %left PLUS TIMES";
      "  %nonassoc unary_minus";
      "  %right CONCAT";
      ])
143
144
145
146
147
148
    }

| PARAMETER t = OCAMLTYPE
    { [ with_poss $startpos $endpos (DParameter t) ] }

| PARAMETER error
149
150
151
152
153
    { Error.error (Positions.two $startpos $endpos) (String.concat "\n" [
      "Syntax error in a %parameter declaration.";
      "Here is a sample valid declaration:";
      "  %parameter <X : sig type t end>";
      ])
154
155
156
    }

| error
157
    { Error.error (Positions.two $startpos $endpos) "Syntax error inside a declaration." }
158
159
160
161
162
163
164

/* This production recognizes tokens that are valid in the rules section,
   but not in the declarations section. This is a hint that a %% was
   forgotten. */

| rule_specific_token
    {
165
166
167
      Error.error (Positions.two $startpos $endpos)
        "Syntax error inside a declaration.\n\
         Did you perhaps forget the %% that separates declarations and rules?"
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
    }

priority_keyword:
  LEFT
    { LeftAssoc }
| RIGHT
    { RightAssoc }
| NONASSOC
    { NonAssoc }

rule_specific_token:
| PUBLIC
| INLINE
| COLON
| EOF
    { () }

/* ------------------------------------------------------------------------- */
/* Our lists of symbols are separated with optional commas. Order is
   irrelevant. */

%inline clist(X):
  xs = separated_nonempty_list(COMMA?, X)
    { xs }

/* ------------------------------------------------------------------------- */
/* A symbol is a terminal or nonterminal symbol. One would like to
   require nonterminal symbols to begin with a lowercase letter, so as
   to lexically distinguish them from terminal symbols, which must
   begin with an uppercase letter. However, for compatibility with
   ocamlyacc, this is impossible. It can be required only for
   nonterminal symbols that are also start symbols. */

symbol:
  id = LID
| id = UID
    { id }

/* ------------------------------------------------------------------------- */
/* Terminals must begin with an uppercase letter. Nonterminals that are
   declared to be start symbols must begin with a lowercase letter. */

%inline terminal:
  id = UID
    { id }

%inline nonterminal:
  id = LID
    { id }

/* ------------------------------------------------------------------------- */
/* A rule defines a symbol. It is optionally declared %public, and optionally
   carries a number of formal parameters. The right-hand side of the definition
   consists of a list of productions. */

rule:
  flags = flags                                             /* flags */
  symbol = symbol                                           /* the symbol that is being defined */
  params = plist(symbol)                                    /* formal parameters */
  COLON optional_bar
  prods = separated_nonempty_list(BAR, production_group)    /* productions */
    { 
      let public, inline = flags in
      [
        {
          pr_public_flag = public; 
          pr_inline_flag = inline; 
	  pr_nt          = Positions.value symbol;
	  pr_positions   = [ Positions.position symbol ];
	  pr_parameters  = List.map Positions.value params;
	  pr_branches    = List.flatten prods
        }
      ]
    }
| error
    /* This error production should lead to resynchronization on the next
       well-formed rule. */
245
    { Error.error (Positions.two $startpos $endpos) "Syntax error inside the definition of a nonterminal symbol." }
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288

flags:
  /* epsilon */
    { false, false }
| PUBLIC
    { true, false }
| INLINE
    { false, true }
| PUBLIC INLINE
| INLINE PUBLIC
    { true, true }

optional_bar:
  /* epsilon */ %prec no_optional_bar
| BAR
    { () }

/* ------------------------------------------------------------------------- */
/* A production group consists of a list of productions, followed by a
   semantic action and an optional precedence specification. */

production_group:
  productions = separated_nonempty_list(BAR, production)
  action = ACTION
  oprec2 = precedence?
    { 
      ParserAux.check_production_group
	productions
	$startpos(action) $endpos(action) action;

      List.map (fun (producers, oprec1, rprec, pos) -> {
	pr_producers                = producers;
	pr_action                   = action;
	pr_branch_shift_precedence  = ParserAux.override pos oprec1 oprec2;
	pr_branch_reduce_precedence = rprec;
	pr_branch_position          = pos
      }) productions
    }
| error ACTION precedence?
| error EOF
    /* This error production should lead to resynchronization on the next
       semantic action, unless the end of file is reached before a semantic
       action is found. */
289
    { Error.error (Positions.two $startpos($1) $endpos($1)) "Syntax error inside a production." }
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363

%inline precedence:
  PREC symbol = symbol
    { symbol }

/* ------------------------------------------------------------------------- */
/* A production is a list of producers, optionally followed by a
   precedence declaration. */

production:
  producers = producer* oprec = precedence?
    { producers,
      oprec,
      ParserAux.current_reduce_precedence(),
      Positions.lex_join $startpos $endpos
    }

/* ------------------------------------------------------------------------- */
/* A producer is an actual parameter, possibly preceded by a
   binding.

   Because both [ioption] and [terminated] are defined as inlined by
   the standard library, this definition expands to two productions,
   one of which begins with id = LID, the other of which begins with
   p = actual_parameter. The token LID is in FIRST(actual_parameter),
   but the LR(1) formalism can deal with that. If [option] was used
   instead of [ioption], an LR(1) conflict would arise -- looking
   ahead at LID would not allow determining whether to reduce an
   empty [option] or to shift. */

producer:
| id = ioption(terminated(LID, EQUAL)) p = actual_parameter
    { id, p }

/* ------------------------------------------------------------------------- */
/* The syntax of actual parameters allows applications, whereas the syntax
   of formal parameters does not. It also allows use of the "?", "+", and
   "*" shortcuts. */

actual_parameter:
  symbol = symbol actuals = plist(actual_parameter) modifier = modifier?
    { Parameters.oapp1 modifier (Parameters.app symbol actuals) }

/* ------------------------------------------------------------------------- */
/* Formal or actual parameter lists are delimited with parentheses and
   separated with commas. They are optional. */

%inline plist(X):
  params = loption(delimited(LPAREN, separated_nonempty_list(COMMA, X), RPAREN))
    { params }

/* ------------------------------------------------------------------------- */
/* The "?", "+", and "*" modifiers are short-hands for applications of
   certain parameterized nonterminals, defined in the standard library. */

modifier:
  QUESTION
    { with_poss $startpos $endpos "option" }
| PLUS
    { with_poss $startpos $endpos "nonempty_list" }
| STAR
    { with_poss $startpos $endpos "list" }

/* ------------------------------------------------------------------------- */
/* A trailer is announced by %%, but is optional. */

trailer:
  EOF
    { None }
| p = PERCENTPERCENT /* followed by actual trailer */
    { Some (Lazy.force p) }

%%