lexer.mll 23.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
15
{

POTTIER Francois's avatar
POTTIER Francois committed
16
17
18
open Lexing
open Parser
open Positions
19
20
21
22
23
24
25
26
27
28
open Keyword

(* ------------------------------------------------------------------------ *)

(* Short-hands. *)

let error1 pos =
  Error.error (Positions.one pos)

let error2 lexbuf =
POTTIER Francois's avatar
POTTIER Francois committed
29
  Error.error (Positions.lexbuf lexbuf)
POTTIER Francois's avatar
POTTIER Francois committed
30

POTTIER Francois's avatar
POTTIER Francois committed
31
32
(* ------------------------------------------------------------------------ *)

33
34
35
36
37
38
39
40
41
42
43
44
(* [int_of_string] raises [Failure] if its argument is too large. This is
   not a problem in practice, but causes false positives when fuzzing
   Menhir. We hide the problem by failing gracefully. *)

let int_of_string (pos : Lexing.position) i =
  try
    int_of_string i
  with Failure _ ->
    error1 pos "unreasonably large integer."

(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
45
46
47
48
49
50
51
52
53
54
(* This wrapper saves the current lexeme start, invokes its argument,
   and restores it. This allows transmitting better positions to the
   parser. *)

let savestart lexbuf f =
  let startp = lexbuf.lex_start_p in
  let token = f lexbuf in
  lexbuf.lex_start_p <- startp;
  token

POTTIER Francois's avatar
POTTIER Francois committed
55
56
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
57
58
59
60
61
62
63
(* Overwrites an old character with a new one at a specified
   offset in a [bytes] buffer. *)

let overwrite content offset c1 c2 =
  assert (Bytes.get content offset = c1);
  Bytes.set content offset c2

POTTIER Francois's avatar
POTTIER Francois committed
64
65
66
67
(* ------------------------------------------------------------------------ *)

(* Keyword recognition and construction. *)

68
69
70
71
72
73
74
75
76
(* A monster is a spot where we have identified a keyword in concrete syntax.
   We describe a monster as an object with the following methods: *)

type monster = {

  (* The position of the monster. *)
  pos: Positions.t;

  (* This method is passed an array of (optional) names for the producers,
77
78
79
80
     that is, the elements of the production's right-hand side. It is also
     passed a flag which tells whether [$i] syntax is allowed or disallowed.
     It may perform some checks and is allowed to fail. *)
  check: check;
81
82
83
84
85
86
87
88
89
90
91
92

  (* This method transforms the keyword (in place) into a conventional
     OCaml identifier. This is done by replacing '$', '(', and ')' with
     '_'. Bloody. The arguments are [ofs1] and [content]. [ofs1] is the
     offset where [content] begins in the source file. *)
  transform: int -> bytes -> unit;

  (* This is the keyword, in abstract syntax. *)
  keyword: keyword option;

}

93
94
95
96
97
98
99
100
and check =
  Settings.dollars -> string option array -> unit

(* No check. *)

let none : check =
  fun _ _ -> ()

101
102
103
104
105
(* ------------------------------------------------------------------------ *)

(* The [$syntaxerror] monster. *)

let syntaxerror pos : monster =
106
107
  let check =
    none
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  and transform ofs1 content =
    (* [$syntaxerror] is replaced with
       [(raise _eRR)]. Same length. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    let source = "(raise _eRR)" in
    Bytes.blit_string source 0 content ofs (String.length source)
  and keyword =
    Some SyntaxError
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* We check that every [$i] is within range. Also, we forbid using [$i]
   when a producer has been given a name; this is bad style and may be
   a mistake. (Plus, this simplies our life, as we rewrite [$i] to [_i],
   and we would have to rewrite it to a different identifier otherwise.) *)

127
128
let check_dollar pos i : check = fun dollars producers ->
  (* If [i] is out of range, say so. *)
129
  if not (0 <= i - 1 && i - 1 < Array.length producers) then
130
131
132
133
134
135
136
137
138
139
140
    Error.error [pos] "$%d refers to a nonexistent symbol." i;
  (* If [$i] could be referred to via a name, say so. *)
  producers.(i - 1) |> Option.iter (fun x ->
    Error.error [pos] "please do not say: $%d. Instead, say: %s." i x
  );
  (* If [$i] syntax is disallowed, say so. *)
  match dollars with
  | Settings.DollarsDisallowed ->
      Error.error [pos] "please do not use $%d. Instead, name this value." i
  | Settings.DollarsAllowed ->
      ()
141
142
143
144

(* We check that every reference to a producer [x] in a position keyword,
   such as [$startpos(x)], exists. *)

145
let check_producer pos x : check = fun _ producers ->
146
147
148
149
150
151
152
153
  if not (List.mem (Some x) (Array.to_list producers)) then
    Error.error [pos] "%s refers to a nonexistent symbol." x

(* ------------------------------------------------------------------------ *)

(* The [$i] monster. *)

let dollar pos i : monster =
154
  let check : check = check_dollar pos i
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
  and transform ofs1 content =
    (* [$i] is replaced with [_i]. Thus, it is no longer a keyword. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_'
  and keyword =
    None
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* The position-keyword monster. The most horrible of all. *)

let position pos
  (where : string)
  (flavor : string)
  (i : string option) (x : string option)
=
174
175
176
177
  let check_no_parameter () =
    if i <> None || x <> None then
      Error.error [pos] "$%s%s does not take a parameter." where flavor
  in
178
179
180
  let ofslpar = (* offset of the opening parenthesis, if there is one *)
    1 + (* for the initial "$" *)
    String.length where +
181
    3   (* for "pos" or "ofs" or "loc" *)
182
  in
183
  let where =
184
    match where with
185
186
    | "symbolstart"
    | "s"           -> check_no_parameter(); WhereSymbolStart
187
188
    | "start"       -> WhereStart
    | "end"         -> WhereEnd
189
    | ""            -> WhereStart
190
    | _             -> assert false
POTTIER Francois's avatar
POTTIER Francois committed
191
192
  in
  let flavor =
193
194
195
    match flavor with
    | "pos"   -> FlavorPosition
    | "ofs"   -> FlavorOffset
196
    | "loc"   -> FlavorLocation
197
198
199
200
201
    | _       -> assert false
  in
  let subject, check =
    match i, x with
    | Some i, None ->
202
        let ii = int_of_string (start_of_position pos) i in
203
        if ii = 0 && where = WhereEnd then
204
205
          (* [$endpos($0)] *)
          Before, none
POTTIER Francois's avatar
POTTIER Francois committed
206
        else
207
208
209
210
211
212
213
214
215
216
217
218
          (* [$startpos($i)] is rewritten to [$startpos(_i)]. *)
          RightNamed ("_" ^ i), check_dollar pos ii
    | None, Some x ->
        (* [$startpos(x)] *)
        RightNamed x, check_producer pos x
    | None, None ->
        (* [$startpos] *)
        Left, none
    | Some _, Some _ ->
        assert false
  in
  let transform ofs1 content =
POTTIER Francois's avatar
POTTIER Francois committed
219
220
221
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_';
222
223
224
225
226
227
    let ofslpar = ofs + ofslpar in
    match i, x with
    | None, Some x ->
        overwrite content ofslpar '(' '_';
        overwrite content (ofslpar + 1 + String.length x) ')' '_'
    | Some i, None ->
POTTIER Francois's avatar
POTTIER Francois committed
228
        overwrite content ofslpar '(' '_';
229
230
231
232
233
234
        overwrite content (ofslpar + 1) '$' '_';
        overwrite content (ofslpar + 2 + String.length i) ')' '_'
    | _, _ ->
        ()
  in
  let keyword =
235
    Some (Position (subject, where, flavor))
236
237
238
239
240
241
242
243
244
245
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* In an OCaml header, there should be no monsters. This is just a sanity
   check. *)

let no_monsters monsters =
  match monsters with
POTTIER Francois's avatar
POTTIER Francois committed
246
247
  | [] ->
      ()
248
249
250
251
252
  | monster :: _ ->
      Error.error [monster.pos]
        "a Menhir keyword cannot be used in an OCaml header."

(* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
253
254
255

(* Creates a stretch. *)

POTTIER Francois's avatar
POTTIER Francois committed
256
let mk_stretch pos1 pos2 parenthesize monsters =
POTTIER Francois's avatar
POTTIER Francois committed
257
  (* Read the specified chunk of the file. *)
258
  let raw_content : string = InputFile.chunk (pos1, pos2) in
259
  (* Transform the monsters, if there are any. (This explicit test
POTTIER Francois's avatar
POTTIER Francois committed
260
261
     allows saving one string copy and keeping just one live copy.) *)
  let content : string =
262
    match monsters with
263
    | [] ->
POTTIER Francois's avatar
POTTIER Francois committed
264
265
266
        raw_content
    | _ :: _ ->
        let content : bytes = Bytes.of_string raw_content in
267
        List.iter (fun monster -> monster.transform pos1.pos_cnum content) monsters;
POTTIER Francois's avatar
POTTIER Francois committed
268
269
270
271
272
273
274
        Bytes.unsafe_to_string content
  in
  (* Add whitespace so that the column numbers match those of the source file.
     If requested, add parentheses so that the semantic action can be inserted
     into other code without ambiguity. *)
  let content =
    if parenthesize then
POTTIER Francois's avatar
POTTIER Francois committed
275
276
277
278
      (* If [parenthesize] is true then we are at the beginning of a semantic
         action, just after the opening brace. This guarantees that we cannot
         be at the beginning of a line, so the subtraction [_ - 1] below
         cannot produce a negative result. *)
POTTIER Francois's avatar
POTTIER Francois committed
279
280
281
282
      (String.make (pos1.pos_cnum - pos1.pos_bol - 1) ' ') ^ "(" ^ content ^ ")"
    else
      (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content
  in
283
  Stretch.({
284
    stretch_filename = InputFile.get_input_file_name();
285
286
287
288
289
290
    stretch_linenum = pos1.pos_lnum;
    stretch_linecount = pos2.pos_lnum - pos1.pos_lnum;
    stretch_content = content;
    stretch_raw_content = raw_content;
    stretch_keywords = Misc.map_opt (fun monster -> monster.keyword) monsters
  })
POTTIER Francois's avatar
POTTIER Francois committed
291

POTTIER Francois's avatar
POTTIER Francois committed
292
293
(* ------------------------------------------------------------------------ *)

294
(* OCaml's reserved words. *)
POTTIER Francois's avatar
POTTIER Francois committed
295

296
let table words =
POTTIER Francois's avatar
POTTIER Francois committed
297
  let table = Hashtbl.create 149 in
298
299
300
301
302
  List.iter (fun word -> Hashtbl.add table word ()) words;
  table

let reserved =
  table [
POTTIER Francois's avatar
POTTIER Francois committed
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
    "and";
    "as";
    "assert";
    "begin";
    "class";
    "constraint";
    "do";
    "done";
    "downto";
    "else";
    "end";
    "exception";
    "external";
    "false";
    "for";
    "fun";
    "function";
    "functor";
    "if";
    "in";
    "include";
    "inherit";
    "initializer";
    "lazy";
    "let";
    "match";
    "method";
    "module";
    "mutable";
    "new";
    "object";
    "of";
    "open";
    "or";
    "parser";
    "private";
    "rec";
    "sig";
    "struct";
    "then";
    "to";
    "true";
    "try";
    "type";
    "val";
    "virtual";
    "when";
    "while";
    "with";
    "mod";
    "land";
    "lor";
    "lxor";
    "lsl";
    "lsr";
    "asr";
359
360
361
362
363
364
365
366
367
  ]

(* ------------------------------------------------------------------------ *)

(* Menhir's percent-directives. *)

let table directives =
  let table = Hashtbl.create 149 in
  List.iter (fun (word, token) -> Hashtbl.add table word token) directives;
POTTIER Francois's avatar
POTTIER Francois committed
368
369
  table

370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
let directives =
  table [
    "token", TOKEN;
    "type", TYPE;
    "left", LEFT;
    "right", RIGHT;
    "nonassoc", NONASSOC;
    "start", START;
    "prec", PREC;
    "public", PUBLIC;
    "parameter", PARAMETER;
    "inline", INLINE;
    "attribute", PERCENTATTRIBUTE;
    "on_error_reduce", ON_ERROR_REDUCE;
  ]

386
387
}

POTTIER Francois's avatar
POTTIER Francois committed
388
389
390
391
(* ------------------------------------------------------------------------ *)

(* Patterns. *)

392
393
let newline = ('\010' | '\013' | "\013\010")

394
let whitespace = [ ' ' '\t' ]
395
396
397
398
399
400
401

let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']

let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']

let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)

402
403
let attributechar = identchar | '.'

POTTIER Francois's avatar
POTTIER Francois committed
404
405
406
407
let subject =
  '$' (['0'-'9']+ as i)
| ((lowercase identchar*) as x)

POTTIER Francois's avatar
POTTIER Francois committed
408
let poskeyword =
409
  '$'
POTTIER Francois's avatar
POTTIER Francois committed
410
411
412
413
414
  (
    (("symbolstart" | "start" | "end") as where) (("pos" | "ofs") as flavor)
  | (("s" | "") as where) ("loc" as flavor)
  )
  ( '(' subject ')' )?
415
416
417
418
419
420
421

let previouserror =
  "$previouserror"

let syntaxerror =
  "$syntaxerror"

POTTIER Francois's avatar
POTTIER Francois committed
422
423
424
425
(* ------------------------------------------------------------------------ *)

(* The lexer. *)

426
rule main = parse
427
428
429
| "%" (identchar+ as directive)
    { try Hashtbl.find directives directive
      with Not_found -> error2 lexbuf "unknown directive: %s." directive }
430
| "%%"
431
    { (* The token [PERCENTPERCENT] carries a stretch that contains
432
         everything that follows %% in the input file. This string
433
434
435
436
437
438
439
440
441
         must be created lazily. The parser decides (based on the
         context) whether this stretch is needed. If it is indeed
         needed, then constructing this stretch drives the lexer
         to the end of the file. *)
      PERCENTPERCENT (lazy (
        let openingpos = lexeme_end_p lexbuf in
        let closingpos = finish lexbuf in
        mk_stretch openingpos closingpos false []
      )) }
442
443
| ";"
    { SEMI }
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
| ":"
    { COLON }
| ","
    { COMMA }
| "="
    { EQUAL }
| "("
    { LPAREN }
| ")"
    { RPAREN }
| "|"
    { BAR }
| "?"
    { QUESTION }
| "*"
    { STAR }
| "+"
    { PLUS }
POTTIER Francois's avatar
POTTIER Francois committed
462
463
464
465
466
467
468
469
470
471
| "~"
    { TILDE }
| "_"
    { UNDERSCORE }
| ":="
    { COLONEQUAL }
| "=="
    { EQUALEQUAL }
| "let"
    { LET }
472
473
| (lowercase identchar *) as id
    { if Hashtbl.mem reserved id then
474
        error2 lexbuf "this is an OCaml reserved word."
475
      else
476
        LID (with_pos (cpos lexbuf) id)
477
478
479
    }
| (uppercase identchar *) as id
    { UID (with_pos (cpos lexbuf) id) }
480
481
482
483
484
(* Quoted strings, which are used as aliases for tokens.
   For simplicity, we just disallow double quotes and backslash outright.
   Given the use of terminal strings in grammars, this is fine. *)
| ( "\"" ( [' ' - '~'] # ['"' '\\'] + ) "\"" ) as id
    { QID (with_pos (cpos lexbuf) id) }
485
486
| "//" [^ '\010' '\013']* newline (* skip C++ style comment *)
| newline
487
    { new_line lexbuf; main lexbuf }
488
489
490
491
492
493
494
495
496
497
| whitespace+
    { main lexbuf }
| "/*"
    { comment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "<"
    { savestart lexbuf (ocamltype (lexeme_end_p lexbuf)) }
| "%{"
    { savestart lexbuf (fun lexbuf ->
498
499
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
500
501
        let closingpos, monsters = action true openingpos [] lexbuf in
        no_monsters monsters;
502
        HEADER (mk_stretch stretchpos closingpos false [])
503
504
505
      ) }
| "{"
    { savestart lexbuf (fun lexbuf ->
506
507
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
508
        let closingpos, monsters = action false openingpos [] lexbuf in
509
        ACTION (
510
511
          fun dollars producers ->
            List.iter (fun monster -> monster.check dollars producers) monsters;
512
            let stretch = mk_stretch stretchpos closingpos true monsters in
513
514
            Action.from_stretch stretch
        )
515
516
      )
    }
517
518
519
520
| ('%'? as percent) "[@" (attributechar+ as id) whitespace*
    { let openingpos = lexeme_start_p lexbuf in
      let stretchpos = lexeme_end_p lexbuf in
      let closingpos = attribute openingpos lexbuf in
521
      let pos = Positions.import (openingpos, lexeme_end_p lexbuf) in
522
523
524
525
526
527
528
529
      let attr = mk_stretch stretchpos closingpos false [] in
      if percent = "" then
        (* No [%] sign: this is a normal attribute. *)
        ATTRIBUTE (Positions.with_pos pos id, attr)
      else
        (* A [%] sign is present: this is a grammar-wide attribute. *)
        GRAMMARATTRIBUTE (Positions.with_pos pos id, attr)
    }
530
531
532
| eof
    { EOF }
| _
533
    { error2 lexbuf "unexpected character(s)." }
534

POTTIER Francois's avatar
POTTIER Francois committed
535
536
(* ------------------------------------------------------------------------ *)

537
538
539
540
(* Skip C style comments. *)

and comment openingpos = parse
| newline
541
    { new_line lexbuf; comment openingpos lexbuf }
542
543
544
545
546
547
548
| "*/"
    { () }
| eof
    { error1 openingpos "unterminated comment." }
| _
    { comment openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
549
550
(* ------------------------------------------------------------------------ *)

551
(* Collect an O'Caml type delimited by angle brackets. Angle brackets can
552
553
   appear as part of O'Caml function types and variant types, so we must
   recognize them and *not* treat them as a closing bracket. *)
554
555
556

and ocamltype openingpos = parse
| "->"
557
| "[>"
558
559
    { ocamltype openingpos lexbuf }
| '>'
560
    { OCAMLTYPE (Stretch.Declared (mk_stretch openingpos (lexeme_start_p lexbuf) true [])) }
561
562
563
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamltype openingpos lexbuf }
| newline
564
    { new_line lexbuf; ocamltype openingpos lexbuf }
565
| eof
566
    { error1 openingpos "unterminated OCaml type." }
567
568
569
| _
    { ocamltype openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
570
571
(* ------------------------------------------------------------------------ *)

572
573
574
575
(* Collect O'Caml code delimited by curly brackets. The monsters that are
   encountered along the way are accumulated in the list [monsters]. Nested
   curly brackets must be properly counted. Nested parentheses are also kept
   track of, so as to better report errors when they are not balanced. *)
576

577
and action percent openingpos monsters = parse
578
| '{'
579
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
580
      action percent openingpos monsters lexbuf }
581
582
583
584
| ("}" | "%}") as delimiter
    { match percent, delimiter with
      | true, "%}"
      | false, "}" ->
585
586
          (* This is the delimiter we were instructed to look for. *)
          lexeme_start_p lexbuf, monsters
587
      | _, _ ->
588
589
          (* This is not it. *)
          error1 openingpos "unbalanced opening brace."
590
591
    }
| '('
592
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
593
594
      action percent openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
595
596
    { let i = int_of_string (lexeme_start_p lexbuf) i in
      let monster = dollar (cpos lexbuf) i in
597
      action percent openingpos (monster :: monsters) lexbuf }
598
| poskeyword
599
600
    { let monster = position (cpos lexbuf) where flavor i x in
      action percent openingpos (monster :: monsters) lexbuf }
601
| previouserror
602
    { error2 lexbuf "$previouserror is no longer supported." }
603
| syntaxerror
604
605
    { let monster = syntaxerror (cpos lexbuf) in
      action percent openingpos (monster :: monsters) lexbuf }
606
607
| '"'
    { string (lexeme_start_p lexbuf) lexbuf;
608
      action percent openingpos monsters lexbuf }
609
610
| "'"
    { char lexbuf;
611
      action percent openingpos monsters lexbuf }
612
613
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf;
614
      action percent openingpos monsters lexbuf }
615
| newline
616
    { new_line lexbuf;
617
      action percent openingpos monsters lexbuf }
618
619
620
621
| ')'
| eof
    { error1 openingpos "unbalanced opening brace." }
| _
622
    { action percent openingpos monsters lexbuf }
623

POTTIER Francois's avatar
POTTIER Francois committed
624
625
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
626
627
628
(* Inside a semantic action, we keep track of nested parentheses, so as to
   better report errors when they are not balanced. *)

629
and parentheses openingpos monsters = parse
630
| '('
631
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
632
      parentheses openingpos monsters lexbuf }
633
| ')'
634
    { lexeme_start_p lexbuf, monsters }
635
| '{'
636
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
637
638
      parentheses openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
639
640
    { let i = int_of_string (lexeme_start_p lexbuf) i in
      let monster = dollar (cpos lexbuf) i in
641
      parentheses openingpos (monster :: monsters) lexbuf }
642
| poskeyword
643
644
    { let monster = position (cpos lexbuf) where flavor i x in
      parentheses openingpos (monster :: monsters) lexbuf }
645
| previouserror
646
    { error2 lexbuf "$previouserror is no longer supported." }
647
| syntaxerror
648
649
    { let monster = syntaxerror (cpos lexbuf) in
      parentheses openingpos (monster :: monsters) lexbuf }
650
| '"'
651
    { string (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
652
| "'"
653
    { char lexbuf; parentheses openingpos monsters lexbuf }
654
| "(*"
655
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
656
| newline
657
    { new_line lexbuf; parentheses openingpos monsters lexbuf }
658
659
660
661
| '}'
| eof
    { error1 openingpos "unbalanced opening parenthesis." }
| _
662
    { parentheses openingpos monsters lexbuf }
663

POTTIER Francois's avatar
POTTIER Francois committed
664
665
(* ------------------------------------------------------------------------ *)

666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
(* Collect an attribute payload, which is terminated by a closing square
   bracket. Nested square brackets must be properly counted. Nested curly
   brackets and nested parentheses are also kept track of, so as to better
   report errors when they are not balanced. *)

and attribute openingpos = parse
| '['
    { let _ = attribute (lexeme_start_p lexbuf) lexbuf in
      attribute openingpos lexbuf }
| ']'
    { lexeme_start_p lexbuf }
| '{'
    { let _, _ = action false (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '('
    { let _, _ = parentheses (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| "'"
    { char lexbuf; attribute openingpos lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| newline
    { new_line lexbuf; attribute openingpos lexbuf }
| '}'
| ')'
| eof
    { error1 openingpos "unbalanced opening bracket." }
| _
    { attribute openingpos lexbuf }

(* ------------------------------------------------------------------------ *)

700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
(* Skip O'Caml comments. Comments can be nested and can contain
   strings or characters, which must be correctly analyzed. (A string
   could contain begin-of-comment or end-of-comment sequences, which
   must be ignored; a character could contain a begin-of-string
   sequence.) *)

and ocamlcomment openingpos = parse
| "*)"
    { () }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| "'"
    { char lexbuf; ocamlcomment openingpos lexbuf }
| newline
716
    { new_line lexbuf; ocamlcomment openingpos lexbuf }
717
| eof
718
    { error1 openingpos "unterminated OCaml comment." }
719
720
721
| _
    { ocamlcomment openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
722
723
(* ------------------------------------------------------------------------ *)

724
725
726
(* Skip O'Caml strings. *)

and string openingpos = parse
POTTIER Francois's avatar
POTTIER Francois committed
727
| '"'
728
729
730
   { () }
| '\\' newline
| newline
731
   { new_line lexbuf; string openingpos lexbuf }
732
733
734
735
| '\\' _
   (* Upon finding a backslash, skip the character that follows,
      unless it is a newline. Pretty crude, but should work. *)
   { string openingpos lexbuf }
POTTIER Francois's avatar
POTTIER Francois committed
736
| eof
737
   { error1 openingpos "unterminated OCaml string." }
738
739
740
| _
   { string openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
741
742
(* ------------------------------------------------------------------------ *)

743
744
745
746
747
748
(* Skip O'Caml characters. A lone quote character is legal inside
   a comment, so if we don't recognize the matching closing quote,
   we simply abandon. *)

and char = parse
| '\\'? newline "'"
749
   { new_line lexbuf }
750
751
752
753
754
| [^ '\\' '\''] "'"
| '\\' _ "'"
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
| '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
| ""
POTTIER Francois's avatar
POTTIER Francois committed
755
   { () }
756

POTTIER Francois's avatar
POTTIER Francois committed
757
758
(* ------------------------------------------------------------------------ *)

759
760
761
(* Read until the end of the file. This is used after finding a %%
   that marks the end of the grammar specification. We update the
   current position as we go. This allows us to build a stretch
762
   for the postlude. *)
763
764
765

and finish = parse
| newline
766
    { new_line lexbuf; finish lexbuf }
767
768
769
770
| eof
    { lexeme_start_p lexbuf }
| _
    { finish lexbuf }