lexer.mll 22.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
15
{

POTTIER Francois's avatar
POTTIER Francois committed
16
17
18
open Lexing
open Parser
open Positions
19
20
21
22
23
24
25
26
27
28
open Keyword

(* ------------------------------------------------------------------------ *)

(* Short-hands. *)

let error1 pos =
  Error.error (Positions.one pos)

let error2 lexbuf =
POTTIER Francois's avatar
POTTIER Francois committed
29
  Error.error (Positions.lexbuf lexbuf)
POTTIER Francois's avatar
POTTIER Francois committed
30

POTTIER Francois's avatar
POTTIER Francois committed
31
32
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
33
34
35
36
37
38
39
40
41
42
(* This wrapper saves the current lexeme start, invokes its argument,
   and restores it. This allows transmitting better positions to the
   parser. *)

let savestart lexbuf f =
  let startp = lexbuf.lex_start_p in
  let token = f lexbuf in
  lexbuf.lex_start_p <- startp;
  token

POTTIER Francois's avatar
POTTIER Francois committed
43
44
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
45
46
47
48
49
50
51
(* Overwrites an old character with a new one at a specified
   offset in a [bytes] buffer. *)

let overwrite content offset c1 c2 =
  assert (Bytes.get content offset = c1);
  Bytes.set content offset c2

POTTIER Francois's avatar
POTTIER Francois committed
52
53
54
55
(* ------------------------------------------------------------------------ *)

(* Keyword recognition and construction. *)

56
57
58
59
60
61
62
63
64
(* A monster is a spot where we have identified a keyword in concrete syntax.
   We describe a monster as an object with the following methods: *)

type monster = {

  (* The position of the monster. *)
  pos: Positions.t;

  (* This method is passed an array of (optional) names for the producers,
65
66
67
68
     that is, the elements of the production's right-hand side. It is also
     passed a flag which tells whether [$i] syntax is allowed or disallowed.
     It may perform some checks and is allowed to fail. *)
  check: check;
69
70
71
72
73
74
75
76
77
78
79
80

  (* This method transforms the keyword (in place) into a conventional
     OCaml identifier. This is done by replacing '$', '(', and ')' with
     '_'. Bloody. The arguments are [ofs1] and [content]. [ofs1] is the
     offset where [content] begins in the source file. *)
  transform: int -> bytes -> unit;

  (* This is the keyword, in abstract syntax. *)
  keyword: keyword option;

}

81
82
83
84
85
86
87
88
and check =
  Settings.dollars -> string option array -> unit

(* No check. *)

let none : check =
  fun _ _ -> ()

89
90
91
92
93
(* ------------------------------------------------------------------------ *)

(* The [$syntaxerror] monster. *)

let syntaxerror pos : monster =
94
95
  let check =
    none
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
  and transform ofs1 content =
    (* [$syntaxerror] is replaced with
       [(raise _eRR)]. Same length. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    let source = "(raise _eRR)" in
    Bytes.blit_string source 0 content ofs (String.length source)
  and keyword =
    Some SyntaxError
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* We check that every [$i] is within range. Also, we forbid using [$i]
   when a producer has been given a name; this is bad style and may be
   a mistake. (Plus, this simplies our life, as we rewrite [$i] to [_i],
   and we would have to rewrite it to a different identifier otherwise.) *)

115
116
let check_dollar pos i : check = fun dollars producers ->
  (* If [i] is out of range, say so. *)
117
  if not (0 <= i - 1 && i - 1 < Array.length producers) then
118
119
120
121
122
123
124
125
126
127
128
    Error.error [pos] "$%d refers to a nonexistent symbol." i;
  (* If [$i] could be referred to via a name, say so. *)
  producers.(i - 1) |> Option.iter (fun x ->
    Error.error [pos] "please do not say: $%d. Instead, say: %s." i x
  );
  (* If [$i] syntax is disallowed, say so. *)
  match dollars with
  | Settings.DollarsDisallowed ->
      Error.error [pos] "please do not use $%d. Instead, name this value." i
  | Settings.DollarsAllowed ->
      ()
129
130
131
132

(* We check that every reference to a producer [x] in a position keyword,
   such as [$startpos(x)], exists. *)

133
let check_producer pos x : check = fun _ producers ->
134
135
136
137
138
139
140
141
  if not (List.mem (Some x) (Array.to_list producers)) then
    Error.error [pos] "%s refers to a nonexistent symbol." x

(* ------------------------------------------------------------------------ *)

(* The [$i] monster. *)

let dollar pos i : monster =
142
  let check : check = check_dollar pos i
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
  and transform ofs1 content =
    (* [$i] is replaced with [_i]. Thus, it is no longer a keyword. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_'
  and keyword =
    None
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* The position-keyword monster. The most horrible of all. *)

let position pos
  (where : string)
  (flavor : string)
  (i : string option) (x : string option)
=
162
163
164
165
  let check_no_parameter () =
    if i <> None || x <> None then
      Error.error [pos] "$%s%s does not take a parameter." where flavor
  in
166
167
168
  let ofslpar = (* offset of the opening parenthesis, if there is one *)
    1 + (* for the initial "$" *)
    String.length where +
169
    3   (* for "pos" or "ofs" or "loc" *)
170
  in
171
  let where =
172
    match where with
173
174
    | "symbolstart"
    | "s"           -> check_no_parameter(); WhereSymbolStart
175
176
    | "start"       -> WhereStart
    | "end"         -> WhereEnd
177
    | ""            -> WhereStart
178
    | _             -> assert false
POTTIER Francois's avatar
POTTIER Francois committed
179
180
  in
  let flavor =
181
182
183
    match flavor with
    | "pos"   -> FlavorPosition
    | "ofs"   -> FlavorOffset
184
    | "loc"   -> FlavorLocation
185
186
187
188
189
190
    | _       -> assert false
  in
  let subject, check =
    match i, x with
    | Some i, None ->
        let ii = int_of_string i in (* cannot fail *)
191
        if ii = 0 && where = WhereEnd then
192
193
          (* [$endpos($0)] *)
          Before, none
POTTIER Francois's avatar
POTTIER Francois committed
194
        else
195
196
197
198
199
200
201
202
203
204
205
206
          (* [$startpos($i)] is rewritten to [$startpos(_i)]. *)
          RightNamed ("_" ^ i), check_dollar pos ii
    | None, Some x ->
        (* [$startpos(x)] *)
        RightNamed x, check_producer pos x
    | None, None ->
        (* [$startpos] *)
        Left, none
    | Some _, Some _ ->
        assert false
  in
  let transform ofs1 content =
POTTIER Francois's avatar
POTTIER Francois committed
207
208
209
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_';
210
211
212
213
214
215
    let ofslpar = ofs + ofslpar in
    match i, x with
    | None, Some x ->
        overwrite content ofslpar '(' '_';
        overwrite content (ofslpar + 1 + String.length x) ')' '_'
    | Some i, None ->
POTTIER Francois's avatar
POTTIER Francois committed
216
        overwrite content ofslpar '(' '_';
217
218
219
220
221
222
        overwrite content (ofslpar + 1) '$' '_';
        overwrite content (ofslpar + 2 + String.length i) ')' '_'
    | _, _ ->
        ()
  in
  let keyword =
223
    Some (Position (subject, where, flavor))
224
225
226
227
228
229
230
231
232
233
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* In an OCaml header, there should be no monsters. This is just a sanity
   check. *)

let no_monsters monsters =
  match monsters with
POTTIER Francois's avatar
POTTIER Francois committed
234
235
  | [] ->
      ()
236
237
238
239
240
  | monster :: _ ->
      Error.error [monster.pos]
        "a Menhir keyword cannot be used in an OCaml header."

(* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
241
242
243

(* Creates a stretch. *)

POTTIER Francois's avatar
POTTIER Francois committed
244
let mk_stretch pos1 pos2 parenthesize monsters =
POTTIER Francois's avatar
POTTIER Francois committed
245
  (* Read the specified chunk of the file. *)
246
  let raw_content : string = InputFile.chunk (pos1, pos2) in
247
  (* Transform the monsters, if there are any. (This explicit test
POTTIER Francois's avatar
POTTIER Francois committed
248
249
     allows saving one string copy and keeping just one live copy.) *)
  let content : string =
250
    match monsters with
251
    | [] ->
POTTIER Francois's avatar
POTTIER Francois committed
252
253
254
        raw_content
    | _ :: _ ->
        let content : bytes = Bytes.of_string raw_content in
255
        List.iter (fun monster -> monster.transform pos1.pos_cnum content) monsters;
POTTIER Francois's avatar
POTTIER Francois committed
256
257
258
259
260
261
262
263
264
265
266
        Bytes.unsafe_to_string content
  in
  (* Add whitespace so that the column numbers match those of the source file.
     If requested, add parentheses so that the semantic action can be inserted
     into other code without ambiguity. *)
  let content =
    if parenthesize then
      (String.make (pos1.pos_cnum - pos1.pos_bol - 1) ' ') ^ "(" ^ content ^ ")"
    else
      (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content
  in
267
  Stretch.({
268
    stretch_filename = InputFile.get_input_file_name();
269
270
271
272
273
274
    stretch_linenum = pos1.pos_lnum;
    stretch_linecount = pos2.pos_lnum - pos1.pos_lnum;
    stretch_content = content;
    stretch_raw_content = raw_content;
    stretch_keywords = Misc.map_opt (fun monster -> monster.keyword) monsters
  })
POTTIER Francois's avatar
POTTIER Francois committed
275

POTTIER Francois's avatar
POTTIER Francois committed
276
277
(* ------------------------------------------------------------------------ *)

278
(* OCaml's reserved words. *)
POTTIER Francois's avatar
POTTIER Francois committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341

let reserved =
  let table = Hashtbl.create 149 in
  List.iter (fun word -> Hashtbl.add table word ()) [
    "and";
    "as";
    "assert";
    "begin";
    "class";
    "constraint";
    "do";
    "done";
    "downto";
    "else";
    "end";
    "exception";
    "external";
    "false";
    "for";
    "fun";
    "function";
    "functor";
    "if";
    "in";
    "include";
    "inherit";
    "initializer";
    "lazy";
    "let";
    "match";
    "method";
    "module";
    "mutable";
    "new";
    "object";
    "of";
    "open";
    "or";
    "parser";
    "private";
    "rec";
    "sig";
    "struct";
    "then";
    "to";
    "true";
    "try";
    "type";
    "val";
    "virtual";
    "when";
    "while";
    "with";
    "mod";
    "land";
    "lor";
    "lxor";
    "lsl";
    "lsr";
    "asr";
  ];
  table

342
343
}

POTTIER Francois's avatar
POTTIER Francois committed
344
345
346
347
(* ------------------------------------------------------------------------ *)

(* Patterns. *)

348
349
let newline = ('\010' | '\013' | "\013\010")

350
let whitespace = [ ' ' '\t' ]
351
352
353
354
355
356
357

let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']

let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']

let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)

358
359
let attributechar = identchar | '.'

POTTIER Francois's avatar
POTTIER Francois committed
360
361
362
363
let subject =
  '$' (['0'-'9']+ as i)
| ((lowercase identchar*) as x)

POTTIER Francois's avatar
POTTIER Francois committed
364
let poskeyword =
365
  '$'
POTTIER Francois's avatar
POTTIER Francois committed
366
367
368
369
370
  (
    (("symbolstart" | "start" | "end") as where) (("pos" | "ofs") as flavor)
  | (("s" | "") as where) ("loc" as flavor)
  )
  ( '(' subject ')' )?
371
372
373
374
375
376
377

let previouserror =
  "$previouserror"

let syntaxerror =
  "$syntaxerror"

POTTIER Francois's avatar
POTTIER Francois committed
378
379
380
381
(* ------------------------------------------------------------------------ *)

(* The lexer. *)

382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
rule main = parse
| "%token"
    { TOKEN }
| "%type"
    { TYPE }
| "%left"
    { LEFT }
| "%right"
    { RIGHT }
| "%nonassoc"
    { NONASSOC }
| "%start"
    { START }
| "%prec"
    { PREC }
| "%public"
    { PUBLIC }
| "%parameter"
    { PARAMETER }
| "%inline"
    { INLINE }
403
404
| "%attribute"
    { PERCENTATTRIBUTE }
405
406
| "%on_error_reduce"
    { ON_ERROR_REDUCE }
407
| "%%"
408
    { (* The token [PERCENTPERCENT] carries a stretch that contains
409
         everything that follows %% in the input file. This string
410
411
412
413
414
415
416
417
418
         must be created lazily. The parser decides (based on the
         context) whether this stretch is needed. If it is indeed
         needed, then constructing this stretch drives the lexer
         to the end of the file. *)
      PERCENTPERCENT (lazy (
        let openingpos = lexeme_end_p lexbuf in
        let closingpos = finish lexbuf in
        mk_stretch openingpos closingpos false []
      )) }
419
420
| ";"
    { SEMI }
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
| ":"
    { COLON }
| ","
    { COMMA }
| "="
    { EQUAL }
| "("
    { LPAREN }
| ")"
    { RPAREN }
| "|"
    { BAR }
| "?"
    { QUESTION }
| "*"
    { STAR }
| "+"
    { PLUS }
POTTIER Francois's avatar
POTTIER Francois committed
439
440
441
442
443
444
445
446
447
448
| "~"
    { TILDE }
| "_"
    { UNDERSCORE }
| ":="
    { COLONEQUAL }
| "=="
    { EQUALEQUAL }
| "let"
    { LET }
449
450
| (lowercase identchar *) as id
    { if Hashtbl.mem reserved id then
451
        error2 lexbuf "this is an OCaml reserved word."
452
      else
453
        LID (with_pos (cpos lexbuf) id)
454
455
456
    }
| (uppercase identchar *) as id
    { UID (with_pos (cpos lexbuf) id) }
457
458
459
460
461
(* Quoted strings, which are used as aliases for tokens.
   For simplicity, we just disallow double quotes and backslash outright.
   Given the use of terminal strings in grammars, this is fine. *)
| ( "\"" ( [' ' - '~'] # ['"' '\\'] + ) "\"" ) as id
    { QID (with_pos (cpos lexbuf) id) }
462
463
| "//" [^ '\010' '\013']* newline (* skip C++ style comment *)
| newline
464
    { new_line lexbuf; main lexbuf }
465
466
467
468
469
470
471
472
473
474
| whitespace+
    { main lexbuf }
| "/*"
    { comment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "<"
    { savestart lexbuf (ocamltype (lexeme_end_p lexbuf)) }
| "%{"
    { savestart lexbuf (fun lexbuf ->
475
476
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
477
478
        let closingpos, monsters = action true openingpos [] lexbuf in
        no_monsters monsters;
479
        HEADER (mk_stretch stretchpos closingpos false [])
480
481
482
      ) }
| "{"
    { savestart lexbuf (fun lexbuf ->
483
484
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
485
        let closingpos, monsters = action false openingpos [] lexbuf in
486
        ACTION (
487
488
          fun dollars producers ->
            List.iter (fun monster -> monster.check dollars producers) monsters;
489
            let stretch = mk_stretch stretchpos closingpos true monsters in
490
491
            Action.from_stretch stretch
        )
492
493
      )
    }
494
495
496
497
| ('%'? as percent) "[@" (attributechar+ as id) whitespace*
    { let openingpos = lexeme_start_p lexbuf in
      let stretchpos = lexeme_end_p lexbuf in
      let closingpos = attribute openingpos lexbuf in
498
      let pos = Positions.import (openingpos, lexeme_end_p lexbuf) in
499
500
501
502
503
504
505
506
      let attr = mk_stretch stretchpos closingpos false [] in
      if percent = "" then
        (* No [%] sign: this is a normal attribute. *)
        ATTRIBUTE (Positions.with_pos pos id, attr)
      else
        (* A [%] sign is present: this is a grammar-wide attribute. *)
        GRAMMARATTRIBUTE (Positions.with_pos pos id, attr)
    }
507
508
509
| eof
    { EOF }
| _
510
    { error2 lexbuf "unexpected character(s)." }
511

POTTIER Francois's avatar
POTTIER Francois committed
512
513
(* ------------------------------------------------------------------------ *)

514
515
516
517
(* Skip C style comments. *)

and comment openingpos = parse
| newline
518
    { new_line lexbuf; comment openingpos lexbuf }
519
520
521
522
523
524
525
| "*/"
    { () }
| eof
    { error1 openingpos "unterminated comment." }
| _
    { comment openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
526
527
(* ------------------------------------------------------------------------ *)

528
(* Collect an O'Caml type delimited by angle brackets. Angle brackets can
529
530
   appear as part of O'Caml function types and variant types, so we must
   recognize them and *not* treat them as a closing bracket. *)
531
532
533

and ocamltype openingpos = parse
| "->"
534
| "[>"
535
536
    { ocamltype openingpos lexbuf }
| '>'
537
    { OCAMLTYPE (Stretch.Declared (mk_stretch openingpos (lexeme_start_p lexbuf) true [])) }
538
539
540
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamltype openingpos lexbuf }
| newline
541
    { new_line lexbuf; ocamltype openingpos lexbuf }
542
| eof
543
    { error1 openingpos "unterminated OCaml type." }
544
545
546
| _
    { ocamltype openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
547
548
(* ------------------------------------------------------------------------ *)

549
550
551
552
(* Collect O'Caml code delimited by curly brackets. The monsters that are
   encountered along the way are accumulated in the list [monsters]. Nested
   curly brackets must be properly counted. Nested parentheses are also kept
   track of, so as to better report errors when they are not balanced. *)
553

554
and action percent openingpos monsters = parse
555
| '{'
556
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
557
      action percent openingpos monsters lexbuf }
558
559
560
561
| ("}" | "%}") as delimiter
    { match percent, delimiter with
      | true, "%}"
      | false, "}" ->
562
563
          (* This is the delimiter we were instructed to look for. *)
          lexeme_start_p lexbuf, monsters
564
      | _, _ ->
565
566
          (* This is not it. *)
          error1 openingpos "unbalanced opening brace."
567
568
    }
| '('
569
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
570
571
572
573
      action percent openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
    { let monster = dollar (cpos lexbuf) (int_of_string i) in
      action percent openingpos (monster :: monsters) lexbuf }
574
| poskeyword
575
576
    { let monster = position (cpos lexbuf) where flavor i x in
      action percent openingpos (monster :: monsters) lexbuf }
577
| previouserror
578
    { error2 lexbuf "$previouserror is no longer supported." }
579
| syntaxerror
580
581
    { let monster = syntaxerror (cpos lexbuf) in
      action percent openingpos (monster :: monsters) lexbuf }
582
583
| '"'
    { string (lexeme_start_p lexbuf) lexbuf;
584
      action percent openingpos monsters lexbuf }
585
586
| "'"
    { char lexbuf;
587
      action percent openingpos monsters lexbuf }
588
589
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf;
590
      action percent openingpos monsters lexbuf }
591
| newline
592
    { new_line lexbuf;
593
      action percent openingpos monsters lexbuf }
594
595
596
597
| ')'
| eof
    { error1 openingpos "unbalanced opening brace." }
| _
598
    { action percent openingpos monsters lexbuf }
599

POTTIER Francois's avatar
POTTIER Francois committed
600
601
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
602
603
604
(* Inside a semantic action, we keep track of nested parentheses, so as to
   better report errors when they are not balanced. *)

605
and parentheses openingpos monsters = parse
606
| '('
607
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
608
      parentheses openingpos monsters lexbuf }
609
| ')'
610
    { lexeme_start_p lexbuf, monsters }
611
| '{'
612
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
613
614
615
616
      parentheses openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
    { let monster = dollar (cpos lexbuf) (int_of_string i) in
      parentheses openingpos (monster :: monsters) lexbuf }
617
| poskeyword
618
619
    { let monster = position (cpos lexbuf) where flavor i x in
      parentheses openingpos (monster :: monsters) lexbuf }
620
| previouserror
621
    { error2 lexbuf "$previouserror is no longer supported." }
622
| syntaxerror
623
624
    { let monster = syntaxerror (cpos lexbuf) in
      parentheses openingpos (monster :: monsters) lexbuf }
625
| '"'
626
    { string (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
627
| "'"
628
    { char lexbuf; parentheses openingpos monsters lexbuf }
629
| "(*"
630
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
631
| newline
632
    { new_line lexbuf; parentheses openingpos monsters lexbuf }
633
634
635
636
| '}'
| eof
    { error1 openingpos "unbalanced opening parenthesis." }
| _
637
    { parentheses openingpos monsters lexbuf }
638

POTTIER Francois's avatar
POTTIER Francois committed
639
640
(* ------------------------------------------------------------------------ *)

641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
(* Collect an attribute payload, which is terminated by a closing square
   bracket. Nested square brackets must be properly counted. Nested curly
   brackets and nested parentheses are also kept track of, so as to better
   report errors when they are not balanced. *)

and attribute openingpos = parse
| '['
    { let _ = attribute (lexeme_start_p lexbuf) lexbuf in
      attribute openingpos lexbuf }
| ']'
    { lexeme_start_p lexbuf }
| '{'
    { let _, _ = action false (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '('
    { let _, _ = parentheses (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| "'"
    { char lexbuf; attribute openingpos lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| newline
    { new_line lexbuf; attribute openingpos lexbuf }
| '}'
| ')'
| eof
    { error1 openingpos "unbalanced opening bracket." }
| _
    { attribute openingpos lexbuf }

(* ------------------------------------------------------------------------ *)

675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
(* Skip O'Caml comments. Comments can be nested and can contain
   strings or characters, which must be correctly analyzed. (A string
   could contain begin-of-comment or end-of-comment sequences, which
   must be ignored; a character could contain a begin-of-string
   sequence.) *)

and ocamlcomment openingpos = parse
| "*)"
    { () }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| "'"
    { char lexbuf; ocamlcomment openingpos lexbuf }
| newline
691
    { new_line lexbuf; ocamlcomment openingpos lexbuf }
692
| eof
693
    { error1 openingpos "unterminated OCaml comment." }
694
695
696
| _
    { ocamlcomment openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
697
698
(* ------------------------------------------------------------------------ *)

699
700
701
(* Skip O'Caml strings. *)

and string openingpos = parse
POTTIER Francois's avatar
POTTIER Francois committed
702
| '"'
703
704
705
   { () }
| '\\' newline
| newline
706
   { new_line lexbuf; string openingpos lexbuf }
707
708
709
710
| '\\' _
   (* Upon finding a backslash, skip the character that follows,
      unless it is a newline. Pretty crude, but should work. *)
   { string openingpos lexbuf }
POTTIER Francois's avatar
POTTIER Francois committed
711
| eof
712
   { error1 openingpos "unterminated OCaml string." }
713
714
715
| _
   { string openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
716
717
(* ------------------------------------------------------------------------ *)

718
719
720
721
722
723
(* Skip O'Caml characters. A lone quote character is legal inside
   a comment, so if we don't recognize the matching closing quote,
   we simply abandon. *)

and char = parse
| '\\'? newline "'"
724
   { new_line lexbuf }
725
726
727
728
729
| [^ '\\' '\''] "'"
| '\\' _ "'"
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
| '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
| ""
POTTIER Francois's avatar
POTTIER Francois committed
730
   { () }
731

POTTIER Francois's avatar
POTTIER Francois committed
732
733
(* ------------------------------------------------------------------------ *)

734
735
736
(* Read until the end of the file. This is used after finding a %%
   that marks the end of the grammar specification. We update the
   current position as we go. This allows us to build a stretch
737
   for the postlude. *)
738
739
740

and finish = parse
| newline
741
    { new_line lexbuf; finish lexbuf }
742
743
744
745
| eof
    { lexeme_start_p lexbuf }
| _
    { finish lexbuf }