lexer.mll 21.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
15
{

POTTIER Francois's avatar
POTTIER Francois committed
16
17
18
open Lexing
open Parser
open Positions
19
20
21
22
23
24
25
26
27
28
29
open Keyword

(* ------------------------------------------------------------------------ *)

(* Short-hands. *)

let error1 pos =
  Error.error (Positions.one pos)

let error2 lexbuf =
  Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p)
POTTIER Francois's avatar
POTTIER Francois committed
30

POTTIER Francois's avatar
POTTIER Francois committed
31
32
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
33
34
35
36
37
38
39
40
41
42
(* This wrapper saves the current lexeme start, invokes its argument,
   and restores it. This allows transmitting better positions to the
   parser. *)

let savestart lexbuf f =
  let startp = lexbuf.lex_start_p in
  let token = f lexbuf in
  lexbuf.lex_start_p <- startp;
  token

POTTIER Francois's avatar
POTTIER Francois committed
43
44
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
45
46
47
48
49
50
51
(* Overwrites an old character with a new one at a specified
   offset in a [bytes] buffer. *)

let overwrite content offset c1 c2 =
  assert (Bytes.get content offset = c1);
  Bytes.set content offset c2

POTTIER Francois's avatar
POTTIER Francois committed
52
53
54
55
(* ------------------------------------------------------------------------ *)

(* Keyword recognition and construction. *)

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
(* A monster is a spot where we have identified a keyword in concrete syntax.
   We describe a monster as an object with the following methods: *)

type monster = {

  (* The position of the monster. *)
  pos: Positions.t;

  (* This method is passed an array of (optional) names for the producers,
     that is, the elements of the production's right-hand side. It may
     perform some checks and is allowed to fail. *)
  check: string option array -> unit;

  (* This method transforms the keyword (in place) into a conventional
     OCaml identifier. This is done by replacing '$', '(', and ')' with
     '_'. Bloody. The arguments are [ofs1] and [content]. [ofs1] is the
     offset where [content] begins in the source file. *)
  transform: int -> bytes -> unit;

  (* This is the keyword, in abstract syntax. *)
  keyword: keyword option;

}

(* ------------------------------------------------------------------------ *)

(* The [$syntaxerror] monster. *)

let syntaxerror pos : monster =
  let check _ = ()
  and transform ofs1 content =
    (* [$syntaxerror] is replaced with
       [(raise _eRR)]. Same length. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    let source = "(raise _eRR)" in
    Bytes.blit_string source 0 content ofs (String.length source)
  and keyword =
    Some SyntaxError
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* We check that every [$i] is within range. Also, we forbid using [$i]
   when a producer has been given a name; this is bad style and may be
   a mistake. (Plus, this simplies our life, as we rewrite [$i] to [_i],
   and we would have to rewrite it to a different identifier otherwise.) *)

let check_dollar pos i producers =
  if not (0 <= i - 1 && i - 1 < Array.length producers) then
    Error.error [pos] "$%d refers to a nonexistent symbol." i
  else
    producers.(i - 1) |> Option.iter (fun x ->
      Error.error [pos] "please do not say: $%d. Instead, say: %s." i x
    )

(* We check that every reference to a producer [x] in a position keyword,
   such as [$startpos(x)], exists. *)

let check_producer pos x producers =
  if not (List.mem (Some x) (Array.to_list producers)) then
    Error.error [pos] "%s refers to a nonexistent symbol." x

(* ------------------------------------------------------------------------ *)

(* The [$i] monster. *)

let dollar pos i : monster =
  let check = check_dollar pos i
  and transform ofs1 content =
    (* [$i] is replaced with [_i]. Thus, it is no longer a keyword. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_'
  and keyword =
    None
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* The position-keyword monster. The most horrible of all. *)

let position pos
  (where : string)
  (flavor : string)
  (i : string option) (x : string option)
=
  let none _ = () in
146
147
148
  let ofslpar = (* offset of the opening parenthesis, if there is one *)
    1 + (* for the initial "$" *)
    String.length where +
149
    3   (* for "pos" or "ofs" or "loc" *)
150
  in
151
  let where' =
152
    match where with
153
154
155
    | "symbolstart" -> WhereSymbolStart
    | "start"       -> WhereStart
    | "end"         -> WhereEnd
156
157
    | "s"           -> WhereSymbolStart
    | ""            -> WhereStart
158
    | _             -> assert false
POTTIER Francois's avatar
POTTIER Francois committed
159
160
  in
  let () =
161
    match where', i, x with
POTTIER Francois's avatar
POTTIER Francois committed
162
163
    | WhereSymbolStart, Some _, _
    | WhereSymbolStart, _, Some _ ->
164
        Error.error [pos] "$%s%s does not take a parameter." where flavor
POTTIER Francois's avatar
POTTIER Francois committed
165
166
167
168
    | _, _, _ ->
        ()
  in
  let flavor =
169
170
171
    match flavor with
    | "pos"   -> FlavorPosition
    | "ofs"   -> FlavorOffset
172
    | "loc"   -> FlavorLocation
173
174
175
176
177
178
    | _       -> assert false
  in
  let subject, check =
    match i, x with
    | Some i, None ->
        let ii = int_of_string i in (* cannot fail *)
179
        if ii = 0 && where' = WhereEnd then
180
181
          (* [$endpos($0)] *)
          Before, none
POTTIER Francois's avatar
POTTIER Francois committed
182
        else
183
184
185
186
187
188
189
190
191
192
193
194
          (* [$startpos($i)] is rewritten to [$startpos(_i)]. *)
          RightNamed ("_" ^ i), check_dollar pos ii
    | None, Some x ->
        (* [$startpos(x)] *)
        RightNamed x, check_producer pos x
    | None, None ->
        (* [$startpos] *)
        Left, none
    | Some _, Some _ ->
        assert false
  in
  let transform ofs1 content =
POTTIER Francois's avatar
POTTIER Francois committed
195
196
197
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_';
198
199
200
201
202
203
    let ofslpar = ofs + ofslpar in
    match i, x with
    | None, Some x ->
        overwrite content ofslpar '(' '_';
        overwrite content (ofslpar + 1 + String.length x) ')' '_'
    | Some i, None ->
POTTIER Francois's avatar
POTTIER Francois committed
204
        overwrite content ofslpar '(' '_';
205
206
207
208
209
210
        overwrite content (ofslpar + 1) '$' '_';
        overwrite content (ofslpar + 2 + String.length i) ')' '_'
    | _, _ ->
        ()
  in
  let keyword =
211
    Some (Position (subject, where', flavor))
212
213
214
215
216
217
218
219
220
221
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* In an OCaml header, there should be no monsters. This is just a sanity
   check. *)

let no_monsters monsters =
  match monsters with
POTTIER Francois's avatar
POTTIER Francois committed
222
223
  | [] ->
      ()
224
225
226
227
228
  | monster :: _ ->
      Error.error [monster.pos]
        "a Menhir keyword cannot be used in an OCaml header."

(* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
229
230
231

(* Creates a stretch. *)

POTTIER Francois's avatar
POTTIER Francois committed
232
let mk_stretch pos1 pos2 parenthesize monsters =
POTTIER Francois's avatar
POTTIER Francois committed
233
  (* Read the specified chunk of the file. *)
234
  let raw_content : string = InputFile.chunk (pos1, pos2) in
235
  (* Transform the monsters, if there are any. (This explicit test
POTTIER Francois's avatar
POTTIER Francois committed
236
237
     allows saving one string copy and keeping just one live copy.) *)
  let content : string =
238
    match monsters with
239
    | [] ->
POTTIER Francois's avatar
POTTIER Francois committed
240
241
242
        raw_content
    | _ :: _ ->
        let content : bytes = Bytes.of_string raw_content in
243
        List.iter (fun monster -> monster.transform pos1.pos_cnum content) monsters;
POTTIER Francois's avatar
POTTIER Francois committed
244
245
246
247
248
249
250
251
252
253
254
        Bytes.unsafe_to_string content
  in
  (* Add whitespace so that the column numbers match those of the source file.
     If requested, add parentheses so that the semantic action can be inserted
     into other code without ambiguity. *)
  let content =
    if parenthesize then
      (String.make (pos1.pos_cnum - pos1.pos_bol - 1) ' ') ^ "(" ^ content ^ ")"
    else
      (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content
  in
255
  Stretch.({
256
    stretch_filename = InputFile.get_input_file_name();
257
258
259
260
261
262
    stretch_linenum = pos1.pos_lnum;
    stretch_linecount = pos2.pos_lnum - pos1.pos_lnum;
    stretch_content = content;
    stretch_raw_content = raw_content;
    stretch_keywords = Misc.map_opt (fun monster -> monster.keyword) monsters
  })
POTTIER Francois's avatar
POTTIER Francois committed
263

POTTIER Francois's avatar
POTTIER Francois committed
264
265
(* ------------------------------------------------------------------------ *)

266
(* OCaml's reserved words. *)
POTTIER Francois's avatar
POTTIER Francois committed
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329

let reserved =
  let table = Hashtbl.create 149 in
  List.iter (fun word -> Hashtbl.add table word ()) [
    "and";
    "as";
    "assert";
    "begin";
    "class";
    "constraint";
    "do";
    "done";
    "downto";
    "else";
    "end";
    "exception";
    "external";
    "false";
    "for";
    "fun";
    "function";
    "functor";
    "if";
    "in";
    "include";
    "inherit";
    "initializer";
    "lazy";
    "let";
    "match";
    "method";
    "module";
    "mutable";
    "new";
    "object";
    "of";
    "open";
    "or";
    "parser";
    "private";
    "rec";
    "sig";
    "struct";
    "then";
    "to";
    "true";
    "try";
    "type";
    "val";
    "virtual";
    "when";
    "while";
    "with";
    "mod";
    "land";
    "lor";
    "lxor";
    "lsl";
    "lsr";
    "asr";
  ];
  table

330
331
}

POTTIER Francois's avatar
POTTIER Francois committed
332
333
334
335
(* ------------------------------------------------------------------------ *)

(* Patterns. *)

336
337
338
339
340
341
342
343
344
345
let newline = ('\010' | '\013' | "\013\010")

let whitespace = [ ' ' '\t' ';' ]

let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']

let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']

let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)

346
347
let attributechar = identchar | '.'

POTTIER Francois's avatar
POTTIER Francois committed
348
let poskeyword =
349
  '$'
350
351
  (((("symbolstart" | "start" | "end") as where) (("pos" | "ofs") as flavor)) |
   ((("s" | "") as where) ("loc" as flavor)))
352
  ( '(' ( '$' (['0'-'9']+ as i) | ((lowercase identchar*) as x)) ')')?
353
354
355
356
357
358
359

let previouserror =
  "$previouserror"

let syntaxerror =
  "$syntaxerror"

POTTIER Francois's avatar
POTTIER Francois committed
360
361
362
363
(* ------------------------------------------------------------------------ *)

(* The lexer. *)

364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
rule main = parse
| "%token"
    { TOKEN }
| "%type"
    { TYPE }
| "%left"
    { LEFT }
| "%right"
    { RIGHT }
| "%nonassoc"
    { NONASSOC }
| "%start"
    { START }
| "%prec"
    { PREC }
| "%public"
    { PUBLIC }
| "%parameter"
    { PARAMETER }
| "%inline"
    { INLINE }
385
386
| "%attribute"
    { PERCENTATTRIBUTE }
387
388
| "%on_error_reduce"
    { ON_ERROR_REDUCE }
389
| "%%"
390
    { (* The token [PERCENTPERCENT] carries a stretch that contains
391
         everything that follows %% in the input file. This string
392
393
394
395
396
397
398
399
400
         must be created lazily. The parser decides (based on the
         context) whether this stretch is needed. If it is indeed
         needed, then constructing this stretch drives the lexer
         to the end of the file. *)
      PERCENTPERCENT (lazy (
        let openingpos = lexeme_end_p lexbuf in
        let closingpos = finish lexbuf in
        mk_stretch openingpos closingpos false []
      )) }
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
| ":"
    { COLON }
| ","
    { COMMA }
| "="
    { EQUAL }
| "("
    { LPAREN }
| ")"
    { RPAREN }
| "|"
    { BAR }
| "?"
    { QUESTION }
| "*"
    { STAR }
| "+"
    { PLUS }
| (lowercase identchar *) as id
    { if Hashtbl.mem reserved id then
421
        error2 lexbuf "this is an OCaml reserved word."
422
      else
423
        LID (with_pos (cpos lexbuf) id)
424
425
426
427
428
    }
| (uppercase identchar *) as id
    { UID (with_pos (cpos lexbuf) id) }
| "//" [^ '\010' '\013']* newline (* skip C++ style comment *)
| newline
429
    { new_line lexbuf; main lexbuf }
430
431
432
433
434
435
436
437
438
439
| whitespace+
    { main lexbuf }
| "/*"
    { comment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "<"
    { savestart lexbuf (ocamltype (lexeme_end_p lexbuf)) }
| "%{"
    { savestart lexbuf (fun lexbuf ->
440
441
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
442
443
        let closingpos, monsters = action true openingpos [] lexbuf in
        no_monsters monsters;
444
        HEADER (mk_stretch stretchpos closingpos false [])
445
446
447
      ) }
| "{"
    { savestart lexbuf (fun lexbuf ->
448
449
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
450
        let closingpos, monsters = action false openingpos [] lexbuf in
451
        ACTION (
452
          fun (producers : string option array) ->
453
            List.iter (fun monster -> monster.check producers) monsters;
454
            let stretch = mk_stretch stretchpos closingpos true monsters in
455
456
            Action.from_stretch stretch
        )
457
      ) }
458
459
460
461
| ('%'? as percent) "[@" (attributechar+ as id) whitespace*
    { let openingpos = lexeme_start_p lexbuf in
      let stretchpos = lexeme_end_p lexbuf in
      let closingpos = attribute openingpos lexbuf in
462
      let pos = Positions.lex_join openingpos (lexeme_end_p lexbuf) in
463
464
465
466
467
468
469
470
      let attr = mk_stretch stretchpos closingpos false [] in
      if percent = "" then
        (* No [%] sign: this is a normal attribute. *)
        ATTRIBUTE (Positions.with_pos pos id, attr)
      else
        (* A [%] sign is present: this is a grammar-wide attribute. *)
        GRAMMARATTRIBUTE (Positions.with_pos pos id, attr)
    }
471
472
473
| eof
    { EOF }
| _
474
    { error2 lexbuf "unexpected character(s)." }
475

POTTIER Francois's avatar
POTTIER Francois committed
476
477
(* ------------------------------------------------------------------------ *)

478
479
480
481
(* Skip C style comments. *)

and comment openingpos = parse
| newline
482
    { new_line lexbuf; comment openingpos lexbuf }
483
484
485
486
487
488
489
| "*/"
    { () }
| eof
    { error1 openingpos "unterminated comment." }
| _
    { comment openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
490
491
(* ------------------------------------------------------------------------ *)

492
(* Collect an O'Caml type delimited by angle brackets. Angle brackets can
493
494
   appear as part of O'Caml function types and variant types, so we must
   recognize them and *not* treat them as a closing bracket. *)
495
496
497

and ocamltype openingpos = parse
| "->"
498
| "[>"
499
500
    { ocamltype openingpos lexbuf }
| '>'
501
    { OCAMLTYPE (Stretch.Declared (mk_stretch openingpos (lexeme_start_p lexbuf) true [])) }
502
503
504
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamltype openingpos lexbuf }
| newline
505
    { new_line lexbuf; ocamltype openingpos lexbuf }
506
| eof
507
    { error1 openingpos "unterminated OCaml type." }
508
509
510
| _
    { ocamltype openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
511
512
(* ------------------------------------------------------------------------ *)

513
514
515
516
(* Collect O'Caml code delimited by curly brackets. The monsters that are
   encountered along the way are accumulated in the list [monsters]. Nested
   curly brackets must be properly counted. Nested parentheses are also kept
   track of, so as to better report errors when they are not balanced. *)
517

518
and action percent openingpos monsters = parse
519
| '{'
520
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
521
      action percent openingpos monsters lexbuf }
522
523
524
525
| ("}" | "%}") as delimiter
    { match percent, delimiter with
      | true, "%}"
      | false, "}" ->
526
527
          (* This is the delimiter we were instructed to look for. *)
          lexeme_start_p lexbuf, monsters
528
      | _, _ ->
529
530
          (* This is not it. *)
          error1 openingpos "unbalanced opening brace."
531
532
    }
| '('
533
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
534
535
536
537
      action percent openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
    { let monster = dollar (cpos lexbuf) (int_of_string i) in
      action percent openingpos (monster :: monsters) lexbuf }
538
| poskeyword
539
540
    { let monster = position (cpos lexbuf) where flavor i x in
      action percent openingpos (monster :: monsters) lexbuf }
541
| previouserror
542
    { error2 lexbuf "$previouserror is no longer supported." }
543
| syntaxerror
544
545
    { let monster = syntaxerror (cpos lexbuf) in
      action percent openingpos (monster :: monsters) lexbuf }
546
547
| '"'
    { string (lexeme_start_p lexbuf) lexbuf;
548
      action percent openingpos monsters lexbuf }
549
550
| "'"
    { char lexbuf;
551
      action percent openingpos monsters lexbuf }
552
553
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf;
554
      action percent openingpos monsters lexbuf }
555
| newline
556
    { new_line lexbuf;
557
      action percent openingpos monsters lexbuf }
558
559
560
561
| ')'
| eof
    { error1 openingpos "unbalanced opening brace." }
| _
562
    { action percent openingpos monsters lexbuf }
563

POTTIER Francois's avatar
POTTIER Francois committed
564
565
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
566
567
568
(* Inside a semantic action, we keep track of nested parentheses, so as to
   better report errors when they are not balanced. *)

569
and parentheses openingpos monsters = parse
570
| '('
571
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
572
      parentheses openingpos monsters lexbuf }
573
| ')'
574
    { lexeme_start_p lexbuf, monsters }
575
| '{'
576
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
577
578
579
580
      parentheses openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
    { let monster = dollar (cpos lexbuf) (int_of_string i) in
      parentheses openingpos (monster :: monsters) lexbuf }
581
| poskeyword
582
583
    { let monster = position (cpos lexbuf) where flavor i x in
      parentheses openingpos (monster :: monsters) lexbuf }
584
| previouserror
585
    { error2 lexbuf "$previouserror is no longer supported." }
586
| syntaxerror
587
588
    { let monster = syntaxerror (cpos lexbuf) in
      parentheses openingpos (monster :: monsters) lexbuf }
589
| '"'
590
    { string (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
591
| "'"
592
    { char lexbuf; parentheses openingpos monsters lexbuf }
593
| "(*"
594
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
595
| newline
596
    { new_line lexbuf; parentheses openingpos monsters lexbuf }
597
598
599
600
| '}'
| eof
    { error1 openingpos "unbalanced opening parenthesis." }
| _
601
    { parentheses openingpos monsters lexbuf }
602

POTTIER Francois's avatar
POTTIER Francois committed
603
604
(* ------------------------------------------------------------------------ *)

605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
(* Collect an attribute payload, which is terminated by a closing square
   bracket. Nested square brackets must be properly counted. Nested curly
   brackets and nested parentheses are also kept track of, so as to better
   report errors when they are not balanced. *)

and attribute openingpos = parse
| '['
    { let _ = attribute (lexeme_start_p lexbuf) lexbuf in
      attribute openingpos lexbuf }
| ']'
    { lexeme_start_p lexbuf }
| '{'
    { let _, _ = action false (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '('
    { let _, _ = parentheses (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| "'"
    { char lexbuf; attribute openingpos lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| newline
    { new_line lexbuf; attribute openingpos lexbuf }
| '}'
| ')'
| eof
    { error1 openingpos "unbalanced opening bracket." }
| _
    { attribute openingpos lexbuf }

(* ------------------------------------------------------------------------ *)

639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
(* Skip O'Caml comments. Comments can be nested and can contain
   strings or characters, which must be correctly analyzed. (A string
   could contain begin-of-comment or end-of-comment sequences, which
   must be ignored; a character could contain a begin-of-string
   sequence.) *)

and ocamlcomment openingpos = parse
| "*)"
    { () }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| "'"
    { char lexbuf; ocamlcomment openingpos lexbuf }
| newline
655
    { new_line lexbuf; ocamlcomment openingpos lexbuf }
656
| eof
657
    { error1 openingpos "unterminated OCaml comment." }
658
659
660
| _
    { ocamlcomment openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
661
662
(* ------------------------------------------------------------------------ *)

663
664
665
(* Skip O'Caml strings. *)

and string openingpos = parse
POTTIER Francois's avatar
POTTIER Francois committed
666
| '"'
667
668
669
   { () }
| '\\' newline
| newline
670
   { new_line lexbuf; string openingpos lexbuf }
671
672
673
674
| '\\' _
   (* Upon finding a backslash, skip the character that follows,
      unless it is a newline. Pretty crude, but should work. *)
   { string openingpos lexbuf }
POTTIER Francois's avatar
POTTIER Francois committed
675
| eof
676
   { error1 openingpos "unterminated OCaml string." }
677
678
679
| _
   { string openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
680
681
(* ------------------------------------------------------------------------ *)

682
683
684
685
686
687
(* Skip O'Caml characters. A lone quote character is legal inside
   a comment, so if we don't recognize the matching closing quote,
   we simply abandon. *)

and char = parse
| '\\'? newline "'"
688
   { new_line lexbuf }
689
690
691
692
693
| [^ '\\' '\''] "'"
| '\\' _ "'"
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
| '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
| ""
POTTIER Francois's avatar
POTTIER Francois committed
694
   { () }
695

POTTIER Francois's avatar
POTTIER Francois committed
696
697
(* ------------------------------------------------------------------------ *)

698
699
700
(* Read until the end of the file. This is used after finding a %%
   that marks the end of the grammar specification. We update the
   current position as we go. This allows us to build a stretch
701
   for the postlude. *)
702
703
704

and finish = parse
| newline
705
    { new_line lexbuf; finish lexbuf }
706
707
708
709
| eof
    { lexeme_start_p lexbuf }
| _
    { finish lexbuf }