lexer.mll 22.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15
{

POTTIER Francois's avatar
POTTIER Francois committed
16 17 18
open Lexing
open Parser
open Positions
19 20 21 22 23 24 25 26 27 28
open Keyword

(* ------------------------------------------------------------------------ *)

(* Short-hands. *)

let error1 pos =
  Error.error (Positions.one pos)

let error2 lexbuf =
29
  Error.error (Positions.lexbuf lexbuf)
POTTIER Francois's avatar
POTTIER Francois committed
30

31 32
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
33 34 35 36 37 38 39 40 41 42
(* This wrapper saves the current lexeme start, invokes its argument,
   and restores it. This allows transmitting better positions to the
   parser. *)

let savestart lexbuf f =
  let startp = lexbuf.lex_start_p in
  let token = f lexbuf in
  lexbuf.lex_start_p <- startp;
  token

43 44
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
45 46 47 48 49 50 51
(* Overwrites an old character with a new one at a specified
   offset in a [bytes] buffer. *)

let overwrite content offset c1 c2 =
  assert (Bytes.get content offset = c1);
  Bytes.set content offset c2

52 53 54 55
(* ------------------------------------------------------------------------ *)

(* Keyword recognition and construction. *)

56 57 58 59 60 61 62 63 64
(* A monster is a spot where we have identified a keyword in concrete syntax.
   We describe a monster as an object with the following methods: *)

type monster = {

  (* The position of the monster. *)
  pos: Positions.t;

  (* This method is passed an array of (optional) names for the producers,
65 66 67 68
     that is, the elements of the production's right-hand side. It is also
     passed a flag which tells whether [$i] syntax is allowed or disallowed.
     It may perform some checks and is allowed to fail. *)
  check: check;
69 70 71 72 73 74 75 76 77 78 79 80

  (* This method transforms the keyword (in place) into a conventional
     OCaml identifier. This is done by replacing '$', '(', and ')' with
     '_'. Bloody. The arguments are [ofs1] and [content]. [ofs1] is the
     offset where [content] begins in the source file. *)
  transform: int -> bytes -> unit;

  (* This is the keyword, in abstract syntax. *)
  keyword: keyword option;

}

81 82 83 84 85 86 87 88
and check =
  Settings.dollars -> string option array -> unit

(* No check. *)

let none : check =
  fun _ _ -> ()

89 90 91 92 93
(* ------------------------------------------------------------------------ *)

(* The [$syntaxerror] monster. *)

let syntaxerror pos : monster =
94 95
  let check =
    none
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  and transform ofs1 content =
    (* [$syntaxerror] is replaced with
       [(raise _eRR)]. Same length. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    let source = "(raise _eRR)" in
    Bytes.blit_string source 0 content ofs (String.length source)
  and keyword =
    Some SyntaxError
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* We check that every [$i] is within range. Also, we forbid using [$i]
   when a producer has been given a name; this is bad style and may be
   a mistake. (Plus, this simplies our life, as we rewrite [$i] to [_i],
   and we would have to rewrite it to a different identifier otherwise.) *)

115 116
let check_dollar pos i : check = fun dollars producers ->
  (* If [i] is out of range, say so. *)
117
  if not (0 <= i - 1 && i - 1 < Array.length producers) then
118 119 120 121 122 123 124 125 126 127 128
    Error.error [pos] "$%d refers to a nonexistent symbol." i;
  (* If [$i] could be referred to via a name, say so. *)
  producers.(i - 1) |> Option.iter (fun x ->
    Error.error [pos] "please do not say: $%d. Instead, say: %s." i x
  );
  (* If [$i] syntax is disallowed, say so. *)
  match dollars with
  | Settings.DollarsDisallowed ->
      Error.error [pos] "please do not use $%d. Instead, name this value." i
  | Settings.DollarsAllowed ->
      ()
129 130 131 132

(* We check that every reference to a producer [x] in a position keyword,
   such as [$startpos(x)], exists. *)

133
let check_producer pos x : check = fun _ producers ->
134 135 136 137 138 139 140 141
  if not (List.mem (Some x) (Array.to_list producers)) then
    Error.error [pos] "%s refers to a nonexistent symbol." x

(* ------------------------------------------------------------------------ *)

(* The [$i] monster. *)

let dollar pos i : monster =
142
  let check : check = check_dollar pos i
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
  and transform ofs1 content =
    (* [$i] is replaced with [_i]. Thus, it is no longer a keyword. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_'
  and keyword =
    None
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* The position-keyword monster. The most horrible of all. *)

let position pos
  (where : string)
  (flavor : string)
  (i : string option) (x : string option)
=
162 163 164 165
  let check_no_parameter () =
    if i <> None || x <> None then
      Error.error [pos] "$%s%s does not take a parameter." where flavor
  in
166 167 168
  let ofslpar = (* offset of the opening parenthesis, if there is one *)
    1 + (* for the initial "$" *)
    String.length where +
169
    3   (* for "pos" or "ofs" or "loc" *)
170
  in
171
  let where =
172
    match where with
173 174
    | "symbolstart"
    | "s"           -> check_no_parameter(); WhereSymbolStart
175 176
    | "start"       -> WhereStart
    | "end"         -> WhereEnd
177
    | ""            -> WhereStart
178
    | _             -> assert false
179 180
  in
  let flavor =
181 182 183
    match flavor with
    | "pos"   -> FlavorPosition
    | "ofs"   -> FlavorOffset
184
    | "loc"   -> FlavorLocation
185 186 187 188 189 190
    | _       -> assert false
  in
  let subject, check =
    match i, x with
    | Some i, None ->
        let ii = int_of_string i in (* cannot fail *)
191
        if ii = 0 && where = WhereEnd then
192 193
          (* [$endpos($0)] *)
          Before, none
POTTIER Francois's avatar
POTTIER Francois committed
194
        else
195 196 197 198 199 200 201 202 203 204 205 206
          (* [$startpos($i)] is rewritten to [$startpos(_i)]. *)
          RightNamed ("_" ^ i), check_dollar pos ii
    | None, Some x ->
        (* [$startpos(x)] *)
        RightNamed x, check_producer pos x
    | None, None ->
        (* [$startpos] *)
        Left, none
    | Some _, Some _ ->
        assert false
  in
  let transform ofs1 content =
POTTIER Francois's avatar
POTTIER Francois committed
207 208 209
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_';
210 211 212 213 214 215
    let ofslpar = ofs + ofslpar in
    match i, x with
    | None, Some x ->
        overwrite content ofslpar '(' '_';
        overwrite content (ofslpar + 1 + String.length x) ')' '_'
    | Some i, None ->
POTTIER Francois's avatar
POTTIER Francois committed
216
        overwrite content ofslpar '(' '_';
217 218 219 220 221 222
        overwrite content (ofslpar + 1) '$' '_';
        overwrite content (ofslpar + 2 + String.length i) ')' '_'
    | _, _ ->
        ()
  in
  let keyword =
223
    Some (Position (subject, where, flavor))
224 225 226 227 228 229 230 231 232 233
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* In an OCaml header, there should be no monsters. This is just a sanity
   check. *)

let no_monsters monsters =
  match monsters with
POTTIER Francois's avatar
POTTIER Francois committed
234 235
  | [] ->
      ()
236 237 238 239 240
  | monster :: _ ->
      Error.error [monster.pos]
        "a Menhir keyword cannot be used in an OCaml header."

(* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
241 242 243

(* Creates a stretch. *)

244
let mk_stretch pos1 pos2 parenthesize monsters =
POTTIER Francois's avatar
POTTIER Francois committed
245
  (* Read the specified chunk of the file. *)
246
  let raw_content : string = InputFile.chunk (pos1, pos2) in
247
  (* Transform the monsters, if there are any. (This explicit test
POTTIER Francois's avatar
POTTIER Francois committed
248 249
     allows saving one string copy and keeping just one live copy.) *)
  let content : string =
250
    match monsters with
251
    | [] ->
POTTIER Francois's avatar
POTTIER Francois committed
252 253 254
        raw_content
    | _ :: _ ->
        let content : bytes = Bytes.of_string raw_content in
255
        List.iter (fun monster -> monster.transform pos1.pos_cnum content) monsters;
POTTIER Francois's avatar
POTTIER Francois committed
256 257 258 259 260 261 262 263 264 265 266
        Bytes.unsafe_to_string content
  in
  (* Add whitespace so that the column numbers match those of the source file.
     If requested, add parentheses so that the semantic action can be inserted
     into other code without ambiguity. *)
  let content =
    if parenthesize then
      (String.make (pos1.pos_cnum - pos1.pos_bol - 1) ' ') ^ "(" ^ content ^ ")"
    else
      (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content
  in
267
  Stretch.({
268
    stretch_filename = InputFile.get_input_file_name();
269 270 271 272 273 274
    stretch_linenum = pos1.pos_lnum;
    stretch_linecount = pos2.pos_lnum - pos1.pos_lnum;
    stretch_content = content;
    stretch_raw_content = raw_content;
    stretch_keywords = Misc.map_opt (fun monster -> monster.keyword) monsters
  })
POTTIER Francois's avatar
POTTIER Francois committed
275

276 277
(* ------------------------------------------------------------------------ *)

278
(* OCaml's reserved words. *)
POTTIER Francois's avatar
POTTIER Francois committed
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341

let reserved =
  let table = Hashtbl.create 149 in
  List.iter (fun word -> Hashtbl.add table word ()) [
    "and";
    "as";
    "assert";
    "begin";
    "class";
    "constraint";
    "do";
    "done";
    "downto";
    "else";
    "end";
    "exception";
    "external";
    "false";
    "for";
    "fun";
    "function";
    "functor";
    "if";
    "in";
    "include";
    "inherit";
    "initializer";
    "lazy";
    "let";
    "match";
    "method";
    "module";
    "mutable";
    "new";
    "object";
    "of";
    "open";
    "or";
    "parser";
    "private";
    "rec";
    "sig";
    "struct";
    "then";
    "to";
    "true";
    "try";
    "type";
    "val";
    "virtual";
    "when";
    "while";
    "with";
    "mod";
    "land";
    "lor";
    "lxor";
    "lsl";
    "lsr";
    "asr";
  ];
  table

342 343
}

344 345 346 347
(* ------------------------------------------------------------------------ *)

(* Patterns. *)

348 349
let newline = ('\010' | '\013' | "\013\010")

350
let whitespace = [ ' ' '\t' ]
351 352 353 354 355 356 357

let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']

let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']

let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)

358 359
let attributechar = identchar | '.'

360 361 362 363
let subject =
  '$' (['0'-'9']+ as i)
| ((lowercase identchar*) as x)

364
let poskeyword =
365
  '$'
366 367 368 369 370
  (
    (("symbolstart" | "start" | "end") as where) (("pos" | "ofs") as flavor)
  | (("s" | "") as where) ("loc" as flavor)
  )
  ( '(' subject ')' )?
371 372 373 374 375 376 377

let previouserror =
  "$previouserror"

let syntaxerror =
  "$syntaxerror"

378 379 380 381
(* ------------------------------------------------------------------------ *)

(* The lexer. *)

382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
rule main = parse
| "%token"
    { TOKEN }
| "%type"
    { TYPE }
| "%left"
    { LEFT }
| "%right"
    { RIGHT }
| "%nonassoc"
    { NONASSOC }
| "%start"
    { START }
| "%prec"
    { PREC }
| "%public"
    { PUBLIC }
| "%parameter"
    { PARAMETER }
| "%inline"
    { INLINE }
403 404
| "%attribute"
    { PERCENTATTRIBUTE }
405 406
| "%on_error_reduce"
    { ON_ERROR_REDUCE }
407
| "%%"
408
    { (* The token [PERCENTPERCENT] carries a stretch that contains
409
         everything that follows %% in the input file. This string
410 411 412 413 414 415 416 417 418
         must be created lazily. The parser decides (based on the
         context) whether this stretch is needed. If it is indeed
         needed, then constructing this stretch drives the lexer
         to the end of the file. *)
      PERCENTPERCENT (lazy (
        let openingpos = lexeme_end_p lexbuf in
        let closingpos = finish lexbuf in
        mk_stretch openingpos closingpos false []
      )) }
419 420
| ";"
    { SEMI }
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
| ":"
    { COLON }
| ","
    { COMMA }
| "="
    { EQUAL }
| "("
    { LPAREN }
| ")"
    { RPAREN }
| "|"
    { BAR }
| "?"
    { QUESTION }
| "*"
    { STAR }
| "+"
    { PLUS }
POTTIER Francois's avatar
POTTIER Francois committed
439 440 441 442 443 444 445 446 447 448
| "~"
    { TILDE }
| "_"
    { UNDERSCORE }
| ":="
    { COLONEQUAL }
| "=="
    { EQUALEQUAL }
| "let"
    { LET }
449 450
| (lowercase identchar *) as id
    { if Hashtbl.mem reserved id then
451
        error2 lexbuf "this is an OCaml reserved word."
452
      else
453
        LID (with_pos (cpos lexbuf) id)
454 455 456
    }
| (uppercase identchar *) as id
    { UID (with_pos (cpos lexbuf) id) }
457 458 459 460 461
(* Quoted strings, which are used as aliases for tokens.
   For simplicity, we just disallow double quotes and backslash outright.
   Given the use of terminal strings in grammars, this is fine. *)
| ( "\"" ( [' ' - '~'] # ['"' '\\'] + ) "\"" ) as id
    { QID (with_pos (cpos lexbuf) id) }
462 463
| "//" [^ '\010' '\013']* newline (* skip C++ style comment *)
| newline
464
    { new_line lexbuf; main lexbuf }
465 466 467 468 469 470 471 472 473 474
| whitespace+
    { main lexbuf }
| "/*"
    { comment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "<"
    { savestart lexbuf (ocamltype (lexeme_end_p lexbuf)) }
| "%{"
    { savestart lexbuf (fun lexbuf ->
475 476
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
477 478
        let closingpos, monsters = action true openingpos [] lexbuf in
        no_monsters monsters;
479
        HEADER (mk_stretch stretchpos closingpos false [])
480 481 482
      ) }
| "{"
    { savestart lexbuf (fun lexbuf ->
483 484
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
485
        let closingpos, monsters = action false openingpos [] lexbuf in
486
        ACTION (
487 488
          fun dollars producers ->
            List.iter (fun monster -> monster.check dollars producers) monsters;
489
            let stretch = mk_stretch stretchpos closingpos true monsters in
490 491
            Action.from_stretch stretch
        )
492 493
      )
    }
494 495 496 497
| ('%'? as percent) "[@" (attributechar+ as id) whitespace*
    { let openingpos = lexeme_start_p lexbuf in
      let stretchpos = lexeme_end_p lexbuf in
      let closingpos = attribute openingpos lexbuf in
498
      let pos = Positions.import (openingpos, lexeme_end_p lexbuf) in
499 500 501 502 503 504 505 506
      let attr = mk_stretch stretchpos closingpos false [] in
      if percent = "" then
        (* No [%] sign: this is a normal attribute. *)
        ATTRIBUTE (Positions.with_pos pos id, attr)
      else
        (* A [%] sign is present: this is a grammar-wide attribute. *)
        GRAMMARATTRIBUTE (Positions.with_pos pos id, attr)
    }
507 508 509
| eof
    { EOF }
| _
510
    { error2 lexbuf "unexpected character(s)." }
511

512 513
(* ------------------------------------------------------------------------ *)

514 515 516 517
(* Skip C style comments. *)

and comment openingpos = parse
| newline
518
    { new_line lexbuf; comment openingpos lexbuf }
519 520 521 522 523 524 525
| "*/"
    { () }
| eof
    { error1 openingpos "unterminated comment." }
| _
    { comment openingpos lexbuf }

526 527
(* ------------------------------------------------------------------------ *)

528
(* Collect an O'Caml type delimited by angle brackets. Angle brackets can
529 530
   appear as part of O'Caml function types and variant types, so we must
   recognize them and *not* treat them as a closing bracket. *)
531 532 533

and ocamltype openingpos = parse
| "->"
534
| "[>"
535 536
    { ocamltype openingpos lexbuf }
| '>'
537
    { OCAMLTYPE (Stretch.Declared (mk_stretch openingpos (lexeme_start_p lexbuf) true [])) }
538 539 540
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamltype openingpos lexbuf }
| newline
541
    { new_line lexbuf; ocamltype openingpos lexbuf }
542
| eof
543
    { error1 openingpos "unterminated OCaml type." }
544 545 546
| _
    { ocamltype openingpos lexbuf }

547 548
(* ------------------------------------------------------------------------ *)

549 550 551 552
(* Collect O'Caml code delimited by curly brackets. The monsters that are
   encountered along the way are accumulated in the list [monsters]. Nested
   curly brackets must be properly counted. Nested parentheses are also kept
   track of, so as to better report errors when they are not balanced. *)
553

554
and action percent openingpos monsters = parse
555
| '{'
556
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
557
      action percent openingpos monsters lexbuf }
558 559 560 561
| ("}" | "%}") as delimiter
    { match percent, delimiter with
      | true, "%}"
      | false, "}" ->
562 563
          (* This is the delimiter we were instructed to look for. *)
          lexeme_start_p lexbuf, monsters
564
      | _, _ ->
565 566
          (* This is not it. *)
          error1 openingpos "unbalanced opening brace."
567 568
    }
| '('
569
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
570 571 572 573
      action percent openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
    { let monster = dollar (cpos lexbuf) (int_of_string i) in
      action percent openingpos (monster :: monsters) lexbuf }
574
| poskeyword
575 576
    { let monster = position (cpos lexbuf) where flavor i x in
      action percent openingpos (monster :: monsters) lexbuf }
577
| previouserror
578
    { error2 lexbuf "$previouserror is no longer supported." }
579
| syntaxerror
580 581
    { let monster = syntaxerror (cpos lexbuf) in
      action percent openingpos (monster :: monsters) lexbuf }
582 583
| '"'
    { string (lexeme_start_p lexbuf) lexbuf;
584
      action percent openingpos monsters lexbuf }
585 586
| "'"
    { char lexbuf;
587
      action percent openingpos monsters lexbuf }
588 589
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf;
590
      action percent openingpos monsters lexbuf }
591
| newline
592
    { new_line lexbuf;
593
      action percent openingpos monsters lexbuf }
594 595 596 597
| ')'
| eof
    { error1 openingpos "unbalanced opening brace." }
| _
598
    { action percent openingpos monsters lexbuf }
599

600 601
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
602 603 604
(* Inside a semantic action, we keep track of nested parentheses, so as to
   better report errors when they are not balanced. *)

605
and parentheses openingpos monsters = parse
606
| '('
607
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
608
      parentheses openingpos monsters lexbuf }
609
| ')'
610
    { lexeme_start_p lexbuf, monsters }
611
| '{'
612
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
613 614 615 616
      parentheses openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
    { let monster = dollar (cpos lexbuf) (int_of_string i) in
      parentheses openingpos (monster :: monsters) lexbuf }
617
| poskeyword
618 619
    { let monster = position (cpos lexbuf) where flavor i x in
      parentheses openingpos (monster :: monsters) lexbuf }
620
| previouserror
621
    { error2 lexbuf "$previouserror is no longer supported." }
622
| syntaxerror
623 624
    { let monster = syntaxerror (cpos lexbuf) in
      parentheses openingpos (monster :: monsters) lexbuf }
625
| '"'
626
    { string (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
627
| "'"
628
    { char lexbuf; parentheses openingpos monsters lexbuf }
629
| "(*"
630
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
631
| newline
632
    { new_line lexbuf; parentheses openingpos monsters lexbuf }
633 634 635 636
| '}'
| eof
    { error1 openingpos "unbalanced opening parenthesis." }
| _
637
    { parentheses openingpos monsters lexbuf }
638

639 640
(* ------------------------------------------------------------------------ *)

641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674
(* Collect an attribute payload, which is terminated by a closing square
   bracket. Nested square brackets must be properly counted. Nested curly
   brackets and nested parentheses are also kept track of, so as to better
   report errors when they are not balanced. *)

and attribute openingpos = parse
| '['
    { let _ = attribute (lexeme_start_p lexbuf) lexbuf in
      attribute openingpos lexbuf }
| ']'
    { lexeme_start_p lexbuf }
| '{'
    { let _, _ = action false (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '('
    { let _, _ = parentheses (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| "'"
    { char lexbuf; attribute openingpos lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| newline
    { new_line lexbuf; attribute openingpos lexbuf }
| '}'
| ')'
| eof
    { error1 openingpos "unbalanced opening bracket." }
| _
    { attribute openingpos lexbuf }

(* ------------------------------------------------------------------------ *)

675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
(* Skip O'Caml comments. Comments can be nested and can contain
   strings or characters, which must be correctly analyzed. (A string
   could contain begin-of-comment or end-of-comment sequences, which
   must be ignored; a character could contain a begin-of-string
   sequence.) *)

and ocamlcomment openingpos = parse
| "*)"
    { () }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| "'"
    { char lexbuf; ocamlcomment openingpos lexbuf }
| newline
691
    { new_line lexbuf; ocamlcomment openingpos lexbuf }
692
| eof
693
    { error1 openingpos "unterminated OCaml comment." }
694 695 696
| _
    { ocamlcomment openingpos lexbuf }

697 698
(* ------------------------------------------------------------------------ *)

699 700 701
(* Skip O'Caml strings. *)

and string openingpos = parse
702
| '"'
703 704 705
   { () }
| '\\' newline
| newline
706
   { new_line lexbuf; string openingpos lexbuf }
707 708 709 710
| '\\' _
   (* Upon finding a backslash, skip the character that follows,
      unless it is a newline. Pretty crude, but should work. *)
   { string openingpos lexbuf }
711
| eof
712
   { error1 openingpos "unterminated OCaml string." }
713 714 715
| _
   { string openingpos lexbuf }

716 717
(* ------------------------------------------------------------------------ *)

718 719 720 721 722 723
(* Skip O'Caml characters. A lone quote character is legal inside
   a comment, so if we don't recognize the matching closing quote,
   we simply abandon. *)

and char = parse
| '\\'? newline "'"
724
   { new_line lexbuf }
725 726 727 728 729
| [^ '\\' '\''] "'"
| '\\' _ "'"
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
| '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
| ""
730
   { () }
731

732 733
(* ------------------------------------------------------------------------ *)

734 735 736
(* Read until the end of the file. This is used after finding a %%
   that marks the end of the grammar specification. We update the
   current position as we go. This allows us to build a stretch
737
   for the postlude. *)
738 739 740

and finish = parse
| newline
741
    { new_line lexbuf; finish lexbuf }
742 743 744 745
| eof
    { lexeme_start_p lexbuf }
| _
    { finish lexbuf }