lexer.mll 21.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15
{

POTTIER Francois's avatar
POTTIER Francois committed
16 17 18
open Lexing
open Parser
open Positions
19 20 21 22 23 24 25 26 27 28
open Keyword

(* ------------------------------------------------------------------------ *)

(* Short-hands. *)

let error1 pos =
  Error.error (Positions.one pos)

let error2 lexbuf =
POTTIER Francois's avatar
POTTIER Francois committed
29
  Error.error (Positions.lexbuf lexbuf)
POTTIER Francois's avatar
POTTIER Francois committed
30

POTTIER Francois's avatar
POTTIER Francois committed
31 32
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
33 34 35 36 37 38 39 40 41 42
(* This wrapper saves the current lexeme start, invokes its argument,
   and restores it. This allows transmitting better positions to the
   parser. *)

let savestart lexbuf f =
  let startp = lexbuf.lex_start_p in
  let token = f lexbuf in
  lexbuf.lex_start_p <- startp;
  token

POTTIER Francois's avatar
POTTIER Francois committed
43 44
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
45 46 47 48 49 50 51
(* Overwrites an old character with a new one at a specified
   offset in a [bytes] buffer. *)

let overwrite content offset c1 c2 =
  assert (Bytes.get content offset = c1);
  Bytes.set content offset c2

POTTIER Francois's avatar
POTTIER Francois committed
52 53 54 55
(* ------------------------------------------------------------------------ *)

(* Keyword recognition and construction. *)

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
(* A monster is a spot where we have identified a keyword in concrete syntax.
   We describe a monster as an object with the following methods: *)

type monster = {

  (* The position of the monster. *)
  pos: Positions.t;

  (* This method is passed an array of (optional) names for the producers,
     that is, the elements of the production's right-hand side. It may
     perform some checks and is allowed to fail. *)
  check: string option array -> unit;

  (* This method transforms the keyword (in place) into a conventional
     OCaml identifier. This is done by replacing '$', '(', and ')' with
     '_'. Bloody. The arguments are [ofs1] and [content]. [ofs1] is the
     offset where [content] begins in the source file. *)
  transform: int -> bytes -> unit;

  (* This is the keyword, in abstract syntax. *)
  keyword: keyword option;

}

(* ------------------------------------------------------------------------ *)

(* The [$syntaxerror] monster. *)

let syntaxerror pos : monster =
  let check _ = ()
  and transform ofs1 content =
    (* [$syntaxerror] is replaced with
       [(raise _eRR)]. Same length. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    let source = "(raise _eRR)" in
    Bytes.blit_string source 0 content ofs (String.length source)
  and keyword =
    Some SyntaxError
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* We check that every [$i] is within range. Also, we forbid using [$i]
   when a producer has been given a name; this is bad style and may be
   a mistake. (Plus, this simplies our life, as we rewrite [$i] to [_i],
   and we would have to rewrite it to a different identifier otherwise.) *)

let check_dollar pos i producers =
  if not (0 <= i - 1 && i - 1 < Array.length producers) then
    Error.error [pos] "$%d refers to a nonexistent symbol." i
  else
    producers.(i - 1) |> Option.iter (fun x ->
      Error.error [pos] "please do not say: $%d. Instead, say: %s." i x
    )

(* We check that every reference to a producer [x] in a position keyword,
   such as [$startpos(x)], exists. *)

let check_producer pos x producers =
  if not (List.mem (Some x) (Array.to_list producers)) then
    Error.error [pos] "%s refers to a nonexistent symbol." x

(* ------------------------------------------------------------------------ *)

(* The [$i] monster. *)

let dollar pos i : monster =
  let check = check_dollar pos i
  and transform ofs1 content =
    (* [$i] is replaced with [_i]. Thus, it is no longer a keyword. *)
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_'
  and keyword =
    None
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* The position-keyword monster. The most horrible of all. *)

let position pos
  (where : string)
  (flavor : string)
  (i : string option) (x : string option)
=
  let none _ = () in
146 147 148 149
  let check_no_parameter () =
    if i <> None || x <> None then
      Error.error [pos] "$%s%s does not take a parameter." where flavor
  in
150 151 152
  let ofslpar = (* offset of the opening parenthesis, if there is one *)
    1 + (* for the initial "$" *)
    String.length where +
153
    3   (* for "pos" or "ofs" or "loc" *)
154
  in
155
  let where =
156
    match where with
157 158
    | "symbolstart"
    | "s"           -> check_no_parameter(); WhereSymbolStart
159 160
    | "start"       -> WhereStart
    | "end"         -> WhereEnd
161
    | ""            -> WhereStart
162
    | _             -> assert false
POTTIER Francois's avatar
POTTIER Francois committed
163 164
  in
  let flavor =
165 166 167
    match flavor with
    | "pos"   -> FlavorPosition
    | "ofs"   -> FlavorOffset
168
    | "loc"   -> FlavorLocation
169 170 171 172 173 174
    | _       -> assert false
  in
  let subject, check =
    match i, x with
    | Some i, None ->
        let ii = int_of_string i in (* cannot fail *)
175
        if ii = 0 && where = WhereEnd then
176 177
          (* [$endpos($0)] *)
          Before, none
POTTIER Francois's avatar
POTTIER Francois committed
178
        else
179 180 181 182 183 184 185 186 187 188 189 190
          (* [$startpos($i)] is rewritten to [$startpos(_i)]. *)
          RightNamed ("_" ^ i), check_dollar pos ii
    | None, Some x ->
        (* [$startpos(x)] *)
        RightNamed x, check_producer pos x
    | None, None ->
        (* [$startpos] *)
        Left, none
    | Some _, Some _ ->
        assert false
  in
  let transform ofs1 content =
POTTIER Francois's avatar
POTTIER Francois committed
191 192 193
    let pos = start_of_position pos in
    let ofs = pos.pos_cnum - ofs1 in
    overwrite content ofs '$' '_';
194 195 196 197 198 199
    let ofslpar = ofs + ofslpar in
    match i, x with
    | None, Some x ->
        overwrite content ofslpar '(' '_';
        overwrite content (ofslpar + 1 + String.length x) ')' '_'
    | Some i, None ->
POTTIER Francois's avatar
POTTIER Francois committed
200
        overwrite content ofslpar '(' '_';
201 202 203 204 205 206
        overwrite content (ofslpar + 1) '$' '_';
        overwrite content (ofslpar + 2 + String.length i) ')' '_'
    | _, _ ->
        ()
  in
  let keyword =
207
    Some (Position (subject, where, flavor))
208 209 210 211 212 213 214 215 216 217
  in
  { pos; check; transform; keyword }

(* ------------------------------------------------------------------------ *)

(* In an OCaml header, there should be no monsters. This is just a sanity
   check. *)

let no_monsters monsters =
  match monsters with
POTTIER Francois's avatar
POTTIER Francois committed
218 219
  | [] ->
      ()
220 221 222 223 224
  | monster :: _ ->
      Error.error [monster.pos]
        "a Menhir keyword cannot be used in an OCaml header."

(* ------------------------------------------------------------------------ *)
POTTIER Francois's avatar
POTTIER Francois committed
225 226 227

(* Creates a stretch. *)

POTTIER Francois's avatar
POTTIER Francois committed
228
let mk_stretch pos1 pos2 parenthesize monsters =
POTTIER Francois's avatar
POTTIER Francois committed
229
  (* Read the specified chunk of the file. *)
230
  let raw_content : string = InputFile.chunk (pos1, pos2) in
231
  (* Transform the monsters, if there are any. (This explicit test
POTTIER Francois's avatar
POTTIER Francois committed
232 233
     allows saving one string copy and keeping just one live copy.) *)
  let content : string =
234
    match monsters with
235
    | [] ->
POTTIER Francois's avatar
POTTIER Francois committed
236 237 238
        raw_content
    | _ :: _ ->
        let content : bytes = Bytes.of_string raw_content in
239
        List.iter (fun monster -> monster.transform pos1.pos_cnum content) monsters;
POTTIER Francois's avatar
POTTIER Francois committed
240 241 242 243 244 245 246 247 248 249 250
        Bytes.unsafe_to_string content
  in
  (* Add whitespace so that the column numbers match those of the source file.
     If requested, add parentheses so that the semantic action can be inserted
     into other code without ambiguity. *)
  let content =
    if parenthesize then
      (String.make (pos1.pos_cnum - pos1.pos_bol - 1) ' ') ^ "(" ^ content ^ ")"
    else
      (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content
  in
251
  Stretch.({
252
    stretch_filename = InputFile.get_input_file_name();
253 254 255 256 257 258
    stretch_linenum = pos1.pos_lnum;
    stretch_linecount = pos2.pos_lnum - pos1.pos_lnum;
    stretch_content = content;
    stretch_raw_content = raw_content;
    stretch_keywords = Misc.map_opt (fun monster -> monster.keyword) monsters
  })
POTTIER Francois's avatar
POTTIER Francois committed
259

POTTIER Francois's avatar
POTTIER Francois committed
260 261
(* ------------------------------------------------------------------------ *)

262
(* OCaml's reserved words. *)
POTTIER Francois's avatar
POTTIER Francois committed
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325

let reserved =
  let table = Hashtbl.create 149 in
  List.iter (fun word -> Hashtbl.add table word ()) [
    "and";
    "as";
    "assert";
    "begin";
    "class";
    "constraint";
    "do";
    "done";
    "downto";
    "else";
    "end";
    "exception";
    "external";
    "false";
    "for";
    "fun";
    "function";
    "functor";
    "if";
    "in";
    "include";
    "inherit";
    "initializer";
    "lazy";
    "let";
    "match";
    "method";
    "module";
    "mutable";
    "new";
    "object";
    "of";
    "open";
    "or";
    "parser";
    "private";
    "rec";
    "sig";
    "struct";
    "then";
    "to";
    "true";
    "try";
    "type";
    "val";
    "virtual";
    "when";
    "while";
    "with";
    "mod";
    "land";
    "lor";
    "lxor";
    "lsl";
    "lsr";
    "asr";
  ];
  table

326 327
}

POTTIER Francois's avatar
POTTIER Francois committed
328 329 330 331
(* ------------------------------------------------------------------------ *)

(* Patterns. *)

332 333 334 335 336 337 338 339 340 341
let newline = ('\010' | '\013' | "\013\010")

let whitespace = [ ' ' '\t' ';' ]

let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']

let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']

let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)

342 343
let attributechar = identchar | '.'

POTTIER Francois's avatar
POTTIER Francois committed
344 345 346 347
let subject =
  '$' (['0'-'9']+ as i)
| ((lowercase identchar*) as x)

POTTIER Francois's avatar
POTTIER Francois committed
348
let poskeyword =
349
  '$'
POTTIER Francois's avatar
POTTIER Francois committed
350 351 352 353 354
  (
    (("symbolstart" | "start" | "end") as where) (("pos" | "ofs") as flavor)
  | (("s" | "") as where) ("loc" as flavor)
  )
  ( '(' subject ')' )?
355 356 357 358 359 360 361

let previouserror =
  "$previouserror"

let syntaxerror =
  "$syntaxerror"

POTTIER Francois's avatar
POTTIER Francois committed
362 363 364 365
(* ------------------------------------------------------------------------ *)

(* The lexer. *)

366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
rule main = parse
| "%token"
    { TOKEN }
| "%type"
    { TYPE }
| "%left"
    { LEFT }
| "%right"
    { RIGHT }
| "%nonassoc"
    { NONASSOC }
| "%start"
    { START }
| "%prec"
    { PREC }
| "%public"
    { PUBLIC }
| "%parameter"
    { PARAMETER }
| "%inline"
    { INLINE }
387 388
| "%attribute"
    { PERCENTATTRIBUTE }
389 390
| "%on_error_reduce"
    { ON_ERROR_REDUCE }
391
| "%%"
392
    { (* The token [PERCENTPERCENT] carries a stretch that contains
393
         everything that follows %% in the input file. This string
394 395 396 397 398 399 400 401 402
         must be created lazily. The parser decides (based on the
         context) whether this stretch is needed. If it is indeed
         needed, then constructing this stretch drives the lexer
         to the end of the file. *)
      PERCENTPERCENT (lazy (
        let openingpos = lexeme_end_p lexbuf in
        let closingpos = finish lexbuf in
        mk_stretch openingpos closingpos false []
      )) }
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
| ":"
    { COLON }
| ","
    { COMMA }
| "="
    { EQUAL }
| "("
    { LPAREN }
| ")"
    { RPAREN }
| "|"
    { BAR }
| "?"
    { QUESTION }
| "*"
    { STAR }
| "+"
    { PLUS }
| (lowercase identchar *) as id
    { if Hashtbl.mem reserved id then
423
        error2 lexbuf "this is an OCaml reserved word."
424
      else
425
        LID (with_pos (cpos lexbuf) id)
426 427 428 429 430
    }
| (uppercase identchar *) as id
    { UID (with_pos (cpos lexbuf) id) }
| "//" [^ '\010' '\013']* newline (* skip C++ style comment *)
| newline
431
    { new_line lexbuf; main lexbuf }
432 433 434 435 436 437 438 439 440 441
| whitespace+
    { main lexbuf }
| "/*"
    { comment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| "<"
    { savestart lexbuf (ocamltype (lexeme_end_p lexbuf)) }
| "%{"
    { savestart lexbuf (fun lexbuf ->
442 443
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
444 445
        let closingpos, monsters = action true openingpos [] lexbuf in
        no_monsters monsters;
446
        HEADER (mk_stretch stretchpos closingpos false [])
447 448 449
      ) }
| "{"
    { savestart lexbuf (fun lexbuf ->
450 451
        let openingpos = lexeme_start_p lexbuf in
        let stretchpos = lexeme_end_p lexbuf in
452
        let closingpos, monsters = action false openingpos [] lexbuf in
453
        ACTION (
454
          fun (producers : string option array) ->
455
            List.iter (fun monster -> monster.check producers) monsters;
456
            let stretch = mk_stretch stretchpos closingpos true monsters in
457 458
            Action.from_stretch stretch
        )
459
      ) }
460 461 462 463
| ('%'? as percent) "[@" (attributechar+ as id) whitespace*
    { let openingpos = lexeme_start_p lexbuf in
      let stretchpos = lexeme_end_p lexbuf in
      let closingpos = attribute openingpos lexbuf in
464
      let pos = Positions.import (openingpos, lexeme_end_p lexbuf) in
465 466 467 468 469 470 471 472
      let attr = mk_stretch stretchpos closingpos false [] in
      if percent = "" then
        (* No [%] sign: this is a normal attribute. *)
        ATTRIBUTE (Positions.with_pos pos id, attr)
      else
        (* A [%] sign is present: this is a grammar-wide attribute. *)
        GRAMMARATTRIBUTE (Positions.with_pos pos id, attr)
    }
473 474 475
| eof
    { EOF }
| _
476
    { error2 lexbuf "unexpected character(s)." }
477

POTTIER Francois's avatar
POTTIER Francois committed
478 479
(* ------------------------------------------------------------------------ *)

480 481 482 483
(* Skip C style comments. *)

and comment openingpos = parse
| newline
484
    { new_line lexbuf; comment openingpos lexbuf }
485 486 487 488 489 490 491
| "*/"
    { () }
| eof
    { error1 openingpos "unterminated comment." }
| _
    { comment openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
492 493
(* ------------------------------------------------------------------------ *)

494
(* Collect an O'Caml type delimited by angle brackets. Angle brackets can
495 496
   appear as part of O'Caml function types and variant types, so we must
   recognize them and *not* treat them as a closing bracket. *)
497 498 499

and ocamltype openingpos = parse
| "->"
500
| "[>"
501 502
    { ocamltype openingpos lexbuf }
| '>'
503
    { OCAMLTYPE (Stretch.Declared (mk_stretch openingpos (lexeme_start_p lexbuf) true [])) }
504 505 506
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamltype openingpos lexbuf }
| newline
507
    { new_line lexbuf; ocamltype openingpos lexbuf }
508
| eof
509
    { error1 openingpos "unterminated OCaml type." }
510 511 512
| _
    { ocamltype openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
513 514
(* ------------------------------------------------------------------------ *)

515 516 517 518
(* Collect O'Caml code delimited by curly brackets. The monsters that are
   encountered along the way are accumulated in the list [monsters]. Nested
   curly brackets must be properly counted. Nested parentheses are also kept
   track of, so as to better report errors when they are not balanced. *)
519

520
and action percent openingpos monsters = parse
521
| '{'
522
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
523
      action percent openingpos monsters lexbuf }
524 525 526 527
| ("}" | "%}") as delimiter
    { match percent, delimiter with
      | true, "%}"
      | false, "}" ->
528 529
          (* This is the delimiter we were instructed to look for. *)
          lexeme_start_p lexbuf, monsters
530
      | _, _ ->
531 532
          (* This is not it. *)
          error1 openingpos "unbalanced opening brace."
533 534
    }
| '('
535
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
536 537 538 539
      action percent openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
    { let monster = dollar (cpos lexbuf) (int_of_string i) in
      action percent openingpos (monster :: monsters) lexbuf }
540
| poskeyword
541 542
    { let monster = position (cpos lexbuf) where flavor i x in
      action percent openingpos (monster :: monsters) lexbuf }
543
| previouserror
544
    { error2 lexbuf "$previouserror is no longer supported." }
545
| syntaxerror
546 547
    { let monster = syntaxerror (cpos lexbuf) in
      action percent openingpos (monster :: monsters) lexbuf }
548 549
| '"'
    { string (lexeme_start_p lexbuf) lexbuf;
550
      action percent openingpos monsters lexbuf }
551 552
| "'"
    { char lexbuf;
553
      action percent openingpos monsters lexbuf }
554 555
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf;
556
      action percent openingpos monsters lexbuf }
557
| newline
558
    { new_line lexbuf;
559
      action percent openingpos monsters lexbuf }
560 561 562 563
| ')'
| eof
    { error1 openingpos "unbalanced opening brace." }
| _
564
    { action percent openingpos monsters lexbuf }
565

POTTIER Francois's avatar
POTTIER Francois committed
566 567
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
568 569 570
(* Inside a semantic action, we keep track of nested parentheses, so as to
   better report errors when they are not balanced. *)

571
and parentheses openingpos monsters = parse
572
| '('
573
    { let _, monsters = parentheses (lexeme_start_p lexbuf) monsters lexbuf in
574
      parentheses openingpos monsters lexbuf }
575
| ')'
576
    { lexeme_start_p lexbuf, monsters }
577
| '{'
578
    { let _, monsters = action false (lexeme_start_p lexbuf) monsters lexbuf in
579 580 581 582
      parentheses openingpos monsters lexbuf }
| '$' (['0'-'9']+ as i)
    { let monster = dollar (cpos lexbuf) (int_of_string i) in
      parentheses openingpos (monster :: monsters) lexbuf }
583
| poskeyword
584 585
    { let monster = position (cpos lexbuf) where flavor i x in
      parentheses openingpos (monster :: monsters) lexbuf }
586
| previouserror
587
    { error2 lexbuf "$previouserror is no longer supported." }
588
| syntaxerror
589 590
    { let monster = syntaxerror (cpos lexbuf) in
      parentheses openingpos (monster :: monsters) lexbuf }
591
| '"'
592
    { string (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
593
| "'"
594
    { char lexbuf; parentheses openingpos monsters lexbuf }
595
| "(*"
596
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; parentheses openingpos monsters lexbuf }
597
| newline
598
    { new_line lexbuf; parentheses openingpos monsters lexbuf }
599 600 601 602
| '}'
| eof
    { error1 openingpos "unbalanced opening parenthesis." }
| _
603
    { parentheses openingpos monsters lexbuf }
604

POTTIER Francois's avatar
POTTIER Francois committed
605 606
(* ------------------------------------------------------------------------ *)

607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
(* Collect an attribute payload, which is terminated by a closing square
   bracket. Nested square brackets must be properly counted. Nested curly
   brackets and nested parentheses are also kept track of, so as to better
   report errors when they are not balanced. *)

and attribute openingpos = parse
| '['
    { let _ = attribute (lexeme_start_p lexbuf) lexbuf in
      attribute openingpos lexbuf }
| ']'
    { lexeme_start_p lexbuf }
| '{'
    { let _, _ = action false (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '('
    { let _, _ = parentheses (lexeme_start_p lexbuf) [] lexbuf in
      attribute openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| "'"
    { char lexbuf; attribute openingpos lexbuf }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; attribute openingpos lexbuf }
| newline
    { new_line lexbuf; attribute openingpos lexbuf }
| '}'
| ')'
| eof
    { error1 openingpos "unbalanced opening bracket." }
| _
    { attribute openingpos lexbuf }

(* ------------------------------------------------------------------------ *)

641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
(* Skip O'Caml comments. Comments can be nested and can contain
   strings or characters, which must be correctly analyzed. (A string
   could contain begin-of-comment or end-of-comment sequences, which
   must be ignored; a character could contain a begin-of-string
   sequence.) *)

and ocamlcomment openingpos = parse
| "*)"
    { () }
| "(*"
    { ocamlcomment (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| '"'
    { string (lexeme_start_p lexbuf) lexbuf; ocamlcomment openingpos lexbuf }
| "'"
    { char lexbuf; ocamlcomment openingpos lexbuf }
| newline
657
    { new_line lexbuf; ocamlcomment openingpos lexbuf }
658
| eof
659
    { error1 openingpos "unterminated OCaml comment." }
660 661 662
| _
    { ocamlcomment openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
663 664
(* ------------------------------------------------------------------------ *)

665 666 667
(* Skip O'Caml strings. *)

and string openingpos = parse
POTTIER Francois's avatar
POTTIER Francois committed
668
| '"'
669 670 671
   { () }
| '\\' newline
| newline
672
   { new_line lexbuf; string openingpos lexbuf }
673 674 675 676
| '\\' _
   (* Upon finding a backslash, skip the character that follows,
      unless it is a newline. Pretty crude, but should work. *)
   { string openingpos lexbuf }
POTTIER Francois's avatar
POTTIER Francois committed
677
| eof
678
   { error1 openingpos "unterminated OCaml string." }
679 680 681
| _
   { string openingpos lexbuf }

POTTIER Francois's avatar
POTTIER Francois committed
682 683
(* ------------------------------------------------------------------------ *)

684 685 686 687 688 689
(* Skip O'Caml characters. A lone quote character is legal inside
   a comment, so if we don't recognize the matching closing quote,
   we simply abandon. *)

and char = parse
| '\\'? newline "'"
690
   { new_line lexbuf }
691 692 693 694 695
| [^ '\\' '\''] "'"
| '\\' _ "'"
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
| '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
| ""
POTTIER Francois's avatar
POTTIER Francois committed
696
   { () }
697

POTTIER Francois's avatar
POTTIER Francois committed
698 699
(* ------------------------------------------------------------------------ *)

700 701 702
(* Read until the end of the file. This is used after finding a %%
   that marks the end of the grammar specification. We update the
   current position as we go. This allows us to build a stretch
703
   for the postlude. *)
704 705 706

and finish = parse
| newline
707
    { new_line lexbuf; finish lexbuf }
708 709 710 711
| eof
    { lexeme_start_p lexbuf }
| _
    { finish lexbuf }