grammarFunctor.ml 47 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
open BasicSyntax
15 16 17 18 19 20
open Syntax
open Positions

module Make (G : sig

  (* An abstract syntax tree for the grammar. *)
21
  val grammar: BasicSyntax.grammar
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57

  (* This flag indicates whether it is OK to produce warnings, verbose
     information, etc., when this functor is invoked. If it is set to
     [false], then only serious errors can be signaled. *)
  val verbose: bool

end) = struct

  open G

(* ------------------------------------------------------------------------ *)
(* Precedence levels for tokens or pseudo-tokens alike. *)

module TokPrecedence = struct

  (* This set records, on a token by token basis, whether the token's
     precedence level is ever useful. This allows emitting warnings
     about useless precedence declarations. *)

  let ever_useful : StringSet.t ref =
    ref StringSet.empty

  let use id =
    ever_useful := StringSet.add id !ever_useful

  (* This function is invoked when someone wants to consult a token's
     precedence level. This does not yet mean that this level is
     useful, though. Indeed, if it is subsequently compared against
     [UndefinedPrecedence], it will not allow solving a conflict. So,
     in addition to the desired precedence level, we return a delayed
     computation which, when evaluated, records that this precedence
     level was useful. *)

  let levelip id properties =
    lazy (use id), properties.tk_precedence

58
  let leveli id =
59 60
    let properties =
      try
61
        StringMap.find id grammar.tokens
62
      with Not_found ->
63
        assert false (* well-formedness check has been performed earlier *)
64
    in
65
    levelip id properties
66 67 68 69 70 71 72 73

  (* This function prints warnings about useless precedence declarations
     for terminal symbols (%left, %right, %nonassoc). It should be invoked
     after only the automaton has been constructed. *)

  let diagnostics () =
    StringMap.iter (fun id properties ->
      if not (StringSet.mem id !ever_useful) then
74 75 76 77
        match properties.tk_precedence with
        | UndefinedPrecedence ->
            ()
        | PrecedenceLevel (_, _, pos1, pos2) ->
78
            Error.grammar_warning [Positions.import (pos1, pos2)]
79
              "the precedence level assigned to %s is never useful." id
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
    ) grammar.tokens

end

(* ------------------------------------------------------------------------ *)
(* Nonterminals. *)

module Nonterminal = struct

  type t = int

  let n2i i = i

  let compare = (-)

  (* Determine how many nonterminals we have and build mappings
     both ways between names and indices. A new nonterminal is
     created for every start symbol. *)

  let new_start_nonterminals =
    StringSet.fold (fun symbol ss -> (symbol ^ "'") :: ss) grammar.start_symbols []

  let original_nonterminals =
    nonterminals grammar
104

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
  let start =
    List.length new_start_nonterminals

  let (n : int), (name : string array), (map : int StringMap.t) =
    Misc.index (new_start_nonterminals @ original_nonterminals)

  let () =
    if verbose then
      Error.logG 1 (fun f ->
        Printf.fprintf f
          "Grammar has %d nonterminal symbols, among which %d start symbols.\n"
          (n - start) start
      )

  let is_start nt =
    nt < start

  let print normalize nt =
    if normalize then
      Misc.normalize name.(nt)
    else
      name.(nt)

  let lookup name =
    StringMap.find name map

  let positions nt =
    (StringMap.find (print false nt) grammar.rules).positions

134 135 136
  let init f =
    Array.init n f

137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
  let iter f =
    Misc.iteri n f

  let fold f accu =
    Misc.foldi n f accu

  let map f =
    Misc.mapi n f

  let iterx f =
    for nt = start to n - 1 do
      f nt
    done

  let foldx f accu =
    Misc.foldij start n f accu

  let ocamltype nt =
    assert (not (is_start nt));
    try
      Some (StringMap.find (print false nt) grammar.types)
    with Not_found ->
      None

  let ocamltype_of_start_symbol nt =
    match ocamltype nt with
    | Some typ ->
        typ
    | None ->
        (* Every start symbol has a type. *)
        assert false

  let tabulate f =
    Array.get (Array.init n f)

172 173 174 175 176 177 178 179 180 181 182 183
  let attributes : Syntax.attributes array =
    Array.make n []

  let () =
    StringMap.iter (fun nonterminal { attributes = attrs } ->
      let nt = lookup nonterminal in
      attributes.(nt) <- attrs
    ) grammar.rules

  let attributes nt =
    attributes.(nt)

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
end

(* Sets and maps over nonterminals. *)

module NonterminalMap = Patricia.Big

module NonterminalSet = Patricia.Big.Domain

(* ------------------------------------------------------------------------ *)
(* Terminals. *)

module Terminal = struct

  type t = int

  let t2i i = i
  let i2t i = i

  let compare = (-)

  let equal (tok1 : t) (tok2 : t) =
    tok1 = tok2

  (* Determine how many terminals we have and build mappings
     both ways between names and indices. A new terminal "#"
     is created. A new terminal "error" is created. The fact
     that the integer code assigned to the "#" pseudo-terminal
     is the last one is exploited in the table-based back-end.
     (The right-most row of the action table is not created.)

     Pseudo-tokens (used in %prec declarations, but never
     declared using %token) are filtered out. *)

  (* In principle, the number of the [error] token is irrelevant.
     It is currently 0, but we do not rely on that. *)

  let (n : int), (name : string array), (map : int StringMap.t) =
    let tokens = tokens grammar in
    match tokens with
    | [] when verbose ->
224
        Error.error [] "no tokens have been declared."
225
    | _ ->
226
        Misc.index ("error" :: tokens @ [ "#" ])
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245

  let print tok =
    name.(tok)

  let lookup name =
    StringMap.find name map

  let sharp =
    lookup "#"

  let error =
    lookup "error"

  let pseudo tok =
    (tok = sharp) || (tok = error)

  let real t =
    error <> t && t <> sharp

246 247 248
  let non_error tok =
    tok <> error

249
  let token_properties =
250 251
    let not_so_dummy_properties = (* applicable to [error] and [#] *)
      {
252 253 254 255 256 257
        tk_filename      = "__primitives__";
        tk_precedence    = UndefinedPrecedence;
        tk_associativity = UndefinedAssoc;
        tk_ocamltype     = None;
        tk_is_declared   = true;
        tk_position      = Positions.dummy;
258
        tk_attributes    = [];
259 260 261
      }
    in
    Array.init n (fun tok ->
262 263
      try
         StringMap.find name.(tok) grammar.tokens
264
       with Not_found ->
265 266
         assert (tok = sharp || tok = error);
         not_so_dummy_properties
267 268 269 270 271 272 273 274
    )

  let () =
    if verbose then
      Error.logG 1 (fun f ->
        Printf.fprintf f "Grammar has %d terminal symbols.\n" (n - 2)
      )

275
  let precedence_level tok =
276 277 278 279 280 281 282 283
    TokPrecedence.levelip (print tok) token_properties.(tok)

  let associativity tok =
    token_properties.(tok).tk_associativity

  let ocamltype tok =
    token_properties.(tok).tk_ocamltype

284 285 286
  let init f =
    Array.init n f

287 288 289 290 291 292 293 294 295 296 297
  let iter f =
    Misc.iteri n f

  let fold f accu =
    Misc.foldi n f accu

  let map f =
    Misc.mapi n f

  let () =
    assert (sharp = n - 1)
POTTIER Francois's avatar
POTTIER Francois committed
298 299
  let foldx f accu =
    Misc.foldi sharp f accu
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
  let mapx f =
    Misc.mapi sharp f

  let () =
    assert (error = 0)
  let iter_real f =
    for i = 1 to n-2 do
      f i
    done

  (* If a token named [EOF] exists, then it is assumed to represent
     ocamllex's [eof] pattern. *)

  let eof =
    try
      Some (lookup "EOF")
    with Not_found ->
      None

319 320 321
  let attributes tok =
    token_properties.(tok).tk_attributes

322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
  (* The sub-module [Word] offers an implementation of words (that is,
     sequences) of terminal symbols. It is used by [LRijkstra]. We
     make it a functor, because it has internal state (a hash table)
     and a side effect (failure if there are more than 256 terminal
     symbols). *)

  module Word (X : sig end) = struct

    (* We could use lists, or perhaps the sequences offered by the module
       [Seq], which support constant time concatenation. However, we need a
       much more compact representation: [LRijkstra] stores tens of millions
       of such words. We use strings, because they are very compact (8 bits
       per symbol), and on top of that, we use a hash-consing facility. In
       practice, hash-consing allows us to save 1000x in space. *)

    (* A drawback of this approach is that it works only if the number of
       terminal symbols is at most 256. For the moment, this is good enough.
       [LRijkstra] already has difficulty at 100 terminal symbols or so. *)

    let () =
      assert (n <= 256)

    let (encode : string -> int), (decode : int -> string), verbose =
      Misc.new_encode_decode 1024

    type word =
      int

    let epsilon =
      encode ""

    let singleton t =
      encode (String.make 1 (Char.chr t))

    let append i1 i2 =
      let w1 = decode i1
      and w2 = decode i2 in
      if String.length w1 = 0 then
        i2
      else if String.length w2 = 0 then
        i1
      else
        encode (w1 ^ w2)

    let length i =
      String.length (decode i)

    let first i z =
      let w = decode i in
      if String.length w > 0 then
        Char.code w.[0]
      else
        z

    let rec elements i n w =
      if i = n then
        []
      else
        Char.code w.[i] :: elements (i + 1) n w

    let elements i =
      let w = decode i in
      elements 0 (String.length w) w

    let print i =
      let w = decode i in
      Misc.separated_iter_to_string
        (fun c -> print (Char.code c))
        " "
        (fun f -> String.iter f w)

    (* [Pervasives.compare] implements a lexicographic ordering on strings. *)
    let compare i1 i2 =
      Pervasives.compare (decode i1) (decode i2)

  end

end

(* Sets of terminals are used intensively in the LR(1) construction,
   so it is important that they be as efficient as possible. *)

module TerminalSet = struct

406
  include CompressedBitSet
407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447

  let print toks =
    Misc.separated_iter_to_string Terminal.print " " (fun f -> iter f toks)

  let universe =
    remove Terminal.sharp (
      remove Terminal.error (
        Terminal.fold add empty
      )
    )

  (* The following definitions are used in the computation of FIRST sets
     below. They are not exported outside of this file. *)

  type property =
    t

  let bottom =
    empty

  let is_maximal _ =
    false

end

(* Maps over terminals. *)

module TerminalMap = Patricia.Big

(* ------------------------------------------------------------------------ *)
(* Symbols. *)

module Symbol = struct

  type t =
    | N of Nonterminal.t
    | T of Terminal.t

  let compare sym1 sym2 =
    match sym1, sym2 with
    | N nt1, N nt2 ->
448
        Nonterminal.compare nt1 nt2
449
    | T tok1, T tok2 ->
450
        Terminal.compare tok1 tok2
451
    | N _, T _ ->
452
        1
453
    | T _, N _ ->
454
        -1
455 456 457 458 459 460 461

  let equal sym1 sym2 =
    compare sym1 sym2 = 0

  let rec lequal syms1 syms2 =
    match syms1, syms2 with
    | [], [] ->
462
        true
463
    | sym1 :: syms1, sym2 :: syms2 ->
464
        equal sym1 sym2 && lequal syms1 syms2
465 466
    | _ :: _, []
    | [], _ :: _ ->
467
        false
468

469 470 471 472 473 474 475
  let non_error sym =
    match sym with
    | T tok ->
        Terminal.non_error tok
    | N _ ->
        true

476 477
  let print = function
    | N nt ->
478
        Nonterminal.print false nt
479
    | T tok ->
480
        Terminal.print tok
481 482 483

  let nonterminal = function
    | T _ ->
484
        false
485
    | N _ ->
486
        true
487 488 489 490 491 492 493 494 495 496

  (* Printing an array of symbols. [offset] is the start offset -- we
     print everything to its right. [dot] is the dot offset -- we
     print a dot at this offset, if we find it. *)

  let printaod offset dot symbols =
    let buffer = Buffer.create 512 in
    let length = Array.length symbols in
    for i = offset to length do
      if i = dot then
497
        Buffer.add_string buffer ". ";
498
      if i < length then begin
499 500
        Buffer.add_string buffer (print symbols.(i));
        Buffer.add_char buffer ' '
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
      end
    done;
    Buffer.contents buffer

  let printao offset symbols =
    printaod offset (-1) symbols

  let printa symbols =
    printao 0 symbols

  let printl symbols =
    printa (Array.of_list symbols)

  let lookup name =
    try
      T (Terminal.lookup name)
    with Not_found ->
      try
519
        N (Nonterminal.lookup name)
520
      with Not_found ->
521
        assert false (* well-formedness check has been performed earlier *)
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576

end

(* Sets of symbols. *)

module SymbolSet = struct

  include Set.Make(Symbol)

  let print symbols =
    Symbol.printl (elements symbols)

  (* The following definitions are used in the computation of symbolic FOLLOW
     sets below. They are not exported outside of this file. *)

  type property =
    t

  let bottom =
    empty

  let is_maximal _ =
    false

end

(* Maps over symbols. *)

module SymbolMap = struct

  include Map.Make(Symbol)

  let domain m =
    fold (fun symbol _ accu ->
      symbol :: accu
    ) m []

  let purelynonterminal m =
    fold (fun symbol _ accu ->
      accu && Symbol.nonterminal symbol
    ) m true

end

(* ------------------------------------------------------------------------ *)
(* Productions. *)

module Production = struct

  type index =
      int

  let compare =
    (-)

577 578 579 580 581
  (* A new production S' -> S is created for every start symbol S.
     It is known as a start production. *)

  (* Count how many productions we have, including the start productions.
     This is [n]. *)
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597

  let n : int =
    let n = StringMap.fold (fun _ { branches = branches } n ->
      n + List.length branches
    ) grammar.rules 0 in
    if verbose then
      Error.logG 1 (fun f -> Printf.fprintf f "Grammar has %d productions.\n" n);
    n + StringSet.cardinal grammar.start_symbols

  let p2i prod =
    prod

  let i2p prod =
    assert (prod >= 0 && prod < n);
    prod

598 599 600 601 602 603 604 605 606 607 608 609 610 611
  (* Create a number of uninitialized tables that map a production index to
     information about this production. *)

  (* [table] maps a production to the left-hand side and right-hand side of
     this production. [identifiers] maps a production to an array of the
     identifiers that are used to name the elements of the right-hand side.
     [actions] maps a production to an optional semantic action. (Only the
     start productions have none.) [positions] maps a production to an array
     of the positions (in the .mly file) of the elements of the right-hand
     side. [rhs_attributes] maps a production to an array of the attributes
     attached to the elements of the right-hand side. [prec_decl] maps a
     production to an optional [%prec] annotation. [production_level] maps
     a production to a production level (see [ParserAux]). *)

612 613 614 615 616 617 618 619 620 621 622 623
  let table : (Nonterminal.t * Symbol.t array) array =
    Array.make n (-1, [||])

  let identifiers : identifier array array =
    Array.make n [||]

  let actions : action option array =
    Array.make n None

  let positions : Positions.t list array =
    Array.make n []

624 625 626
  let rhs_attributes : Syntax.attributes array array =
    Array.make n [||]

627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
  let prec_decl : symbol located option array =
    Array.make n None

  let production_level : branch_production_level array =
    (* The start productions receive a level that pretends that they
       originate in a fictitious "builtin" file. So, a reduce/reduce
       conflict that involves a start production will not be solved. *)
    let dummy = ProductionLevel (InputFile.builtin_input_file, 0) in
    Array.make n dummy

  (* [ntprods] maps a nonterminal symbol to the interval of its productions. *)

  let ntprods : (int * int) array =
    Array.make Nonterminal.n (-1, -1)

  (* This Boolean flag records whether the grammar uses the [error] token. *)

  let grammar_uses_error_token =
    ref false

  (* Create the start productions, populating the above arrays as appropriate.
     [start] is the number of start productions, therefore also the index of the
     first non-start production. [startprods] is a mapping of the start symbols
     to the corresponding start productions. *)

652 653 654 655 656 657 658 659 660 661 662 663 664
  let (start : int),
      (startprods : index NonterminalMap.t) =
    StringSet.fold (fun nonterminal (k, startprods) ->
      let nt = Nonterminal.lookup nonterminal
      and nt' = Nonterminal.lookup (nonterminal ^ "'") in
      table.(k) <- (nt', [| Symbol.N nt |]);
      identifiers.(k) <- [| "_1" |];
      ntprods.(nt') <- (k, k+1);
      positions.(k) <- Nonterminal.positions nt;
      k+1,
      NonterminalMap.add nt k startprods
    ) grammar.start_symbols (0, NonterminalMap.empty)

665
  (* Create the non-start productions, populating the above arrays. *)
666

667 668
  let producer_symbol producer =
    Symbol.lookup (producer_symbol producer)
669

670
  let (_ : int) = StringMap.fold (fun nonterminal { branches } k ->
671 672
    let nt = Nonterminal.lookup nonterminal in
    let k' = List.fold_left (fun k branch ->
POTTIER Francois's avatar
POTTIER Francois committed
673 674
      let producers = Array.of_list branch.producers in
      let rhs = Array.map producer_symbol producers in
675
      table.(k) <- (nt, rhs);
POTTIER Francois's avatar
POTTIER Francois committed
676
      identifiers.(k) <- Array.map producer_identifier producers;
677
      actions.(k) <- Some branch.action;
POTTIER Francois's avatar
POTTIER Francois committed
678
      rhs_attributes.(k) <- Array.map producer_attributes producers;
679 680 681
      production_level.(k) <- branch.branch_production_level;
      prec_decl.(k) <- branch.branch_prec_annotation;
      positions.(k) <- [ branch.branch_position ];
682
      if not (Misc.array_for_all Symbol.non_error rhs) then
683
        grammar_uses_error_token := true;
684 685 686 687 688 689
      k+1
    ) k branches in
    ntprods.(nt) <- (k, k');
    k'
  ) grammar.rules start

690
  (* Iteration over the productions associated with a specific nonterminal. *)
691 692 693 694 695 696 697 698 699 700 701

  let iternt nt f =
    let k, k' = ntprods.(nt) in
    for prod = k to k' - 1 do
      f prod
    done

  let foldnt (nt : Nonterminal.t) (accu : 'a) (f : index -> 'a -> 'a) : 'a =
    let k, k' = ntprods.(nt) in
    let rec loop accu prod =
      if prod < k' then
702
        loop (f prod accu) (prod + 1)
703
      else
704
        accu
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
    in
    loop accu k

  (* This funny variant is lazy. If at some point [f] does not demand its
     second argument, then iteration stops. *)
  let foldnt_lazy (nt : Nonterminal.t) (f : index -> (unit -> 'a) -> 'a) (seed : 'a) : 'a =
    let k, k' = ntprods.(nt) in
    let rec loop prod seed =
      if prod < k' then
        f prod (fun () -> loop (prod + 1) seed)
      else
        seed
    in
    loop k seed

  (* Accessors. *)

  let def prod =
    table.(prod)

  let nt prod =
    let nt, _ = table.(prod) in
    nt

  let rhs prod =
    let _, rhs = table.(prod) in
    rhs

  let length prod =
    Array.length (rhs prod)

  let identifiers prod =
    identifiers.(prod)

  let is_start prod =
    prod < start

  let classify prod =
    if is_start prod then
      match (rhs prod).(0) with
      | Symbol.N nt ->
746
          Some nt
747
      | Symbol.T _ ->
748
          assert false
749 750 751 752 753 754
    else
      None

  let action prod =
    match actions.(prod) with
    | Some action ->
755
        action
756
    | None ->
757 758 759
        (* Start productions have no action. *)
        assert (is_start prod);
        assert false
760 761 762 763

  let positions prod =
    positions.(prod)

764 765 766 767 768 769
  let lhs_attributes prod =
    Nonterminal.attributes (nt prod)

  let rhs_attributes prod =
    rhs_attributes.(prod)

770 771 772 773 774 775 776 777
  let startsymbol2startprod nt =
    try
      NonterminalMap.find nt startprods
    with Not_found ->
      assert false (* [nt] is not a start symbol *)

  (* Iteration. *)

778 779 780
  let init f =
    Array.init n f

781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875
  let iter f =
    Misc.iteri n f

  let fold f accu =
    Misc.foldi n f accu

  let map f =
    Misc.mapi n f

  let amap f =
    Array.init n f

  let iterx f =
    for prod = start to n - 1 do
      f prod
    done

  let foldx f accu =
    Misc.foldij start n f accu

  let mapx f =
    Misc.mapij start n f

  (* Printing a production. *)

  let print prod =
    assert (not (is_start prod));
    let nt, rhs = table.(prod) in
    Printf.sprintf "%s -> %s" (Nonterminal.print false nt) (Symbol.printao 0 rhs)

  (* Tabulation. *)

  let tabulate f =
    Misc.tabulate n f

  let tabulateb f =
    Misc.tabulateb n f

  (* This array allows recording, for each %prec declaration, whether it is
     ever useful. This allows us to emit a warning about useless %prec
     declarations. *)

  (* 2015/10/06: We take into account the fact that a %prec declaration can be
     duplicated by inlining or by the expansion of parameterized non-terminal
     symbols. Our table is not indexed by productions, but by positions (of
     %prec declarations in the source). Thus, if a %prec declaration is
     duplicated, at least one of its copies should be found useful for the
     warning to be suppressed. *)

  let ever_useful : (Positions.t, unit) Hashtbl.t =
    (* assuming that generic hashing and equality on positions are OK *)
    Hashtbl.create 16

  let consult_prec_decl prod =
    let osym = prec_decl.(prod) in
    lazy (
      Option.iter (fun sym ->
        (* Mark this %prec declaration as useful. *)
        let pos = Positions.position sym in
        Hashtbl.add ever_useful pos ()
      ) osym
    ),
    osym

  (* This function prints warnings about useless precedence declarations for
     productions (%prec). It should be invoked after only the automaton has
     been constructed. *)

  let diagnostics () =
    iterx (fun prod ->
      let osym = prec_decl.(prod) in
      Option.iter (fun sym ->
        (* Check whether this %prec declaration was useless. *)
        let pos = Positions.position sym in
        if not (Hashtbl.mem ever_useful pos) then begin
          Error.grammar_warning [pos] "this %%prec declaration is never useful.";
          Hashtbl.add ever_useful pos () (* hack: avoid two warnings at the same position *)
        end
      ) osym
    )

  (* Determining the precedence level of a production. If no %prec
     declaration was explicitly supplied, it is the precedence level
     of the rightmost terminal symbol in the production's right-hand
     side. *)

  type production_level =
    | PNone
    | PRightmostToken of Terminal.t
    | PPrecDecl of symbol

  let rightmost_terminal prod =
    Array.fold_left (fun accu symbol ->
      match symbol with
      | Symbol.T tok ->
876
          PRightmostToken tok
877
      | Symbol.N _ ->
878
          accu
879 880 881 882 883 884 885 886 887 888
    ) PNone (rhs prod)

  let combine e1 e2 =
    lazy (Lazy.force e1; Lazy.force e2)

  let precedence prod =
    let fact1, prec_decl = consult_prec_decl prod in
    let oterminal =
      match prec_decl with
      | None ->
889
          rightmost_terminal prod
890
      | Some { value = terminal } ->
891
          PPrecDecl terminal
892 893 894
    in
    match oterminal with
    | PNone ->
895
        fact1, UndefinedPrecedence
896
    | PRightmostToken tok ->
897 898
        let fact2, level = Terminal.precedence_level tok in
        combine fact1 fact2, level
899
    | PPrecDecl id ->
900 901
        let fact2, level = TokPrecedence.leveli id  in
        combine fact1 fact2, level
902 903 904

end

905 906 907
let grammar_uses_error_token =
  !Production.grammar_uses_error_token

908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941
(* ------------------------------------------------------------------------ *)
(* Maps over productions. *)

module ProductionMap = struct

  include Patricia.Big

  (* Iteration over the start productions only. *)

  let start f =
    Misc.foldi Production.start (fun prod m ->
      add prod (f prod) m
    ) empty

end

(* ------------------------------------------------------------------------ *)
(* Support for analyses of the grammar, expressed as fixed point computations.
   We exploit the generic fixed point algorithm in [Fix]. *)

(* We perform memoization only at nonterminal symbols. We assume that the
   analysis of a symbol is the analysis of its definition (as opposed to,
   say, a computation that depends on the occurrences of this symbol in
   the grammar). *)

module GenericAnalysis
  (P : Fix.PROPERTY)
  (S : sig
    open P

    (* An analysis is specified by the following functions. *)

    (* [terminal] maps a terminal symbol to a property. *)
    val terminal: Terminal.t -> property
942

943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
    (* [disjunction] abstracts a binary alternative. That is, when we analyze
       an alternative between several productions, we compute a property for
       each of them independently, then we combine these properties using
       [disjunction]. *)
    val disjunction: property -> (unit -> property) -> property

    (* [P.bottom] should be a neutral element for [disjunction]. We use it in
       the analysis of an alternative with zero branches. *)

    (* [conjunction] abstracts a binary sequence. That is, when we analyze a
       sequence, we compute a property for each member independently, then we
       combine these properties using [conjunction]. In general, conjunction
       needs access to the first member of the sequence (a symbol), not just
       to its analysis (a property). *)
    val conjunction: Symbol.t -> property -> (unit -> property) -> property

    (* [epsilon] abstracts the empty sequence. It should be a neutral element
       for [conjunction]. *)
    val epsilon: property

  end)
: sig
  open P

  (* The results of the analysis take the following form. *)

  (* To every nonterminal symbol, we associate a property. *)
  val nonterminal: Nonterminal.t -> property

  (* To every symbol, we associate a property. *)
  val symbol: Symbol.t -> property

  (* To every suffix of every production, we associate a property.
     The offset [i], which determines the beginning of the suffix,
     must be contained between [0] and [n], inclusive, where [n]
     is the length of the production. *)
  val production: Production.index -> int -> property

end = struct
  open P

  (* The following analysis functions are parameterized over [get], which allows
     making a recursive call to the analysis at a nonterminal symbol. [get] maps
     a nonterminal symbol to a property. *)

  (* Analysis of a symbol. *)

  let symbol sym get : property =
    match sym with
    | Symbol.T tok ->
        S.terminal tok
    | Symbol.N nt ->
        (* Recursive call to the analysis, via [get]. *)
996
        get nt
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109

  (* Analysis of (a suffix of) a production [prod], starting at index [i]. *)

  let production prod i get : property =
    let rhs = Production.rhs prod in
    let n = Array.length rhs in
    (* Conjunction over all symbols in the right-hand side. This can be viewed
       as a version of [Array.fold_right], which does not necessarily begin at
       index [0]. Note that, because [conjunction] is lazy, it is possible
       to stop early. *)
    let rec loop i =
      if i = n then
        S.epsilon
      else
        let sym = rhs.(i) in
        S.conjunction sym
          (symbol sym get)
          (fun () -> loop (i+1))
    in
    loop i

  (* The analysis is the least fixed point of the following function, which
     analyzes a nonterminal symbol by looking up and analyzing its definition
     as a disjunction of conjunctions of symbols. *)

  let nonterminal nt get : property =
    (* Disjunction over all productions for this nonterminal symbol. *)
    Production.foldnt_lazy nt (fun prod rest ->
      S.disjunction
        (production prod 0 get)
        rest
    ) P.bottom

  (* The least fixed point is taken as follows. Note that it is computed
     on demand, as [lfp] is called by the user. *)

  module F =
    Fix.Make
      (Maps.ArrayAsImperativeMaps(Nonterminal))
      (P)

  let nonterminal =
    F.lfp nonterminal

  (* The auxiliary functions can be published too. *)

  let symbol sym =
    symbol sym nonterminal

  let production prod i =
    production prod i nonterminal

end

(* ------------------------------------------------------------------------ *)
(* Compute which nonterminals are nonempty, that is, recognize a
   nonempty language. Also, compute which nonterminals are
   nullable. The two computations are almost identical. The only
   difference is in the base case: a single terminal symbol is not
   nullable, but is nonempty. *)

module NONEMPTY =
  GenericAnalysis
    (Boolean)
    (struct
      (* A terminal symbol is nonempty. *)
      let terminal _ = true
      (* An alternative is nonempty if at least one branch is nonempty. *)
      let disjunction p q = p || q()
      (* A sequence is nonempty if both members are nonempty. *)
      let conjunction _ p q = p && q()
      (* The sequence epsilon is nonempty. It generates the singleton
         language {epsilon}. *)
      let epsilon = true
     end)

module NULLABLE =
  GenericAnalysis
    (Boolean)
    (struct
      (* A terminal symbol is not nullable. *)
      let terminal _ = false
      (* An alternative is nullable if at least one branch is nullable. *)
      let disjunction p q = p || q()
      (* A sequence is nullable if both members are nullable. *)
      let conjunction _ p q = p && q()
      (* The sequence epsilon is nullable. *)
      let epsilon = true
     end)

(* ------------------------------------------------------------------------ *)
(* Compute FIRST sets. *)

module FIRST =
  GenericAnalysis
    (TerminalSet)
    (struct
      (* A terminal symbol has a singleton FIRST set. *)
      let terminal = TerminalSet.singleton
      (* The FIRST set of an alternative is the union of the FIRST sets. *)
      let disjunction p q = TerminalSet.union p (q())
      (* The FIRST set of a sequence is the union of:
           the FIRST set of the first member, and
           the FIRST set of the second member, if the first member is nullable. *)
      let conjunction symbol p q =
        if NULLABLE.symbol symbol then
          TerminalSet.union p (q())
        else
          p
      (* The FIRST set of the empty sequence is empty. *)
      let epsilon = TerminalSet.empty
     end)

1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
(* ------------------------------------------------------------------------ *)
(* For every nonterminal symbol [nt], compute a word of minimal length
   generated by [nt]. This analysis subsumes [NONEMPTY] and [NULLABLE].
   Indeed, [nt] produces a nonempty language if only if the minimal length is
   finite; [nt] is nullable if only if the minimal length is zero. *)

(* This analysis is in principle more costly than [NONEMPTY] and [NULLABLE],
   so it is performed only on demand. In practice, it seems to be very cheap:
   its cost is not measurable for any of the grammars in our benchmark
   suite. *)

module MINIMAL =
  GenericAnalysis
    (struct
      include CompletedNatWitness
      type property = Terminal.t t
     end)
    (struct
      open CompletedNatWitness
      (* A terminal symbol has length 1. *)
      let terminal = singleton
      (* The length of an alternative is the minimum length of any branch. *)
      let disjunction = min_lazy
      (* The length of a sequence is the sum of the lengths of the members. *)
      let conjunction _ = add_lazy
      (* The epsilon sequence has length 0. *)
      let epsilon = epsilon
     end)

1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183
(* ------------------------------------------------------------------------ *)

let () =
  if verbose then begin
    (* If a start symbol generates the empty language or generates
       the language {epsilon}, report an error. In principle, this
       could be just a warning. However, in [Engine], in the function
       [start], it is convenient to assume that neither of these
       situations can arise. This means that at least one token must
       be read. *)
    StringSet.iter (fun symbol ->
      let nt = Nonterminal.lookup symbol in
      if not (NONEMPTY.nonterminal nt) then
        Error.error
          (Nonterminal.positions nt)
          "%s generates the empty language." (Nonterminal.print false nt);
      if TerminalSet.is_empty (FIRST.nonterminal nt) then
        Error.error
          (Nonterminal.positions nt)
          "%s generates the language {epsilon}." (Nonterminal.print false nt)
    ) grammar.start_symbols;
    (* If a nonterminal symbol generates the empty language, issue a warning. *)
    for nt = Nonterminal.start to Nonterminal.n - 1 do
      if not (NONEMPTY.nonterminal nt) then
        Error.grammar_warning
          (Nonterminal.positions nt)
          "%s generates the empty language." (Nonterminal.print false nt);
    done
  end

(* ------------------------------------------------------------------------ *)
(* Dump the analysis results. *)

let () =
  if verbose then
    Error.logG 2 (fun f ->
      for nt = Nonterminal.start to Nonterminal.n - 1 do
        Printf.fprintf f "nullable(%s) = %b\n"
          (Nonterminal.print false nt)
          (NULLABLE.nonterminal nt)
      done;
      for nt = Nonterminal.start to Nonterminal.n - 1 do
        Printf.fprintf f "first(%s) = %s\n"
          (Nonterminal.print false nt)
          (TerminalSet.print (FIRST.nonterminal nt))
1184 1185 1186 1187 1188
      done;
      for nt = Nonterminal.start to Nonterminal.n - 1 do
        Printf.fprintf f "minimal(%s) = %s\n"
          (Nonterminal.print false nt)
          (CompletedNatWitness.print Terminal.print (MINIMAL.nonterminal nt))
1189
      done
1190
  )
1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201

let () =
  if verbose then
    Time.tick "Analysis of the grammar"

(* ------------------------------------------------------------------------ *)
(* Compute FOLLOW sets. Unnecessary for us, but requested by a user. Also,
   this is useful for the SLR(1) test. Thus, we perform this analysis only
   on demand. *)

(* The computation of the symbolic FOLLOW sets follows exactly the same
1202 1203 1204
   pattern as that of the traditional FOLLOW sets. We share code and
   parameterize this computation over a module [P]. The type [P.property]
   intuitively represents a set of symbols. *)
1205 1206 1207 1208 1209 1210 1211 1212

module FOLLOW (P : sig
  include Fix.PROPERTY
  val union: property -> property -> property
  val terminal: Terminal.t -> property
  val first: Production.index -> int -> property
end) = struct

1213 1214 1215 1216
  module S =
    FixSolver.Make
      (Maps.ArrayAsImperativeMaps(Nonterminal))
      (P)
1217

1218
  (* Build a system of constraints. *)
1219

1220 1221
  let record_ConVar, record_VarVar, solve =
    S.create()
1222 1223 1224

  (* Iterate over all start symbols. *)
  let () =
1225
    let sharp = P.terminal Terminal.sharp in
1226 1227 1228
    for nt = 0 to Nonterminal.start - 1 do
      assert (Nonterminal.is_start nt);
      (* Add # to FOLLOW(nt). *)
1229
      record_ConVar sharp nt
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
    done
    (* We need to do this explicitly because our start productions are
       of the form S' -> S, not S' -> S #, so # will not automatically
       appear into FOLLOW(S) when the start productions are examined. *)

  (* Iterate over all productions. *)
  let () =
    Array.iteri (fun prod (nt1, rhs) ->
      (* Iterate over all nonterminal symbols [nt2] in the right-hand side. *)
      Array.iteri (fun i symbol ->
        match symbol with
        | Symbol.T _ ->
            ()
        | Symbol.N nt2 ->
            let nullable = NULLABLE.production prod (i+1)
            and first = P.first prod (i+1) in
            (* The FIRST set of the remainder of the right-hand side
               contributes to the FOLLOW set of [nt2]. *)
1248
            record_ConVar first nt2;
1249 1250 1251
            (* If the remainder of the right-hand side is nullable,
               FOLLOW(nt1) contributes to FOLLOW(nt2). *)
            if nullable then
1252
              record_VarVar nt1 nt2
1253 1254 1255 1256 1257 1258
      ) rhs
    ) Production.table

  (* Second pass. Solve the equations (on demand). *)

  let follow : Nonterminal.t -> P.property =
1259
    solve()
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299

end

(* Use the above functor to obtain the standard (concrete) FOLLOW sets. *)

let follow : Nonterminal.t -> TerminalSet.t =
  let module F = FOLLOW(struct
    include TerminalSet
    let terminal = singleton
    let first = FIRST.production
  end) in
  F.follow

(* At log level 2, display the FOLLOW sets. *)

let () =
  if verbose then
    Error.logG 2 (fun f ->
      for nt = Nonterminal.start to Nonterminal.n - 1 do
        Printf.fprintf f "follow(%s) = %s\n"
          (Nonterminal.print false nt)
          (TerminalSet.print (follow nt))
      done
    )

(* Compute FOLLOW sets for the terminal symbols as well. Again, unnecessary
   for us, but requested by a user. This is done in a single pass over the
   grammar -- no new fixpoint computation is required. *)

let tfollow : TerminalSet.t array Lazy.t =
  lazy (

    let tfollow =
      Array.make Terminal.n TerminalSet.empty
    in

    (* Iterate over all productions. *)
    Array.iteri (fun prod (nt1, rhs) ->
      (* Iterate over all terminal symbols [t2] in the right-hand side. *)
      Array.iteri (fun i symbol ->
1300 1301 1302 1303
        match symbol with
        | Symbol.N _ ->
            ()
        | Symbol.T t2 ->
1304 1305
            let nullable = NULLABLE.production prod (i+1)
            and first = FIRST.production prod (i+1) in
1306 1307 1308 1309 1310 1311 1312
            (* The FIRST set of the remainder of the right-hand side
               contributes to the FOLLOW set of [t2]. *)
            tfollow.(t2) <- TerminalSet.union first tfollow.(t2);
            (* If the remainder of the right-hand side is nullable,
               FOLLOW(nt1) contributes to FOLLOW(t2). *)
            if nullable then
              tfollow.(t2) <- TerminalSet.union (follow nt1) tfollow.(t2)
1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410
      ) rhs
    ) Production.table;

    tfollow

  )

(* Define another accessor. *)

let tfollow t =
  (Lazy.force tfollow).(t)

(* At log level 3, display the FOLLOW sets for terminal symbols. *)

let () =
  if verbose then
    Error.logG 3 (fun f ->
      for t = 0 to Terminal.n - 1 do
        Printf.fprintf f "follow(%s) = %s\n"
          (Terminal.print t)
          (TerminalSet.print (tfollow t))
      done
    )

(* ------------------------------------------------------------------------ *)
(* Compute symbolic FIRST and FOLLOW sets. *)

(* The symbolic FIRST set of the word determined by [prod/i] is defined
   (and computed) as follows. *)

let sfirst prod i =
  let rhs = Production.rhs prod in
  let n = Array.length rhs in
  let rec loop i =
    if i = n then
      (* If the word [prod/i] is empty, the set is empty. *)
      SymbolSet.empty
    else
      let sym = rhs.(i) in
      (* If the word [prod/i] begins with a symbol [sym], then [sym]
         itself is part of the symbolic FIRST set, unconditionally. *)
      SymbolSet.union
        (SymbolSet.singleton sym)
        (* Furthermore, if [sym] is nullable, then the symbolic
           FIRST set of the sub-word [prod/i+1] contributes, too. *)
        (if NULLABLE.symbol sym then loop (i + 1) else SymbolSet.empty)
  in
  loop i

(* The symbolic FOLLOW sets are computed just like the FOLLOW sets,
   except we use a symbolic FIRST set instead of a standard FIRST
   set. *)

let sfollow : Nonterminal.t -> SymbolSet.t =
  let module F = FOLLOW(struct
    include SymbolSet
    let terminal t = SymbolSet.singleton (Symbol.T t)
    let first = sfirst
  end) in
  F.follow

(* At log level 3, display the symbolic FOLLOW sets. *)

let () =
  if verbose then
    Error.logG 3 (fun f ->
      for nt = Nonterminal.start to Nonterminal.n - 1 do
        Printf.fprintf f "sfollow(%s) = %s\n"
          (Nonterminal.print false nt)
          (SymbolSet.print (sfollow nt))
      done
    )

(* ------------------------------------------------------------------------ *)
(* Provide explanations about FIRST sets. *)

(* The idea is to explain why a certain token appears in the FIRST set
   for a certain sequence of symbols. Such an explanation involves
   basic assertions of the form (i) symbol N is nullable and (ii) the
   token appears in the FIRST set for symbol N. We choose to take
   these basic facts for granted, instead of recursively explaining
   them, so as to keep explanations short. *)

(* We first produce an explanation in abstract syntax, then
   convert it to a human-readable string. *)

type explanation =
  | EObvious                                 (* sequence begins with desired token *)
  | EFirst of Terminal.t * Nonterminal.t     (* sequence begins with a nonterminal that produces desired token *)
  | ENullable of Symbol.t list * explanation (* sequence begins with a list of nullable symbols and ... *)

let explain (tok : Terminal.t) (rhs : Symbol.t array) (i : int) =
  let length = Array.length rhs in
  let rec loop i =
    assert (i < length);
    let symbol = rhs.(i) in
    match symbol with
    | Symbol.T tok' ->
1411 1412
        assert (Terminal.equal tok tok');
        EObvious
1413
    | Symbol.N nt ->
1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
        if TerminalSet.mem tok (FIRST.nonterminal nt) then
          EFirst (tok, nt)
        else begin
          assert (NULLABLE.nonterminal nt);
          match loop (i + 1) with
          | ENullable (symbols, e) ->
              ENullable (symbol :: symbols, e)
          | e ->
              ENullable ([ symbol ], e)
        end
1424 1425 1426 1427 1428 1429 1430 1431
  in
  loop i

let rec convert = function
  | EObvious ->
      ""
  | EFirst (tok, nt) ->
      Printf.sprintf "%s can begin with %s"
1432 1433
        (Nonterminal.print false nt)
        (Terminal.print tok)
1434 1435 1436
  | ENullable (symbols, e) ->
      let e = convert e in
      Printf.sprintf "%scan vanish%s%s"
1437 1438 1439
        (Symbol.printl symbols)
        (if e = "" then "" else " and ")
        e
1440 1441 1442 1443 1444 1445 1446 1447

(* ------------------------------------------------------------------------ *)
(* Package the analysis results. *)

module Analysis = struct

  let nullable = NULLABLE.nonterminal

1448 1449
  let nullable_symbol = NULLABLE.symbol

1450 1451
  let first = FIRST.nonterminal

1452 1453
  let first_symbol = FIRST.symbol

1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481
  (* An initial definition of [nullable_first_prod]. *)

  let nullable_first_prod prod i =
    NULLABLE.production prod i,
    FIRST.production prod i

  (* A memoised version, so as to avoid recomputing along a production's
     right-hand side. *)

  let nullable_first_prod =
    Misc.tabulate Production.n (fun prod ->
      Misc.tabulate (Production.length prod + 1) (fun i ->
        nullable_first_prod prod i
      )
    )

  let first_prod_lookahead prod i z =
    let nullable, first = nullable_first_prod prod i in
    if nullable then
      TerminalSet.add z first
    else
      first

  let explain_first_rhs (tok : Terminal.t) (rhs : Symbol.t array) (i : int) =
    convert (explain tok rhs i)

  let follow = follow

1482 1483 1484
  let attributes =
    grammar.gr_attributes

1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499
end

(* ------------------------------------------------------------------------ *)
(* Conflict resolution via precedences. *)

module Precedence = struct

  type choice =
    | ChooseShift
    | ChooseReduce
    | ChooseNeither
    | DontKnow

  type order = Lt | Gt | Eq | Ic

1500
  let precedence_order p1 p2 =
1501
    match p1, p2 with
1502
      | UndefinedPrecedence, _
1503
      | _, UndefinedPrecedence ->
1504
          Ic
1505
      | PrecedenceLevel (m1, l1, _, _), PrecedenceLevel (m2, l2, _, _) ->
1506
          if not (InputFile.same_input_file m1 m2) then
1507 1508
            Ic
          else
1509 1510 1511
            if l1 > l2 then
              Gt
            else if l1 < l2 then
1512
              Lt
1513
            else
1514
              Eq
1515 1516 1517 1518

  let production_order p1 p2 =
    match p1, p2 with
      | ProductionLevel (m1, l1), ProductionLevel (m2, l2) ->
1519
          if not (InputFile.same_input_file m1 m2) then
1520 1521
            Ic
          else
1522 1523 1524
            if l1 > l2 then
              Gt
            else if l1 < l2 then
1525
              Lt
1526
            else
1527
              Eq
1528 1529 1530 1531 1532

  let shift_reduce tok prod =
    let fact1, tokp  = Terminal.precedence_level tok
    and fact2, prodp = Production.precedence prod in
    match precedence_order tokp prodp with
1533

1534
      (* Our information is inconclusive. Drop [fact1] and [fact2],
1535
         that is, do not record that this information was useful. *)
1536 1537

    | Ic ->
1538
        DontKnow
1539 1540

      (* Our information is useful. Record that fact by evaluating
1541
         [fact1] and [fact2]. *)
1542 1543

    | (Eq | Lt | Gt) as c ->
1544 1545 1546
        Lazy.force fact1;
        Lazy.force fact2;
        match c with
1547

1548 1549
        | Ic ->
            assert false (* already dispatched *)
1550

1551
        | Eq ->
1552 1553 1554 1555 1556 1557 1558 1559 1560
            begin
              match Terminal.associativity tok with
              | LeftAssoc  -> ChooseReduce
              | RightAssoc -> ChooseShift
              | NonAssoc   -> ChooseNeither
              | _          -> assert false
                              (* If [tok]'s precedence level is defined, then
                                 its associativity must be defined as well. *)
            end
1561

1562 1563
        | Lt ->
            ChooseReduce
1564

1565 1566
        | Gt ->
            ChooseShift
1567 1568 1569


  let reduce_reduce prod1 prod2 =
1570
    let pl1 = Production.production_level.(prod1)
1571 1572
    and pl2 = Production.production_level.(prod2) in
    match production_order pl1 pl2 with
1573
    | Lt ->
1574
        Some prod1
1575
    | Gt ->
1576
        Some prod2
1577
    | Eq ->
1578 1579 1580 1581 1582
        (* The order is strict except in the presence of parameterized
           non-terminals and/or inlining. Two productions can have the same
           precedence level if they originate, via macro-expansion or via
           inlining, from a single production in the source grammar. *)
        None
1583
    | Ic ->
1584 1585 1586
        None

end
1587

1588 1589 1590 1591 1592
(* This function prints warnings about useless precedence declarations for
   terminal symbols (%left, %right, %nonassoc) and productions (%prec). It
   should be invoked after only the automaton has been constructed. *)

let diagnostics () =
1593 1594 1595 1596
  if not Settings.ignore_all_unused_precedence_levels then begin
    TokPrecedence.diagnostics();
    Production.diagnostics()
  end
1597 1598 1599 1600 1601 1602

(* ------------------------------------------------------------------------ *)
(* %on_error_reduce declarations. *)

module OnErrorReduce = struct

1603 1604 1605 1606 1607
  (* We keep a [StringMap] internally, and convert back and forth between
     the types [Nonterminal.t] and [string] when querying this map. This
     is not very elegant, and could be changed if desired. *)

  let declarations : Syntax.on_error_reduce_level StringMap.t =
1608 1609
    grammar.on_error_reduce

1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631
  let print (nt : Nonterminal.t) : string =
    Nonterminal.print false nt

  let lookup (nt : string) : Nonterminal.t =
    try
      Nonterminal.lookup nt
    with Not_found ->
      (* If this fails, then we have an [%on_error_reduce] declaration
         for an invalid symbol. *)
      assert false

  let reduce prod =
    let nt = Production.nt prod in
    StringMap.mem (print nt) declarations

  let iter f =
    StringMap.iter (fun nt _prec ->
      f (lookup nt)
    ) declarations

  open Precedence

1632 1633 1634 1635 1636 1637 1638 1639 1640 1641
  let preferable prod1 prod2 =
    (* The two productions that we are comparing must be distinct. *)
    assert (prod1 <> prod2);
    let nt1 = Production.nt prod1
    and nt2 = Production.nt prod2 in
    (* If they have the same left-hand side (which seems rather unlikely?),
       declare them incomparable. *)
    nt1 <> nt2 &&
    (* Otherwise, look up the priority levels associated with their left-hand
       symbols. *)
1642 1643 1644 1645 1646
    let prec1, prec2 =
      try
        StringMap.find (print nt1) declarations,
        StringMap.find (print nt2) declarations
      with Not_found ->
1647
        (* [preferable] should be used to compare two symbols for which
1648 1649 1650 1651 1652 1653 1654 1655
           there exist [%on_error_reduce] declarations. *)
        assert false
    in
    match production_order prec1 prec2 with
    | Gt ->
        (* [prec1] is a higher integer than [prec2], therefore comes later
           in the file. By analogy with [%left] and friends, we give higher
           priority to later declarations. *)
1656
        true
1657
    | Lt ->
1658
        false
1659 1660 1661
    | Eq
    | Ic ->
        (* We could issue a warning or an information message in these cases. *)
1662
        false
1663

1664 1665 1666 1667 1668
end

(* ------------------------------------------------------------------------ *)

end (* module Make *)