tableBackend.ml 29.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16 17 18 19
open CodeBits
open Grammar
open IL
open Interface
open Printf
open TokenType
20
open NonterminalType
21 22 23 24 25 26 27 28 29 30 31
open CodePieces

module Run (T : sig end) = struct

(* ------------------------------------------------------------------------ *)

(* Conventional names for modules, exceptions, record fields, functions. *)

let menhirlib =
  "MenhirLib"

32 33 34
let make_engine_table =
  menhirlib ^ ".TableInterpreter.MakeEngineTable"

35
let make_engine =
36
  menhirlib ^ ".Engine.Make"
37

38 39 40
let make_symbol =
  menhirlib ^ ".InspectionTableInterpreter.Symbols"

41
let make_inspection =
42
  menhirlib ^ ".InspectionTableInterpreter.Make"
43

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
let engineTypes =
  menhirlib ^ ".EngineTypes"

let field x =
  engineTypes ^ "." ^ x

let fstate =
  field "state"

let fsemv =
  field "semv"

let fstartp =
  field "startp"

let fendp =
  field "endp"

let fnext =
  field "next"

let fstack =
  field "stack"

let fcurrent =
  field "current"

let entry =
  interpreter ^ ".entry"

74 75 76
let start =
  interpreter ^ ".start"

77 78 79
let staticVersion =
  menhirlib ^ ".StaticVersion"

80 81
(* The following are names of internal sub-modules. *)

82
let tables =
83 84 85 86 87
  "Tables"

let symbols =
  "Symbols"

88 89 90
let et =
  "ET"

91 92
let ti =
  "TI"
93

94 95
(* ------------------------------------------------------------------------ *)

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
(* Statistics. *)

(* Integer division, rounded up. *)

let div a b =
  if a mod b = 0 then a / b else a / b + 1

(* [size] provides a rough measure of the size of its argument, in words.
   The [unboxed] parameter is true if we have already counted 1 for the
   pointer to the object. *)

let rec size unboxed = function
  | EIntConst _
  | ETuple []
  | EData (_, []) ->
      if unboxed then 0 else 1
  | EStringConst s ->
      1 + div (String.length s * 8) Sys.word_size
  | ETuple es
  | EData (_, es)
  | EArray es ->
      1 + List.length es + List.fold_left (fun s e -> s + size true e) 0 es
  | _ ->
      assert false (* not implemented *)

let size =
  size false

(* Optionally, print a measure of each of the tables that we are defining. *)

let define (name, expr) = {
  valpublic = true;
  valpat = PVar name;
  valval = expr
}

let define_and_measure (x, e) =
  Error.logC 1 (fun f ->
    fprintf f
      "The %s table occupies roughly %d bytes.\n"
      x
      (size e * (Sys.word_size / 8))
  );
  define (x, e)


(* ------------------------------------------------------------------------ *)

144 145
(* Code generation for semantic actions. *)

POTTIER Francois's avatar
POTTIER Francois committed
146
(* The functions [reducecellparams] and [reducebody] are adapted from
147 148 149 150 151 152 153 154 155 156 157 158 159 160
   [CodeBackend]. *)

(* Things are slightly more regular here than in the code-based
   back-end, since there is no optimization: every stack cell has the
   same structure and holds a state, a semantic value, and a pair of
   positions. Because every semantic value is represented, we do not
   have a separate [unitbindings]. *)

(* [reducecellparams] constructs a pattern that describes the contents
   of a stack cell. If this is the bottom cell, the variable [state]
   is bound to the state found in the cell. If [ids.(i)] is used in
   the semantic action, then it is bound to the semantic value. The
   position variables are always bound. *)

161
let reducecellparams prod i _symbol (next : pattern) : pattern =
162

163
  let ids = Production.identifiers prod in
164 165 166

  PRecord [
    fstate, (if i = 0 then PVar state else PWildcard);
167
    fsemv, PVar ids.(i);
168 169 170 171 172 173 174 175 176 177 178 179
    fstartp, PVar (Printf.sprintf "_startpos_%s_" ids.(i));
    fendp, PVar (Printf.sprintf "_endpos_%s_" ids.(i));
    fnext, next;
  ]

(* The semantic values bound in [reducecellparams] have type [Obj.t].
   They should now be cast to their real type. If we had [PMagic] in
   the syntax of patterns, we could do that in one swoop; since we don't,
   we have to issue a series of casts a posteriori. *)

let reducecellcasts prod i symbol casts =

180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
  let ids = Production.identifiers prod in
  let id = ids.(i) in
  let t : typ =
    match semvtype symbol with
    | [] ->
        tunit
    | [ t ] ->
        t
    | _ ->
        assert false
  in
  (* Cast: [let id = ((Obj.magic id) : t) in ...]. *)
  (
    PVar id,
    EAnnot (EMagic (EVar id), type2scheme t)
  ) :: casts
196

197 198 199 200 201 202
(* 2015/11/04. The start and end positions of an epsilon production are obtained
   by taking the end position stored in the top stack cell (whatever it is). *)

let endpos_of_top_stack_cell =
  ERecordAccess(EVar stack, fendp)

203 204 205 206 207 208
(* This is the body of the [reduce] function associated with
   production [prod]. It assumes that the variables [env] and [stack]
   have been bound. *)

let reducebody prod =

209
  let nt, rhs = Production.def prod
210 211 212 213 214 215 216 217 218
  and ids = Production.identifiers prod
  and length = Production.length prod in

  (* Build a pattern that represents the shape of the stack. Out of
     the stack, we extract a state (except when the production is an
     epsilon production) and a number of semantic values. *)

  (* At the same time, build a series of casts. *)

219 220 221
  (* We want a [fold] that begins with the deepest cells in the stack.
     Folding from left to right on [rhs] is appropriate. *)

222
  let (_ : int), pat, casts =
223
    Array.fold_left (fun (i, pat, casts) symbol ->
224 225 226
      i + 1,
      reducecellparams prod i symbol pat,
      reducecellcasts prod i symbol casts
227
    ) (0, PVar stack, []) rhs
228 229
  in

230 231 232 233 234
  (* Determine beforeend/start/end positions for the left-hand side of the
     production, and bind them to the conventional variables [beforeendp],
     [startp], and [endp]. These variables may be unused by the semantic
     action, in which case these bindings are dead code and can be ignored
     by the OCaml compiler. *)
235 236

  let posbindings =
237 238 239
    ( PVar beforeendp,
      endpos_of_top_stack_cell
    ) ::
240 241
    ( PVar startp,
      if length > 0 then
242
        EVar (Printf.sprintf "_startpos_%s_" ids.(0))
243
      else
244
        endpos_of_top_stack_cell
245 246 247
    ) ::
    ( PVar endp,
      if length > 0 then
248
        EVar (Printf.sprintf "_endpos_%s_" ids.(length - 1))
249
      else
250
        EVar startp
251 252 253
    ) :: []
  in

254 255
  (* This cannot be one of the start productions. *)
  assert (not (Production.is_start prod));
256

257
  (* This is a regular production. Perform a reduction. *)
258

259 260 261 262 263 264
  let action =
    Production.action prod
  in
  let act =
    EAnnot (Action.to_il_expr action, type2scheme (semvtypent nt))
  in
265

266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
  EComment (
    Production.print prod,
    blet (
      (pat, EVar stack) ::                  (* destructure the stack *)
      casts @                               (* perform type casts *)
      posbindings @                         (* bind [startp] and [endp] *)
      [ PVar semv, act ],                   (* run the user's code and bind [semv] *)

      (* Return a new stack, onto which we have pushed a new stack cell. *)

      ERecord [                             (* the new stack cell *)
        fstate, EVar state;                 (* the current state after popping; it will be updated by [goto] *)
        fsemv, ERepr (EVar semv);           (* the newly computed semantic value *)
        fstartp, EVar startp;               (* the newly computed start and end positions *)
        fendp, EVar endp;
        fnext, EVar stack;                  (* this is the stack after popping *)
      ]
283

284 285
    )
  )
286 287 288 289 290

let semantic_action prod =
  EFun (
    [ PVar env ],

291
    (* Access the stack and current state via the environment. *)
292

293 294 295
    (* In fact, the current state needs be bound here only if this is
       an epsilon production. Otherwise, the variable [state] will be
       bound by the pattern produced by [reducecellparams] above. *)
296

297
    ELet (
298

299 300
      [ PVar stack, ERecordAccess (EVar env, fstack) ] @
        (if Production.length prod = 0 then [ PVar state, ERecordAccess (EVar env, fcurrent) ] else []),
301

302
      reducebody prod
303

304
    )
305 306
  )

307 308 309 310 311 312 313 314
(* Export the number of start productions. *)

let start_def =
  define (
    "start",
    EIntConst Production.start
  )

315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
(* ------------------------------------------------------------------------ *)

(* Table encodings. *)

(* Encodings of entries in the default reduction table. *)

let encode_DefRed prod =            (* 1 + prod *)
  1 + Production.p2i prod

let encode_NoDefRed =               (* 0 *)
  0

(* Encodings of entries in the action table. *)

let encode_Reduce prod =            (* prod | 01 *)
  (Production.p2i prod lsl 2) lor 1

let encode_ShiftDiscard s =         (*    s | 10 *)
  ((Lr1.number s) lsl 2) lor 0b10

let encode_ShiftNoDiscard s =       (*    s | 11 *)
  ((Lr1.number s) lsl 2) lor 0b11

let encode_Fail =                   (*        00 *)
  0

(* Encodings of entries in the goto table. *)

let encode_Goto node =              (* 1 + node *)
  1 + Lr1.number node

let encode_NoGoto =                 (* 0 *)
  0

(* Encodings of the hole in the action and goto tables. *)

let hole =
  assert (encode_Fail = 0);
  assert (encode_NoGoto = 0);
  0

(* Encodings of entries in the error bitmap. *)

let encode_Error =                  (* 0 *)
  0

let encode_NoError =                (* 1 *)
  1

364 365
(* Encodings of terminal and nonterminal symbols in the production table. *)

366 367 368
let encode_no_symbol =
  0                                          (* 0 | 0 *)

369
let encode_terminal tok =
370
  (Terminal.t2i tok + 1) lsl 1          (*  t + 1 | 0 *)
371 372

let encode_nonterminal nt =
373
  ((Nonterminal.n2i nt) lsl 1) lor 1        (* nt | 1 *)
374 375 376 377 378 379 380

let encode_symbol = function
  | Symbol.T tok ->
      encode_terminal tok
  | Symbol.N nt ->
      encode_nonterminal nt

381 382 383 384 385 386
let encode_symbol_option = function
  | None ->
      encode_no_symbol
  | Some symbol ->
      encode_symbol symbol

387 388 389 390 391
(* Encoding a Boolean as an integer value. *)

let encode_bool b =
  if b then 1 else 0

392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
(* ------------------------------------------------------------------------ *)

(* Table compression. *)

(* Our sparse, two-dimensional tables are turned into one-dimensional tables
   via [RowDisplacement]. *)

(* The error bitmap, which is two-dimensional but not sparse, is made
   one-dimensional by simple flattening. *)

(* Every one-dimensional table is then packed via [PackedIntArray]. *)

(* Optionally, we print some information about the compression ratio. *)

(* [population] counts the number of significant entries in a
   two-dimensional matrix. *)

let population (matrix : int array array) =
  Array.fold_left (fun population row ->
    Array.fold_left (fun population entry ->
      if entry = hole then population else population + 1
    ) population row
  ) 0 matrix

(* [marshal1] marshals a one-dimensional array. *)

let marshal1 (table : int array) =
  let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in
  ETuple [ EIntConst bits; EStringConst text ]

(* [marshal11] marshals a one-dimensional array whose bit width is
   statically known to be [1]. *)

let marshal11 (table : int array) =
  let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in
  assert (bits = 1);
  EStringConst text

430 431 432 433 434 435 436 437
(* List-based versions of the above functions. *)

let marshal1_list (table : int list) =
  marshal1 (Array.of_list table)

let marshal11_list (table : int list) =
  marshal11 (Array.of_list table)

POTTIER Francois's avatar
POTTIER Francois committed
438 439 440 441 442 443 444
(* [linearize_and_marshal1] marshals an array of integer arrays (of possibly
   different lengths). *)

let linearize_and_marshal1 (table : int array array) =
  let data, entry = MenhirLib.LinearizedArray.make table in
  ETuple [ marshal1 data; marshal1 entry ]

445 446 447 448 449 450 451 452 453 454 455 456
(* [flatten_and_marshal11_list] marshals a two-dimensional bitmap,
   whose width (for now) is assumed to be [Terminal.n - 1]. *)

let flatten_and_marshal11_list (table : int list list) =
  ETuple [
    (* Store the table width. *)
    EIntConst (Terminal.n - 1);
    (* View the table as a one-dimensional array, and marshal it. *)
    marshal11_list (List.flatten table)
  ]

(* [marshal2] marshals a two-dimensional table, with row displacement. *)
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490

let marshal2 name m n (matrix : int list list) =
  let matrix : int array array =
    Array.of_list (List.map Array.of_list matrix)
  in
  let (displacement : int array), (data : int array) =
    MenhirLib.RowDisplacement.compress
      (=)
      (fun x -> x = hole)
      hole
      m
      n
      matrix
  in
  Error.logC 1 (fun f ->
    fprintf f
      "The %s table is %d entries; %d non-zero; %d compressed.\n"
      name
      (m * n)
      (population matrix)
      (Array.length displacement + Array.length data)
  );
  ETuple [
    marshal1 displacement;
    marshal1 data;
  ]

(* ------------------------------------------------------------------------ *)

(* Table generation. *)

(* The action table. *)

let action node t =
491
  match Default.has_default_reduction node with
492 493 494
  | Some _ ->

      (* [node] has a default reduction; in that case, the action
495
         table is never looked up. *)
496 497 498 499 500 501

      hole

  | None ->

      try
502
        let target = SymbolMap.find (Symbol.T t) (Lr1.transitions node) in
503

504 505
        (* [node] has a transition to [target]. If [target] has a default
           reduction on [#], use [ShiftNoDiscard], otherwise [ShiftDiscard]. *)
506

507
        match Default.has_default_reduction target with
508 509 510 511 512
        | Some (_, toks) when TerminalSet.mem Terminal.sharp toks ->
            assert (TerminalSet.cardinal toks = 1);
            encode_ShiftNoDiscard target
        | _ ->
            encode_ShiftDiscard target
513 514

      with Not_found ->
515
        try
516

517
          (* [node] has a reduction. *)
518

519 520
          let prod = Misc.single (TerminalMap.find t (Lr1.reductions node)) in
          encode_Reduce prod
521

522
        with Not_found ->
523

524
          (* [node] has no action. *)
525

526
          encode_Fail
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557

(* In the error bitmap and in the action table, the row that corresponds to the
   [#] pseudo-terminal is never accessed. Thus, we do not create this row. This
   does not create a gap in the table, because this is the right-most row. For
   sanity, we check this fact here. *)

let () =
  assert (Terminal.t2i Terminal.sharp = Terminal.n - 1)

(* The goto table. *)

let goto node nt =
  try
    let target = SymbolMap.find (Symbol.N nt) (Lr1.transitions node) in
    encode_Goto target
  with Not_found ->
    encode_NoGoto

(* The error bitmap reflects which entries in the action table are
   [Fail]. Like the action table, it is not accessed when [node] has a
   default reduction. *)

let error node t =
  if action node t = encode_Fail then
    encode_Error
  else
    encode_NoError

(* The default reductions table. *)

let default_reduction node =
558
  match Default.has_default_reduction node with
559 560 561 562 563 564 565 566 567 568 569 570
  | Some (prod, _) ->
      encode_DefRed prod
  | None ->
      encode_NoDefRed

(* Generate the table definitions. *)

let action =
  define_and_measure (
    "action",
    marshal2 "action" Lr1.n (Terminal.n - 1) (
      Lr1.map (fun node ->
571 572 573
        Terminal.mapx (fun t ->
          action node t
        )
574 575 576 577 578 579 580 581 582
      )
    )
  )

let goto =
  define_and_measure (
    "goto",
    marshal2 "goto" Lr1.n Nonterminal.n (
      Lr1.map (fun node ->
583 584 585
        Nonterminal.map (fun nt ->
          goto node nt
        )
586 587 588 589 590 591 592
      )
    )
  )

let error =
  define_and_measure (
    "error",
593 594 595 596 597
    flatten_and_marshal11_list (
      Lr1.map (fun node ->
        Terminal.mapx (fun t ->
          error node t
        )
598
      )
599
    )
600 601 602 603 604
  )

let default_reduction =
  define_and_measure (
    "default_reduction",
605
    marshal1_list (
606
      Lr1.map (fun node ->
607
        default_reduction node
608 609 610 611 612 613 614 615
      )
    )
  )

let lhs =
  define_and_measure (
    "lhs",
    marshal1 (
616
      Production.amap (fun prod ->
617
        Nonterminal.n2i (Production.nt prod)
618 619 620 621 622 623 624
      )
    )
  )

let semantic_action =
  define (
    "semantic_action",
625 626
    (* Non-start productions only. *)
    EArray (Production.mapx semantic_action)
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
  )

(* ------------------------------------------------------------------------ *)

(* When [--trace] is enabled, we need tables that map terminals and
   productions to strings. *)

let stringwrap f x =
  EStringConst (f x)

let reduce_or_accept prod =
  match Production.classify prod with
  | Some _ ->
      "Accepting"
  | None ->
      "Reducing production " ^ (Production.print prod)

let trace =
  define_and_measure (
    "trace",
    if Settings.trace then
      EData ("Some", [
649 650 651 652
        ETuple [
          EArray (Terminal.map (stringwrap Terminal.print));
          EArray (Production.map (stringwrap reduce_or_accept));
        ]
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
      ])
    else
      EData ("None", [])
  )

(* ------------------------------------------------------------------------ *)

(* Generate the two functions that map a token to its integer code and to
   its semantic value, respectively. *)

let token2terminal =
  destructuretokendef
    "token2terminal"
    tint
    false
    (fun tok -> EIntConst (Terminal.t2i tok))

let token2value =
  destructuretokendef
    "token2value"
    tobj
    true
    (fun tok ->
      ERepr (
677 678 679 680 681
        match Terminal.ocamltype tok with
        | None ->
            EUnit
        | Some _ ->
            EVar semv
682 683 684 685 686
      )
    )

(* ------------------------------------------------------------------------ *)

687 688 689
(* The client APIs invoke the interpreter with an appropriate start state.
   The monolithic API calls [entry] (see [Engine]), while the incremental
   API calls [start]. *)
690

POTTIER Francois's avatar
POTTIER Francois committed
691
(* An entry point to the monolithic API. *)
692

POTTIER Francois's avatar
POTTIER Francois committed
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
let monolithic_entry_point state nt t =
  define (
    Nonterminal.print true nt,
    let lexer = "lexer"
    and lexbuf = "lexbuf" in
    EFun (
      [ PVar lexer; PVar lexbuf ],
      EAnnot (
        EMagic (
          EApp (
            EVar entry, [
              EIntConst (Lr1.number state);
              EVar lexer;
              EVar lexbuf
            ]
          )
        ),
        type2scheme (TypTextual t)
711
      )
POTTIER Francois's avatar
POTTIER Francois committed
712 713 714
    )
  )

715 716 717 718 719 720 721 722
(* The whole monolithic API. *)

let monolithic_api : IL.valdef list =
  Lr1.fold_entry (fun _prod state nt t api ->
    monolithic_entry_point state nt t ::
    api
  ) []

POTTIER Francois's avatar
POTTIER Francois committed
723 724 725
(* An entry point to the incremental API. *)

let incremental_entry_point state nt t =
726
  let initial = "initial_position" in
POTTIER Francois's avatar
POTTIER Francois committed
727
  define (
728
    Nonterminal.print true nt,
729 730 731 732
    (* In principle the eta-expansion [fun initial_position -> start s
       initial_position] should not be necessary, since [start] is a pure
       function. However, when [--trace] is enabled, [start] will log messages
       to the standard error channel. *)
POTTIER Francois's avatar
POTTIER Francois committed
733
    EFun (
734
      [ PVar initial ],
POTTIER Francois's avatar
POTTIER Francois committed
735 736 737 738 739
      EAnnot (
        EMagic (
          EApp (
            EVar start, [
              EIntConst (Lr1.number state);
740
              EVar initial;
POTTIER Francois's avatar
POTTIER Francois committed
741 742 743
            ]
          )
        ),
744
        type2scheme (checkpoint (TypTextual t))
745
      )
POTTIER Francois's avatar
POTTIER Francois committed
746 747 748
    )
  )

749 750 751
(* The whole incremental API. *)

let incremental_api : IL.valdef list =
POTTIER Francois's avatar
POTTIER Francois committed
752 753
  Lr1.fold_entry (fun _prod state nt t api ->
    incremental_entry_point state nt t ::
754
    api
755
  ) []
756 757 758

(* ------------------------------------------------------------------------ *)

759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
(* Constructing representations of symbols. *)

(* [eterminal t] is a value of type ['a terminal] (for some ['a]) that
   encodes the terminal symbol [t]. It is just a data constructor of
   the terminal GADT. *)

let eterminal (t : Terminal.t) : expr =
  EData (tokengadtdata (Terminal.print t), [])

(* [enonterminal nt] is a value of type ['a nonterminal] (for some
   ['a]) that encodes the nonterminal symbol [nt]. It is just a data
   constructor of the nonterminal GADT. *)

let enonterminal (nt : Nonterminal.t) : expr =
  EData (tnonterminalgadtdata (Nonterminal.print false nt), [])

(* [esymbol symbol] is a value of type ['a symbol] (for some ['a])
   that encodes the symbol [symbol]. It is built by applying the
   injection [T] or [N] to the terminal or nonterminal encoding. *)

779 780 781 782 783 784
let dataT =
  "T"

let dataN =
  "N"

785 786 787 788 789 790 791
let esymbol (symbol : Symbol.t) : expr =
  match symbol with
  | Symbol.T t ->
      EData (dataT, [ eterminal t ])
  | Symbol.N nt ->
      EData (dataN, [ enonterminal nt ])

792 793 794 795
(* [xsymbol symbol] is a value of type [xsymbol] that encodes the
   symbol [symbol]. It is built by applying the injection [X] (an
   existential quantifier) to [esymbol symbol]. *)

796 797 798
let dataX =
  "X"

799 800 801
let xsymbol (symbol : Symbol.t) : expr =
  EData (dataX, [ esymbol symbol ])

802 803
(* ------------------------------------------------------------------------ *)

804 805 806
(* Produce a function that maps a terminal symbol (represented as an integer
   code) to its representation as an [xsymbol]. Include [error] but not [#],
   i.e., include all of the symbols which can appear in a production. *)
807

POTTIER Francois's avatar
POTTIER Francois committed
808 809 810 811 812
(* Note that, instead of generating a function, we could (a) use an array
   or (b) use an unsafe conversion of an integer to a data constructor,
   then wrap it using [X] and [T/N]. Approach (b) is unsafe and causes
   memory allocation (due to the wrapping) at each call. *)

813 814 815 816 817
let terminal () =
  assert Settings.inspection;
  let t = "t" in
  define (
    "terminal",
818 819 820 821 822 823 824 825 826 827 828 829 830
    EFun ([ PVar t ],
      EMatch (EVar t,
        Terminal.mapx (fun tok ->
          { branchpat = pint (Terminal.t2i tok);
            branchbody = xsymbol (Symbol.T tok) }
        ) @ [
          { branchpat = PWildcard;
            branchbody =
              EComment ("This terminal symbol does not exist.",
                EApp (EVar "assert", [ efalse ])
              ) }
        ]
      )
831 832 833 834 835
    )
  )

(* ------------------------------------------------------------------------ *)

836 837 838 839 840 841 842 843
(* Produce a function that maps a (non-start) nonterminal symbol (represented
   as an integer code) to its representation as an [xsymbol]. *)

let nonterminal () =
  assert Settings.inspection;
  let nt = "nt" in
  define (
    "nonterminal",
844 845 846 847 848 849 850 851 852 853 854 855 856
    EFun ([ PVar nt ],
      EMatch (EVar nt,
        Nonterminal.foldx (fun nt branches ->
          { branchpat = pint (Nonterminal.n2i nt);
            branchbody = xsymbol (Symbol.N nt) } :: branches
        ) [
          { branchpat = PWildcard;
            branchbody =
              EComment ("This nonterminal symbol does not exist.",
                EApp (EVar "assert", [ efalse ])
              ) }
        ]
      )
857 858 859 860 861
    )
  )

(* ------------------------------------------------------------------------ *)

862 863 864 865 866 867 868 869 870 871 872 873 874 875
(* Produce a mapping of every LR(0) state to its incoming symbol (encoded as
   an integer value). (Note that the initial states do not have one.) *)

let lr0_incoming () =
  assert Settings.inspection;
  define_and_measure (
    "lr0_incoming",
    marshal1 (Array.init Lr0.n (fun node ->
      encode_symbol_option (Lr0.incoming_symbol node)
    ))
  )

(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
876 877 878 879 880
(* A table that maps a production (i.e., an integer index) to the production's
   right-hand side. In principle, we use this table for ordinary productions
   only, as opposed to the start productions, whose existence is not exposed
   to the user. However, it is simpler (and not really costly) to include all
   productions in this table. *)
881

POTTIER Francois's avatar
POTTIER Francois committed
882
let rhs () =
883
  assert Settings.inspection;
884
  let productions : int array array =
POTTIER Francois's avatar
POTTIER Francois committed
885 886 887
    Production.amap (fun prod ->
      Array.map encode_symbol (Production.rhs prod)
    )
888 889
  in
  define_and_measure (
POTTIER Francois's avatar
POTTIER Francois committed
890 891
    "rhs",
    linearize_and_marshal1 productions
892 893
  )

894 895
(* ------------------------------------------------------------------------ *)

896 897 898 899 900 901
(* A table that maps an LR(1) state to its LR(0) core. *)

let lr0_core () =
  assert Settings.inspection;
  define_and_measure (
    "lr0_core",
902
    marshal1_list (Lr1.map (fun (node : Lr1.node) ->
903 904 905 906 907 908 909
      Lr0.core (Lr1.state node)
    ))
  )

(* A table that maps an LR(0) state to a set of LR(0) items. *)

let lr0_items () =
910 911 912 913 914 915 916
  assert Settings.inspection;
  let items : int array array =
    Array.init Lr0.n (fun node ->
      Array.map Item.marshal (Array.of_list (Item.Set.elements (Lr0.items node)))
    )
  in
  define_and_measure (
917
    "lr0_items",
918 919 920
    linearize_and_marshal1 items
  )

921 922
(* ------------------------------------------------------------------------ *)

923 924 925 926 927 928 929
(* A table that tells which nonterminal symbols are nullable.
   (For simplicity, this table includes the start symbols.) *)

let nullable () =
  assert Settings.inspection;
  define_and_measure (
    "nullable",
930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952
    marshal11_list (
      Nonterminal.map (fun nt ->
        encode_bool (Analysis.nullable nt)
      )
    )
  )

(* ------------------------------------------------------------------------ *)

(* A two-dimensional bitmap, indexed first by nonterminal symbols, then by
   terminal symbols, encodes the FIRST sets. *)

let first () =
  assert Settings.inspection;
  define_and_measure (
    "first",
    flatten_and_marshal11_list (
      Nonterminal.map (fun nt ->
        Terminal.mapx (fun t ->
          encode_bool (TerminalSet.mem t (Analysis.first nt))
        )
      )
    )
953 954 955 956
  )

(* ------------------------------------------------------------------------ *)

957 958 959 960 961 962 963 964 965 966 967 968 969
(* A reference to [MenhirLib.StaticVersion.require_XXXXXXXX], where [XXXXXXXX]
   is our 8-digit version number. This ensures that the generated code can be
   linked only with an appropriate version of MenhirLib. This is important
   because we use unsafe casts, and a version mismatch could cause a crash. *)

let versiondef = {
  valpublic = true;
  valpat = PUnit;
  valval = EVar (staticVersion ^ ".require_" ^ Version.version);
}

(* ------------------------------------------------------------------------ *)

970 971
(* Let's put everything together. *)

972
open BasicSyntax
973

974 975 976
let grammar =
  Front.grammar

977
let program =
978

979
  [ SIFunctor (grammar.parameters,
980

981 982 983 984 985 986 987 988 989
    (* Make a reference to [MenhirLib.StaticVersion.require_XXXXXXXX], where
       [XXXXXXXX] is our 8-digit version number. This ensures that the
       generated code can be linked only with an appropriate version of
       MenhirLib. This is important because we use unsafe casts, and a
       version mismatch could cause a crash. *)

    SIComment "This generated code requires the following version of MenhirLib:" ::
    SIValDefs (false, [ versiondef ]) ::

990 991 992 993
    (* Define the internal sub-module [basics], which contains the definitions
       of the exception [Error] and of the type [token]. Then, include this
       sub-module. This sub-module is used again below, as part of the
       application of the functor [TableInterpreter.Make]. *)
994

995
    mbasics grammar @
996

997 998
    (* In order to avoid hiding user-defined identifiers, only the
       exception [Error] and the type [token] should be defined (at
999 1000 1001
       top level, with non-mangled names) above this line. We also
       define the value [_eRR] above this line so that we do not
       have a problem if a user prelude hides the name [Error]. *)
1002

1003
    SIStretch grammar.preludes ::
1004

1005 1006 1007 1008 1009 1010 1011
    (* Define the tables. *)

    SIModuleDef (tables,
      MStruct [
        (* The internal sub-module [basics] contains the definitions of the
           exception [Error] and of the type [token]. *)
        SIInclude (MVar basics);
1012

1013 1014 1015 1016 1017 1018 1019 1020
        (* This is a non-recursive definition, so none of the names
           defined here are visible in the semantic actions. *)
        SIValDefs (false, [
          token2terminal;
          define ("error_terminal", EIntConst (Terminal.t2i Terminal.error));
          token2value;
          default_reduction;
          error;
1021
          start_def;
1022
          action;
1023
          lhs;
1024 1025 1026 1027 1028 1029 1030
          goto;
          semantic_action;
          trace;
        ])
      ]
    ) ::

1031
    SIModuleDef (interpreter, MStruct (
1032

1033 1034 1035 1036
      (* Apply the functor [TableInterpreter.MakeEngineTable] to the tables. *)
      SIModuleDef (et, MApp (MVar make_engine_table, MVar tables)) ::
      (* Apply the functor [Engine.Make] to obtain an engine. *)
      SIModuleDef (ti, MApp (MVar make_engine, MVar et)) ::
1037
      SIInclude (MVar ti) ::
1038

1039
      listiflazy Settings.inspection (fun () ->
1040

1041
        (* Define the internal sub-module [symbols], which contains type
1042 1043 1044 1045
           definitions. Then, include this sub-module. This sub-module is used
           again below, as part of the application of the functor
           [TableInterpreter.MakeInspection]. *)

1046
        SIModuleDef (symbols, MStruct (
1047 1048
          interface_to_structure (
            tokengadtdef grammar @
1049
            nonterminalgadtdef grammar
1050 1051 1052
          )
        )) ::

1053
        SIInclude (MVar symbols) ::
1054

1055
        (* Apply the functor [InspectionTableInterpreter.Make], which expects
1056
           four arguments. *)
POTTIER Francois's avatar
POTTIER Francois committed
1057
        SIInclude (mapp (MVar make_inspection) [
1058
          (* Argument 1, of type [TableFormat.TABLES]. *)
POTTIER Francois's avatar
POTTIER Francois committed
1059
          MVar tables;
1060
          (* Argument 2, of type [InspectionTableFormat.TABLES]. *)
POTTIER Francois's avatar
POTTIER Francois committed
1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
          MStruct (
            (* [lr1state] *)
            SIInclude (MVar ti) ::
            (* [terminal], [nonterminal]. *)
            SIInclude (MVar symbols) ::
            (* This functor application builds the types [symbol] and [xsymbol]
               in terms of the types [terminal] and [nonterminal]. This saves
               us the trouble of generating these definitions. *)
            SIInclude (MApp (MVar make_symbol, MVar symbols)) ::
            SIValDefs (false,
              terminal() ::
              nonterminal() ::
              lr0_incoming() ::
              rhs() ::
              lr0_core() ::
              lr0_items() ::
              nullable() ::
              first() ::
              []
            ) ::
1081
            []
POTTIER Francois's avatar
POTTIER Francois committed
1082
          );
1083 1084
          (* Argument 3, of type [EngineTypes.TABLE]. *)
          MVar et;
1085 1086
          (* Argument 4, of type [EngineTypes.ENGINE with ...]. *)
          MVar ti;
POTTIER Francois's avatar
POTTIER Francois committed
1087
        ]) ::
1088

1089
        []
1090

1091
      )
POTTIER Francois's avatar
POTTIER Francois committed
1092

1093
    )) ::
1094

POTTIER Francois's avatar
POTTIER Francois committed
1095 1096 1097 1098 1099 1100
    SIValDefs (false, monolithic_api) ::

    SIModuleDef (incremental, MStruct [
      SIValDefs (false, incremental_api)
    ]) ::

1101 1102 1103
    SIStretch grammar.postludes ::

  [])]
1104 1105 1106 1107 1108

let () =
  Time.tick "Producing abstract syntax"

end