tableBackend.ml 27.5 KB
Newer Older
1 2 3 4 5 6
open CodeBits
open Grammar
open IL
open Interface
open Printf
open TokenType
7
open NonterminalType
8 9 10 11 12 13 14 15 16 17 18
open CodePieces

module Run (T : sig end) = struct

(* ------------------------------------------------------------------------ *)

(* Conventional names for modules, exceptions, record fields, functions. *)

let menhirlib =
  "MenhirLib"

19 20
let make_engine =
  menhirlib ^ ".TableInterpreter.Make"
21

22 23 24
let make_symbol =
  menhirlib ^ ".InspectionTableInterpreter.Symbols"

25
let make_inspection =
26
  menhirlib ^ ".InspectionTableInterpreter.Make"
27

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
let engineTypes =
  menhirlib ^ ".EngineTypes"

let field x =
  engineTypes ^ "." ^ x

let fstate =
  field "state"

let fsemv =
  field "semv"

let fstartp =
  field "startp"

let fendp =
  field "endp"

let fnext =
  field "next"

let fstack =
  field "stack"

let fcurrent =
  field "current"

let entry =
  interpreter ^ ".entry"

58 59 60
let start =
  interpreter ^ ".start"

61 62
(* The following are names of internal sub-modules. *)

63
let basics =
64
  "Basics"
65

66
let tables =
67 68 69 70 71 72 73
  "Tables"

let symbols =
  "Symbols"

let ti =
  "TI"
74

75 76
(* ------------------------------------------------------------------------ *)

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
(* Statistics. *)

(* Integer division, rounded up. *)

let div a b =
  if a mod b = 0 then a / b else a / b + 1

(* [size] provides a rough measure of the size of its argument, in words.
   The [unboxed] parameter is true if we have already counted 1 for the
   pointer to the object. *)

let rec size unboxed = function
  | EIntConst _
  | ETuple []
  | EData (_, []) ->
      if unboxed then 0 else 1
  | EStringConst s ->
      1 + div (String.length s * 8) Sys.word_size
  | ETuple es
  | EData (_, es)
  | EArray es ->
      1 + List.length es + List.fold_left (fun s e -> s + size true e) 0 es
  | _ ->
      assert false (* not implemented *)

let size =
  size false

(* Optionally, print a measure of each of the tables that we are defining. *)

let define (name, expr) = {
  valpublic = true;
  valpat = PVar name;
  valval = expr
}

let define_and_measure (x, e) =
  Error.logC 1 (fun f ->
    fprintf f
      "The %s table occupies roughly %d bytes.\n"
      x
      (size e * (Sys.word_size / 8))
  );
  define (x, e)


(* ------------------------------------------------------------------------ *)

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
(* Code generation for semantic actions. *)

(* The functions [reducecellparams] and [reducebody] are adpated from
   [CodeBackend]. *)

(* Things are slightly more regular here than in the code-based
   back-end, since there is no optimization: every stack cell has the
   same structure and holds a state, a semantic value, and a pair of
   positions. Because every semantic value is represented, we do not
   have a separate [unitbindings]. *)

(* [reducecellparams] constructs a pattern that describes the contents
   of a stack cell. If this is the bottom cell, the variable [state]
   is bound to the state found in the cell. If [ids.(i)] is used in
   the semantic action, then it is bound to the semantic value. The
   position variables are always bound. *)

142
let reducecellparams prod i _symbol (next : pattern) : pattern =
143

144
  let ids = Production.identifiers prod in
145 146 147

  PRecord [
    fstate, (if i = 0 then PVar state else PWildcard);
148
    fsemv, PVar ids.(i);
149 150 151 152 153 154 155 156 157 158 159 160
    fstartp, PVar (Printf.sprintf "_startpos_%s_" ids.(i));
    fendp, PVar (Printf.sprintf "_endpos_%s_" ids.(i));
    fnext, next;
  ]

(* The semantic values bound in [reducecellparams] have type [Obj.t].
   They should now be cast to their real type. If we had [PMagic] in
   the syntax of patterns, we could do that in one swoop; since we don't,
   we have to issue a series of casts a posteriori. *)

let reducecellcasts prod i symbol casts =

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
  let ids = Production.identifiers prod in
  let id = ids.(i) in
  let t : typ =
    match semvtype symbol with
    | [] ->
        tunit
    | [ t ] ->
        t
    | _ ->
        assert false
  in
  (* Cast: [let id = ((Obj.magic id) : t) in ...]. *)
  (
    PVar id,
    EAnnot (EMagic (EVar id), type2scheme t)
  ) :: casts
177

178 179 180 181 182 183
(* 2015/11/04. The start and end positions of an epsilon production are obtained
   by taking the end position stored in the top stack cell (whatever it is). *)

let endpos_of_top_stack_cell =
  ERecordAccess(EVar stack, fendp)

184 185 186 187 188 189
(* This is the body of the [reduce] function associated with
   production [prod]. It assumes that the variables [env] and [stack]
   have been bound. *)

let reducebody prod =

190
  let nt, _rhs = Production.def prod
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
  and ids = Production.identifiers prod
  and length = Production.length prod in

  (* Build a pattern that represents the shape of the stack. Out of
     the stack, we extract a state (except when the production is an
     epsilon production) and a number of semantic values. *)

  (* At the same time, build a series of casts. *)

  let (_ : int), pat, casts =
    Invariant.fold (fun (i, pat, casts) (_ : bool) symbol _ ->
      i + 1,
      reducecellparams prod i symbol pat,
      reducecellcasts prod i symbol casts
    ) (0, PVar stack, []) (Invariant.prodstack prod)
  in

208 209 210 211 212
  (* Determine beforeend/start/end positions for the left-hand side of the
     production, and bind them to the conventional variables [beforeendp],
     [startp], and [endp]. These variables may be unused by the semantic
     action, in which case these bindings are dead code and can be ignored
     by the OCaml compiler. *)
213 214

  let posbindings =
215 216 217
    ( PVar beforeendp,
      endpos_of_top_stack_cell
    ) ::
218 219 220 221
    ( PVar startp,
      if length > 0 then
	EVar (Printf.sprintf "_startpos_%s_" ids.(0))
      else
222
        endpos_of_top_stack_cell
223 224 225 226 227 228 229 230 231
    ) ::
    ( PVar endp,
      if length > 0 then
	EVar (Printf.sprintf "_endpos_%s_" ids.(length - 1))
      else
	EVar startp
    ) :: []
  in

232 233
  (* This cannot be one of the start productions. *)
  assert (not (Production.is_start prod));
234

235
  (* This is a regular production. Perform a reduction. *)
236

237 238 239 240 241 242
  let action =
    Production.action prod
  in
  let act =
    EAnnot (Action.to_il_expr action, type2scheme (semvtypent nt))
  in
243

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
  EComment (
    Production.print prod,
    blet (
      (pat, EVar stack) ::                  (* destructure the stack *)
      casts @                               (* perform type casts *)
      posbindings @                         (* bind [startp] and [endp] *)
      extrabindings action @                (* add bindings for the weird keywords *)
      [ PVar semv, act ],                   (* run the user's code and bind [semv] *)

      (* Return a new stack, onto which we have pushed a new stack cell. *)

      ERecord [                             (* the new stack cell *)
        fstate, EVar state;                 (* the current state after popping; it will be updated by [goto] *)
        fsemv, ERepr (EVar semv);           (* the newly computed semantic value *)
        fstartp, EVar startp;               (* the newly computed start and end positions *)
        fendp, EVar endp;
        fnext, EVar stack;                  (* this is the stack after popping *)
      ]
262

263 264
    )
  )
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302

let semantic_action prod =
  EFun (
    [ PVar env ],

    if Invariant.ever_reduced prod then

      (* Access the stack and current state via the environment. *)

      (* In fact, the current state needs be bound here only if this is
	 an epsilon production. Otherwise, the variable [state] will be
	 bound by the pattern produced by [reducecellparams] above. *)

      ELet (

	[ PVar stack, ERecordAccess (EVar env, fstack) ] @
	  (if Production.length prod = 0 then [ PVar state, ERecordAccess (EVar env, fcurrent) ] else []),

	(* Then, *)

	reducebody prod

      )

    else

      (* For productions that are never reduced, generate no code. *)

      (* We do this mainly because [Invariant.prodstack] does not
	 support productions that are never reduced. *)
      
      EComment (
	"a production never reduced",
	EApp (EVar "assert", [ EData ("false", []) ])
      )

  )

303 304 305 306 307 308 309 310
(* Export the number of start productions. *)

let start_def =
  define (
    "start",
    EIntConst Production.start
  )

311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
(* ------------------------------------------------------------------------ *)

(* Table encodings. *)

(* Encodings of entries in the default reduction table. *)

let encode_DefRed prod =            (* 1 + prod *)
  1 + Production.p2i prod

let encode_NoDefRed =               (* 0 *)
  0

(* Encodings of entries in the action table. *)

let encode_Reduce prod =            (* prod | 01 *)
  (Production.p2i prod lsl 2) lor 1

let encode_ShiftDiscard s =         (*    s | 10 *)
  ((Lr1.number s) lsl 2) lor 0b10

let encode_ShiftNoDiscard s =       (*    s | 11 *)
  ((Lr1.number s) lsl 2) lor 0b11

let encode_Fail =                   (*        00 *)
  0

(* Encodings of entries in the goto table. *)

let encode_Goto node =              (* 1 + node *)
  1 + Lr1.number node

let encode_NoGoto =                 (* 0 *)
  0

(* Encodings of the hole in the action and goto tables. *)

let hole =
  assert (encode_Fail = 0);
  assert (encode_NoGoto = 0);
  0

(* Encodings of entries in the error bitmap. *)

let encode_Error =                  (* 0 *)
  0

let encode_NoError =                (* 1 *)
  1

360 361
(* Encodings of terminal and nonterminal symbols in the production table. *)

362 363 364
let encode_no_symbol =
  0                                          (* 0 | 0 *)

365
let encode_terminal tok =
366
  (Terminal.t2i tok + 1) lsl 1          (*  t + 1 | 0 *)
367 368

let encode_nonterminal nt =
369
  ((Nonterminal.n2i nt) lsl 1) lor 1        (* nt | 1 *)
370 371 372 373 374 375 376

let encode_symbol = function
  | Symbol.T tok ->
      encode_terminal tok
  | Symbol.N nt ->
      encode_nonterminal nt

377 378 379 380 381 382
let encode_symbol_option = function
  | None ->
      encode_no_symbol
  | Some symbol ->
      encode_symbol symbol

383 384 385 386 387
(* Encoding a Boolean as an integer value. *)

let encode_bool b =
  if b then 1 else 0

388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
(* ------------------------------------------------------------------------ *)

(* Table compression. *)

(* Our sparse, two-dimensional tables are turned into one-dimensional tables
   via [RowDisplacement]. *)

(* The error bitmap, which is two-dimensional but not sparse, is made
   one-dimensional by simple flattening. *)

(* Every one-dimensional table is then packed via [PackedIntArray]. *)

(* Optionally, we print some information about the compression ratio. *)

(* [population] counts the number of significant entries in a
   two-dimensional matrix. *)

let population (matrix : int array array) =
  Array.fold_left (fun population row ->
    Array.fold_left (fun population entry ->
      if entry = hole then population else population + 1
    ) population row
  ) 0 matrix

(* [marshal1] marshals a one-dimensional array. *)

let marshal1 (table : int array) =
  let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in
  ETuple [ EIntConst bits; EStringConst text ]

(* [marshal11] marshals a one-dimensional array whose bit width is
   statically known to be [1]. *)

let marshal11 (table : int array) =
  let (bits : int), (text : string) = MenhirLib.PackedIntArray.pack table in
  assert (bits = 1);
  EStringConst text

426 427 428 429 430 431 432 433
(* List-based versions of the above functions. *)

let marshal1_list (table : int list) =
  marshal1 (Array.of_list table)

let marshal11_list (table : int list) =
  marshal11 (Array.of_list table)

POTTIER Francois's avatar
POTTIER Francois committed
434 435 436 437 438 439 440
(* [linearize_and_marshal1] marshals an array of integer arrays (of possibly
   different lengths). *)

let linearize_and_marshal1 (table : int array array) =
  let data, entry = MenhirLib.LinearizedArray.make table in
  ETuple [ marshal1 data; marshal1 entry ]

441 442 443 444 445 446 447 448 449 450 451 452
(* [flatten_and_marshal11_list] marshals a two-dimensional bitmap,
   whose width (for now) is assumed to be [Terminal.n - 1]. *)

let flatten_and_marshal11_list (table : int list list) =
  ETuple [
    (* Store the table width. *)
    EIntConst (Terminal.n - 1);
    (* View the table as a one-dimensional array, and marshal it. *)
    marshal11_list (List.flatten table)
  ]

(* [marshal2] marshals a two-dimensional table, with row displacement. *)
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588

let marshal2 name m n (matrix : int list list) =
  let matrix : int array array =
    Array.of_list (List.map Array.of_list matrix)
  in
  let (displacement : int array), (data : int array) =
    MenhirLib.RowDisplacement.compress
      (=)
      (fun x -> x = hole)
      hole
      m
      n
      matrix
  in
  Error.logC 1 (fun f ->
    fprintf f
      "The %s table is %d entries; %d non-zero; %d compressed.\n"
      name
      (m * n)
      (population matrix)
      (Array.length displacement + Array.length data)
  );
  ETuple [
    marshal1 displacement;
    marshal1 data;
  ]

(* ------------------------------------------------------------------------ *)

(* Table generation. *)

(* The action table. *)

let action node t =
  match Invariant.has_default_reduction node with
  | Some _ ->

      (* [node] has a default reduction; in that case, the action
	 table is never looked up. *)

      hole

  | None ->

      try
	let target = SymbolMap.find (Symbol.T t) (Lr1.transitions node) in

	(* [node] has a transition to [target]. If [target] has a default
	   reduction on [#], use [ShiftNoDiscard], otherwise [ShiftDiscard]. *)

	match Invariant.has_default_reduction target with
	| Some (_, toks) when TerminalSet.mem Terminal.sharp toks ->
	    assert (TerminalSet.cardinal toks = 1);
	    encode_ShiftNoDiscard target
	| _ ->
	    encode_ShiftDiscard target

      with Not_found ->
	try

	  (* [node] has a reduction. *)

	  let prod = Misc.single (TerminalMap.find t (Lr1.reductions node)) in
	  encode_Reduce prod

	with Not_found ->

	  (* [node] has no action. *)

	  encode_Fail

(* In the error bitmap and in the action table, the row that corresponds to the
   [#] pseudo-terminal is never accessed. Thus, we do not create this row. This
   does not create a gap in the table, because this is the right-most row. For
   sanity, we check this fact here. *)

let () =
  assert (Terminal.t2i Terminal.sharp = Terminal.n - 1)

(* The goto table. *)

let goto node nt =
  try
    let target = SymbolMap.find (Symbol.N nt) (Lr1.transitions node) in
    encode_Goto target
  with Not_found ->
    encode_NoGoto

(* The error bitmap reflects which entries in the action table are
   [Fail]. Like the action table, it is not accessed when [node] has a
   default reduction. *)

let error node t =
  if action node t = encode_Fail then
    encode_Error
  else
    encode_NoError

(* The default reductions table. *)

let default_reduction node =
  match Invariant.has_default_reduction node with
  | Some (prod, _) ->
      encode_DefRed prod
  | None ->
      encode_NoDefRed

(* Generate the table definitions. *)

let action =
  define_and_measure (
    "action",
    marshal2 "action" Lr1.n (Terminal.n - 1) (
      Lr1.map (fun node ->
	Terminal.mapx (fun t ->
	  action node t
	)
      )
    )
  )

let goto =
  define_and_measure (
    "goto",
    marshal2 "goto" Lr1.n Nonterminal.n (
      Lr1.map (fun node ->
	Nonterminal.map (fun nt ->
	  goto node nt
	)
      )
    )
  )

let error =
  define_and_measure (
    "error",
589 590 591 592 593
    flatten_and_marshal11_list (
      Lr1.map (fun node ->
        Terminal.mapx (fun t ->
          error node t
        )
594
      )
595
    )
596 597 598 599 600
  )

let default_reduction =
  define_and_measure (
    "default_reduction",
601
    marshal1_list (
602 603 604 605 606 607 608 609 610 611
      Lr1.map (fun node ->
	default_reduction node
      )
    )
  )

let lhs =
  define_and_measure (
    "lhs",
    marshal1 (
612
      Production.amap (fun prod ->
613 614 615 616 617 618 619 620
	Nonterminal.n2i (Production.nt prod)
      )
    )
  )

let semantic_action =
  define (
    "semantic_action",
621 622
    (* Non-start productions only. *)
    EArray (Production.mapx semantic_action)
623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
  )

(* ------------------------------------------------------------------------ *)

(* When [--trace] is enabled, we need tables that map terminals and
   productions to strings. *)

let stringwrap f x =
  EStringConst (f x)

let reduce_or_accept prod =
  match Production.classify prod with
  | Some _ ->
      "Accepting"
  | None ->
      "Reducing production " ^ (Production.print prod)

let trace =
  define_and_measure (
    "trace",
    if Settings.trace then
      EData ("Some", [
	ETuple [
	  EArray (Terminal.map (stringwrap Terminal.print));
	  EArray (Production.map (stringwrap reduce_or_accept));
	]
      ])
    else
      EData ("None", [])
  )

(* ------------------------------------------------------------------------ *)

(* Generate the two functions that map a token to its integer code and to
   its semantic value, respectively. *)

let token2terminal =
  destructuretokendef
    "token2terminal"
    tint
    false
    (fun tok -> EIntConst (Terminal.t2i tok))

let token2value =
  destructuretokendef
    "token2value"
    tobj
    true
    (fun tok ->
      ERepr (
	match Terminal.ocamltype tok with
	| None ->
	    EUnit
	| Some _ ->
	    EVar semv
      )
    )

(* ------------------------------------------------------------------------ *)

683 684 685
(* The client APIs invoke the interpreter with an appropriate start state.
   The monolithic API calls [entry] (see [Engine]), while the incremental
   API calls [start]. *)
686

POTTIER Francois's avatar
POTTIER Francois committed
687
(* An entry point to the monolithic API. *)
688

POTTIER Francois's avatar
POTTIER Francois committed
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
let monolithic_entry_point state nt t =
  define (
    Nonterminal.print true nt,
    let lexer = "lexer"
    and lexbuf = "lexbuf" in
    EFun (
      [ PVar lexer; PVar lexbuf ],
      EAnnot (
        EMagic (
          EApp (
            EVar entry, [
              EIntConst (Lr1.number state);
              EVar lexer;
              EVar lexbuf
            ]
          )
        ),
        type2scheme (TypTextual t)
707
      )
POTTIER Francois's avatar
POTTIER Francois committed
708 709 710
    )
  )

711 712 713 714 715 716 717 718
(* The whole monolithic API. *)

let monolithic_api : IL.valdef list =
  Lr1.fold_entry (fun _prod state nt t api ->
    monolithic_entry_point state nt t ::
    api
  ) []

POTTIER Francois's avatar
POTTIER Francois committed
719 720 721
(* An entry point to the incremental API. *)

let incremental_entry_point state nt t =
722
  let initial = "initial_position" in
POTTIER Francois's avatar
POTTIER Francois committed
723
  define (
724
    Nonterminal.print true nt,
725 726 727 728
    (* In principle the eta-expansion [fun initial_position -> start s
       initial_position] should not be necessary, since [start] is a pure
       function. However, when [--trace] is enabled, [start] will log messages
       to the standard error channel. *)
POTTIER Francois's avatar
POTTIER Francois committed
729
    EFun (
730
      [ PVar initial ],
POTTIER Francois's avatar
POTTIER Francois committed
731 732 733 734 735
      EAnnot (
        EMagic (
          EApp (
            EVar start, [
              EIntConst (Lr1.number state);
736
              EVar initial;
POTTIER Francois's avatar
POTTIER Francois committed
737 738 739
            ]
          )
        ),
740
        type2scheme (checkpoint (TypTextual t))
741
      )
POTTIER Francois's avatar
POTTIER Francois committed
742 743 744
    )
  )

745 746 747
(* The whole incremental API. *)

let incremental_api : IL.valdef list =
POTTIER Francois's avatar
POTTIER Francois committed
748 749
  Lr1.fold_entry (fun _prod state nt t api ->
    incremental_entry_point state nt t ::
750
    api
751
  ) []
752 753 754

(* ------------------------------------------------------------------------ *)

755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
(* Constructing representations of symbols. *)

(* [eterminal t] is a value of type ['a terminal] (for some ['a]) that
   encodes the terminal symbol [t]. It is just a data constructor of
   the terminal GADT. *)

let eterminal (t : Terminal.t) : expr =
  EData (tokengadtdata (Terminal.print t), [])

(* [enonterminal nt] is a value of type ['a nonterminal] (for some
   ['a]) that encodes the nonterminal symbol [nt]. It is just a data
   constructor of the nonterminal GADT. *)

let enonterminal (nt : Nonterminal.t) : expr =
  EData (tnonterminalgadtdata (Nonterminal.print false nt), [])

(* [esymbol symbol] is a value of type ['a symbol] (for some ['a])
   that encodes the symbol [symbol]. It is built by applying the
   injection [T] or [N] to the terminal or nonterminal encoding. *)

775 776 777 778 779 780
let dataT =
  "T"

let dataN =
  "N"

781 782 783 784 785 786 787
let esymbol (symbol : Symbol.t) : expr =
  match symbol with
  | Symbol.T t ->
      EData (dataT, [ eterminal t ])
  | Symbol.N nt ->
      EData (dataN, [ enonterminal nt ])

788 789 790 791
(* [xsymbol symbol] is a value of type [xsymbol] that encodes the
   symbol [symbol]. It is built by applying the injection [X] (an
   existential quantifier) to [esymbol symbol]. *)

792 793 794
let dataX =
  "X"

795 796 797
let xsymbol (symbol : Symbol.t) : expr =
  EData (dataX, [ esymbol symbol ])

798 799
(* ------------------------------------------------------------------------ *)

800 801 802
(* Produce a function that maps a terminal symbol (represented as an integer
   code) to its representation as an [xsymbol]. Include [error] but not [#],
   i.e., include all of the symbols which can appear in a production. *)
803

POTTIER Francois's avatar
POTTIER Francois committed
804 805 806 807 808
(* Note that, instead of generating a function, we could (a) use an array
   or (b) use an unsafe conversion of an integer to a data constructor,
   then wrap it using [X] and [T/N]. Approach (b) is unsafe and causes
   memory allocation (due to the wrapping) at each call. *)

809 810 811 812 813
let terminal () =
  assert Settings.inspection;
  let t = "t" in
  define (
    "terminal",
814 815 816 817 818 819 820 821 822 823 824 825 826
    EFun ([ PVar t ],
      EMatch (EVar t,
        Terminal.mapx (fun tok ->
          { branchpat = pint (Terminal.t2i tok);
            branchbody = xsymbol (Symbol.T tok) }
        ) @ [
          { branchpat = PWildcard;
            branchbody =
              EComment ("This terminal symbol does not exist.",
                EApp (EVar "assert", [ efalse ])
              ) }
        ]
      )
827 828 829 830 831
    )
  )

(* ------------------------------------------------------------------------ *)

832 833 834 835 836 837 838 839
(* Produce a function that maps a (non-start) nonterminal symbol (represented
   as an integer code) to its representation as an [xsymbol]. *)

let nonterminal () =
  assert Settings.inspection;
  let nt = "nt" in
  define (
    "nonterminal",
840 841 842 843 844 845 846 847 848 849 850 851 852
    EFun ([ PVar nt ],
      EMatch (EVar nt,
        Nonterminal.foldx (fun nt branches ->
          { branchpat = pint (Nonterminal.n2i nt);
            branchbody = xsymbol (Symbol.N nt) } :: branches
        ) [
          { branchpat = PWildcard;
            branchbody =
              EComment ("This nonterminal symbol does not exist.",
                EApp (EVar "assert", [ efalse ])
              ) }
        ]
      )
853 854 855 856 857
    )
  )

(* ------------------------------------------------------------------------ *)

858 859 860 861 862 863 864 865 866 867 868 869 870 871
(* Produce a mapping of every LR(0) state to its incoming symbol (encoded as
   an integer value). (Note that the initial states do not have one.) *)

let lr0_incoming () =
  assert Settings.inspection;
  define_and_measure (
    "lr0_incoming",
    marshal1 (Array.init Lr0.n (fun node ->
      encode_symbol_option (Lr0.incoming_symbol node)
    ))
  )

(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
872 873 874 875 876
(* A table that maps a production (i.e., an integer index) to the production's
   right-hand side. In principle, we use this table for ordinary productions
   only, as opposed to the start productions, whose existence is not exposed
   to the user. However, it is simpler (and not really costly) to include all
   productions in this table. *)
877

POTTIER Francois's avatar
POTTIER Francois committed
878
let rhs () =
879
  assert Settings.inspection;
880
  let productions : int array array =
POTTIER Francois's avatar
POTTIER Francois committed
881 882 883
    Production.amap (fun prod ->
      Array.map encode_symbol (Production.rhs prod)
    )
884 885
  in
  define_and_measure (
POTTIER Francois's avatar
POTTIER Francois committed
886 887
    "rhs",
    linearize_and_marshal1 productions
888 889
  )

890 891
(* ------------------------------------------------------------------------ *)

892 893 894 895 896 897
(* A table that maps an LR(1) state to its LR(0) core. *)

let lr0_core () =
  assert Settings.inspection;
  define_and_measure (
    "lr0_core",
898
    marshal1_list (Lr1.map (fun (node : Lr1.node) ->
899 900 901 902 903 904 905
      Lr0.core (Lr1.state node)
    ))
  )

(* A table that maps an LR(0) state to a set of LR(0) items. *)

let lr0_items () =
906 907 908 909 910 911 912
  assert Settings.inspection;
  let items : int array array =
    Array.init Lr0.n (fun node ->
      Array.map Item.marshal (Array.of_list (Item.Set.elements (Lr0.items node)))
    )
  in
  define_and_measure (
913
    "lr0_items",
914 915 916
    linearize_and_marshal1 items
  )

917 918
(* ------------------------------------------------------------------------ *)

919 920 921 922 923 924 925
(* A table that tells which nonterminal symbols are nullable.
   (For simplicity, this table includes the start symbols.) *)

let nullable () =
  assert Settings.inspection;
  define_and_measure (
    "nullable",
926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948
    marshal11_list (
      Nonterminal.map (fun nt ->
        encode_bool (Analysis.nullable nt)
      )
    )
  )

(* ------------------------------------------------------------------------ *)

(* A two-dimensional bitmap, indexed first by nonterminal symbols, then by
   terminal symbols, encodes the FIRST sets. *)

let first () =
  assert Settings.inspection;
  define_and_measure (
    "first",
    flatten_and_marshal11_list (
      Nonterminal.map (fun nt ->
        Terminal.mapx (fun t ->
          encode_bool (TerminalSet.mem t (Analysis.first nt))
        )
      )
    )
949 950 951 952
  )

(* ------------------------------------------------------------------------ *)

953 954
(* Let's put everything together. *)

955 956
open UnparameterizedSyntax

957 958 959
let grammar =
  Front.grammar

960 961
let program =
 
962
  [ SIFunctor (grammar.parameters,
963

964 965 966 967
    (* Define the internal sub-module [basics], which contains the definitions
       of the exception [Error] and of the type [token]. Then, include this
       sub-module. This sub-module is used again below, as part of the
       application of the functor [TableInterpreter.Make]. *)
968

969 970 971 972 973 974
    SIModuleDef (basics, MStruct (
      SIExcDefs [ excdef ] ::
      interface_to_structure (
        tokentypedef grammar
      )
    )) ::
975

976
    SIInclude (MVar basics) ::
977

978 979
    SIValDefs (false, [ excvaldef ]) ::

980 981
    (* In order to avoid hiding user-defined identifiers, only the
       exception [Error] and the type [token] should be defined (at
982 983 984
       top level, with non-mangled names) above this line. We also
       define the value [_eRR] above this line so that we do not
       have a problem if a user prelude hides the name [Error]. *)
985

986
    SIStretch grammar.preludes ::
987

988 989 990 991 992 993 994
    (* Define the tables. *)

    SIModuleDef (tables,
      MStruct [
        (* The internal sub-module [basics] contains the definitions of the
           exception [Error] and of the type [token]. *)
        SIInclude (MVar basics);
995

996 997 998 999 1000 1001 1002 1003
        (* This is a non-recursive definition, so none of the names
           defined here are visible in the semantic actions. *)
        SIValDefs (false, [
          token2terminal;
          define ("error_terminal", EIntConst (Terminal.t2i Terminal.error));
          token2value;
          default_reduction;
          error;
1004
          start_def;
1005
          action;
1006
          lhs;
1007 1008 1009 1010 1011 1012 1013
          goto;
          semantic_action;
          trace;
        ])
      ]
    ) ::

1014
    SIModuleDef (interpreter, MStruct (
1015

1016 1017 1018
      (* Apply the functor [TableInterpreter.Make] to the tables. *)
      SIModuleDef (ti, MApp (MVar make_engine, MVar tables)) ::
      SIInclude (MVar ti) ::
1019

1020
      listiflazy Settings.inspection (fun () ->
1021

1022
        (* Define the internal sub-module [symbols], which contains type
1023 1024 1025 1026
           definitions. Then, include this sub-module. This sub-module is used
           again below, as part of the application of the functor
           [TableInterpreter.MakeInspection]. *)

1027
        SIModuleDef (symbols, MStruct (
1028 1029
          interface_to_structure (
            tokengadtdef grammar @
1030
            nonterminalgadtdef grammar
1031 1032 1033
          )
        )) ::

1034
        SIInclude (MVar symbols) ::
1035

1036
        SIInclude (MApp (MApp (MVar make_inspection, MVar tables), MStruct (
1037
          (* This module must satisfy [InspectionTableFormat.TABLES]. *)
POTTIER Francois's avatar
POTTIER Francois committed
1038
          (* [lr1state] *)
1039
          SIInclude (MVar ti) ::
1040
          (* [terminal], [nonterminal]. *)
1041
          SIInclude (MVar symbols) ::
1042 1043 1044
          (* This functor application builds the types [symbol] and [xsymbol]
             in terms of the types [terminal] and [nonterminal]. This saves
             us the trouble of generating these definitions. *)
1045
          SIInclude (MApp (MVar make_symbol, MVar symbols)) ::
1046 1047
          SIValDefs (false,
            terminal() ::
1048
            nonterminal() ::
1049
            lr0_incoming() ::
POTTIER Francois's avatar
POTTIER Francois committed
1050
            rhs() ::
1051 1052
            lr0_core() ::
            lr0_items() ::
1053
            nullable() ::
1054
            first() ::
1055 1056 1057 1058
            []
          ) ::
          []
        ))) ::
1059

1060
        []
1061

1062
      )
POTTIER Francois's avatar
POTTIER Francois committed
1063

1064
    )) ::
1065

POTTIER Francois's avatar
POTTIER Francois committed
1066 1067 1068 1069 1070 1071
    SIValDefs (false, monolithic_api) ::

    SIModuleDef (incremental, MStruct [
      SIValDefs (false, incremental_api)
    ]) ::

1072 1073 1074
    SIStretch grammar.postludes ::

  [])]
1075 1076 1077 1078 1079 1080

let () =
  Time.tick "Producing abstract syntax"

end