inlining.ml 21.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
let position = Positions.position
15
open Keyword
16
type sw = Action.sw
17
open BasicSyntax
18
open ListMonad
19 20
let drop = MenhirLib.General.drop
let take = MenhirLib.General.take
21

POTTIER Francois's avatar
POTTIER Francois committed
22 23 24 25 26 27 28 29 30
(* -------------------------------------------------------------------------- *)

(* Throughout this file, branches (productions) are represented as lists of
   producers. We consider it acceptable to perform operations whose cost is
   linear in the length of a production, even when (with more complicated
   code) it would be possible to eliminate this cost. *)

(* -------------------------------------------------------------------------- *)

31
(* [search p i xs] searches the list [xs] for an element [x] that satisfies [p].
POTTIER Francois's avatar
POTTIER Francois committed
32 33
   If successful, then it returns a pair of: - [i] plus the offset of [x] in the
   list, and - the element [x]. *)
34

POTTIER Francois's avatar
POTTIER Francois committed
35
let rec search (p : 'a -> bool) (i : int) (xs : 'a list) : (int * 'a) option =
36 37 38 39 40 41
  match xs with
  | [] ->
      None
  | x :: xs ->
      if p x then Some (i, x) else search p (i+1) xs

42 43 44 45 46 47 48
(* [search_at p i xs] searches the list [xs] for an element [x] that satisfies
   [p]. The search begins at index [i] in the list. If successful, then it
   returns a pair of: - the offset of [x] in the list, and - the element [x]. *)

let search_at p i xs =
  search p i (drop i xs)

POTTIER Francois's avatar
POTTIER Francois committed
49
(* -------------------------------------------------------------------------- *)
50

51 52 53 54 55 56 57 58 59 60 61 62
(* [find grammar symbol] looks up the definition of [symbol], which must be
   a valid nonterminal symbol, in the grammar [grammar]. *)

let find grammar symbol : rule =
  try
    StringMap.find symbol grammar.rules
  with Not_found ->
    (* This cannot happen. *)
    assert false

(* -------------------------------------------------------------------------- *)

63 64 65
(* [check_no_producer_attributes] checks that a producer, which represents a
   use site of an %inline symbol, does not carry any attributes. This ensures
   that we need not worry about propagating attributes through inlining. *)
66 67 68 69 70 71 72

let check_no_producer_attributes producer =
  match producer_attributes producer with
  | [] ->
      ()
  | (id, _payload) :: _attributes ->
      Error.error
73
        [position id]
74 75 76 77
        "the nonterminal symbol %s is declared %%inline.\n\
         A use of it cannot carry an attribute."
        (producer_symbol producer)

78
(* -------------------------------------------------------------------------- *)
79

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
(* 2015/11/18. The interaction of %prec and %inline is not documented.
   It used to be the case that we would disallow marking a production
   both %inline and %prec. Now, we allow it, but we check that (1) it
   is inlined at the last position of the host production and (2) the
   host production does not already have a %prec annotation. *)

let check_prec_inline caller producer nsuffix callee =
  callee.branch_prec_annotation |> Option.iter (fun callee_prec ->
    (* The callee has a %prec annotation. *)
    (* Check condition 1. *)
    if nsuffix > 0 then begin
      let symbol = producer_symbol producer in
      Error.error [ position callee_prec; caller.branch_position ]
        "this production carries a %%prec annotation,\n\
         and the nonterminal symbol %s is marked %%inline.\n\
         For this reason, %s can be used only in tail position."
        symbol symbol
    end;
    (* Check condition 2. *)
    caller.branch_prec_annotation |> Option.iter (fun caller_prec ->
      let symbol = producer_symbol producer in
      Error.error [ position callee_prec; position caller_prec ]
        "this production carries a %%prec annotation,\n\
         and the nonterminal symbol %s is marked %%inline.\n\
         For this reason, %s cannot be used in a production\n\
         which itself carries a %%prec annotation."
        symbol symbol
    )
  )

(* -------------------------------------------------------------------------- *)

POTTIER Francois's avatar
POTTIER Francois committed
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
(* 2015/11/18. If the callee has a %prec annotation (which implies that the
   caller does not have one, and that the callee appears in tail position in
   the caller) then the annotation is inherited. This seems reasonable, but
   remains undocumented. *)

let propagate_prec_annotation caller callee =
  match callee.branch_prec_annotation with
  | (Some _) as annotation ->
      assert (caller.branch_prec_annotation = None);
      annotation
  | None ->
      caller.branch_prec_annotation

(* -------------------------------------------------------------------------- *)

127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
(* [new_candidate x] is a candidate fresh name, which is based on [x] in an
   unspecified way. A fairly arbitrary construction can be used here; we just
   need it to produce an infinite sequence of names, so that eventually we are
   certain to be able to escape any finite set of unavailable names. We also
   need this construction to produce reasonably concise names, as it can be
   iterated several times in practice; I have observed up to 9 iterations in
   real-world grammars. *)

(* Here, the idea is to add a suffix of the form _inlined['0'-'9']+ to the
   name [x], if it does not already include such a suffix. If [x] already
   carries such a suffix, then we increment the integer number. *)

let new_candidate x =
  let x, n = ChopInlined.chop (Lexing.from_string x) in
  Printf.sprintf "%s_inlined%d" x (n + 1)

143
(* [fresh names x] returns a fresh name that is not in the set [names].
144 145 146
   The new name is obtained by iterating [new_candidate] until we fall
   outside the set [names]. *)

147
let rec fresh names x =
148 149 150
  if StringSet.mem x names then fresh names (new_candidate x) else x

(* -------------------------------------------------------------------------- *)
151

POTTIER Francois's avatar
POTTIER Francois committed
152 153 154 155
(* [rename used producers] renames the producers [producers] of the inlined
   branch (the callee) if necessary to avoid a clash with the set [used] of
   the names used by the producers of the host branch (the caller). This set
   need not contain the name of the producer that is inlined away. *)
156

POTTIER Francois's avatar
POTTIER Francois committed
157 158 159
(* This function produces a pair of: 1. a substitution [phi], which represents
   the renaming that we have performed, and which must be applied to the
   semantic action of the callee; 2. the renamed [producers]. *)
160

POTTIER Francois's avatar
POTTIER Francois committed
161 162
let rename (used : StringSet.t) producers: Action.subst * producers =
  let phi, _used, producers =
163 164 165 166 167 168 169 170 171 172 173
    List.fold_left (fun (phi, used, producers) producer ->
      let x = producer_identifier producer in
      if StringSet.mem x used then
        let x' = fresh used x in
        (x, x') :: phi,
        StringSet.add x' used,
        { producer with producer_identifier = x' } :: producers
      else
        (phi, StringSet.add x used, producer :: producers)
    ) ([], used, []) producers
  in
POTTIER Francois's avatar
POTTIER Francois committed
174
  phi, List.rev producers
175

176 177
(* -------------------------------------------------------------------------- *)

178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
(* [define_positions] defines how the start and end positions of the callee
   should be computed once it is inlined into the caller. This information is
   used to transform [$startpos] and [$endpos] in the callee and to transform
   [$startpos(x)] and [$endpos(x)] in the caller. *)

(* 2015/11/04. We ensure that positions are computed in the same manner,
   regardless of whether inlining is performed. *)

(* The arguments of this function are as follows:

   [name]       an array of the names of the producers of the new branch
   [nprefix]    the length of the prefix of the caller, up to the inlining site
   [ncallee]    the length of the callee

   The results are as follows:

   [startp]     how to transform $startpos in the callee
   [endp]       how to transform $endpos in the callee
   [beforeendp] how to transform $endpos($0) in the callee

 *)

let define_positions (name : string array) nprefix ncallee : sw * sw * sw =

  let startp =
    if ncallee > 0 then
      (* If the inner production is non-epsilon, things are easy. The start
         position of the inner production is the start position of its first
         element. *)
      RightNamed name.(nprefix), WhereStart
    else if nprefix > 0 then
      (* If the inner production is epsilon, we are supposed to compute the
         end position of whatever comes in front of it. If the prefix is
         nonempty, then this is the end position of the last symbol in the
         prefix. *)
      RightNamed (name.(nprefix - 1)), WhereEnd
    else
      (* If the inner production is epsilon and the prefix is empty, then
         we need to look up the end position stored in the top stack cell.
         This is the reason why we need the keyword [$endpos($0)]. It is
         required in this case to preserve the semantics of $startpos and
         $endpos. *)
      Before, WhereEnd

    (* Note that, to contrary to intuition perhaps, we do NOT have that
       if the prefix is empty, then the start position of the inner
       production is the start production of the outer production.
       This is true only if the inner production is non-epsilon. *)

  in

  let endp =
    if ncallee > 0 then
      (* If the inner production is non-epsilon, things are easy: its end
         position is the end position of its last element. *)
      RightNamed (name.(nprefix + ncallee - 1)), WhereEnd
    else
      (* If the inner production is epsilon, then its end position is equal
         to its start position. *)
      startp

  (* We must also transform [$endpos($0)] if it used by the inner
     production. It refers to the end position of the stack cell
     that comes before the inner production. So, if the prefix is
     non-empty, then it translates to the end position of the last
     element of the prefix. Otherwise, it translates to [$endpos($0)]. *)

  and beforeendp =
    if nprefix > 0 then
      RightNamed (name.(nprefix - 1)), WhereEnd
    else
      Before, WhereEnd

  in
  startp, endp, beforeendp

(* -------------------------------------------------------------------------- *)

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
(* [rename_sw_outer] transforms the keywords in the outer production (the
   caller) during inlining. It replaces [$startpos(x)] and [$endpos(x)], where
   [x] is the name of the callee, with [startpx] and [endpx], respectively. *)

let rename_sw_outer (x, startpx, endpx) (sw : sw) : sw option =
  match sw with
  | Before, _ ->
      None
  | RightNamed x', where ->
      if x' = x then
        match where with
        | WhereStart -> Some startpx
        | WhereEnd   -> Some endpx
        | WhereSymbolStart -> assert false (* has been expanded away *)
      else
        None
  | Left, _ ->
      (* [$startpos], [$endpos], and [$symbolstartpos] have been expanded away
         earlier; see [KeywordExpansion]. *)
      assert false

(* -------------------------------------------------------------------------- *)

(* [rename_sw_inner] transforms the keywords in the inner production (the callee)
   during inlining. It replaces [$endpos($0)] with [beforeendp]. *)

let rename_sw_inner beforeendp (sw : sw) : sw option =
  match sw with
  | Before, where ->
      assert (where = WhereEnd);
      Some beforeendp
  | RightNamed _, _ ->
      None
  | Left, _ ->
      (* [$startpos] and [$endpos] have been expanded away earlier; see
         [KeywordExpansion]. *)
      assert false

POTTIER Francois's avatar
POTTIER Francois committed
294 295 296 297 298 299 300
(* -------------------------------------------------------------------------- *)

(* [inline_branch caller site callee] inlines the branch [callee] into the
   branch [caller] at the site [site]. By convention, a site is a pair of an
   integer index -- the index [i] of the producer that must be inlined away --
   and a producer [producer] -- the producer itself. This is redundant, as
   [producer] can be recovered based on [caller] and [i], but convenient. *)
301 302

type site =
303
  int * producer
304

POTTIER Francois's avatar
POTTIER Francois committed
305
let inline_branch caller (i, producer : site) (callee : branch) : branch =
306

POTTIER Francois's avatar
POTTIER Francois committed
307 308 309 310 311
  (* The host branch (the caller) is divided into three sections: a prefix
     of length [nprefix], the producer that we wish to inline away, and a
     suffix of length [nsuffix]. *)

  (* Compute the length of the prefix and suffix. *)
312

313
  let nprefix = i in
POTTIER Francois's avatar
POTTIER Francois committed
314 315 316
  let nsuffix = List.length caller.producers - (i + 1) in

  (* Construct the prefix and suffix. *)
317

318 319
  let prefix = take nprefix caller.producers
  and suffix = drop (nprefix + 1) caller.producers in
320

321
  (* Apply the (undocumented) restrictions that concern the interaction
POTTIER Francois's avatar
POTTIER Francois committed
322 323
     between %prec and %inline. Then, (possibly) propagate a %prec
     annotation. *)
324
  check_prec_inline caller producer nsuffix callee;
POTTIER Francois's avatar
POTTIER Francois committed
325
  let branch_prec_annotation = propagate_prec_annotation caller callee in
326

POTTIER Francois's avatar
POTTIER Francois committed
327 328 329 330 331
  (* Compute the names of the producers in the host branch (the caller), minus
     the one that is being inlined away. Rename the producers of the inlined
     branch (the callee), if necessary, so as to avoid a clash with this set.
     The goal is to guarantee that, after inlining, all producers in the newly
     constructed branch have unique names. *)
332 333 334
  let used = StringSet.union (names prefix) (names suffix) in
  let phi, inlined_producers = rename used callee.producers in

POTTIER Francois's avatar
POTTIER Francois committed
335 336 337 338
  (* Construct (the producers of) the new branch. The prefix and suffix of the
     caller are preserved. In the middle, [producer] disappears and is replaced
     with [inlined_producers]. For debugging purposes, check that each producer
     in the new branch carries a unique name. *)
339 340 341
  let producers = prefix @ inlined_producers @ suffix in
  let (_ : StringSet.t) = names producers in

342 343
  (* Find out how the start and end positions of the callee should be computed
     once it is inlined into the caller. *)
344

345 346 347 348
  let startp, endp, beforeendp =
    let name = producers |> Array.of_list |> Array.map producer_identifier in
    let ncallee = List.length callee.producers in
    define_positions name nprefix ncallee
349 350
  in

POTTIER Francois's avatar
POTTIER Francois committed
351 352 353 354 355
  (* Apply appropriate renamings to the semantic actions of the caller and
     callee, then compose them using a [let] binding. If [x] is the name of
     the producer that we wish to inline away, then the variable [x] in the
     caller's semantic action should refer to the semantic value produced by
     the callee's semantic action. *)
356 357

  let x = producer_identifier producer in
POTTIER Francois's avatar
POTTIER Francois committed
358 359
  let caller_action, callee_action =
    Action.rename (rename_sw_outer (x, startp, endp)) [] caller.action,
360 361
    Action.rename (rename_sw_inner beforeendp) phi callee.action
  in
POTTIER Francois's avatar
POTTIER Francois committed
362
  let action = Action.compose x callee_action caller_action in
363

POTTIER Francois's avatar
POTTIER Francois committed
364
  (* We are done! Build a new branch. *)
365

POTTIER Francois's avatar
POTTIER Francois committed
366 367 368
  let { branch_position; branch_production_level; _ } = caller in
  {
    branch_position;
369
    producers;
POTTIER Francois's avatar
POTTIER Francois committed
370
    action;
371
    branch_prec_annotation;
POTTIER Francois's avatar
POTTIER Francois committed
372
    branch_production_level;
373 374
  }

POTTIER Francois's avatar
POTTIER Francois committed
375 376 377
(* -------------------------------------------------------------------------- *)

(* Inlining a list of branches [callees] into the branch [caller] at [site]. *)
378 379 380 381

let inline_branches caller site (callees : branches) : branches =
  List.map (inline_branch caller site) callees

POTTIER Francois's avatar
POTTIER Francois committed
382
(* -------------------------------------------------------------------------- *)
POTTIER Francois's avatar
POTTIER Francois committed
383

384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
(* For greater syntactic convenience, the main function is written as a
   functor, and re-packaged as a function at the very end. *)

(* Roughly speaking, the transformation is implemented by two mutually
   recursive functions. [expand_branches] transforms a list of branches into a
   list of (expanded) branches; [expand_symbol] maps a nonterminal symbol
   (which may or may not be marked %inline) to its definition in the
   transformed grammar, an (expanded) rule. In order to avoid duplication of
   work, we memoize [expand_symbol]. Thus, the expansion of each symbol is
   computed at most once. (Expansions are demanded top-down, but are computed
   bottom-up.) Memoization is implemented without pain by using a ready-made
   fixed point combinator, [Memoize.defensive_fix]. Furthermore, this find
   point combinator dynamically detects cycles of %inline nonterminal symbols,
   allowing us to avoid divergence and display a nice error message. *)

module Inline (G : sig val grammar: grammar end) = struct

  open G

403 404 405
  let is_inline_symbol =
    is_inline_symbol grammar

406 407 408 409 410 411
  let is_inline_producer =
    is_inline_producer grammar

  let find =
    find grammar

412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
  (* In [--coq] mode, %inline is forbidden. There are two reasons for this.
     One technical reason is that inlining requires constructing composite
     semantic actions (using [Action.compose], etc.) and this construction is
     currently OCaml-specific. (This could be rather easily changed, though.)
     A more philosophical reason is that we don't want to have a large gap
     between the grammar written by the user in the .mly file and the grammar
     written by Menhir in the .v file. The latter grammar is the reference
     grammar, the one with respect to which the generated parser is proved
     correct. *)

  let () =
    if Settings.coq then
      StringMap.iter (fun _ rule ->
        if rule.inline_flag then
          Error.error rule.positions
            "%%inline is not supported by the Coq back-end."
      ) grammar.rules

430 431 432 433 434 435
  (* This is [expand_branches], parameterized by its companion function,
     [expand_symbol]. The parameter [i], an integer, is used to perform
     a left-to-right sweep: the precondition of [expand_branches] is that
     there are no inlining sites at indices less than [i] in [branches].
     Thus, we can begin searching at index [i]. (Beginning to search at
     index 0 would work, too, but would cause redundant searches.) *)
436

437
  let rec expand_branches expand_symbol i branches : branches =
438 439
    (* For each branch [caller] in the list [branches], *)
    branches >>= fun (caller : branch) ->
440 441 442
      (* Search for an inlining site in the branch [caller]. We begin the
         search at position [i], as we know that every inlining site left
         of this position has been dealt with already. *)
443
      match search_at is_inline_producer i caller.producers with
444 445 446
      | None ->
          (* There is none; we are done. *)
          return caller
447
      | Some ((i, producer) as site) ->
448 449 450 451 452
          (* There is one. This is an occurrence of a nonterminal symbol
             [symbol] that is marked %inline. We look up its (expanded)
             definition (via a recursive call to [expand_symbol]), yielding
             a set of branches, which we inline into the branch [caller].
             Then, we continue looking for inlining sites. *)
POTTIER Francois's avatar
POTTIER Francois committed
453
          check_no_producer_attributes producer;
454 455 456 457
          let symbol = producer_symbol producer in
          expand_symbol symbol
          |> get_branches
          |> inline_branches caller site
458
          |> expand_branches expand_symbol i
459 460

  (* This is [expand_symbol], parameterized by itself. *)
461

462
  let expand_symbol expand_symbol symbol : rule =
463 464 465 466
    (* Find the rule that defines this symbol. Then, transform this rule
       by applying [expand_branches] to its branches. The left-to-right
       sweep begins at index 0. *)
    find symbol
467
    |> transform_branches (expand_branches expand_symbol 0)
468 469

  (* Apply [defensive_fix] to obtain a closed function [expand_symbol]. *)
470 471 472

  let expand_symbol : Syntax.symbol -> rule =
    Memoize.String.defensive_fix expand_symbol
473 474 475 476

  (* Wrap [expand_symbol] in an exception handler, so that, when a cycle
     of %inline nonterminal symbols is detected, a good error message is
     displayed. *)
477 478

  let expand_symbol symbol =
479
    try
480
      expand_symbol symbol
481
    with Memoize.String.Cycle (symbols, symbol) ->
482
      let rule = find symbol in
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
      let b = Buffer.create 128 in
      Printf.bprintf b "there is a cycle of %%inline nonterminal symbols:\n";
      begin match symbols with
      | [] ->
          assert false
      | head :: [] ->
          assert (head = symbol);
          Printf.bprintf b "  %s refers to itself." symbol
      | head :: next :: symbols ->
          assert (head = symbol);
          Printf.bprintf b "  %s refers to %s,\n" head next;
          List.iter (fun symbol ->
            Printf.bprintf b "  which refers to %s,\n" symbol
          ) symbols;
          Printf.bprintf b "  which refers back to %s." symbol
      end;
      Error.error rule.positions "%s" (Buffer.contents b)
500

501 502 503
  (* The rules of the transformed grammar are obtained by keeping only
     non-%inline symbols and expanding their rules. *)

504 505
  let rules =
    grammar.rules
506 507
    |> StringMap.filter (fun _ rule -> not rule.inline_flag)
    |> StringMap.mapi (fun symbol _rule -> expand_symbol symbol)
508

509 510 511 512
  (* Drop %type declarations that concern %inline symbols. *)

  let keep symbol _rule : bool =
    not (is_inline_symbol symbol)
513

514
  let types =
515 516 517 518 519 520 521 522 523 524 525 526 527
    StringMap.filter keep grammar.types

  (* Drop %on_error_reduce declarations that concern %inline symbols. At the
     same time, display a warning, as this seems strange: these declarations
     are useless. *)

  let keep_or_warn (symbol : string) _rule : bool =
    let keep = keep symbol _rule in
    if not keep then
      Error.grammar_warning []
        "the declaration %%on_error_reduce %s\n\
         has no effect: this symbol is marked %%inline and is expanded away." symbol;
    keep
528 529

  let on_error_reduce =
530 531 532
    StringMap.filter keep_or_warn grammar.on_error_reduce

  (* We are done. *)
533 534 535 536 537 538

  let grammar =
    { grammar with rules; types; on_error_reduce }

end

539 540 541 542
(* -------------------------------------------------------------------------- *)

(* Re-package the above functor as a function. *)

543 544 545
let inline grammar =
  let module I = Inline(struct let grammar = grammar end) in
  I.grammar