nonTerminalDefinitionInlining.ml 15.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
let position = Positions.position
15
open Keyword
16 17
open UnparameterizedSyntax
open ListMonad
18 19
let drop = MenhirLib.General.drop
let take = MenhirLib.General.take
20

21 22 23 24 25 26 27 28 29 30 31
(* [search p i xs] searches the list [xs] for an element [x] that satisfies [p].
   If successful, then it returns a pair of 1. [i] plus the offset of [x] in the
   list, and 2. the element [x]. *)

let rec search (p : 'a -> bool) i (xs : 'a list) : (int * 'a) option =
  match xs with
  | [] ->
      None
  | x :: xs ->
      if p x then Some (i, x) else search p (i+1) xs

32 33
(* [index2id] converts a 0-based index (into a list of producers) to
   an identifier (the name of the producer). *)
34 35 36

let index2id producers i =
  try
37
    producer_identifier (List.nth producers i)
38 39 40 41
  with Failure _ ->
    assert false (* should not happen *)

(* [rename_sw_outer] transforms the keywords in the outer production (the
42
   caller) during inlining. It replaces [$startpos(x)] and [$endpos(x)], where
43
   [x] is the name of the callee, with [startpx] and [endpx], respectively. *)
44

45
let rename_sw_outer (x, startpx, endpx) (subject, where) : (subject * where) option =
46
  match subject, where with
47
  | Before, _ ->
48
      None
49
  | RightNamed x', _ ->
50
      if x' = x then
51
        match where with
52 53
        | WhereStart -> Some startpx
        | WhereEnd   -> Some endpx
POTTIER Francois's avatar
POTTIER Francois committed
54
        | WhereSymbolStart -> assert false (* has been expanded away *)
55 56
      else
        None
57
  | Left, _ ->
POTTIER Francois's avatar
POTTIER Francois committed
58 59
      (* [$startpos], [$endpos], and [$symbolstartpos] have been expanded away
         earlier; see [KeywordExpansion]. *)
60
      assert false
61 62

(* [rename_sw_inner] transforms the keywords in the inner production (the callee)
63
   during inlining. It replaces [$endpos($0)] with [beforeendp]. *)
64

65
let rename_sw_inner beforeendp (subject, where) : (subject * where) option =
66
  match subject, where with
67 68
  | Before, _ ->
      assert (where = WhereEnd);
69
      Some beforeendp
70 71
  | RightNamed _, _ ->
      None
72 73 74 75
  | Left, _ ->
      (* [$startpos] and [$endpos] have been expanded away earlier; see
         [KeywordExpansion]. *)
      assert false
76

77
(* This auxiliary function checks that a use site of an %inline symbol does
POTTIER Francois's avatar
POTTIER Francois committed
78 79
   not carry any attributes. This condition guarantees that we need not worry
   about propagating these attributes through inlining. *)
80 81 82 83 84 85 86

let check_no_producer_attributes producer =
  match producer_attributes producer with
  | [] ->
      ()
  | (id, _payload) :: _attributes ->
      Error.error
87
        [position id]
88 89 90 91
        "the nonterminal symbol %s is declared %%inline.\n\
         A use of it cannot carry an attribute."
        (producer_symbol producer)

92 93 94 95 96 97 98 99
(* [names producers] is the set of names of the producers [producers].
   The name of a producer is the OCaml variable that is used to name the
   semantic value. *)

(* At the same time, this function checks that no two producers carry the
   same name. This check should never fail if we have performed appropriate
   renamings. It is a debugging aid. *)

100
let names (producers : producers) : StringSet.t =
101 102 103 104
  List.fold_left (fun ids producer ->
    let id = producer_identifier producer in
    assert (not (StringSet.mem id ids));
    StringSet.add id ids
105 106
  ) StringSet.empty producers

107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
(* [fresh names x] returns a fresh name that is not in the set [names].
   The new name is based on [x] in an unspecified way. *)
let rec fresh names x =
  if StringSet.mem x names then
    let x =
      (* Propose a new candidate name. A fairly arbitrary construction
         can be used here; we just need it to produce an infinite sequence
         of names, so that eventually we fall outside of [names]. We also
         need it to produce reasonably concise names, as this construction
         can be iterated several times in practice; I have observed up to
         9 iterations in real-world grammars. *)
      let x, n = ChopInlined.chop (Lexing.from_string x) in
      let n = n + 1 in
      Printf.sprintf "%s_inlined%d" x n
    in
    fresh names x
  else
    x

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
(* We have to rename the producers [producers] of the inlined production
   if they clash with the set [used] of the names used by the producers
   of the host branch. (Note that [used] need not contain the name of the
   producer that is inlined away.)
   This function produces a pair of:
   1. a substitution [phi], which represents the renaming that we have
      performed, and which must be applied to the inner semantic action;
   2. the renamed [producers]. *)
let rename (used : StringSet.t) producers: Action.subst * producers =

  (* Compute a renaming and the new names of the inlined producers. *)
  let phi, _used, producers' =
    List.fold_left (fun (phi, used, producers) producer ->
      let x = producer_identifier producer in
      if StringSet.mem x used then
        let x' = fresh used x in
        (x, x') :: phi,
        StringSet.add x' used,
        { producer with producer_identifier = x' } :: producers
      else
        (phi, StringSet.add x used, producer :: producers)
    ) ([], used, []) producers
  in
  phi, List.rev producers'

151 152 153 154 155 156 157 158 159 160 161 162
(* [is_inline_nonterminal grammar symbol] tells whether [symbol] is a nonterminal
   symbol (as opposed to a terminal symbol) and is marked %inline. *)

let is_inline_nonterminal grammar symbol : bool =
  match StringMap.find symbol grammar.rules with
  | rule ->
      (* This is a nonterminal symbol. Test its %inline flag. *)
      rule.inline_flag
  | exception Not_found ->
      (* This is a terminal symbol. *)
      false

POTTIER Francois's avatar
POTTIER Francois committed
163 164 165
let is_inline_producer grammar producer =
  is_inline_nonterminal grammar (producer_symbol producer)

166 167 168 169 170 171 172 173 174
(* [find grammar symbol] looks up the definition of [symbol], which must be
   a valid nonterminal symbol. *)

let find grammar symbol : rule =
  try
    StringMap.find symbol grammar.rules
  with Not_found ->
    assert false

175 176
(* Inline the branch [callee] into the branch [caller] at the site
   determined by [prefix, producer, suffix]. *)
177 178

type site =
179
  int * producer
180

181
let inline_branch caller (site : site) (callee : branch) : branch =
182

183
  let (i, producer) = site in
POTTIER Francois's avatar
POTTIER Francois committed
184

185 186 187
  let nprefix = i in
  let ncaller = List.length caller.producers in
  let nsuffix = ncaller - (i + 1) in
188

189 190
  let prefix = take nprefix caller.producers
  and suffix = drop (nprefix + 1) caller.producers in
191 192 193 194 195 196 197 198 199

  (* 2015/11/18. The interaction of %prec and %inline is not documented.
     It used to be the case that we would disallow marking a production
     both %inline and %prec. Now, we allow it, but we check that (1) it
     is inlined at the last position of the host production and (2) the
     host production does not already have a %prec annotation. *)
  callee.branch_prec_annotation |> Option.iter (fun callee_prec ->
    (* The callee has a %prec annotation. *)
    (* Check condition 1. *)
200
    if nsuffix > 0 then begin
201
      let nt = producer_symbol producer in
202
      Error.error [ position callee_prec; caller.branch_position ]
203 204 205 206 207 208 209 210
        "this production carries a %%prec annotation,\n\
         and the nonterminal symbol %s is marked %%inline.\n\
         For this reason, %s can be used only in tail position."
        nt nt
    end;
    (* Check condition 2. *)
    caller.branch_prec_annotation |> Option.iter (fun caller_prec ->
      let nt = producer_symbol producer in
211
      Error.error [ position callee_prec; position caller_prec ]
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
        "this production carries a %%prec annotation,\n\
         and the nonterminal symbol %s is marked %%inline.\n\
         For this reason, %s cannot be used in a production\n\
         which itself carries a %%prec annotation."
        nt nt
    )
  );

  (* These are the names of the producers in the host branch,
     minus the producer that is being inlined away. *)
  let used = StringSet.union (names prefix) (names suffix) in
  (* Rename the producers of this branch if they conflict with
     the name of the host's producers. *)
  let phi, inlined_producers = rename used callee.producers in

  (* After inlining, the producers are as follows. *)
  let producers = prefix @ inlined_producers @ suffix in
  (* For debugging: check that each producer carries a unique name. *)
  let (_ : StringSet.t) = names producers in

  let index2id = index2id producers in

234
  let inlined_producers = List.length inlined_producers in
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249

  (* Define how the start and end positions of the inner production should
     be computed once it is inlined into the outer production. These
     definitions of [startp] and [endp] are then used to transform
     [$startpos] and [$endpos] in the inner production and to transform
     [$startpos(x)] and [$endpos(x)] in the outer production. *)

  (* 2015/11/04. We ensure that positions are computed in the same manner,
     regardless of whether inlining is performed. *)

  let startp =
    if inlined_producers > 0 then
      (* If the inner production is non-epsilon, things are easy. The start
         position of the inner production is the start position of its first
         element. *)
250 251
      RightNamed (index2id nprefix), WhereStart
    else if nprefix > 0 then
252 253 254 255
      (* If the inner production is epsilon, we are supposed to compute the
         end position of whatever comes in front of it. If the prefix is
         nonempty, then this is the end position of the last symbol in the
         prefix. *)
256
      RightNamed (index2id (nprefix - 1)), WhereEnd
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    else
      (* If the inner production is epsilon and the prefix is empty, then
         we need to look up the end position stored in the top stack cell.
         This is the reason why we need the keyword [$endpos($0)]. It is
         required in this case to preserve the semantics of $startpos and
         $endpos. *)
      Before, WhereEnd

    (* Note that, to contrary to intuition perhaps, we do NOT have that
       if the prefix is empty, then the start position of the inner
       production is the start production of the outer production.
       This is true only if the inner production is non-epsilon. *)

  in

  let endp =
    if inlined_producers > 0 then
      (* If the inner production is non-epsilon, things are easy: its end
         position is the end position of its last element. *)
276
      RightNamed (index2id (nprefix + inlined_producers - 1)), WhereEnd
277 278 279 280 281 282 283 284 285 286 287 288 289 290
    else
      (* If the inner production is epsilon, then its end position is equal
         to its start position. *)
      startp

  in

  (* We must also transform [$endpos($0)] if it used by the inner
     production. It refers to the end position of the stack cell
     that comes before the inner production. So, if the prefix is
     non-empty, then it translates to the end position of the last
     element of the prefix. Otherwise, it translates to [$endpos($0)]. *)

  let beforeendp =
291 292
    if nprefix > 0 then
      RightNamed (index2id (nprefix - 1)), WhereEnd
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
    else
      Before, WhereEnd
  in

  (* [c] is the identifier under which the callee is known inside the caller. *)
  let c = producer_identifier producer in

  (* Rename the outer and inner semantic action. *)
  let outer_action =
    Action.rename (rename_sw_outer (c, startp, endp)) [] caller.action
  and action' =
    Action.rename (rename_sw_inner beforeendp) phi callee.action
  in

  (* 2015/11/18. If the callee has a %prec annotation (which implies
     the caller does not have one, and the callee appears in tail
     position in the caller) then the annotation is inherited. This
     seems reasonable, but remains undocumented. *)
  let branch_prec_annotation =
    match callee.branch_prec_annotation with
    | (Some _) as annotation ->
        assert (caller.branch_prec_annotation = None);
        annotation
    | None ->
        caller.branch_prec_annotation
  in

  { caller with
    producers;
    action = Action.compose c action' outer_action;
    branch_prec_annotation;
  }

(* Inline a list of branches [callees] into the branch [caller] at [site]. *)

let inline_branches caller site (callees : branches) : branches =
  List.map (inline_branch caller site) callees

POTTIER Francois's avatar
POTTIER Francois committed
331 332
(* A getter and transformer for branches. *)

POTTIER Francois's avatar
POTTIER Francois committed
333
let get_branches rule =
POTTIER Francois's avatar
POTTIER Francois committed
334 335 336 337 338
  rule.branches

let transform_branches f rule =
  { rule with branches = f rule.branches }

339 340
(* Inline a grammar. The resulting grammar does not contain any definitions
   that can be inlined. *)
POTTIER Francois's avatar
POTTIER Francois committed
341
let inline grammar =
342

343
  let rec expand_branches expand_symbol i branches : branches =
POTTIER Francois's avatar
POTTIER Francois committed
344 345
    (* For each branch [caller] in the list [branches], *)
    branches >>= fun (caller : branch) ->
346 347 348 349 350
      (* Search for an inlining site in the branch [caller]. We begin the
         search at position [i], as we know that every inlining site left
         of this position has been dealt with already. *)
      let producers = drop i caller.producers in
      match search (is_inline_producer grammar) i producers with
POTTIER Francois's avatar
POTTIER Francois committed
351 352 353
      | None ->
          (* There is none; we are done. *)
          return caller
354
      | Some ((i, producer) as site) ->
POTTIER Francois's avatar
POTTIER Francois committed
355 356 357 358 359
          (* There is one. This is an occurrence of a nonterminal symbol
             [symbol] that is marked %inline. We look up its (expanded)
             definition (via a recursive call to [expand_symbol]), yielding
             a set of branches, which we inline into the branch [caller].
             Then, we continue looking for inlining sites. *)
POTTIER Francois's avatar
POTTIER Francois committed
360
          check_no_producer_attributes producer;
POTTIER Francois's avatar
POTTIER Francois committed
361 362 363 364
          let symbol = producer_symbol producer in
          expand_symbol symbol
          |> get_branches
          |> inline_branches caller site
365
          |> expand_branches expand_symbol i
366
  in
367

368
  let expand_symbol expand_symbol symbol : rule =
POTTIER Francois's avatar
POTTIER Francois committed
369
    find grammar symbol
370
    |> transform_branches (expand_branches expand_symbol 0)
371 372 373 374 375 376 377
  in

  let expand_symbol : Syntax.symbol -> rule =
    Memoize.String.defensive_fix expand_symbol
  in

  let expand_symbol symbol =
POTTIER Francois's avatar
POTTIER Francois committed
378
    try
379 380 381 382 383 384
      expand_symbol symbol
    with Memoize.String.Cycle (_, symbol) ->
      let rule = find grammar symbol in
      Error.error rule.positions
        "there is a cycle in the definition of %s." symbol
      (* TEMPORARY we can now give a better message. *)
385 386
  in

387
  (* If we are in Coq mode, %inline is forbidden. *)
388 389
  let _ =
    if Settings.coq then
POTTIER Francois's avatar
POTTIER Francois committed
390 391
      StringMap.iter
        (fun _ r ->
392
           if r.inline_flag then
393
             Error.error r.positions
394
               "%%inline is not supported by the Coq back-end.")
395 396 397
        grammar.rules
  in

398 399
  (* To expand a grammar, we expand all its rules and remove
     the %inline rules. *)
400 401 402
  let rules =
    grammar.rules
    |> StringMap.filter (fun _ r -> not r.inline_flag)
403
    |> StringMap.mapi (fun symbol _rule -> expand_symbol symbol) (* a little wasteful, but simple *)
404 405
  in

406 407 408 409 410 411
  let useful (k : string) : bool =
    try
      not (StringMap.find k grammar.rules).inline_flag
    with Not_found ->
      true (* could be: assert false? *)
  in
POTTIER Francois's avatar
POTTIER Francois committed
412

413 414 415 416 417 418 419 420 421 422 423
  (* Remove %on_error_reduce declarations for symbols that are expanded away,
     and warn about them, at the same time. *)
  let useful_warn (k : string) : bool =
    let u = useful k in
    if not u then
      Error.grammar_warning []
        "the declaration %%on_error_reduce %s\n\
          has no effect, since this symbol is marked %%inline and is expanded away." k;
    u
  in

424
  { grammar with
425
      rules;
426
      types = StringMap.filter (fun k _ -> useful k) grammar.types;
427
      on_error_reduce = StringMap.filter (fun k _ -> useful_warn k) grammar.on_error_reduce;
428
  }