nonTerminalDefinitionInlining.ml 16 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
open Keyword
15 16 17 18
open UnparameterizedSyntax
open ListMonad

(* Color are used to detect cycles. *)
19
type 'a color =
20 21 22
  | BeingExpanded
  | Expanded of 'a

23 24
(* [index2id] converts a 0-based index (into a list of producers) to
   an identifier (the name of the producer). *)
25 26 27

let index2id producers i =
  try
28
    producer_identifier (List.nth producers i)
29 30 31 32
  with Failure _ ->
    assert false (* should not happen *)

(* [rename_sw_outer] transforms the keywords in the outer production (the
33
   caller) during inlining. It replaces [$startpos(x)] and [$endpos(x)], where
34
   [x] is the name of the callee, with [startpx] and [endpx], respectively. *)
35

36
let rename_sw_outer (x, startpx, endpx) (subject, where) : (subject * where) option =
37
  match subject, where with
38
  | Before, _ ->
39
      None
40
  | RightNamed x', _ ->
41
      if x' = x then
42
        match where with
43 44
        | WhereStart -> Some startpx
        | WhereEnd   -> Some endpx
45
        | WhereSymbolStart -> assert false (* has been expanded away *)
46 47
      else
        None
48
  | Left, _ ->
49 50
      (* [$startpos], [$endpos], and [$symbolstartpos] have been expanded away
         earlier; see [KeywordExpansion]. *)
51
      assert false
52 53

(* [rename_sw_inner] transforms the keywords in the inner production (the callee)
54
   during inlining. It replaces [$endpos($0)] with [beforeendp]. *)
55

56
let rename_sw_inner beforeendp (subject, where) : (subject * where) option =
57
  match subject, where with
58 59
  | Before, _ ->
      assert (where = WhereEnd);
60
      Some beforeendp
61 62
  | RightNamed _, _ ->
      None
63 64 65 66
  | Left, _ ->
      (* [$startpos] and [$endpos] have been expanded away earlier; see
         [KeywordExpansion]. *)
      assert false
67

68 69 70 71 72 73 74 75 76 77 78 79 80 81
(* This auxiliary function checks that a use site of an %inline symbol does
   not carry any attributes. *)

let check_no_producer_attributes producer =
  match producer_attributes producer with
  | [] ->
      ()
  | (id, _payload) :: _attributes ->
      Error.error
        [Positions.position id]
        "the nonterminal symbol %s is declared %%inline.\n\
         A use of it cannot carry an attribute."
        (producer_symbol producer)

82 83 84 85 86 87 88 89
(* [names producers] is the set of names of the producers [producers].
   The name of a producer is the OCaml variable that is used to name the
   semantic value. *)

(* At the same time, this function checks that no two producers carry the
   same name. This check should never fail if we have performed appropriate
   renamings. It is a debugging aid. *)

90
let names (producers : producers) : StringSet.t =
91 92 93 94
  List.fold_left (fun ids producer ->
    let id = producer_identifier producer in
    assert (not (StringSet.mem id ids));
    StringSet.add id ids
95 96
  ) StringSet.empty producers

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
(* [fresh names x] returns a fresh name that is not in the set [names].
   The new name is based on [x] in an unspecified way. *)
let rec fresh names x =
  if StringSet.mem x names then
    let x =
      (* Propose a new candidate name. A fairly arbitrary construction
         can be used here; we just need it to produce an infinite sequence
         of names, so that eventually we fall outside of [names]. We also
         need it to produce reasonably concise names, as this construction
         can be iterated several times in practice; I have observed up to
         9 iterations in real-world grammars. *)
      let x, n = ChopInlined.chop (Lexing.from_string x) in
      let n = n + 1 in
      Printf.sprintf "%s_inlined%d" x n
    in
    fresh names x
  else
    x

116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
(* We have to rename the producers [producers] of the inlined production
   if they clash with the set [used] of the names used by the producers
   of the host branch. (Note that [used] need not contain the name of the
   producer that is inlined away.)
   This function produces a pair of:
   1. a substitution [phi], which represents the renaming that we have
      performed, and which must be applied to the inner semantic action;
   2. the renamed [producers]. *)
let rename (used : StringSet.t) producers: Action.subst * producers =

  (* Compute a renaming and the new names of the inlined producers. *)
  let phi, _used, producers' =
    List.fold_left (fun (phi, used, producers) producer ->
      let x = producer_identifier producer in
      if StringSet.mem x used then
        let x' = fresh used x in
        (x, x') :: phi,
        StringSet.add x' used,
        { producer with producer_identifier = x' } :: producers
      else
        (phi, StringSet.add x used, producer :: producers)
    ) ([], used, []) producers
  in
  phi, List.rev producers'

141 142 143 144 145 146 147 148 149 150 151 152
(* [is_inline_nonterminal grammar symbol] tells whether [symbol] is a nonterminal
   symbol (as opposed to a terminal symbol) and is marked %inline. *)

let is_inline_nonterminal grammar symbol : bool =
  match StringMap.find symbol grammar.rules with
  | rule ->
      (* This is a nonterminal symbol. Test its %inline flag. *)
      rule.inline_flag
  | exception Not_found ->
      (* This is a terminal symbol. *)
      false

153 154 155
(* [find_inlining_site grammar (prefix, suffix)] traverses a list of producers
   that is already decomposed as [List.rev prefix @ suffix]. It looks for the
   first nonterminal symbol that can be inlined away. If it does not find one,
156 157
   it raises [NoInlining]. If it does find one, it returns a decomposition of
   the whole list of producers under the form [prefix @ producer :: suffix]. *)
158 159 160 161 162

type site =
  producers * producer * producers

let rec find_inlining_site grammar (prefix, suffix : producers * producers) : site option =
163 164
  match suffix with
  | [] ->
165
      None
166 167 168 169 170 171 172 173
  | producer :: suffix ->
      let symbol = producer_symbol producer in
      if is_inline_nonterminal grammar symbol then begin
        (* We have checked earlier than an %inline symbol does not carry
           any attributes. In addition, we now check that the use site of
           this symbol does not carry any attributes either. Thus, we need
           not worry about propagating these attributes through inlining. *)
        check_no_producer_attributes producer;
174
        Some (List.rev prefix, producer, suffix)
175 176 177
      end
      else
        find_inlining_site grammar (producer :: prefix, suffix)
178

179 180
(* Inline a grammar. The resulting grammar does not contain any definitions
   that can be inlined. *)
181
let inline grammar =
182 183

  (* This table associates a color to each non terminal that can be expanded. *)
184 185
  let expanded_non_terminals =
    Hashtbl.create 13
186 187
  in

188 189
  let expanded_state k =
    Hashtbl.find expanded_non_terminals k
190
  in
191 192

  let mark_as_being_expanded k =
193 194 195 196 197 198 199 200
    Hashtbl.add expanded_non_terminals k BeingExpanded
  in

  let mark_as_expanded k r =
    Hashtbl.replace expanded_non_terminals  k (Expanded r);
    r
  in

POTTIER Francois's avatar
POTTIER Francois committed
201
  (* Inline the non terminals that can be inlined in [caller]. We use the
202
     ListMonad to combine the results. *)
POTTIER Francois's avatar
POTTIER Francois committed
203
  let rec expand_branch (caller : branch) : branch ListMonad.m =
204 205 206 207
    match find_inlining_site grammar ([], caller.producers) with
    | None ->
        return caller
    | Some (prefix, producer, suffix) ->
208

209
      (* Inline a branch of [nt] at position [prefix] ... [suffix] in
210
         the branch [caller]. *)
POTTIER Francois's avatar
POTTIER Francois committed
211
      let inline_branch (callee : branch) : branch =
212 213 214 215 216 217

        (* 2015/11/18. The interaction of %prec and %inline is not documented.
           It used to be the case that we would disallow marking a production
           both %inline and %prec. Now, we allow it, but we check that (1) it
           is inlined at the last position of the host production and (2) the
           host production does not already have a %prec annotation. *)
POTTIER Francois's avatar
POTTIER Francois committed
218
        callee.branch_prec_annotation |> Option.iter (fun callee_prec ->
219 220
          (* The callee has a %prec annotation. *)
          (* Check condition 1. *)
221 222
          if List.length suffix > 0 then begin
            let nt = producer_symbol producer in
POTTIER Francois's avatar
POTTIER Francois committed
223
            Error.error [ Positions.position callee_prec; caller.branch_position ]
224 225 226
              "this production carries a %%prec annotation,\n\
               and the nonterminal symbol %s is marked %%inline.\n\
               For this reason, %s can be used only in tail position."
227 228
              nt nt
          end;
229
          (* Check condition 2. *)
POTTIER Francois's avatar
POTTIER Francois committed
230
          caller.branch_prec_annotation |> Option.iter (fun caller_prec ->
231
            let nt = producer_symbol producer in
232 233 234 235 236 237 238 239 240
            Error.error [ Positions.position callee_prec; Positions.position caller_prec ]
              "this production carries a %%prec annotation,\n\
               and the nonterminal symbol %s is marked %%inline.\n\
               For this reason, %s cannot be used in a production\n\
               which itself carries a %%prec annotation."
              nt nt
          )
        );

241 242 243
        (* These are the names of the producers in the host branch,
           minus the producer that is being inlined away. *)
        let used = StringSet.union (names prefix) (names suffix) in
244
        (* Rename the producers of this branch if they conflict with
245
           the name of the host's producers. *)
POTTIER Francois's avatar
POTTIER Francois committed
246
        let phi, inlined_producers = rename used callee.producers in
247

248 249
        (* After inlining, the producers are as follows. *)
        let producers = prefix @ inlined_producers @ suffix in
250
        (* For debugging: check that each producer carries a unique name. *)
251 252
        let (_ : StringSet.t) = names producers in

253 254
        let index2id = index2id producers in

255 256 257
        let prefix = List.length prefix
        and inlined_producers = List.length inlined_producers in

258 259 260 261 262 263 264 265 266 267
        (* Define how the start and end positions of the inner production should
           be computed once it is inlined into the outer production. These
           definitions of [startp] and [endp] are then used to transform
           [$startpos] and [$endpos] in the inner production and to transform
           [$startpos(x)] and [$endpos(x)] in the outer production. *)

        (* 2015/11/04. We ensure that positions are computed in the same manner,
           regardless of whether inlining is performed. *)

        let startp =
268
          if inlined_producers > 0 then
269 270 271
            (* If the inner production is non-epsilon, things are easy. The start
               position of the inner production is the start position of its first
               element. *)
272 273
            RightNamed (index2id prefix), WhereStart
          else if prefix > 0 then
274 275 276 277
            (* If the inner production is epsilon, we are supposed to compute the
               end position of whatever comes in front of it. If the prefix is
               nonempty, then this is the end position of the last symbol in the
               prefix. *)
278
            RightNamed (index2id (prefix - 1)), WhereEnd
279 280
          else
            (* If the inner production is epsilon and the prefix is empty, then
281
               we need to look up the end position stored in the top stack cell.
282
               This is the reason why we need the keyword [$endpos($0)]. It is
283 284 285
               required in this case to preserve the semantics of $startpos and
               $endpos. *)
            Before, WhereEnd
286 287 288 289 290

          (* Note that, to contrary to intuition perhaps, we do NOT have that
             if the prefix is empty, then the start position of the inner
             production is the start production of the outer production.
             This is true only if the inner production is non-epsilon. *)
291

292
        in
293

294
        let endp =
295
          if inlined_producers > 0 then
POTTIER Francois's avatar
POTTIER Francois committed
296
            (* If the inner production is non-epsilon, things are easy: its end
297 298 299 300 301 302 303 304 305
               position is the end position of its last element. *)
            RightNamed (index2id (prefix + inlined_producers - 1)), WhereEnd
          else
            (* If the inner production is epsilon, then its end position is equal
               to its start position. *)
            startp

        in

306
        (* We must also transform [$endpos($0)] if it used by the inner
307 308 309
           production. It refers to the end position of the stack cell
           that comes before the inner production. So, if the prefix is
           non-empty, then it translates to the end position of the last
310
           element of the prefix. Otherwise, it translates to [$endpos($0)]. *)
311

312
        let beforeendp =
313 314 315 316 317 318
          if prefix > 0 then
            RightNamed (index2id (prefix - 1)), WhereEnd
          else
            Before, WhereEnd
        in

319 320 321
        (* [c] is the identifier under which the callee is known inside the caller. *)
        let c = producer_identifier producer in

322 323
        (* Rename the outer and inner semantic action. *)
        let outer_action =
POTTIER Francois's avatar
POTTIER Francois committed
324
          Action.rename (rename_sw_outer (c, startp, endp)) [] caller.action
325
        and action' =
POTTIER Francois's avatar
POTTIER Francois committed
326
          Action.rename (rename_sw_inner beforeendp) phi callee.action
327
        in
328

329 330 331 332 333
        (* 2015/11/18. If the callee has a %prec annotation (which implies
           the caller does not have one, and the callee appears in tail
           position in the caller) then the annotation is inherited. This
           seems reasonable, but remains undocumented. *)
        let branch_prec_annotation =
POTTIER Francois's avatar
POTTIER Francois committed
334
          match callee.branch_prec_annotation with
335
          | (Some _) as annotation ->
POTTIER Francois's avatar
POTTIER Francois committed
336
              assert (caller.branch_prec_annotation = None);
337 338
              annotation
          | None ->
POTTIER Francois's avatar
POTTIER Francois committed
339
              caller.branch_prec_annotation
340 341
        in

POTTIER Francois's avatar
POTTIER Francois committed
342
        { caller with
343 344
          producers;
          action = Action.compose c action' outer_action;
345
          branch_prec_annotation;
346
        }
347
      in
348 349 350
      let nt = producer_symbol producer in
      let p = StringMap.find nt grammar.rules in (* cannot fail *)
      let p = expand_rule nt p in
351 352
      List.map inline_branch p.branches >>= expand_branch

353
  (* Expand a rule if necessary. *)
354 355
  and expand_rule k r =
    try
356
      (match expanded_state k with
357 358 359 360 361 362
         | BeingExpanded ->
             Error.error
               r.positions
               "there is a cycle in the definition of %s." k
         | Expanded r ->
             r)
363 364 365 366 367
    with Not_found ->
      mark_as_being_expanded k;
      mark_as_expanded k { r with branches = r.branches >>= expand_branch }
  in

368
  (* If we are in Coq mode, %inline is forbidden. *)
369 370
  let _ =
    if Settings.coq then
371 372
      StringMap.iter
        (fun _ r ->
373
           if r.inline_flag then
374
             Error.error r.positions
375
               "%%inline is not supported by the Coq back-end.")
376 377 378
        grammar.rules
  in

379 380
  (* To expand a grammar, we expand all its rules and remove
     the %inline rules. *)
381
  let expanded_rules =
382 383 384
    StringMap.mapi expand_rule grammar.rules
  in

385 386 387 388 389 390
  let useful (k : string) : bool =
    try
      not (StringMap.find k grammar.rules).inline_flag
    with Not_found ->
      true (* could be: assert false? *)
  in
391

392 393 394 395 396 397 398 399 400 401 402
  (* Remove %on_error_reduce declarations for symbols that are expanded away,
     and warn about them, at the same time. *)
  let useful_warn (k : string) : bool =
    let u = useful k in
    if not u then
      Error.grammar_warning []
        "the declaration %%on_error_reduce %s\n\
          has no effect, since this symbol is marked %%inline and is expanded away." k;
    u
  in

403 404 405
  { grammar with
      rules = StringMap.filter (fun _ r -> not r.inline_flag) expanded_rules;
      types = StringMap.filter (fun k _ -> useful k) grammar.types;
406
      on_error_reduce = StringMap.filter (fun k _ -> useful_warn k) grammar.on_error_reduce;
407
  }