nonTerminalDefinitionInlining.ml 14.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
open Keyword
15 16 17 18 19 20 21
open UnparameterizedSyntax
open ListMonad

(* This exception will be raised when a branch does not need inlining. *)
exception NoInlining

(* Color are used to detect cycles. *)
22
type 'a color =
23 24 25
  | BeingExpanded
  | Expanded of 'a

26 27
(* [index2id] converts a 0-based index (into a list of producers) to
   an identifier (the name of the producer). *)
28 29 30

let index2id producers i =
  try
31
    producer_identifier (List.nth producers i)
32 33 34 35
  with Failure _ ->
    assert false (* should not happen *)

(* [rename_sw_outer] transforms the keywords in the outer production (the
36
   caller) during inlining. It replaces [$startpos(x)] and [$endpos(x)], where
37
   [x] is the name of the callee, with [startpx] and [endpx], respectively. *)
38

39
let rename_sw_outer (x, startpx, endpx) (subject, where) : (subject * where) option =
40
  match subject, where with
41
  | Before, _ ->
42
      None
43
  | RightNamed x', _ ->
44
      if x' = x then
45
        match where with
46 47
        | WhereStart -> Some startpx
        | WhereEnd   -> Some endpx
POTTIER Francois's avatar
POTTIER Francois committed
48
        | WhereSymbolStart -> assert false (* has been expanded away *)
49 50
      else
        None
51
  | Left, _ ->
POTTIER Francois's avatar
POTTIER Francois committed
52 53
      (* [$startpos], [$endpos], and [$symbolstartpos] have been expanded away
         earlier; see [KeywordExpansion]. *)
54
      assert false
55 56

(* [rename_sw_inner] transforms the keywords in the inner production (the callee)
57
   during inlining. It replaces [$endpos($0)] with [beforeendp]. *)
58

59
let rename_sw_inner beforeendp (subject, where) : (subject * where) option =
60
  match subject, where with
61 62
  | Before, _ ->
      assert (where = WhereEnd);
63
      Some beforeendp
64 65
  | RightNamed _, _ ->
      None
66 67 68 69
  | Left, _ ->
      (* [$startpos] and [$endpos] have been expanded away earlier; see
         [KeywordExpansion]. *)
      assert false
70

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
(* This auxiliary function checks that a use site of an %inline symbol does
   not carry any attributes. *)

let check_no_producer_attributes producer =
  match producer_attributes producer with
  | [] ->
      ()
  | (id, _payload) :: _attributes ->
      Error.error
        [Positions.position id]
        "the nonterminal symbol %s is declared %%inline.\n\
         A use of it cannot carry an attribute."
        (producer_symbol producer)

let names (producers : producers) : StringSet.t =
  List.fold_left (fun s producer ->
    StringSet.add (producer_identifier producer) s
  ) StringSet.empty producers

90 91
(* Inline a grammar. The resulting grammar does not contain any definitions
   that can be inlined. *)
92
let inline grammar =
93

94 95 96 97 98 99 100 101
  (* This function returns a fresh name that begins with [prefix] (although
     this is not essential!) and that is not in the set [names]. *)
  let rec fresh names prefix =
    if StringSet.mem prefix names then
      let prefix = prefix ^ "'" in
      fresh names prefix
    else
      prefix
102 103 104 105 106
  in

  let use_inline = ref false in

  (* This table associates a color to each non terminal that can be expanded. *)
107 108
  let expanded_non_terminals =
    Hashtbl.create 13
109 110
  in

111 112
  let expanded_state k =
    Hashtbl.find expanded_non_terminals k
113
  in
114 115

  let mark_as_being_expanded k =
116 117 118 119 120 121 122 123
    Hashtbl.add expanded_non_terminals k BeingExpanded
  in

  let mark_as_expanded k r =
    Hashtbl.replace expanded_non_terminals  k (Expanded r);
    r
  in

124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
  (* [find_inline_producer b] traverses the producers of the branch [b] and
     looks for the first nonterminal symbol that can be inlined. If it finds
     one, it inlines its branches into [b], which is why this function can
     return several branches. Otherwise, it raises [NoInlining]. *)
  let rec chop_inline (prefix, suffix) =
    match suffix with
    | [] ->
        raise NoInlining
    | x :: xs ->
        let nt = producer_symbol x
        and id = producer_identifier x in
        try
          let r = StringMap.find nt grammar.rules in
          if r.inline_flag then begin
            (* We have checked earlier than an %inline symbol does not carry
               any attributes. In addition, we now check that the use site of
               this symbol does not carry any attributes either. Thus, we need
               not worry about propagating these attributes through inlining. *)
            check_no_producer_attributes x;
            (* We inline the rule [r] into [b] between [prefix] and [xs]. *)
            List.rev prefix, nt, r, id, xs
          end
          else
            chop_inline (x :: prefix, xs)
        with Not_found ->
          chop_inline (x :: prefix, xs)
  in

152
  let rec find_inline_producer b =
153 154
    let prefix, nt, p, psym, suffix = chop_inline ([], b.producers) in
    prefix, expand_rule nt p, nt, psym, suffix
155

156 157
  (* We have to rename the producers [producers] of the inlined production
     if they clash with the names of the producers of the host branch [b]. *)
158 159
  and rename_if_necessary b producers =

160 161
    (* Compute the set of the names already in use in the host branch. *)
    let used = names b.producers in
162

163 164 165
    (* Compute a renaming and the new names of the inlined producers. *)
    let phi, _used, producers' =
      List.fold_left (fun (phi, used, producers) producer ->
166
        let x = producer_identifier producer in
167 168 169 170 171
        if StringSet.mem x used then
          let x' = fresh used x in
          (x, x') :: phi,
          StringSet.add x' used,
          { producer with producer_identifier = x' } :: producers
172
        else
173 174
          (phi, used, producer :: producers)
      ) ([], used, []) producers
175
    in
176
    phi, List.rev producers'
177 178

  (* Inline the non terminals that can be inlined in [b]. We use the
179 180
     ListMonad to combine the results. *)
  and expand_branch (b : branch) : branch ListMonad.m =
181
    try
182
      (* [c] is the identifier under which the callee is known. *)
183
      let prefix, p, nt, c, suffix = find_inline_producer b in
184 185
      use_inline := true;
      (* Inline a branch of [nt] at position [prefix] ... [suffix] in
186
         the branch [b]. *)
187
      let inline_branch pb =
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213

        (* 2015/11/18. The interaction of %prec and %inline is not documented.
           It used to be the case that we would disallow marking a production
           both %inline and %prec. Now, we allow it, but we check that (1) it
           is inlined at the last position of the host production and (2) the
           host production does not already have a %prec annotation. *)
        pb.branch_prec_annotation |> Option.iter (fun callee_prec ->
          (* The callee has a %prec annotation. *)
          (* Check condition 1. *)
          if List.length suffix > 0 then
            Error.error [ Positions.position callee_prec; b.branch_position ]
              "this production carries a %%prec annotation,\n\
               and the nonterminal symbol %s is marked %%inline.\n\
               For this reason, %s can be used only in tail position."
              nt nt;
          (* Check condition 2. *)
          b.branch_prec_annotation |> Option.iter (fun caller_prec ->
            Error.error [ Positions.position callee_prec; Positions.position caller_prec ]
              "this production carries a %%prec annotation,\n\
               and the nonterminal symbol %s is marked %%inline.\n\
               For this reason, %s cannot be used in a production\n\
               which itself carries a %%prec annotation."
              nt nt
          )
        );

214
        (* Rename the producers of this branch if they conflict with
215 216
           the name of the host's producers. *)
        let phi, inlined_producers = rename_if_necessary b pb.producers in
217

218 219 220 221
        (* After inlining, the producers are as follows. *)
        let producers = prefix @ inlined_producers @ suffix in
        let index2id = index2id producers in

222 223 224
        let prefix = List.length prefix
        and inlined_producers = List.length inlined_producers in

225 226 227 228 229 230 231 232 233 234
        (* Define how the start and end positions of the inner production should
           be computed once it is inlined into the outer production. These
           definitions of [startp] and [endp] are then used to transform
           [$startpos] and [$endpos] in the inner production and to transform
           [$startpos(x)] and [$endpos(x)] in the outer production. *)

        (* 2015/11/04. We ensure that positions are computed in the same manner,
           regardless of whether inlining is performed. *)

        let startp =
235
          if inlined_producers > 0 then
236 237 238
            (* If the inner production is non-epsilon, things are easy. The start
               position of the inner production is the start position of its first
               element. *)
239 240
            RightNamed (index2id prefix), WhereStart
          else if prefix > 0 then
241 242 243 244
            (* If the inner production is epsilon, we are supposed to compute the
               end position of whatever comes in front of it. If the prefix is
               nonempty, then this is the end position of the last symbol in the
               prefix. *)
245
            RightNamed (index2id (prefix - 1)), WhereEnd
246 247
          else
            (* If the inner production is epsilon and the prefix is empty, then
248
               we need to look up the end position stored in the top stack cell.
249
               This is the reason why we need the keyword [$endpos($0)]. It is
250 251 252
               required in this case to preserve the semantics of $startpos and
               $endpos. *)
            Before, WhereEnd
253 254 255 256 257

          (* Note that, to contrary to intuition perhaps, we do NOT have that
             if the prefix is empty, then the start position of the inner
             production is the start production of the outer production.
             This is true only if the inner production is non-epsilon. *)
258

259
        in
260

261
        let endp =
262 263 264 265 266 267 268 269 270 271 272
          if inlined_producers > 0 then
            (* If the inner production is non-epsilon, things are easy, then its end
               position is the end position of its last element. *)
            RightNamed (index2id (prefix + inlined_producers - 1)), WhereEnd
          else
            (* If the inner production is epsilon, then its end position is equal
               to its start position. *)
            startp

        in

273
        (* We must also transform [$endpos($0)] if it used by the inner
274 275 276
           production. It refers to the end position of the stack cell
           that comes before the inner production. So, if the prefix is
           non-empty, then it translates to the end position of the last
277
           element of the prefix. Otherwise, it translates to [$endpos($0)]. *)
278

279
        let beforeendp =
280 281 282 283 284 285
          if prefix > 0 then
            RightNamed (index2id (prefix - 1)), WhereEnd
          else
            Before, WhereEnd
        in

286 287 288 289 290 291
        (* Rename the outer and inner semantic action. *)
        let outer_action =
          Action.rename (rename_sw_outer (c, startp, endp)) [] b.action
        and action' =
          Action.rename (rename_sw_inner beforeendp) phi pb.action
        in
292

293 294 295 296 297 298 299 300 301 302 303 304 305
        (* 2015/11/18. If the callee has a %prec annotation (which implies
           the caller does not have one, and the callee appears in tail
           position in the caller) then the annotation is inherited. This
           seems reasonable, but remains undocumented. *)
        let branch_prec_annotation =
          match pb.branch_prec_annotation with
          | (Some _) as annotation ->
              assert (b.branch_prec_annotation = None);
              annotation
          | None ->
              b.branch_prec_annotation
        in

306 307 308
        { b with
          producers;
          action = Action.compose c action' outer_action;
309
          branch_prec_annotation;
310
        }
311 312 313 314
      in
      List.map inline_branch p.branches >>= expand_branch

    with NoInlining ->
315
      return b
316

317
  (* Expand a rule if necessary. *)
318 319
  and expand_rule k r =
    try
320
      (match expanded_state k with
321 322 323 324 325 326
         | BeingExpanded ->
             Error.error
               r.positions
               "there is a cycle in the definition of %s." k
         | Expanded r ->
             r)
327 328 329 330 331
    with Not_found ->
      mark_as_being_expanded k;
      mark_as_expanded k { r with branches = r.branches >>= expand_branch }
  in

332
  (* If we are in Coq mode, %inline is forbidden. *)
333 334
  let _ =
    if Settings.coq then
335 336
      StringMap.iter
        (fun _ r ->
337
           if r.inline_flag then
338
             Error.error r.positions
339
               "%%inline is not supported by the Coq back-end.")
340 341 342
        grammar.rules
  in

343 344
  (* To expand a grammar, we expand all its rules and remove
     the %inline rules. *)
345
  let expanded_rules =
346 347 348
    StringMap.mapi expand_rule grammar.rules
  in

349 350 351 352 353 354
  let useful (k : string) : bool =
    try
      not (StringMap.find k grammar.rules).inline_flag
    with Not_found ->
      true (* could be: assert false? *)
  in
355

356 357 358 359 360 361 362 363 364 365 366
  (* Remove %on_error_reduce declarations for symbols that are expanded away,
     and warn about them, at the same time. *)
  let useful_warn (k : string) : bool =
    let u = useful k in
    if not u then
      Error.grammar_warning []
        "the declaration %%on_error_reduce %s\n\
          has no effect, since this symbol is marked %%inline and is expanded away." k;
    u
  in

367 368 369
  { grammar with
      rules = StringMap.filter (fun _ r -> not r.inline_flag) expanded_rules;
      types = StringMap.filter (fun k _ -> useful k) grammar.types;
370
      on_error_reduce = StringMap.filter (fun k _ -> useful_warn k) grammar.on_error_reduce;
371
  }, !use_inline