keywordExpansion.ml 10.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
open BasicSyntax
15 16 17 18 19 20 21 22 23 24 25 26 27
open Keyword
open IL
open CodeBits

(* [posvar_ keyword] constructs the conventional name of the variable
   that stands for the position keyword [keyword]. *)

let posvar_ = function
  | Position (subject, where, flavor) ->
      posvar subject where flavor
  | _ ->
      assert false (* [posvar_] should be applied to a position keyword *)

28 29 30 31 32 33
(* [symbolstartpos producers i n] constructs an expression which, beginning at
   index [i], looks for the first non-empty producer and returns its start
   position. If none is found, this expression returns the end position of the
   right-hand side. This computation is modeled after the function
   [Parsing.symbol_start_pos] in OCaml's standard library. *)

34
(* This cascade of [if] constructs could be quite big, and this could be a
35 36 37 38 39 40
   problem in terms of code size. Fortunately, we can optimize this code by
   computing, ahead of time, the outcome of certain comparisons. We assume that
   the lexer never produces a token whose start and end positions are the same.
   There follows that a non-nullable symbol cannot have the same start and end
   positions. Conversely, a symbol that generates (a subset of) the language
   {epsilon} must have the same start and end positions. *)
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

(* Although this code is modeled after [Parsing.symbol_start_pos], we compare
   positions using physical equality, whereas they use structural equality. If
   for some reason a symbol has start and end positions that are structurally
   equal but physically different, then a difference will be observable.
   However, this is very unlikely. It would mean that a token has the same start
   and end positions (and furthermore, this position has been re-allocated). *)

(* The reason why we expand [$symbolstartpos] away prior to inlining is that we
   want its meaning to be preserved by inlining. If we tried to preserve this
   keyword through the inlining phase, then (I suppose) we would have to introduce
   a family of keywords [$symbolstartpos(i, j)], computing over the interval from
   [i] to [j], and the preservation would not be exact -- because a nonempty
   symbol, once inlined, can be seen to be a sequence of empty and nonempty
   symbols. *)

57 58
let rec symbolstartpos ((nullable, epsilon) as analysis) producers i n
: IL.expr * KeywordSet.t =
59 60 61 62 63
  if i = n then
    (* Return [$endpos]. *)
    let keyword = Position (Left, WhereEnd, FlavorPosition) in
    EVar (posvar_ keyword), KeywordSet.singleton keyword
  else
64 65 66 67 68 69
    (* [symbol] is the symbol that appears in the right-hand side at position i.
       [x] is the identifier that is bound to it. We generate code that compares
       [$startpos($i)] and [$endpos($i)]. If they differ, we return
       [$startpos($i)]. Otherwise, we continue. Furthermore, as noted above, if
       [symbol] is not nullable, then we know that the start and end positions
       must differ, so we optimize this case. *)
70 71 72
    let producer = List.nth producers i in
    let symbol = producer_symbol producer
    and x = producer_identifier producer in
73 74
    let startp = Position (RightNamed x, WhereStart, FlavorPosition)
    and   endp = Position (RightNamed x, WhereEnd,   FlavorPosition) in
75
    if not (nullable symbol) then
76
      (* The start and end positions must differ. *)
77
      EVar (posvar_ startp),
78 79
      KeywordSet.singleton startp
    else
80 81 82 83 84 85 86 87
      let continue, keywords = symbolstartpos analysis producers (i + 1) n in
      if epsilon symbol then
        (* The start and end positions must be the same. *)
        continue,
        keywords
      else
        (* In the general case, a runtime test is required. *)
        EIfThenElse (
88
          EApp (EVar "(!=)", [ EVar (posvar_ startp); EVar (posvar_ endp) ]),
89 90 91 92
          EVar (posvar_ startp),
          continue
        ),
        KeywordSet.add startp (KeywordSet.add endp keywords)
93

94 95 96 97 98 99 100 101 102 103 104
(* [define keyword1 f keyword2] macro-expands [keyword1] as [f(keyword2)],
   where [f] is a function of expressions to expressions. *)

let define keyword1 f keyword2 =
  Action.define
    keyword1
    (KeywordSet.singleton keyword2)
    (mlet
       [ PVar (posvar_ keyword1) ]
       [ f (EVar (posvar_ keyword2)) ])

105 106
(* A [loc] keyword is expanded away. *)

POTTIER Francois's avatar
POTTIER Francois committed
107 108 109 110
(* Since a location is represented as a pair of positions, $loc is sugar for
   the pair ($startpos, $endpos). (Similarly for $loc(x).) Furthermore, $sloc
   is sugar for the pair ($symbolstartpos, $endpos). *)

POTTIER Francois's avatar
POTTIER Francois committed
111
let define_as_tuple keyword keywords =
112 113 114 115 116 117 118 119 120 121
  Action.define
    keyword
    (List.fold_right KeywordSet.add keywords KeywordSet.empty)
    (mlet
       [ PVar (posvar_ keyword) ]
       [ ETuple (List.map (fun keyword -> EVar (posvar_ keyword)) keywords) ])

let expand_loc keyword action =
  match keyword with
  | Position (Left, WhereSymbolStart, FlavorLocation) -> (* $sloc *)
POTTIER Francois's avatar
POTTIER Francois committed
122
      define_as_tuple keyword
123 124 125 126
        [ Position (Left, WhereSymbolStart, FlavorPosition);
          Position (Left, WhereEnd, FlavorPosition) ]
        action
  | Position (subject, WhereStart, FlavorLocation) -> (* $loc, $loc(x) *)
POTTIER Francois's avatar
POTTIER Francois committed
127
      define_as_tuple keyword
128 129 130 131 132 133
        [ Position (subject, WhereStart, FlavorPosition);
          Position (subject, WhereEnd, FlavorPosition) ]
        action
  | _ ->
      action

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
(* An [ofs] keyword is expanded away. It is defined in terms of the
   corresponding [pos] keyword. *)

let expand_ofs keyword action =
  match keyword with
  | Position (subject, where, FlavorOffset) ->
      define keyword
        (fun e -> ERecordAccess (e, "Lexing.pos_cnum"))
        (Position (subject, where, FlavorPosition))
        action
  | _ ->
      action

(* [$symbolstartpos] is expanded into a cascade of [if] constructs, modeled
   after [Parsing.symbol_start_pos]. *)

150
let expand_symbolstartpos analysis producers n keyword action =
151 152
  match keyword with
  | Position (Left, WhereSymbolStart, FlavorPosition) ->
153
      let expansion, keywords = symbolstartpos analysis producers 0 n in
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
      Action.define keyword keywords
        (mlet [ PVar (posvar_ keyword) ] [ expansion ])
        action
  | Position (RightNamed _, WhereSymbolStart, FlavorPosition) ->
      (* [$symbolstartpos(x)] does not exist. *)
      assert false
  | _ ->
      action

(* [$startpos] and [$endpos] are expanded away.  *)

let expand_startend producers n keyword action =
  match keyword with
  | Position (Left, WhereStart, flavor) ->

      (* [$startpos] is defined as [$startpos($1)] if this production has
         nonzero length and [$endpos($0)] otherwise. *)
      define keyword (fun e -> e) (
        if n > 0 then
173
          let x = producer_identifier (List.hd producers) in
174 175 176 177 178 179 180 181 182 183 184
          Position (RightNamed x, WhereStart, flavor)
        else
          Position (Before, WhereEnd, flavor)
      ) action

  | Position (Left, WhereEnd, flavor) ->

      (* [$endpos] is defined as [$endpos($n)] if this production has
         nonzero length and [$endpos($0)] otherwise. *)
      define keyword (fun e -> e) (
        if n > 0 then
185
          let x = producer_identifier (List.hd (List.rev producers)) in
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
          Position (RightNamed x, WhereEnd, flavor)
        else
          Position (Before, WhereEnd, flavor)
      ) action

  | _ ->
      action

(* [expand_round] performs one round of expansion on [action], using [f] as a
   rewriting rule. *)

let expand_round f action =
  KeywordSet.fold f (Action.keywords action) action

(* [expand_action] performs macro-expansion in [action]. We do this in several
POTTIER Francois's avatar
POTTIER Francois committed
201 202 203 204
   rounds: first, expand the [loc] keywords away; then, expand the [ofs]
   keywords away; then, expand [symbolstart] away; then, expand the rest. We
   do this in this order because each round can cause new keywords to appear,
   which must eliminated by the following rounds. *)
205

206
let expand_action analysis producers action =
207
  let n = List.length producers in
208

209 210 211 212
  (* Expand [loc] keywords away first. *)

  let action = expand_round expand_loc action in

213 214 215 216 217 218 219
  (* The [ofs] keyword family is defined in terms of the [pos] family by
     accessing the [pos_cnum] field. Expand these keywords away first. *)

  let action = expand_round expand_ofs action in

  (* Expand [$symbolstartpos] away. *)

220
  let action = expand_round (expand_symbolstartpos analysis producers n) action in
221 222 223 224 225 226

  (* Then, expand away the non-[ofs] keywords. *)

  let action = expand_round (expand_startend producers n) action in

  action
227

228 229 230
(* Silently analyze the grammar so as to find out which symbols are
   nullable and which symbols generate a subset of {epsilon}. This
   is used to optimize the expansion of $symbolstartpos. *)
231

232
let analysis grammar =
233 234 235 236
  let module G = GrammarFunctor.Make(struct
    let grammar = grammar
    let verbose = false
  end) in
237 238
  let lookup (nt : Syntax.symbol) : G.Symbol.t =
    try G.Symbol.lookup nt with Not_found -> assert false
239
  in
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
  let nullable nt : bool =
    G.Analysis.nullable_symbol (lookup nt)
  and epsilon nt : bool =
    G.TerminalSet.is_empty (G.Analysis.first_symbol (lookup nt))
  in
  nullable, epsilon

(* Put everything together. *)

let expand_branch analysis branch =
  { branch with action = expand_action analysis branch.producers branch.action }

let expand_rule analysis rule =
  { rule with branches = List.map (expand_branch analysis) rule.branches }

let expand_grammar grammar =
  let analysis = analysis grammar in
  { grammar with rules = StringMap.map (expand_rule analysis) grammar.rules }