keywordExpansion.ml 8.98 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16 17 18 19 20 21 22 23 24 25 26 27
open UnparameterizedSyntax
open Keyword
open IL
open CodeBits

(* [posvar_ keyword] constructs the conventional name of the variable
   that stands for the position keyword [keyword]. *)

let posvar_ = function
  | Position (subject, where, flavor) ->
      posvar subject where flavor
  | _ ->
      assert false (* [posvar_] should be applied to a position keyword *)

POTTIER Francois's avatar
POTTIER Francois committed
28 29 30 31 32 33
(* [symbolstartpos producers i n] constructs an expression which, beginning at
   index [i], looks for the first non-empty producer and returns its start
   position. If none is found, this expression returns the end position of the
   right-hand side. This computation is modeled after the function
   [Parsing.symbol_start_pos] in OCaml's standard library. *)

34
(* This cascade of [if] constructs could be quite big, and this could be a
35 36 37 38 39 40
   problem in terms of code size. Fortunately, we can optimize this code by
   computing, ahead of time, the outcome of certain comparisons. We assume that
   the lexer never produces a token whose start and end positions are the same.
   There follows that a non-nullable symbol cannot have the same start and end
   positions. Conversely, a symbol that generates (a subset of) the language
   {epsilon} must have the same start and end positions. *)
POTTIER Francois's avatar
POTTIER Francois committed
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

(* Although this code is modeled after [Parsing.symbol_start_pos], we compare
   positions using physical equality, whereas they use structural equality. If
   for some reason a symbol has start and end positions that are structurally
   equal but physically different, then a difference will be observable.
   However, this is very unlikely. It would mean that a token has the same start
   and end positions (and furthermore, this position has been re-allocated). *)

(* The reason why we expand [$symbolstartpos] away prior to inlining is that we
   want its meaning to be preserved by inlining. If we tried to preserve this
   keyword through the inlining phase, then (I suppose) we would have to introduce
   a family of keywords [$symbolstartpos(i, j)], computing over the interval from
   [i] to [j], and the preservation would not be exact -- because a nonempty
   symbol, once inlined, can be seen to be a sequence of empty and nonempty
   symbols. *)

57 58
let rec symbolstartpos ((nullable, epsilon) as analysis) producers i n
: IL.expr * KeywordSet.t =
POTTIER Francois's avatar
POTTIER Francois committed
59 60 61 62 63
  if i = n then
    (* Return [$endpos]. *)
    let keyword = Position (Left, WhereEnd, FlavorPosition) in
    EVar (posvar_ keyword), KeywordSet.singleton keyword
  else
64 65 66 67 68 69
    (* [symbol] is the symbol that appears in the right-hand side at position i.
       [x] is the identifier that is bound to it. We generate code that compares
       [$startpos($i)] and [$endpos($i)]. If they differ, we return
       [$startpos($i)]. Otherwise, we continue. Furthermore, as noted above, if
       [symbol] is not nullable, then we know that the start and end positions
       must differ, so we optimize this case. *)
70 71 72
    let producer = List.nth producers i in
    let symbol = producer_symbol producer
    and x = producer_identifier producer in
POTTIER Francois's avatar
POTTIER Francois committed
73 74
    let startp = Position (RightNamed x, WhereStart, FlavorPosition)
    and   endp = Position (RightNamed x, WhereEnd,   FlavorPosition) in
75
    if not (nullable symbol) then
76
      (* The start and end positions must differ. *)
POTTIER Francois's avatar
POTTIER Francois committed
77
      EVar (posvar_ startp),
78 79
      KeywordSet.singleton startp
    else
80 81 82 83 84 85 86 87 88 89 90 91 92
      let continue, keywords = symbolstartpos analysis producers (i + 1) n in
      if epsilon symbol then
        (* The start and end positions must be the same. *)
        continue,
        keywords
      else
        (* In the general case, a runtime test is required. *)
        EIfThenElse (
          EApp (EVar "Pervasives.(!=)", [ EVar (posvar_ startp); EVar (posvar_ endp) ]),
          EVar (posvar_ startp),
          continue
        ),
        KeywordSet.add startp (KeywordSet.add endp keywords)
POTTIER Francois's avatar
POTTIER Francois committed
93

94 95 96 97 98 99 100 101 102 103 104
(* [define keyword1 f keyword2] macro-expands [keyword1] as [f(keyword2)],
   where [f] is a function of expressions to expressions. *)

let define keyword1 f keyword2 =
  Action.define
    keyword1
    (KeywordSet.singleton keyword2)
    (mlet
       [ PVar (posvar_ keyword1) ]
       [ f (EVar (posvar_ keyword2)) ])

POTTIER Francois's avatar
POTTIER Francois committed
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
(* An [ofs] keyword is expanded away. It is defined in terms of the
   corresponding [pos] keyword. *)

let expand_ofs keyword action =
  match keyword with
  | Position (subject, where, FlavorOffset) ->
      define keyword
        (fun e -> ERecordAccess (e, "Lexing.pos_cnum"))
        (Position (subject, where, FlavorPosition))
        action
  | _ ->
      action

(* [$symbolstartpos] is expanded into a cascade of [if] constructs, modeled
   after [Parsing.symbol_start_pos]. *)

121
let expand_symbolstartpos analysis producers n keyword action =
POTTIER Francois's avatar
POTTIER Francois committed
122 123
  match keyword with
  | Position (Left, WhereSymbolStart, FlavorPosition) ->
124
      let expansion, keywords = symbolstartpos analysis producers 0 n in
POTTIER Francois's avatar
POTTIER Francois committed
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
      Action.define keyword keywords
        (mlet [ PVar (posvar_ keyword) ] [ expansion ])
        action
  | Position (RightNamed _, WhereSymbolStart, FlavorPosition) ->
      (* [$symbolstartpos(x)] does not exist. *)
      assert false
  | _ ->
      action

(* [$startpos] and [$endpos] are expanded away.  *)

let expand_startend producers n keyword action =
  match keyword with
  | Position (Left, WhereStart, flavor) ->

      (* [$startpos] is defined as [$startpos($1)] if this production has
         nonzero length and [$endpos($0)] otherwise. *)
      define keyword (fun e -> e) (
        if n > 0 then
144
          let x = producer_identifier (List.hd producers) in
POTTIER Francois's avatar
POTTIER Francois committed
145 146 147 148 149 150 151 152 153 154 155
          Position (RightNamed x, WhereStart, flavor)
        else
          Position (Before, WhereEnd, flavor)
      ) action

  | Position (Left, WhereEnd, flavor) ->

      (* [$endpos] is defined as [$endpos($n)] if this production has
         nonzero length and [$endpos($0)] otherwise. *)
      define keyword (fun e -> e) (
        if n > 0 then
156
          let x = producer_identifier (List.hd (List.rev producers)) in
POTTIER Francois's avatar
POTTIER Francois committed
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
          Position (RightNamed x, WhereEnd, flavor)
        else
          Position (Before, WhereEnd, flavor)
      ) action

  | _ ->
      action

(* [expand_round] performs one round of expansion on [action], using [f] as a
   rewriting rule. *)

let expand_round f action =
  KeywordSet.fold f (Action.keywords action) action

(* [expand_action] performs macro-expansion in [action]. We do this in several
   rounds: first, expand the [ofs] keywords away; then, expand [symbolstart]
   away; then, expand the rest. We do this in this order because each round
   can cause new keywords to appear, which must eliminated by the following
   rounds. *)
176

177
let expand_action analysis producers action =
178
  let n = List.length producers in
POTTIER Francois's avatar
POTTIER Francois committed
179 180 181 182 183 184 185 186

  (* The [ofs] keyword family is defined in terms of the [pos] family by
     accessing the [pos_cnum] field. Expand these keywords away first. *)

  let action = expand_round expand_ofs action in

  (* Expand [$symbolstartpos] away. *)

187
  let action = expand_round (expand_symbolstartpos analysis producers n) action in
POTTIER Francois's avatar
POTTIER Francois committed
188 189 190 191 192 193

  (* Then, expand away the non-[ofs] keywords. *)

  let action = expand_round (expand_startend producers n) action in

  action
194

195 196 197
(* Silently analyze the grammar so as to find out which symbols are
   nullable and which symbols generate a subset of {epsilon}. This
   is used to optimize the expansion of $symbolstartpos. *)
198

199
let analysis grammar =
200 201 202 203
  let module G = GrammarFunctor.Make(struct
    let grammar = grammar
    let verbose = false
  end) in
204 205
  let lookup (nt : Syntax.symbol) : G.Symbol.t =
    try G.Symbol.lookup nt with Not_found -> assert false
206
  in
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
  let nullable nt : bool =
    G.Analysis.nullable_symbol (lookup nt)
  and epsilon nt : bool =
    G.TerminalSet.is_empty (G.Analysis.first_symbol (lookup nt))
  in
  nullable, epsilon

(* Put everything together. *)

let expand_branch analysis branch =
  { branch with action = expand_action analysis branch.producers branch.action }

let expand_rule analysis rule =
  { rule with branches = List.map (expand_branch analysis) rule.branches }

let expand_grammar grammar =
  let analysis = analysis grammar in
  { grammar with rules = StringMap.map (expand_rule analysis) grammar.rules }