basicPrinter.ml 14.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
open Printf
15 16 17
open Positions
open Syntax
open Stretch
18
open BasicSyntax
19 20
open Settings

21
(* When the original grammar is split over several files, it may be IMPOSSIBLE
POTTIER Francois's avatar
POTTIER Francois committed
22
   to print it out into a single file, as that would introduce a total ordering
23 24 25 26
   (between rules, between priority declarations, between %on_error_reduce
   declarations) that did not exist originally. We currently do not warn about
   this problem. Nobody has ever complained about it. *)

27 28
(* -------------------------------------------------------------------------- *)

29 30 31 32 33 34 35 36
(* The printing mode. *)

(* [PrintNormal] is the normal mode: the result is a Menhir grammar.

   [PrintForOCamlyacc] is close to the normal mode, but attempts to produce
   ocamlyacc-compatible output. This means, in particular, that we cannot bind
   identifiers to semantic values, but must use [$i] instead.

37
   [PrintUnitActions _] causes all OCaml code to be suppressed: the semantic
38 39 40 41
   actions are replaced with unit actions, preludes and postludes disappear,
   %parameter declarations disappear. Every %type declaration carries the
   [unit] type.

42 43
   [PrintUnitActions true] in addition declares that every token carries a
   semantic value of type [unit].
44 45 46 47 48 49 50 51

 *)

module Print (X : sig val mode : Settings.print_mode end) = struct
open X

(* -------------------------------------------------------------------------- *)

52 53 54 55 56 57 58 59 60 61 62
(* Printing an OCaml type. *)

let print_ocamltype ty : string =
  Printf.sprintf " <%s>" (
    match ty with
    | Declared stretch ->
        stretch.stretch_raw_content
    | Inferred t ->
        t
    )

63
let print_ocamltype ty : string =
64 65 66 67
  let s = print_ocamltype ty in
  match mode with
  | PrintForOCamlyacc ->
      (* ocamlyacc does not allow a %type declaration to contain
68
         a new line. Replace it with a space. *)
69 70
      String.map (function '\r' | '\n' -> ' ' | c -> c) s
  | PrintNormal
71
  | PrintUnitActions _ ->
72 73
      s

74 75 76 77 78
(* -------------------------------------------------------------------------- *)

(* Printing the type of a terminal symbol. *)

let print_token_type (prop : token_properties) =
79 80
  match mode with
  | PrintNormal
81
  | PrintForOCamlyacc
82
  | PrintUnitActions false ->
83
      Misc.o2s prop.tk_ocamltype print_ocamltype
84
  | PrintUnitActions true ->
85 86
      "" (* omitted ocamltype after %token means <unit> *)

87 88 89 90 91
(* -------------------------------------------------------------------------- *)

(* Printing the type of a nonterminal symbol. *)

let print_nonterminal_type ty =
92
  match mode with
93 94
  | PrintNormal
  | PrintForOCamlyacc ->
95
      print_ocamltype ty
96
  | PrintUnitActions _ ->
97 98
      " <unit>"

99 100 101 102 103
(* -------------------------------------------------------------------------- *)

(* Printing a binding for a semantic value. *)

let print_binding id =
104 105 106
  match mode with
  | PrintNormal ->
      id ^ " = "
107
  | PrintForOCamlyacc
108
  | PrintUnitActions _ ->
109
      (* need not, or must not, bind a semantic value *)
110 111
      ""

112 113
(* -------------------------------------------------------------------------- *)

114 115
(* Testing whether it is permitted to print OCaml code (semantic actions,
   prelude, postlude). *)
116

117
let if_ocaml_code_permitted f x =
118 119 120 121
  match mode with
  | PrintNormal
  | PrintForOCamlyacc ->
      f x
122
  | PrintUnitActions _ ->
123 124 125 126
      (* In these modes, all OCaml code is omitted: semantic actions,
         preludes, postludes, etc. *)
      ()

127 128
(* -------------------------------------------------------------------------- *)

129 130 131 132 133 134 135 136 137 138 139 140
(* Testing whether attributes should be printed. *)

let attributes_printed : bool =
  match mode with
  | PrintNormal
  | PrintUnitActions _ ->
      true
  | PrintForOCamlyacc ->
      false

(* -------------------------------------------------------------------------- *)

141 142
(* Printing a semantic action. *)

143
let print_semantic_action f g branch =
144 145
  let e = Action.to_il_expr branch.action in
  match mode with
146
  | PrintUnitActions _ ->
147 148 149 150 151 152
      (* In the unit-action modes, we print a pair of empty braces, which is fine. *)
      ()
  | PrintNormal ->
      Printer.print_expr f e
  | PrintForOCamlyacc ->
       (* In ocamlyacc-compatibility mode, the code must be wrapped in
153
          [let]-bindings whose right-hand side uses the [$i] keywords. *)
154
      let bindings =
155 156 157
        List.mapi (fun i producer ->
          let id = producer_identifier producer
          and symbol = producer_symbol producer in
158
          (* Test if [symbol] is a terminal symbol whose type is [unit]. *)
159 160 161 162 163
          let is_unit_token =
            try
              let prop = StringMap.find symbol g.tokens in
              prop.tk_ocamltype = None
            with Not_found ->
164
              symbol = "error"
165
          in
166
          (* Define the variable [id] as a synonym for [$(i+1)]. *)
167 168 169 170 171 172 173 174 175
          (* As an exception to this rule, if [symbol] is a terminal symbol
             which has been declared *not* to carry a semantic value, then
             we cannot use [$(i+1)] -- ocamlyacc does not allow it -- so we
             use the unit value instead. *)
          IL.PVar id,
          if is_unit_token then
            IL.EUnit
          else
            IL.EVar (sprintf "$%d" (i + 1))
176 177
        ) branch.producers
      in
178 179 180 181 182 183 184
      (* The identifiers that we bind are pairwise distinct. *)
      (* We must use simultaneous bindings (that is, a [let/and] form), as
          opposed to a cascade of [let] bindings. Indeed, ocamlyacc internally
          translates [$i] to [_i] (just like us!), so name captures will occur
          unless we restrict the use of [$i] to the outermost scope. (Reported
          by Kenji Maillard.) *)
      let e = CodeBits.eletand (bindings, e) in
185
      Printer.print_expr f e
186 187 188

(* -------------------------------------------------------------------------- *)

189
(* Printing preludes and postludes. *)
190

191 192
let print_preludes f g =
  List.iter (fun prelude ->
193
    fprintf f "%%{%s%%}\n" prelude.stretch_raw_content
194 195
  ) g.preludes

196 197 198 199 200
let print_postludes f g =
  List.iter (fun postlude ->
    fprintf f "%s\n" postlude.stretch_raw_content
  ) g.postludes

201 202 203 204
(* -------------------------------------------------------------------------- *)

(* Printing %start declarations. *)

205
let print_start_symbols f g =
206
  StringSet.iter (fun symbol ->
207
    fprintf f "%%start %s\n" (Misc.normalize symbol)
208
  ) g.start_symbols
209

210 211 212 213
(* -------------------------------------------------------------------------- *)

(* Printing %parameter declarations. *)

214
let print_parameter f stretch =
215
  fprintf f "%%parameter<%s>\n" stretch.stretch_raw_content
216

217
let print_parameters f g =
218 219 220 221
  match mode with
  | PrintNormal ->
      List.iter (print_parameter f) g.parameters
  | PrintForOCamlyacc
222
  | PrintUnitActions _ ->
223 224 225 226 227 228 229
       (* %parameter declarations are not supported by ocamlyacc,
          and presumably become useless when the semantic actions
          are removed. *)
      ()

(* -------------------------------------------------------------------------- *)

230 231 232 233 234 235 236 237 238 239 240 241 242
(* Printing attributes. *)

let print_attribute f ((name, payload) : attribute) =
  if attributes_printed then
    fprintf f " [@%s %s]"
      (Positions.value name)
      payload.stretch_raw_content

let print_attributes f attrs =
  List.iter (print_attribute f) attrs

(* -------------------------------------------------------------------------- *)

243
(* Printing token declarations and precedence declarations. *)
244

245 246 247 248 249 250 251 252 253 254
let print_assoc = function
  | LeftAssoc ->
      Printf.sprintf "%%left"
  | RightAssoc ->
      Printf.sprintf "%%right"
  | NonAssoc ->
      Printf.sprintf "%%nonassoc"
  | UndefinedAssoc ->
      ""

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
let compare_pairs compare1 compare2 (x1, x2) (y1, y2) =
  let c = compare1 x1 y1 in
  if c <> 0 then c
  else compare2 x2 y2

let compare_tokens (_token, prop) (_token', prop') =
  match prop.tk_precedence, prop'.tk_precedence with
  | UndefinedPrecedence, UndefinedPrecedence ->
      0
  | UndefinedPrecedence, PrecedenceLevel _ ->
      -1
  | PrecedenceLevel _, UndefinedPrecedence ->
      1
  | PrecedenceLevel (m, v, _, _), PrecedenceLevel (m', v', _, _) ->
      compare_pairs InputFile.compare_input_files Pervasives.compare (m, v) (m', v')

271
let print_tokens f g =
272 273
  (* Print the %token declarations. *)
  StringMap.iter (fun token prop ->
274
    if prop.tk_is_declared then
275 276 277 278
      fprintf f "%%token%s %s%a\n"
        (print_token_type prop)
        token
        print_attributes prop.tk_attributes
279
  ) g.tokens;
280 281 282 283 284 285 286 287 288 289 290
  (* Sort the tokens wrt. precedence, and group them into levels. *)
  let levels : (string * token_properties) list list =
    Misc.levels compare_tokens (List.sort compare_tokens (
      StringMap.bindings g.tokens
    ))
  in
  (* Print the precedence declarations: %left, %right, %nonassoc. *)
  List.iter (fun level ->
    let (_token, prop) = try List.hd level with Failure _ -> assert false in
    (* Do nothing about the tokens that have no precedence. *)
    if prop.tk_precedence <> UndefinedPrecedence then begin
291
      fprintf f "%s" (print_assoc prop.tk_associativity);
292
      List.iter (fun (token, _prop) ->
293
        fprintf f " %s" token
294
      ) level;
295
      fprintf f "\n"
296 297
    end
  ) levels
298

299 300 301 302
(* -------------------------------------------------------------------------- *)

(* Printing %type declarations. *)

303
let print_types f g =
304
  StringMap.iter (fun symbol ty ->
305
    fprintf f "%%type%s %s\n"
306
      (print_nonterminal_type ty)
307 308 309
      (Misc.normalize symbol)
  ) g.types

310 311 312 313
(* -------------------------------------------------------------------------- *)

(* Printing branches and rules. *)

314 315 316 317 318 319 320
let print_producer sep f producer =
  fprintf f "%s%s%s%a"
    (sep())
    (print_binding (producer_identifier producer))
    (Misc.normalize (producer_symbol producer))
    print_attributes (producer_attributes producer)

321
let print_branch f g branch =
322 323
  (* Print the producers. *)
  let sep = Misc.once "" " " in
324
  List.iter (print_producer sep f) branch.producers;
325 326 327 328 329 330
  (* Print the %prec annotation, if there is one. *)
  Option.iter (fun x ->
    fprintf f " %%prec %s" x.value
  ) branch.branch_prec_annotation;
  (* Newline, indentation, semantic action. *)
  fprintf f "\n    {";
331
  print_semantic_action f g branch;
332
  fprintf f "}\n"
333

334 335 336
(* Because the resolution of reduce/reduce conflicts is implicitly dictated by
   the order in which productions appear in the grammar, the printer should be
   careful to preserve this order. *)
337 338 339 340 341 342 343 344

(* 2016/08/25: As noted above, when two productions originate in different files,
   we have a problem. We MUST print them in some order, even though they should
   be incomparable. In that case, we use the order in which the source files are
   specified on the command line. However, this behavior is undocumented, and
   should not be exploited. (In previous versions of Menhir, the function passed
   to [List.sort] was not transitive, so it did not make any sense!) *)

345 346
let compare_branch_production_levels bpl bpl' =
  match bpl, bpl' with
347 348 349
  | ProductionLevel (m, l), ProductionLevel (m', l') ->
      compare_pairs InputFile.compare_input_files Pervasives.compare (m, l) (m', l')

350 351 352
let compare_branches (b : branch) (b' : branch) =
  compare_branch_production_levels b.branch_production_level b'.branch_production_level

353 354 355 356 357 358 359 360 361 362 363
let compare_rules (_nt, (r : rule)) (_nt', (r' : rule)) =
  match r.branches, r'.branches with
  | [], [] ->
      0
  | [], _ ->
      -1
  | _, [] ->
      1
  | b :: _, b' :: _ ->
      (* To compare two rules, it suffices to compare their first productions. *)
      compare_branches b b'
364

365 366 367 368 369 370 371 372 373 374
let print_rule f g (nt, r) =
  fprintf f "\n%s%a:\n" (Misc.normalize nt) print_attributes r.attributes;
  (* Menhir accepts a leading "|", but bison does not. Let's not print it.
     So, we print a bar-separated list. *)
  let sep = Misc.once ("  ") ("| ") in
  List.iter (fun br ->
    fprintf f "%s" (sep());
    print_branch f g br
  ) r.branches

375
let print_rules f g =
376
  let rules = List.sort compare_rules (StringMap.bindings g.rules) in
377
  List.iter (print_rule f g) rules
378

379 380 381 382
(* -------------------------------------------------------------------------- *)

(* Printing %on_error_reduce declarations. *)

383 384 385 386 387 388 389 390 391 392
let print_on_error_reduce_declarations f g =
  let cmp (_nt, oel) (_nt', oel') =
    compare_branch_production_levels oel oel'
  in
  let levels : (string * on_error_reduce_level) list list =
    Misc.levels cmp (List.sort cmp (
      StringMap.bindings g.on_error_reduce
    ))
  in
  List.iter (fun level ->
393
    fprintf f "%%on_error_reduce";
394
    List.iter (fun (nt, _level) ->
395
      fprintf f " %s" nt
396
    ) level;
397
    fprintf f "\n"
398 399
  ) levels

400 401 402
let print_on_error_reduce_declarations f g =
  match mode with
  | PrintNormal
403
  | PrintUnitActions _ ->
404 405 406 407 408 409 410
      print_on_error_reduce_declarations f g
  | PrintForOCamlyacc ->
      (* %on_error_reduce declarations are not supported by ocamlyacc *)
      ()

(* -------------------------------------------------------------------------- *)

411 412 413 414 415 416 417 418 419 420 421 422 423
(* Printing %attribute declarations. *)

let print_grammar_attribute f ((name, payload) : attribute) =
  if attributes_printed then
    fprintf f "%%[@%s %s]\n"
      (Positions.value name)
      payload.stretch_raw_content

let print_grammar_attributes f g =
  List.iter (print_grammar_attribute f) g.gr_attributes

(* -------------------------------------------------------------------------- *)

424 425
(* The main entry point. *)

426
let print f g =
427
  print_parameters f g;
428
  if_ocaml_code_permitted (print_preludes f) g;
429
  print_start_symbols f g;
430 431
  print_tokens f g;
  print_types f g;
432
  print_on_error_reduce_declarations f g;
433
  print_grammar_attributes f g;
434
  fprintf f "%%%%\n";
435
  print_rules f g;
436
  fprintf f "\n%%%%\n";
437 438 439 440 441 442 443
  if_ocaml_code_permitted (print_postludes f) g

end

let print mode =
  let module P = Print(struct let mode = mode end) in
  P.print