basicSyntax.ml 7.16 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15
open Syntax

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
(* This is the abstract syntax for an unparameterized grammar, that is,
   a grammar that does not have any parameterized nonterminal symbols.
   Such a grammar is obtained as the result of an expansion phase, which
   is implemented in [ParameterizedGrammar]. *)

(* In an unparameterized grammar, %attribute declarations can be desugared
   away. This is also done during the above-mentioned expansion phase.

   Thus, in an unparameterized grammar, attributes can be attached in the
   following places:

   - with the grammar:          field [gr_attributes] of [grammar]
   - with a terminal symbol:    field [tk_attributes] of [token_properties]
   - with a nonterminal symbol: field [attributes] of [rule]
   - with a producer:           field [producer_attributes] of [producer]   *)

(* ------------------------------------------------------------------------ *)

(* A producer is a pair of identifier and a symbol. In concrete syntax, it
   could be [e = expr], for instance. It carries a number of attributes. *)

type producer = {
38 39 40 41 42 43 44 45
    producer_identifier : identifier;
    producer_symbol     : symbol;
    producer_attributes : attributes;
  }

type producers =
  producer list

46 47 48 49 50 51 52 53 54 55 56 57 58
(* ------------------------------------------------------------------------ *)

(* A branch contains a series of producers and a semantic action. It is the
   same as in the surface syntax; see [Syntax]. *)

type branch = {
    branch_position         : Positions.t;
    producers               : producers;
    action                  : action;
    branch_prec_annotation  : branch_prec_annotation;
    branch_production_level : branch_production_level
  }

59 60 61
type branches =
  branch list

62 63
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
64
(* A rule consists mainly of several branches. In contrast with the surface
65 66 67 68 69
   syntax, it has no parameters. *)

(* The [%inline] flag is no longer relevant after [NonTerminalInlining]. *)

type rule = {
70
    branches    : branches;
71 72 73 74 75 76 77 78 79
    positions   : Positions.t list;
    inline_flag : bool;
    attributes  : attributes;
  }

(* ------------------------------------------------------------------------ *)

(* A grammar is essentially the same as in the surface syntax; see [Syntax].
   The main difference is that [%attribute] declarations, represented by
POTTIER Francois's avatar
POTTIER Francois committed
80
   the field [p_symbol_attributes] in the surface syntax, have disappeared. *)
81 82 83 84 85 86 87 88 89 90 91 92 93 94

type grammar =  {
    preludes        : Stretch.t list;
    postludes       : Syntax.postlude list;
    parameters      : Stretch.t list;
    start_symbols   : StringSet.t;
    types           : Stretch.ocamltype StringMap.t;
    tokens          : Syntax.token_properties StringMap.t;
    on_error_reduce : on_error_reduce_level StringMap.t;
    gr_attributes   : attributes;
    rules           : rule StringMap.t;
  }

(* -------------------------------------------------------------------------- *)
95

96 97 98 99 100 101
(* Accessors for the type [producer]. *)

let producer_identifier { producer_identifier } = producer_identifier
let producer_symbol { producer_symbol } = producer_symbol
let producer_attributes { producer_attributes } = producer_attributes

102 103
(* -------------------------------------------------------------------------- *)

POTTIER Francois's avatar
POTTIER Francois committed
104 105 106 107 108 109 110 111 112 113
(* A getter and a transformer for the field [branches] of the type [rule]. *)

let get_branches rule =
  rule.branches

let transform_branches f rule =
  { rule with branches = f rule.branches }

(* -------------------------------------------------------------------------- *)

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
(* [tokens grammar] is a list of all (real) tokens in the grammar
   [grammar]. The special tokens "#" and "error" are not included.
   Pseudo-tokens (used in %prec declarations, but never declared
   using %token) are filtered out. *)

let tokens grammar =
  StringMap.fold (fun token properties tokens ->
    if properties.tk_is_declared then token :: tokens else tokens
  ) grammar.tokens []

(* [typed_tokens grammar] is analogous, but includes the OCaml type
   of each token. *)

let typed_tokens grammar =
  StringMap.fold (fun token properties tokens ->
    if properties.tk_is_declared then (token, properties.tk_ocamltype) :: tokens else tokens
  ) grammar.tokens []

132 133 134 135
(* [nonterminals grammar] is a list of all nonterminal symbols in the
   grammar [grammar]. *)

let nonterminals grammar : nonterminal list =
136
  StringMap.fold (fun nt _ rules -> nt :: rules) grammar.rules []
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155

(* [ocamltype_of_symbol grammar symbol] produces the OCaml type
   of the symbol [symbol] in the grammar [grammar], if it is known. *)

let ocamltype_of_symbol grammar symbol : Stretch.ocamltype option =
  try
    Some (StringMap.find symbol grammar.types)
  with Not_found ->
    None

(* [ocamltype_of_start_symbol grammar symbol] produces the OCaml type
   of the start symbol [symbol] in the grammar [grammar]. *)

let ocamltype_of_start_symbol grammar symbol : Stretch.ocamltype =
  try
    StringMap.find symbol grammar.types
  with Not_found ->
    (* Every start symbol should have a type. *)
    assert false
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173

(* [is_inline_symbol grammar symbol] tells whether [symbol] is a nonterminal
   symbol (as opposed to a terminal symbol) and is marked %inline. *)

let is_inline_symbol grammar symbol : bool =
  match StringMap.find symbol grammar.rules with
  | rule ->
      (* This is a nonterminal symbol. Test its %inline flag. *)
      rule.inline_flag
  | exception Not_found ->
      (* This is a terminal symbol. *)
      false

(* [is_inline_symbol grammar producer] tells whether [producer] represents a
   nonterminal symbol (as opposed to a terminal) and is marked %inline. *)

let is_inline_producer grammar producer =
  is_inline_symbol grammar (producer_symbol producer)
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190

(* -------------------------------------------------------------------------- *)

(* [names producers] is the set of names of the producers [producers]. The
   name of a producer is the OCaml variable that is used to name its semantic
   value. *)

(* This function checks on the fly that no two producers carry the same name.
   This check should never fail if we have performed appropriate renamings.
   It is a debugging aid. *)

let names (producers : producers) : StringSet.t =
  List.fold_left (fun ids producer ->
    let id = producer_identifier producer in
    assert (not (StringSet.mem id ids));
    StringSet.add id ids
  ) StringSet.empty producers