syntax.ml 8.97 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16 17 18 19 20 21 22 23
(* The type [partial_grammar] describes the abstract syntax that is produced
   by the parsers (yacc-parser and fancy-parser).

   The type [grammar] describes the abstract syntax that is obtained after one
   or more partial grammars are joined (see [PartialGrammar]). It differs in
   that declarations are organized in a more useful way and a number of
   well-formedness checks have been performed. *)

(* ------------------------------------------------------------------------ *)

24 25 26 27 28 29 30 31
(* Terminals and nonterminal symbols are strings. Identifiers
   (which are used to refer to a symbol's semantic value) are
   strings. A file name is a string. *)

type terminal =
    string

type nonterminal =
32
    string
33 34

type symbol =
35
    string
36

37 38
type identifier =
    string
39

40
type filename =
41 42
    string

43 44
(* ------------------------------------------------------------------------ *)

45
(* A postlude is a source file fragment. *)
46

47
type postlude =
48
    Stretch.t
49

50 51
(* ------------------------------------------------------------------------ *)

52
(* OCaml semantic actions are represented as stretches. *)
53 54 55 56

type action =
    Action.t

57 58
(* ------------------------------------------------------------------------ *)

59 60 61 62 63 64 65 66 67
(* An attribute consists of an attribute name and an attribute payload. The
   payload is an uninterpreted stretch of source text. *)

type attribute =
    string Positions.located * Stretch.t

type attributes =
    attribute list

68 69 70 71 72 73 74 75 76 77 78 79 80 81
(* Attributes allow the user to annotate the grammar with information that is
   ignored by Menhir, but can be exploited by other tools, via the SDK. *)

(* Attributes can be attached in the following places:

   - with the grammar:         %[@bar ...]
   - with a terminal symbol:   %token FOO [@bar ...]
   - with a rule:              foo(X) [@bar ...]: ...
   - with a producer:          e = foo(quux) [@bar ...]
   - with an arbitrary symbol: %attribute FOO foo(quux) [@bar ...]

   After expanding away parameterized nonterminal symbols, things become
   a bit simpler, as %attribute declarations are desugared away. *)

82 83
(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
84
(* Information about tokens. (Only after joining.) *)
85

86 87
type token_associativity =
    LeftAssoc
88 89 90 91
  | RightAssoc
  | NonAssoc
  | UndefinedAssoc

92 93
type precedence_level =
    UndefinedPrecedence
94 95

  (* Items are incomparable when they originate in different files. A
96
     value of type [input_file] is used to record an item's origin. The
97 98
     positions allow locating certain warnings. *)

99
  | PrecedenceLevel of InputFile.input_file * int * Lexing.position * Lexing.position
100

101 102
type token_properties =
    {
103 104 105
               tk_filename      : filename;
               tk_ocamltype     : Stretch.ocamltype option;
               tk_position      : Positions.t;
106
               tk_attributes    : attributes;
107
      mutable  tk_associativity : token_associativity;
108
      mutable  tk_precedence    : precedence_level;
109
      mutable  tk_is_declared   : bool;
110 111
    }

112 113
(* ------------------------------------------------------------------------ *)

114
(* A [%prec] annotation is optional. A production can carry at most one.
115
   If there is one, it is a symbol name. See [ParserAux]. *)
116 117

type branch_prec_annotation =
118 119
    symbol Positions.located option

POTTIER Francois's avatar
POTTIER Francois committed
120 121
(* ------------------------------------------------------------------------ *)

122 123 124 125
(* A "production level" is used to solve reduce/reduce conflicts. It reflects
   which production appears first in the grammar. See [ParserAux]. *)

type branch_production_level =
126
  | ProductionLevel of InputFile.input_file * int
127

POTTIER Francois's avatar
POTTIER Francois committed
128 129
(* ------------------------------------------------------------------------ *)

130 131 132 133 134 135 136 137 138
(* A level is attached to every [%on_error_reduce] declaration. It is used
   to decide what to do when several such declarations are applicable in a
   single state. *)

type on_error_reduce_level =
  branch_production_level (* we re-use the above type, to save code *)

(* ------------------------------------------------------------------------ *)

139 140 141 142 143 144 145 146 147 148 149 150 151 152
(* A parameter is either just a symbol or an application of a symbol to a
   nonempty tuple of parameters. Before anonymous rules have been eliminated,
   it can also be an anonymous rule, represented as a list of branches. *)

type parameter =
  | ParameterVar of symbol Positions.located
  | ParameterApp of symbol Positions.located * parameters
  | ParameterAnonymous of parameterized_branch list Positions.located

and parameters =
    parameter list

(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
153
(* A producer is a pair of identifier and a parameter. In concrete syntax,
POTTIER Francois's avatar
POTTIER Francois committed
154
   it could be [e = expr], for instance. It carries a number of attributes. *)
POTTIER Francois's avatar
POTTIER Francois committed
155

156
and producer =
157
    identifier Positions.located * parameter * attributes
158

POTTIER Francois's avatar
POTTIER Francois committed
159 160 161 162
(* ------------------------------------------------------------------------ *)

(* A branch contains a series of producers and a semantic action. *)

163
and parameterized_branch =
164
    {
165 166
      pr_branch_position           : Positions.t;
      pr_producers                 : producer list;
167
      pr_action                    : action;
168
      pr_branch_prec_annotation    : branch_prec_annotation;
169
      pr_branch_production_level   : branch_production_level
170 171
    }

POTTIER Francois's avatar
POTTIER Francois committed
172 173 174 175
(* ------------------------------------------------------------------------ *)

(* A rule has a header and several branches. *)

176 177
type parameterized_rule =
    {
178 179 180 181
      pr_public_flag       : bool;
      pr_inline_flag       : bool;
      pr_nt                : nonterminal;
      pr_positions         : Positions.t list;
182
      pr_attributes        : attributes;
183 184
      pr_parameters        : symbol list;
      pr_branches          : parameterized_branch list;
185 186
    }

POTTIER Francois's avatar
POTTIER Francois committed
187 188
(* ------------------------------------------------------------------------ *)

189 190 191 192 193 194 195 196 197 198 199 200 201 202
(* A declaration. (Only before joining.) *)

type declaration =

    (* Raw OCaml code. *)

  | DCode of Stretch.t

    (* Raw OCaml functor parameter. *)

  | DParameter of Stretch.ocamltype (* really a stretch *)

    (* Terminal symbol (token) declaration. *)

203
  | DToken of Stretch.ocamltype option * terminal * attributes
204 205 206 207 208 209 210 211 212 213 214 215 216

    (* Start symbol declaration. *)

  | DStart of nonterminal

    (* Priority and associativity declaration. *)

  | DTokenProperties of terminal * token_associativity * precedence_level

    (* Type declaration. *)

  | DType of Stretch.ocamltype * parameter

217 218 219 220 221 222 223 224
    (* Grammar-level attribute declaration. *)

  | DGrammarAttribute of attribute

    (* Attributes shared among multiple symbols, i.e., [%attribute]. *)

  | DSymbolAttributes of parameter list * attributes

225 226
    (* On-error-reduce declaration. *)

227
  | DOnErrorReduce of parameter * on_error_reduce_level
228 229 230

(* ------------------------------------------------------------------------ *)

POTTIER Francois's avatar
POTTIER Francois committed
231 232
(* A partial grammar. (Only before joining.) *)

233
type partial_grammar =
234 235
    {
      pg_filename          : filename;
236
      pg_postlude           : postlude option;
237 238 239
      pg_declarations      : declaration Positions.located list;
      pg_rules             : parameterized_rule list;
    }
240

POTTIER Francois's avatar
POTTIER Francois committed
241 242 243 244 245 246
(* ------------------------------------------------------------------------ *)

(* A grammar. (Only after joining.) *)

(* The differences with partial grammars (above) are as follows:
   1. the file name is gone (there could be several file names, anyway).
247
   2. there can be several postludes.
248 249 250
   3. declarations are organized by kind: preludes, postludes,
      functor %parameters, %start symbols, %types, %tokens, %on_error_reduce,
      grammar attributes, %attributes.
POTTIER Francois's avatar
POTTIER Francois committed
251 252 253
   4. rules are stored in a map, indexed by symbol names, instead of a list.
 *)

254 255 256
type grammar =
    {
      p_preludes           : Stretch.t list;
257
      p_postludes          : postlude list;
258 259 260 261
      p_parameters         : Stretch.t list;
      p_start_symbols      : Positions.t StringMap.t;
      p_types              : (parameter * Stretch.ocamltype Positions.located) list;
      p_tokens             : token_properties StringMap.t;
262
      p_on_error_reduce    : (parameter * on_error_reduce_level) list;
263 264
      p_grammar_attributes : attributes;
      p_symbol_attributes  : (parameter list * attributes) list;
POTTIER Francois's avatar
POTTIER Francois committed
265
      p_rules              : parameterized_rule StringMap.t;
266
    }