IncrementalEngine.ml 8 KB
Newer Older
1 2
open General

3 4 5 6 7 8 9 10 11 12
(* This signature describes the incremental LR engine. *)

(* In this mode, the user controls the lexer, and the parser suspends
   itself when it needs to read a new token. *)

module type INCREMENTAL_ENGINE = sig

  type token

  (* The type ['a result] represents an intermediate or final result of the
13 14 15 16 17 18 19 20 21 22 23
     parser. An intermediate result is a suspension: it records the parser's
     current state, and allows parsing to be resumed. The parameter ['a] is
     the type of the semantic value that will eventually be produced if the
     parser succeeds. *)

  (* [Accepted] and [Rejected] are final results. [Accepted] carries a
     semantic value. *)

  (* [InputNeeded] is an intermediate result. It means that the parser wishes
     to read one token before continuing. *)

24 25 26 27 28 29
  (* [Shifting] is an intermediate result. It means that the parser is taking
     a shift transition. It exposes the state of the parser before and after
     the transition. The Boolean parameter tells whether the parser intends to
     request a new token after this transition. (It always does, except when
     it is about to accept.) *)

30
  (* [AboutToReduce] is an intermediate result. It means that the parser is
31 32
     about to perform a reduction step. It exposes the parser's current
     state as well as the production that is about to be reduced. *)
33 34

  (* [HandlingError] is an intermediate result. It means that the parser has
35
     detected an error and is currently handling it, in several steps. *)
36

37
  type env
38

39 40
  type production

41 42
  type 'a result = private
    | InputNeeded of env
43
    | Shifting of env * env * bool
44 45
    | AboutToReduce of env * production
    | HandlingError of env
46 47 48 49
    | Accepted of 'a
    | Rejected

  (* [offer] allows the user to resume the parser after it has suspended
50 51 52
     itself with a result of the form [InputNeeded env]. [offer] expects the
     old result as well as a new token and produces a new result. It does not
     raise any exception. *)
53 54

  val offer:
55
    'a result ->
56 57 58
    token * Lexing.position * Lexing.position ->
    'a result

59 60 61 62
  (* [resume] allows the user to resume the parser after it has suspended
     itself with a result of the form [AboutToReduce (env, prod)] or
     [HandlingError env]. [resume] expects the old result and produces a new
     result. It does not raise any exception. *)
63

64 65
  val resume:
    'a result ->
66 67
    'a result

68 69 70 71 72 73
  (* The abstract type ['a lr1state] describes the non-initial states of the
     LR(1) automaton. The index ['a] represents the type of the semantic value
     associated with this state's incoming symbol. *)

  type 'a lr1state

74 75 76
  (* An element is a pair of a non-initial state [s] and a semantic value [v]
     associated with the incoming symbol of this state. The idea is, the value
     [v] was pushed onto the stack just before the state [s] was entered. Thus,
77
     for some type ['a], the state [s] has type ['a lr1state] and the value [v]
78 79 80 81 82 83 84 85 86 87 88
     has type ['a]. In other words, the type [element] is an existential type. *)

  type element =
    | Element: 'a lr1state * 'a * Lexing.position * Lexing.position -> element

  (* The parser's stack is (or, more precisely, can be viewed as) a stream of
     elements. *)

  type stack =
    element stream

89 90 91 92
  (* This is the parser's stack, a stream of elements. This stream is empty if
     the parser is in an initial state; otherwise, it is non-empty.  The LR(1)
     automaton's current state is the one found in the top element of the
     stack. *)
93

94
  val stack: env -> stack
95

96 97 98 99 100 101 102
  (* These are the start and end positions of the current lookahead token. It
     is legal to invoke this function only after at least one token has been
     offered to the parser via [offer]. In other words, it is illegal to
     invoke it in an initial state. *)

  val positions: env -> Lexing.position * Lexing.position

103
end
104

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
(* This signature is a fragment of the inspection API that is made available
   to the user when [--inspection] is used. This fragment contains type
   definitions for symbols. *)

module type SYMBOLS = sig

  (* The type ['a terminal] represents a terminal symbol. The type ['a
     nonterminal] represents a nonterminal symbol. In both cases, the index
     ['a] represents the type of the semantic values associated with this
     symbol. The concrete definitions of these types are generated. *)

  type 'a terminal
  type 'a nonterminal

  (* The type ['a symbol] represents a terminal or nonterminal symbol. It is
     the disjoint union of the types ['a terminal] and ['a nonterminal]. *)

  type 'a symbol =
    | T : 'a terminal -> 'a symbol
    | N : 'a nonterminal -> 'a symbol

  (* The type [xsymbol] is an existentially quantified version of the type
     ['a symbol]. This type is useful in situations where the index ['a]
     is not statically known. *)

  type xsymbol = 
    | X : 'a symbol -> xsymbol

end

135 136
(* This signature describes the inspection API that is made available to the
   user when [--inspection] is used. *)
137 138 139

module type INSPECTION = sig

140 141 142 143
  (* The types of symbols are described above. *)

  include SYMBOLS

144 145
  (* The type ['a lr1state] is meant to be the same as in [INCREMENTAL_ENGINE]. *)

146
  type 'a lr1state
147

148 149 150 151
  (* The type [production] is meant to be the same as in [INCREMENTAL_ENGINE].
     It represents a production of the grammar. A production can be examined
     via the functions [lhs] and [rhs] below. *)

152 153
  type production

154 155 156 157
  (* An LR(0) item is a pair of a production [prod] and a valid index [i] into
     this production. That is, if the length of [rhs prod] is [n], then [i] is
     comprised between 0 and [n], inclusive. *)

158
  type item =
159 160
      production * int

161 162 163 164 165 166 167 168
  (* Ordering functions. *)

  val compare_terminals: _ terminal -> _ terminal -> int
  val compare_nonterminals: _ nonterminal -> _ nonterminal -> int
  val compare_symbols: xsymbol -> xsymbol -> int
  val compare_productions: production -> production -> int
  val compare_items: item -> item -> int

169 170 171 172 173 174
  (* [incoming_symbol s] is the incoming symbol of the state [s], that is,
     the symbol that the parser must recognize before (has recognized when)
     it enters the state [s]. This function gives access to the semantic
     value [v] stored in a stack element [Element (s, v, _, _)]. Indeed,
     by case analysis on the symbol [incoming_symbol s], one discovers the
     type ['a] of the value [v]. *)
175

POTTIER Francois's avatar
POTTIER Francois committed
176
  val incoming_symbol: 'a lr1state -> 'a symbol
177

178 179 180 181 182 183
  (* [items s] is the set of the LR(0) items in the LR(0) core of the LR(1)
     state [s]. This set is not epsilon-closed. This set is presented as a
     list, in an arbitrary order. *)

  val items: _ lr1state -> item list

184 185 186
  (* [lhs prod] is the left-hand side of the production [prod]. This is
     always a non-terminal symbol. *)

187 188
  val lhs: production -> xsymbol

189 190 191
  (* [rhs prod] is the right-hand side of the production [prod]. This is
     a (possibly empty) sequence of (terminal or nonterminal) symbols. *)

192 193
  val rhs: production -> xsymbol list

194 195 196 197
  (* [nullable nt] tells whether the non-terminal symbol [nt] is nullable.
     That is, it is true if and only if this symbol produces the empty
     word [epsilon]. *)

198
  val nullable: _ nonterminal -> bool
199

200 201 202 203
  (* [first nt t] tells whether the FIRST set of the nonterminal symbol [nt]
     contains the terminal symbol [t]. That is, it is true if and only if
     [nt] produces a word that begins with [t]. *)

204 205 206 207 208 209
  val first: _ nonterminal -> _ terminal -> bool

  (* [xfirst] is analogous to [first], but expects a first argument of type
     [xsymbol] instead of [_ terminal]. *)

  val xfirst: xsymbol -> _ terminal -> bool
210

211 212 213 214 215 216 217
  (* [foreach_terminal] enumerates the terminal symbols, including [error].
     [foreach_terminal_but_error] enumerates the terminal symbols, excluding
     [error]. *)

  val foreach_terminal:           (xsymbol -> 'a -> 'a) -> 'a -> 'a
  val foreach_terminal_but_error: (xsymbol -> 'a -> 'a) -> 'a -> 'a

218 219
end

220 221 222 223 224 225 226 227 228 229 230 231
(* This signature combines the incremental API and the inspection API. *)

module type EVERYTHING = sig

  include INCREMENTAL_ENGINE

  include INSPECTION
    with type 'a lr1state := 'a lr1state
    with type production := production

end