SortInference.ml 8.35 KB
Newer Older
1
2
3
let value    = Positions.value
let position = Positions.position
let error    = Error.error
4
5
6
7
8
open Syntax
open SortUnification

(* -------------------------------------------------------------------------- *)

9
(* Error handling. *)
10

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
(* In [check_arity], in principle, [arity1] is the expected arity and [arity2]
   is the actual arity. This distinction does not make much sense, though, as
   we do not know which is wrong, the declaration site or the use site. So, we
   display a neutral error message. *)

let check_arity sym arity1 arity2 =
  let plural = max arity1 arity2 > 1 in
  if arity1 <> arity2 then
    error [position sym]
      "does the symbol \"%s\" expect %d or %d argument%s?"
      (value sym)
      (min arity1 arity2)
      (max arity1 arity2)
      (if plural then "s" else "")

(* This variant of [unify] is used when no unification error can arise. *)
27
28
29
30
31
32
33
34
35

let unify_cannot_fail sort1 sort2 =
  try
    unify sort1 sort2
  with
  | Unify _ | Occurs _ ->
      (* If the caller is right, this unification step cannot fail! *)
      assert false

36
37
38
(* In [unify], in principle, [sort1] is the expected sort and [sort2] is the
   actual sort. Again, this distinction does not make much sense, so we
   display a neutral error message. *)
39

40
let unify sym sort1 sort2 =
41
42
43
  try
    unify sort1 sort2
  with
44
  | Unify (v1, v2) ->
45
      let print v = print (decode v) in
46
47
      error [position sym]
       "how is the symbol \"%s\" parameterized?\n\
48
49
        It is used at sorts %s and %s.\n\
        The sort %s is not compatible with the sort %s."
50
        (value sym) (print sort1) (print sort2) (print v1) (print v2)
51
  | Occurs (v1, v2) ->
52
      let print v = print (decode v) in
53
54
      error [position sym]
       "how is the symbol \"%s\" parameterized?\n\
55
56
        It is used at sorts %s and %s.\n\
        The sort %s cannot be unified with the sort %s."
57
        (value sym) (print sort1) (print sort2) (print v1) (print v2)
58
59
60

(* -------------------------------------------------------------------------- *)

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
(* An environment maps (terminal and nonterminal) symbols to unification
   variables. *)

type symbol =
  string

module Env =
  StringMap

type env =
  variable Env.t

let find x env : variable =
  try
    Env.find x env
  with Not_found ->
    assert false (* unbound terminal or nonterminal symbol *)

let extend env (xvs : (symbol * variable) list) =
  List.fold_left (fun env (x, v) ->
    Env.add x v env
  ) env xvs

(* -------------------------------------------------------------------------- *)

(* [allocate xs] allocates a fresh unification variable [v] for every element
   [x] of the list [xs]. It returns the lists [xvs] and [vs]. *)

let allocate (xs : 'a list) : ('a * variable) list * variable list =
  let xvs = List.map (fun x -> x, fresh()) xs in
  let  vs = List.map snd xvs in
  xvs, vs

(* -------------------------------------------------------------------------- *)

(* [check_parameter env param expected] checks that the parameter [param] has
   sort [expected]. A parameter is either a symbol or an application of a
   symbol to a number of parameters. Every application is total -- the
   language does not have partial applications. The sort of every application
   is [star], but the sort of a variable is unrestricted. *)

let rec check_parameter env (param : parameter) (expected : variable) =
  match param with
104
105
  | ParameterVar sym ->
      let x = value sym in
106
      unify sym expected (find x env)
107
  | ParameterApp (sym, actuals) ->
108
      let x = value sym in
109
      (* This application has sort [star]. *)
110
      unify sym expected star;
111
112
113
114
115
116
117
118
119
120
121
122
123
124
      (* Retrieve the expected sort of each parameter. Two cases arise: if [x]
         has already been assigned an arrow sort, then we can retrieve its
         domain, which gives us the expected sort of each actual parameter;
         otherwise, we just make up a fresh arrow sort of appropriate arity.
         We could avoid this case distinction and always use the latter
         method, but the former method, when applicable, yields better error
         messages. If [sym] is a toplevel (nonterminal or terminal) symbol,
         then we will be in the first case, as we have been careful to
         initially assign an arrow sort of appropriate arity to each such
         symbol. *)
      let v = find x env in
      let expected =
        match domain v with
        | Some expected ->
125
            check_arity sym (List.length expected) (List.length actuals);
126
127
128
129
130
131
            expected
        | None ->
            let _, expected = allocate actuals in
            unify_cannot_fail v (arrow expected);
            expected
      in
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
      (* Check the sort of each actual parameter. *)
      List.iter2 (check_parameter env) actuals expected
  | ParameterAnonymous _ ->
      (* Anonymous rules have been eliminated already. *)
      assert false

(* -------------------------------------------------------------------------- *)

(* The following functions respectively check that a producer, a branch,
   a rule, and a grammar are well-sorted under an environment [env]. *)

let check_producer env (producer : producer) =
  let (_, param, _) = producer in
  (* A producer must have sort [star]. *)
  check_parameter env param star

let check_branch env (branch : parameterized_branch) =
  List.iter (check_producer env) branch.pr_producers

let enter_rule env (nt : symbol) (rule : parameterized_rule) : env =

  (* For each formal parameter, allocate a fresh variable. *)
  (* TEMPORARY should we check that the formals have distinct names? *)
  let formals, domain = allocate rule.pr_parameters in

  (* Connect these variables with the sort of the symbol [nt]. *)
  (* Because it is performed first, this particular unification
     cannot fail. *)
160
  unify_cannot_fail (find nt env) (arrow domain);
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

  (* Extend the environment. *)
  extend env formals

let check_rule env (nt : symbol) (rule : parameterized_rule) =

  (* Extend the environment within this rule. *)
  let env = enter_rule env nt rule in

  (* Check each branch in this extended environment. *)
  List.iter (check_branch env) rule.pr_branches

let check_grammar env g =

  (* Each rule must be well-sorted. *)
  StringMap.iter (check_rule env) g.p_rules;

  (* The start symbols must have sort [star]. *)
179
  StringMap.iter (fun nt position ->
180
181
    let sym = Positions.with_pos position nt in
    unify sym star (find nt env)
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  ) g.p_start_symbols;

  (* Every symbol that appears in a [%type] declaration must have sort [star]. *)
  List.iter (fun (param, _) ->
    check_parameter env param star
  ) g.p_types;

  (* Same rule for [%on_error_reduce] declarations. *)
  List.iter (fun (param, _) ->
    check_parameter env param star
  ) g.p_on_error_reduce;

  (* The symbols that appear in [%attribute] declarations must be well-sorted.
     Their sort is not necessarily [star]: it is legal to attach an attribute
     with a parameterized symbol. *)
  List.iter (fun (params, _) ->
    List.iter (fun param ->
      check_parameter env param (fresh())
    ) params
  ) g.p_symbol_attributes
202
203
204

(* -------------------------------------------------------------------------- *)

205
let infer_grammar (g : grammar) : ground_sort Env.t =
206
207
208
209
210
211
212
213
214
215

  (* For each (terminal or nonterminal) symbol, allocate a unification
     variable. The terminal symbols have sort [star], so we can use
     this particular variable. *)

  let env =
    StringMap.fold (fun tok _ env ->
      Env.add tok star env
    ) g.p_tokens Env.empty
  in
216
217
218
  let env =
    Env.add "error" star env
  in
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237

  let env =
    StringMap.fold (fun nt rule env ->
      let env = Env.add nt (fresh()) env in
      (* The following line unifies the sort of [nt] with an arrow of
         appropriate arity. It cannot fail. This strategy should lead
         to slightly better unification error messages. *)
      let _ : env = enter_rule env nt rule in
      env
    ) g.p_rules env
  in

  (* Impose sort equality constraints. *)

  check_grammar env g;

  (* Decode the environment, so our user doesn't have to deal with
     unification variables. *)

238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
  let env = Env.map decode env in

  (* Ground any unassigned sort variables. (These should occur only in
     unreachable parts of the grammar.) This guarantees that the user
     does not have to deal with sort variables. *)

  let env = Env.map ground env in

  (* At log level 3, display the inferred sort of every symbol. *)

  Error.logG 3 (fun f ->
    Env.iter (fun x gsort ->
      Printf.fprintf f "%s :: %s\n" x (print (unground gsort))
    ) env
  );

  env