SortInference.ml 9.25 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16
let value    = Positions.value
let position = Positions.position
let error    = Error.error
17 18 19 20 21
open Syntax
open SortUnification

(* -------------------------------------------------------------------------- *)

22
(* Error handling. *)
23

24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
(* In [check_arity], in principle, [arity1] is the expected arity and [arity2]
   is the actual arity. This distinction does not make much sense, though, as
   we do not know which is wrong, the declaration site or the use site. So, we
   display a neutral error message. *)

let check_arity sym arity1 arity2 =
  let plural = max arity1 arity2 > 1 in
  if arity1 <> arity2 then
    error [position sym]
      "does the symbol \"%s\" expect %d or %d argument%s?"
      (value sym)
      (min arity1 arity2)
      (max arity1 arity2)
      (if plural then "s" else "")

(* This variant of [unify] is used when no unification error can arise. *)
40 41 42 43 44 45 46 47 48

let unify_cannot_fail sort1 sort2 =
  try
    unify sort1 sort2
  with
  | Unify _ | Occurs _ ->
      (* If the caller is right, this unification step cannot fail! *)
      assert false

49 50 51
(* In [unify], in principle, [sort1] is the expected sort and [sort2] is the
   actual sort. Again, this distinction does not make much sense, so we
   display a neutral error message. *)
52

53
let unify sym sort1 sort2 =
54 55 56
  try
    unify sort1 sort2
  with
57
  | Unify (v1, v2) ->
58
      let print v = print (decode v) in
59 60
      error [position sym]
       "how is the symbol \"%s\" parameterized?\n\
61 62
        It is used at sorts %s and %s.\n\
        The sort %s is not compatible with the sort %s."
63
        (value sym) (print sort1) (print sort2) (print v1) (print v2)
64
  | Occurs (v1, v2) ->
65
      let print v = print (decode v) in
66 67
      error [position sym]
       "how is the symbol \"%s\" parameterized?\n\
68 69
        It is used at sorts %s and %s.\n\
        The sort %s cannot be unified with the sort %s."
70
        (value sym) (print sort1) (print sort2) (print v1) (print v2)
71 72 73

(* -------------------------------------------------------------------------- *)

74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
(* An environment maps (terminal and nonterminal) symbols to unification
   variables. *)

type symbol =
  string

module Env =
  StringMap

type env =
  variable Env.t

let find x env : variable =
  try
    Env.find x env
  with Not_found ->
    assert false (* unbound terminal or nonterminal symbol *)

let extend env (xvs : (symbol * variable) list) =
  List.fold_left (fun env (x, v) ->
    Env.add x v env
  ) env xvs

(* -------------------------------------------------------------------------- *)

(* [allocate xs] allocates a fresh unification variable [v] for every element
   [x] of the list [xs]. It returns the lists [xvs] and [vs]. *)

let allocate (xs : 'a list) : ('a * variable) list * variable list =
  let xvs = List.map (fun x -> x, fresh()) xs in
  let  vs = List.map snd xvs in
  xvs, vs

(* -------------------------------------------------------------------------- *)

(* [check_parameter env param expected] checks that the parameter [param] has
   sort [expected]. A parameter is either a symbol or an application of a
   symbol to a number of parameters. Every application is total -- the
   language does not have partial applications. The sort of every application
   is [star], but the sort of a variable is unrestricted. *)

let rec check_parameter env (param : parameter) (expected : variable) =
  match param with
117 118
  | ParameterVar sym ->
      let x = value sym in
119
      unify sym expected (find x env)
120
  | ParameterApp (sym, actuals) ->
121
      let x = value sym in
122
      (* This application has sort [star]. *)
123
      unify sym expected star;
124 125 126 127 128 129 130 131 132 133 134 135 136 137
      (* Retrieve the expected sort of each parameter. Two cases arise: if [x]
         has already been assigned an arrow sort, then we can retrieve its
         domain, which gives us the expected sort of each actual parameter;
         otherwise, we just make up a fresh arrow sort of appropriate arity.
         We could avoid this case distinction and always use the latter
         method, but the former method, when applicable, yields better error
         messages. If [sym] is a toplevel (nonterminal or terminal) symbol,
         then we will be in the first case, as we have been careful to
         initially assign an arrow sort of appropriate arity to each such
         symbol. *)
      let v = find x env in
      let expected =
        match domain v with
        | Some expected ->
138
            check_arity sym (List.length expected) (List.length actuals);
139 140 141 142 143 144
            expected
        | None ->
            let _, expected = allocate actuals in
            unify_cannot_fail v (arrow expected);
            expected
      in
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
      (* Check the sort of each actual parameter. *)
      List.iter2 (check_parameter env) actuals expected
  | ParameterAnonymous _ ->
      (* Anonymous rules have been eliminated already. *)
      assert false

(* -------------------------------------------------------------------------- *)

(* The following functions respectively check that a producer, a branch,
   a rule, and a grammar are well-sorted under an environment [env]. *)

let check_producer env (producer : producer) =
  let (_, param, _) = producer in
  (* A producer must have sort [star]. *)
  check_parameter env param star

let check_branch env (branch : parameterized_branch) =
  List.iter (check_producer env) branch.pr_producers

let enter_rule env (nt : symbol) (rule : parameterized_rule) : env =

  (* For each formal parameter, allocate a fresh variable. *)
  let formals, domain = allocate rule.pr_parameters in

  (* Connect these variables with the sort of the symbol [nt]. *)
  (* Because it is performed first, this particular unification
     cannot fail. *)
172
  unify_cannot_fail (find nt env) (arrow domain);
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190

  (* Extend the environment. *)
  extend env formals

let check_rule env (nt : symbol) (rule : parameterized_rule) =

  (* Extend the environment within this rule. *)
  let env = enter_rule env nt rule in

  (* Check each branch in this extended environment. *)
  List.iter (check_branch env) rule.pr_branches

let check_grammar env g =

  (* Each rule must be well-sorted. *)
  StringMap.iter (check_rule env) g.p_rules;

  (* The start symbols must have sort [star]. *)
191
  StringMap.iter (fun nt position ->
192 193
    let sym = Positions.with_pos position nt in
    unify sym star (find nt env)
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
  ) g.p_start_symbols;

  (* Every symbol that appears in a [%type] declaration must have sort [star]. *)
  List.iter (fun (param, _) ->
    check_parameter env param star
  ) g.p_types;

  (* Same rule for [%on_error_reduce] declarations. *)
  List.iter (fun (param, _) ->
    check_parameter env param star
  ) g.p_on_error_reduce;

  (* The symbols that appear in [%attribute] declarations must be well-sorted.
     Their sort is not necessarily [star]: it is legal to attach an attribute
     with a parameterized symbol. *)
  List.iter (fun (params, _) ->
    List.iter (fun param ->
      check_parameter env param (fresh())
    ) params
  ) g.p_symbol_attributes
214 215 216

(* -------------------------------------------------------------------------- *)

217
type sorts =
218
  GroundSort.sort Env.t
219

220
let infer (g : grammar) : sorts =
221 222 223 224 225 226 227 228 229 230

  (* For each (terminal or nonterminal) symbol, allocate a unification
     variable. The terminal symbols have sort [star], so we can use
     this particular variable. *)

  let env =
    StringMap.fold (fun tok _ env ->
      Env.add tok star env
    ) g.p_tokens Env.empty
  in
231 232 233
  let env =
    Env.add "error" star env
  in
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252

  let env =
    StringMap.fold (fun nt rule env ->
      let env = Env.add nt (fresh()) env in
      (* The following line unifies the sort of [nt] with an arrow of
         appropriate arity. It cannot fail. This strategy should lead
         to slightly better unification error messages. *)
      let _ : env = enter_rule env nt rule in
      env
    ) g.p_rules env
  in

  (* Impose sort equality constraints. *)

  check_grammar env g;

  (* Decode the environment, so our user doesn't have to deal with
     unification variables. *)

253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
  let env = Env.map decode env in

  (* Ground any unassigned sort variables. (These should occur only in
     unreachable parts of the grammar.) This guarantees that the user
     does not have to deal with sort variables. *)

  let env = Env.map ground env in

  (* At log level 3, display the inferred sort of every symbol. *)

  Error.logG 3 (fun f ->
    Env.iter (fun x gsort ->
      Printf.fprintf f "%s :: %s\n" x (print (unground gsort))
    ) env
  );

  env