let value = Positions.value let position = Positions.position let error = Error.error open Syntax open SortUnification (* -------------------------------------------------------------------------- *) (* Error handling. *) (* In [check_arity], in principle, [arity1] is the expected arity and [arity2] is the actual arity. This distinction does not make much sense, though, as we do not know which is wrong, the declaration site or the use site. So, we display a neutral error message. *) let check_arity sym arity1 arity2 = let plural = max arity1 arity2 > 1 in if arity1 <> arity2 then error [position sym] "does the symbol \"%s\" expect %d or %d argument%s?" (value sym) (min arity1 arity2) (max arity1 arity2) (if plural then "s" else "") (* This variant of [unify] is used when no unification error can arise. *) let unify_cannot_fail sort1 sort2 = try unify sort1 sort2 with | Unify _ | Occurs _ -> (* If the caller is right, this unification step cannot fail! *) assert false (* In [unify], in principle, [sort1] is the expected sort and [sort2] is the actual sort. Again, this distinction does not make much sense, so we display a neutral error message. *) let unify sym sort1 sort2 = try unify sort1 sort2 with | Unify (v1, v2) -> let print v = print (decode v) in error [position sym] "how is the symbol \"%s\" parameterized?\n\ It is used at sorts %s and %s.\n\ The sort %s is not compatible with the sort %s." (value sym) (print sort1) (print sort2) (print v1) (print v2) | Occurs (v1, v2) -> let print v = print (decode v) in error [position sym] "how is the symbol \"%s\" parameterized?\n\ It is used at sorts %s and %s.\n\ The sort %s cannot be unified with the sort %s." (value sym) (print sort1) (print sort2) (print v1) (print v2) (* -------------------------------------------------------------------------- *) (* An environment maps (terminal and nonterminal) symbols to unification variables. *) type symbol = string module Env = StringMap type env = variable Env.t let find x env : variable = try Env.find x env with Not_found -> assert false (* unbound terminal or nonterminal symbol *) let extend env (xvs : (symbol * variable) list) = List.fold_left (fun env (x, v) -> Env.add x v env ) env xvs (* -------------------------------------------------------------------------- *) (* [allocate xs] allocates a fresh unification variable [v] for every element [x] of the list [xs]. It returns the lists [xvs] and [vs]. *) let allocate (xs : 'a list) : ('a * variable) list * variable list = let xvs = List.map (fun x -> x, fresh()) xs in let vs = List.map snd xvs in xvs, vs (* -------------------------------------------------------------------------- *) (* [check_parameter env param expected] checks that the parameter [param] has sort [expected]. A parameter is either a symbol or an application of a symbol to a number of parameters. Every application is total -- the language does not have partial applications. The sort of every application is [star], but the sort of a variable is unrestricted. *) let rec check_parameter env (param : parameter) (expected : variable) = match param with | ParameterVar sym -> let x = value sym in unify sym expected (find x env) | ParameterApp (sym, actuals) -> let x = value sym in (* This application has sort [star]. *) unify sym expected star; (* Retrieve the expected sort of each parameter. Two cases arise: if [x] has already been assigned an arrow sort, then we can retrieve its domain, which gives us the expected sort of each actual parameter; otherwise, we just make up a fresh arrow sort of appropriate arity. We could avoid this case distinction and always use the latter method, but the former method, when applicable, yields better error messages. If [sym] is a toplevel (nonterminal or terminal) symbol, then we will be in the first case, as we have been careful to initially assign an arrow sort of appropriate arity to each such symbol. *) let v = find x env in let expected = match domain v with | Some expected -> check_arity sym (List.length expected) (List.length actuals); expected | None -> let _, expected = allocate actuals in unify_cannot_fail v (arrow expected); expected in (* Check the sort of each actual parameter. *) List.iter2 (check_parameter env) actuals expected | ParameterAnonymous _ -> (* Anonymous rules have been eliminated already. *) assert false (* -------------------------------------------------------------------------- *) (* The following functions respectively check that a producer, a branch, a rule, and a grammar are well-sorted under an environment [env]. *) let check_producer env (producer : producer) = let (_, param, _) = producer in (* A producer must have sort [star]. *) check_parameter env param star let check_branch env (branch : parameterized_branch) = List.iter (check_producer env) branch.pr_producers let enter_rule env (nt : symbol) (rule : parameterized_rule) : env = (* For each formal parameter, allocate a fresh variable. *) (* TEMPORARY should we check that the formals have distinct names? *) let formals, domain = allocate rule.pr_parameters in (* Connect these variables with the sort of the symbol [nt]. *) (* Because it is performed first, this particular unification cannot fail. *) unify_cannot_fail (find nt env) (arrow domain); (* Extend the environment. *) extend env formals let check_rule env (nt : symbol) (rule : parameterized_rule) = (* Extend the environment within this rule. *) let env = enter_rule env nt rule in (* Check each branch in this extended environment. *) List.iter (check_branch env) rule.pr_branches let check_grammar env g = (* Each rule must be well-sorted. *) StringMap.iter (check_rule env) g.p_rules; (* The start symbols must have sort [star]. *) StringMap.iter (fun nt position -> let sym = Positions.with_pos position nt in unify sym star (find nt env) ) g.p_start_symbols; (* Every symbol that appears in a [%type] declaration must have sort [star]. *) List.iter (fun (param, _) -> check_parameter env param star ) g.p_types; (* Same rule for [%on_error_reduce] declarations. *) List.iter (fun (param, _) -> check_parameter env param star ) g.p_on_error_reduce; (* The symbols that appear in [%attribute] declarations must be well-sorted. Their sort is not necessarily [star]: it is legal to attach an attribute with a parameterized symbol. *) List.iter (fun (params, _) -> List.iter (fun param -> check_parameter env param (fresh()) ) params ) g.p_symbol_attributes (* -------------------------------------------------------------------------- *) let infer_grammar (g : grammar) : ground_sort Env.t = (* For each (terminal or nonterminal) symbol, allocate a unification variable. The terminal symbols have sort [star], so we can use this particular variable. *) let env = StringMap.fold (fun tok _ env -> Env.add tok star env ) g.p_tokens Env.empty in let env = Env.add "error" star env in let env = StringMap.fold (fun nt rule env -> let env = Env.add nt (fresh()) env in (* The following line unifies the sort of [nt] with an arrow of appropriate arity. It cannot fail. This strategy should lead to slightly better unification error messages. *) let _ : env = enter_rule env nt rule in env ) g.p_rules env in (* Impose sort equality constraints. *) check_grammar env g; (* Decode the environment, so our user doesn't have to deal with unification variables. *) let env = Env.map decode env in (* Ground any unassigned sort variables. (These should occur only in unreachable parts of the grammar.) This guarantees that the user does not have to deal with sort variables. *) let env = Env.map ground env in (* At log level 3, display the inferred sort of every symbol. *) Error.logG 3 (fun f -> Env.iter (fun x gsort -> Printf.fprintf f "%s :: %s\n" x (print (unground gsort)) ) env ); env