infer.ml 9.74 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
open Syntax
open Stretch
open UnparameterizedSyntax
open IL
open CodeBits
open TokenType

(* ------------------------------------------------------------------------- *)
(* Naming conventions. *)

11 12 13
(* The type variable associated with a nonterminal symbol. Its name begins
   with a prefix which ensures that it begins with a lowercase letter and
   cannot clash with Objective Caml keywords. *)
14 15 16 17

let ntvar symbol =
  Printf.sprintf "tv_%s" (Misc.normalize symbol)

18 19 20 21 22 23 24 25 26 27 28 29
(* The term variable associated with a nonterminal symbol. Its name begins
   with a prefix which ensures that it begins with a lowercase letter and
   cannot clash with Objective Caml keywords. *)

let encode symbol =
  Printf.sprintf "xv_%s" (Misc.normalize symbol)

let decode s =
  let n = String.length s in
  assert (n >= 3 && String.sub s 0 3 = "xv_");
  String.sub s 3 (n - 3)

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
(* The name of the temporary file. *)

let base =
  Settings.base

let mlname =
  base ^ ".ml"

let mliname =
  base ^ ".mli"

(* ------------------------------------------------------------------------- *)
(* Code production. *)

(* [nttype nt] is the type of the nonterminal [nt], as currently
   known. *)

let nttype grammar nt =
   try
     TypTextual (StringMap.find nt grammar.types)
   with Not_found ->
     TypVar (ntvar nt)

(* [is_standard] determines whether a branch derives from a standard
   library definition. The method, based on a file name, is somewhat
   fragile. *)

let is_standard branch =
  List.for_all (fun x -> x = Settings.stdlib_filename) (Action.filenames branch.action)

(* [actiondef] turns a branch into a function definition. *)

let actiondef grammar symbol branch =

  (* Construct a list of the semantic action's formal parameters that
     depend on the production's right-hand side. *)

  let _, formals =
68
    List.fold_left (fun (i, formals) (symbol, id) ->
69
      let id, startp, endp, starto, endo =
70 71 72 73 74
	id,
	Printf.sprintf "_startpos_%s_" id,
	Printf.sprintf "_endpos_%s_" id,
	Printf.sprintf "_startofs_%s_" id,
	Printf.sprintf "_endofs_%s_" id
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
      in
      let t =
	try
	  let props = StringMap.find symbol grammar.tokens in
	  (* Symbol is a terminal. *)
	  match props.tk_ocamltype with
	  | None ->
	      tunit
	  | Some ocamltype ->
	      TypTextual ocamltype
	with Not_found ->
	  (* Symbol is a nonterminal. *)
	  nttype grammar symbol
      in
      i + 1,
      PAnnot (PVar id, t) ::
      PAnnot (PVar startp, tposition) ::
      PAnnot (PVar endp, tposition) ::
      PAnnot (PVar starto, tint) ::
      PAnnot (PVar endo, tint) ::
      formals
    ) (0, []) branch.producers
  in

  (* Extend the list with parameters that do not depend on the
     right-hand side. *)

  let formals =
    PAnnot (PVar "_eRR", texn) ::
    PAnnot (PVar "_startpos", tposition) ::
    PAnnot (PVar "_endpos", tposition) ::
    PAnnot (PVar "_startofs", tint) ::
    PAnnot (PVar "_endofs", tint) ::
    formals
  in

  (* Construct a function definition out of the above bindings and the
     semantic action. *)

  let body =
    EAnnot (
      Action.to_il_expr branch.action,
      type2scheme (nttype grammar symbol)
    )
  in

  match formals with
  | [] ->
      body
  | _ ->
      EFun (formals, body)

(* [program] turns an entire grammar into a test program. *)

let program grammar =

  (* Turn the grammar into a bunch of function definitions. Grammar
     productions that derive from the standard library are reflected
     first, so that type errors are not reported in them. *)

  let bindings1, bindings2 = 
    StringMap.fold (fun symbol rule (bindings1, bindings2) ->
      List.fold_left (fun (bindings1, bindings2) branch ->
	if is_standard branch then
	  (PWildcard, actiondef grammar symbol branch) :: bindings1, bindings2
	else
	  bindings1, (PWildcard, actiondef grammar symbol branch) :: bindings2
      ) (bindings1, bindings2) rule.branches
    ) grammar.rules ([], [])
  in

  (* Create entry points whose types are the unknowns that we are
     looking for. *)

  let ps, ts =
    StringMap.fold (fun symbol _ (ps, ts) ->
151
      PVar (encode (Misc.normalize symbol)) :: ps,
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
      nttype grammar symbol :: ts
    ) grammar.rules ([], [])
  in

  let def = {
    valpublic = true;
    valpat = PTuple ps;
    valval = ELet (bindings1 @ bindings2, EAnnot (bottom, type2scheme (TypTuple ts)))
  }
  in

  (* Insert markers to delimit the part of the file that we are
     interested in. These markers are recognized by [Lexmli]. This
     helps skip the values, types, exceptions, etc. that might be
     defined by the prologue or postlogue. *)

  let begindef = {
    valpublic = true;
    valpat = PVar "menhir_begin_marker";
    valval = EIntConst 0
  }
  and enddef = {
    valpublic = true;
    valpat = PVar "menhir_end_marker";
    valval = EIntConst 0
  } in

  (* Issue the test program. We include the definition of the type of
     tokens, because, in principle, the semantic actions may refer to
     it or to its data constructors. *)

183
  [ SIFunctor (grammar.parameters,
184
    interface_to_structure (tokentypedef grammar) @
185 186 187 188
    SIStretch grammar.preludes ::
    SIValDefs (false, [ begindef; def; enddef ]) ::
    SIStretch grammar.postludes ::
  [])]
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220

(* ------------------------------------------------------------------------- *)
(* Writing the program associated with a grammar to a file. *)

let write grammar () =
  let ml = open_out mlname in
  let module P = Printer.Make (struct
    let f = ml
    let locate_stretches = Some mlname
  end) in
  P.program (program grammar);
  close_out ml

(* ------------------------------------------------------------------------- *)
(* Running ocamldep on the program. *)

type entry =
    string (* basename *) * string (* filename *)

type line =
    entry (* target *) * entry list (* dependencies *)

let depend grammar =

  (* Create an [.ml] file and an [.mli] file, then invoke ocamldep to
     compute dependencies for us. *)

  (* If an old [.ml] or [.mli] file exists, we are careful to preserve
     it. We temporarily move it out of the way and restore it when we
     are done. There is no reason why dependency analysis should
     destroy existing files. *)

221 222 223
  let ocamldep_command =
    Printf.sprintf "%s %s %s"
      Settings.ocamldep (Filename.quote mlname) (Filename.quote mliname)
224 225
  in

226 227 228 229 230
  let output : string =
    Option.project (
      IO.moving_away mlname (fun () ->
      IO.moving_away mliname (fun () ->
      IO.with_file mlname (write grammar) (fun () ->
231
      IO.with_file mliname (Interface.write grammar) (fun () ->
232 233
      IO.invoke ocamldep_command
    )))))
234 235 236 237 238 239
  in

  (* Echo ocamldep's output. *)

  print_string output;

240 241 242 243
  (* If [--raw-depend] was specified on the command line, stop here.  This
     option is used by omake and by ocamlbuild, which performs their own
     postprocessing of [ocamldep]'s output. For normal [make] users, who use
     [--depend], some postprocessing is required, which is performed below. *)
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259

  begin match Settings.depend with
  | Settings.OMNone ->
      assert false (* we wouldn't be here in the first place *)
  | Settings.OMRaw ->
      ()
  | Settings.OMPostprocess ->

      (* Make sense out of ocamldep's output. *)

      let lexbuf = Lexing.from_string output in
      let lines : line list =
	try
	  Lexdep.main lexbuf
	with Lexdep.Error msg ->
	  (* Echo the error message, followed with ocamldep's output. *)
260
	  Error.error [] "%s" (msg ^ output)
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
      in

      (* Look for the line that concerns the [.cmo] target, and echo a
	 modified version of this line, where the [.cmo] target is
	 replaced with [.ml] and [.mli] targets, and where the dependency
	 over the [.cmi] file is dropped.

	 In doing so, we assume that the user's [Makefile] supports
	 bytecode compilation, so that it makes sense to request [bar.cmo]
	 to be built, as opposed to [bar.cmx]. This is not optimal, but
	 will do. [camldep] exhibits the same behavior. *)

      (* TEMPORARY allow ocamldep to be called with flag -native. *)

      List.iter (fun ((_, target_filename), dependencies) ->
	if Filename.check_suffix target_filename ".cmo" then
	  let dependencies = List.filter (fun (basename, _) ->
	    basename <> base
	  ) dependencies in
	  if List.length dependencies > 0 then begin
	    Printf.printf "%s.ml %s.mli:" base base;
282
	    List.iter (fun (_basename, filename) ->
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
	      Printf.printf " %s" filename
	    ) dependencies;
	    Printf.printf "\n%!"
	  end
      ) lines

  end;

  (* Stop. *)

  exit 0

(* ------------------------------------------------------------------------- *)
(* Inferring types for a grammar's nonterminals. *)

let infer grammar =

  (* Invoke ocamlc to do type inference for us. *)

302 303 304 305
  let ocamlc_command =
    Printf.sprintf "%s -c -i %s" Settings.ocamlc (Filename.quote mlname)
  in

306
  let output =
307 308 309 310 311 312 313 314 315 316
    write grammar ();
    match IO.invoke ocamlc_command with
    | Some result ->
        Sys.remove mlname;
        result
    | None ->
        (* 2015/10/05: intentionally do not remove the [.ml] file if [ocamlc]
           fails. (Or if an exception is thrown.) We cannot understand why
           [ocaml] complains if we can't see the [.ml] file. *)
        exit 1
317 318 319 320 321 322 323 324 325 326
  in

  (* Make sense out of ocamlc's output. *)

  let env : (string * int * int) list =
    Lexmli.main (Lexing.from_string output)
  in

  let env : (string * ocamltype) list =
    List.map (fun (id, openingofs, closingofs) ->
327
      decode id, Inferred (String.sub output openingofs (closingofs - openingofs))
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
    ) env
  in

  (* Augment the grammar with new %type declarations. *)

  let types =
    StringMap.fold (fun symbol _ types ->
      let ocamltype =
	try
	  List.assoc (Misc.normalize symbol) env
	with Not_found ->
	  assert false
      in
      if StringMap.mem symbol grammar.types then
	(* If there was a declared type, keep it. *)
	types
      else
	(* Otherwise, insert the inferred type. *)
	StringMap.add symbol ocamltype types
    ) grammar.rules grammar.types
  in

  { grammar with types = types }