infer.ml 12.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16 17 18 19 20 21 22 23
open Syntax
open Stretch
open UnparameterizedSyntax
open IL
open CodeBits
open TokenType

(* ------------------------------------------------------------------------- *)
(* Naming conventions. *)

24 25
(* The type variable associated with a nonterminal symbol. Its name begins
   with a prefix which ensures that it begins with a lowercase letter and
26
   cannot clash with OCaml keywords. *)
27 28 29 30

let ntvar symbol =
  Printf.sprintf "tv_%s" (Misc.normalize symbol)

31 32
(* The term variable associated with a nonterminal symbol. Its name begins
   with a prefix which ensures that it begins with a lowercase letter and
33
   cannot clash with OCaml keywords. *)
34 35 36 37 38 39

let encode symbol =
  Printf.sprintf "xv_%s" (Misc.normalize symbol)

let decode s =
  let n = String.length s in
40 41
  if not (n >= 3 && String.sub s 0 3 = "xv_") then
    Lexmli.fail();
42 43
  String.sub s 3 (n - 3)

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
(* The name of the temporary file. *)

let base =
  Settings.base

let mlname =
  base ^ ".ml"

let mliname =
  base ^ ".mli"

(* ------------------------------------------------------------------------- *)
(* Code production. *)

(* [nttype nt] is the type of the nonterminal [nt], as currently
   known. *)

let nttype grammar nt =
   try
     TypTextual (StringMap.find nt grammar.types)
   with Not_found ->
     TypVar (ntvar nt)

(* [is_standard] determines whether a branch derives from a standard
   library definition. The method, based on a file name, is somewhat
   fragile. *)

let is_standard branch =
  List.for_all (fun x -> x = Settings.stdlib_filename) (Action.filenames branch.action)

(* [actiondef] turns a branch into a function definition. *)

POTTIER Francois's avatar
POTTIER Francois committed
76 77 78
(* The names and types of the conventional internal variables that
   correspond to keywords ($startpos,etc.) are hardwired in this
   code. It would be nice if these conventions were more clearly
POTTIER Francois's avatar
POTTIER Francois committed
79
   isolated and perhaps moved to the [Action] or [Keyword] module. *)
POTTIER Francois's avatar
POTTIER Francois committed
80

81 82 83 84 85
let actiondef grammar symbol branch =

  (* Construct a list of the semantic action's formal parameters that
     depend on the production's right-hand side. *)

86
  let formals =
87 88 89
    List.fold_left (fun formals producer ->
      let symbol = producer_symbol producer
      and id = producer_identifier producer in
POTTIER Francois's avatar
POTTIER Francois committed
90
      let startp, endp, starto, endo, loc =
91 92 93
        Printf.sprintf "_startpos_%s_" id,
        Printf.sprintf "_endpos_%s_" id,
        Printf.sprintf "_startofs_%s_" id,
POTTIER Francois's avatar
POTTIER Francois committed
94 95
        Printf.sprintf "_endofs_%s_" id,
        Printf.sprintf "_loc_%s_" id
96 97
      in
      let t =
98 99 100 101 102 103 104 105 106 107 108
        try
          let props = StringMap.find symbol grammar.tokens in
          (* Symbol is a terminal. *)
          match props.tk_ocamltype with
          | None ->
              tunit
          | Some ocamltype ->
              TypTextual ocamltype
        with Not_found ->
          (* Symbol is a nonterminal. *)
          nttype grammar symbol
109 110 111 112 113 114
      in
      PAnnot (PVar id, t) ::
      PAnnot (PVar startp, tposition) ::
      PAnnot (PVar endp, tposition) ::
      PAnnot (PVar starto, tint) ::
      PAnnot (PVar endo, tint) ::
Frédéric Bour's avatar
Frédéric Bour committed
115
      PAnnot (PVar loc, tlocation ~public:false grammar) ::
116
      formals
117
    ) [] branch.producers
118 119 120 121 122 123 124 125 126
  in

  (* Extend the list with parameters that do not depend on the
     right-hand side. *)

  let formals =
    PAnnot (PVar "_eRR", texn) ::
    PAnnot (PVar "_startpos", tposition) ::
    PAnnot (PVar "_endpos", tposition) ::
127
    PAnnot (PVar "_endpos__0_", tposition) ::
POTTIER Francois's avatar
POTTIER Francois committed
128
    PAnnot (PVar "_symbolstartpos", tposition) ::
129 130
    PAnnot (PVar "_startofs", tint) ::
    PAnnot (PVar "_endofs", tint) ::
131
    PAnnot (PVar "_endofs__0_", tint) ::
POTTIER Francois's avatar
POTTIER Francois committed
132
    PAnnot (PVar "_symbolstartofs", tint) ::
Frédéric Bour's avatar
Frédéric Bour committed
133 134
    PAnnot (PVar "_sloc", tlocation ~public:false grammar) ::
    PAnnot (PVar "_loc", tlocation ~public:false grammar) ::
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
    formals
  in

  (* Construct a function definition out of the above bindings and the
     semantic action. *)

  let body =
    EAnnot (
      Action.to_il_expr branch.action,
      type2scheme (nttype grammar symbol)
    )
  in

  match formals with
  | [] ->
      body
  | _ ->
      EFun (formals, body)

Frédéric Bour's avatar
Frédéric Bour committed
154 155 156 157 158 159 160
let location_module grammar =
  match grammar.UnparameterizedSyntax.location with
  | None -> []
  | Some path ->
    let md = MApp (MVar "MenhirLib.EngineTypes.As_location", MTextual path) in
    [SIModuleDef ("Menhir__Location", md)]

161 162 163 164 165 166 167 168
(* [program] turns an entire grammar into a test program. *)

let program grammar =

  (* Turn the grammar into a bunch of function definitions. Grammar
     productions that derive from the standard library are reflected
     first, so that type errors are not reported in them. *)

POTTIER Francois's avatar
POTTIER Francois committed
169
  let bindings1, bindings2 =
170 171
    StringMap.fold (fun symbol rule (bindings1, bindings2) ->
      List.fold_left (fun (bindings1, bindings2) branch ->
172 173 174 175
        if is_standard branch then
          (PWildcard, actiondef grammar symbol branch) :: bindings1, bindings2
        else
          bindings1, (PWildcard, actiondef grammar symbol branch) :: bindings2
176 177 178 179 180 181 182 183 184
      ) (bindings1, bindings2) rule.branches
    ) grammar.rules ([], [])
  in

  (* Create entry points whose types are the unknowns that we are
     looking for. *)

  let ps, ts =
    StringMap.fold (fun symbol _ (ps, ts) ->
185
      PVar (encode (Misc.normalize symbol)) :: ps,
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
      nttype grammar symbol :: ts
    ) grammar.rules ([], [])
  in

  let def = {
    valpublic = true;
    valpat = PTuple ps;
    valval = ELet (bindings1 @ bindings2, EAnnot (bottom, type2scheme (TypTuple ts)))
  }
  in

  (* Insert markers to delimit the part of the file that we are
     interested in. These markers are recognized by [Lexmli]. This
     helps skip the values, types, exceptions, etc. that might be
     defined by the prologue or postlogue. *)

  let begindef = {
    valpublic = true;
    valpat = PVar "menhir_begin_marker";
    valval = EIntConst 0
  }
  and enddef = {
    valpublic = true;
    valpat = PVar "menhir_end_marker";
    valval = EIntConst 0
  } in

  (* Issue the test program. We include the definition of the type of
     tokens, because, in principle, the semantic actions may refer to
     it or to its data constructors. *)

217
  [ SIFunctor (grammar.parameters,
218
    interface_to_structure (tokentypedef grammar) @
Frédéric Bour's avatar
Frédéric Bour committed
219
    location_module grammar @
220 221 222 223
    SIStretch grammar.preludes ::
    SIValDefs (false, [ begindef; def; enddef ]) ::
    SIStretch grammar.postludes ::
  [])]
224 225 226 227

(* ------------------------------------------------------------------------- *)
(* Writing the program associated with a grammar to a file. *)

228 229
let write grammar filename () =
  let ml = open_out filename in
230 231
  let module P = Printer.Make (struct
    let f = ml
232
    let locate_stretches = Some filename
233 234 235 236 237 238 239 240 241 242 243 244 245
  end) in
  P.program (program grammar);
  close_out ml

(* ------------------------------------------------------------------------- *)
(* Running ocamldep on the program. *)

type entry =
    string (* basename *) * string (* filename *)

type line =
    entry (* target *) * entry list (* dependencies *)

246
let depend postprocess grammar =
247 248 249 250 251 252 253 254 255

  (* Create an [.ml] file and an [.mli] file, then invoke ocamldep to
     compute dependencies for us. *)

  (* If an old [.ml] or [.mli] file exists, we are careful to preserve
     it. We temporarily move it out of the way and restore it when we
     are done. There is no reason why dependency analysis should
     destroy existing files. *)

256 257 258
  let ocamldep_command =
    Printf.sprintf "%s %s %s"
      Settings.ocamldep (Filename.quote mlname) (Filename.quote mliname)
259 260
  in

261 262 263 264
  let output : string =
    Option.project (
      IO.moving_away mlname (fun () ->
      IO.moving_away mliname (fun () ->
265
      IO.with_file mlname (write grammar mlname) (fun () ->
266
      IO.with_file mliname (Interface.write grammar) (fun () ->
267 268
      IO.invoke ocamldep_command
    )))))
269 270 271 272 273 274
  in

  (* Echo ocamldep's output. *)

  print_string output;

275 276 277 278
  (* If [--raw-depend] was specified on the command line, stop here.  This
     option is used by omake and by ocamlbuild, which performs their own
     postprocessing of [ocamldep]'s output. For normal [make] users, who use
     [--depend], some postprocessing is required, which is performed below. *)
279

280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
  if postprocess then begin

    (* Make sense out of ocamldep's output. *)

    let lexbuf = Lexing.from_string output in
    let lines : line list =
      try
        Lexdep.main lexbuf
      with Lexdep.Error msg ->
        (* Echo the error message, followed with ocamldep's output. *)
        Error.error [] "%s" (msg ^ output)
    in

    (* Look for the line that concerns the [.cmo] target, and echo a
       modified version of this line, where the [.cmo] target is
       replaced with [.ml] and [.mli] targets, and where the dependency
       over the [.cmi] file is dropped.

       In doing so, we assume that the user's [Makefile] supports
       bytecode compilation, so that it makes sense to request [bar.cmo]
       to be built, as opposed to [bar.cmx]. This is not optimal, but
       will do. [camldep] exhibits the same behavior. *)

    List.iter (fun ((_, target_filename), dependencies) ->
      if Filename.check_suffix target_filename ".cmo" then
        let dependencies = List.filter (fun (basename, _) ->
          basename <> base
        ) dependencies in
        if List.length dependencies > 0 then begin
          Printf.printf "%s.ml %s.mli:" base base;
          List.iter (fun (_basename, filename) ->
            Printf.printf " %s" filename
          ) dependencies;
          Printf.printf "\n%!"
        end
    ) lines
316 317 318 319 320 321 322 323

  end;

  (* Stop. *)

  exit 0

(* ------------------------------------------------------------------------- *)
324
(* Augmenting a grammar with inferred type information. *)
325

326
(* The parameter [output] is supposed to contain the output of [ocamlc -i]. *)
327

328
let read_reply (output : string) grammar =
329

330 331 332
  (* See comment in module [Error]. *)
  Error.enable();

333 334 335 336 337 338
  let env : (string * int * int) list =
    Lexmli.main (Lexing.from_string output)
  in

  let env : (string * ocamltype) list =
    List.map (fun (id, openingofs, closingofs) ->
339
      decode id, Inferred (String.sub output openingofs (closingofs - openingofs))
340 341 342 343 344 345 346 347
    ) env
  in

  (* Augment the grammar with new %type declarations. *)

  let types =
    StringMap.fold (fun symbol _ types ->
      let ocamltype =
348 349 350
        try
          List.assoc (Misc.normalize symbol) env
        with Not_found ->
351 352 353 354
          (* No type information was inferred for this symbol.
             Perhaps the mock [.ml] file or the inferred [.mli] file
             are out of date. Fail gracefully. *)
          Error.error [] "found no inferred type for %s." symbol
355 356
      in
      if StringMap.mem symbol grammar.types then
357 358
        (* If there was a declared type, keep it. *)
        types
359
      else
360 361
        (* Otherwise, insert the inferred type. *)
        StringMap.add symbol ocamltype types
362 363 364 365
    ) grammar.rules grammar.types
  in

  { grammar with types = types }
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404


(* ------------------------------------------------------------------------- *)
(* Inferring types for a grammar's nonterminals. *)

let infer grammar =

  (* Invoke ocamlc to do type inference for us. *)

  let ocamlc_command =
    Printf.sprintf "%s -c -i %s" Settings.ocamlc (Filename.quote mlname)
  in

  let output =
    write grammar mlname ();
    match IO.invoke ocamlc_command with
    | Some result ->
        Sys.remove mlname;
        result
    | None ->
        (* 2015/10/05: intentionally do not remove the [.ml] file if [ocamlc]
           fails. (Or if an exception is thrown.) *)
        exit 1
  in

  (* Make sense out of ocamlc's output. *)

  read_reply output grammar

(* ------------------------------------------------------------------------- *)

let write_query filename grammar =
  write grammar filename ();
  exit 0

(* ------------------------------------------------------------------------- *)

let read_reply filename grammar =
  read_reply (IO.read_whole_file filename) grammar