Commit 19ff31e4 authored by POTTIER Francois's avatar POTTIER Francois

Publish the project.

parent 0ee19236
S kremlin
S alphalib
B _build
B _build/kremlin
B _build/alphalib
PKG unix
PKG process
PKG pprint
PKG ppx_deriving.std
(* The source calculus. *)
module S = Lambda
(* The target calculus. *)
module T = Tail
let cps_term (t : S.term) : T.term =
assert false
(* Through a CPS transformation, the surface language [Lambda] is translated
down to the intermediate language [Tail]. *)
val cps_term: Lambda.term -> Tail.term
open Error
(* The source calculus. *)
module S = RawLambda
(* The target calculus. *)
module T = Lambda
(* Environments map strings to atoms. *)
module Env =
Map.Make(String)
(* [bind env x] creates a fresh atom [a] and extends the environment [env]
with a mapping of [x] to [a]. *)
let bind env x =
let a = Atom.fresh x in
Env.add x a env, a
let rec cook_term env { S.place; S.value } =
match value with
| S.Var x ->
begin try
T.Var (Env.find x env)
with Not_found ->
error place "Unbound variable: %s" x
end
| S.Lam (x, t) ->
let env, x = bind env x in
T.Lam (T.NoSelf, x, cook_term env t)
| S.App (t1, t2) ->
T.App (cook_term env t1, cook_term env t2)
| S.Lit i ->
T.Lit i
| S.BinOp (t1, op, t2) ->
T.BinOp (cook_term env t1, op, cook_term env t2)
| S.Print t ->
T.Print (cook_term env t)
| S.Let (S.NonRecursive, x, t1, t2) ->
let t1 = cook_term env t1 in
let env, x = bind env x in
let t2 = cook_term env t2 in
T.Let (x, t1, t2)
| S.Let (S.Recursive, f, { S.value = S.Lam (x, t1); _ }, t2) ->
let env, f = bind env f in
let x, t1 =
let env, x = bind env x in
x, cook_term env t1
in
let t2 = cook_term env t2 in
T.Let (f, T.Lam (T.Self f, x, t1), t2)
| S.Let (S.Recursive, _, { S.place; _ }, _) ->
error place "the right-hand side of 'let rec' must be a lambda-abstraction"
let cook_term t =
cook_term Env.empty t
(* This module translates [RawLambda] into [Lambda]. *)
(* This involves ensuring that every name is properly bound (otherwise, an
error is reported) and switching from a representation of names as strings
to a representation of names as atoms. *)
(* This also involves checking that the right-hand side of every [let]
construct is a function (otherwise, an error is reported) and switching
from a representation where [let] constructs can carry a [rec] annotation
to a representation where functions can carry such an annotation. *)
(* This also involves dropping places (that is, source code locations), since
they are no longer used after this phase. *)
val cook_term: RawLambda.term -> Lambda.term
(* The source calculus. *)
module S = Tail
(* The target calculus. *)
module T = Top
let defun_term (t : S.term) : T.program =
assert false
(* Through defunctionalization, the intermediate language [Tail] is translated
down to the next intermediate language, [Top]. *)
val defun_term: Tail.term -> Top.program
open Lexing
type place =
position * position
let place lexbuf : place =
lexbuf.lex_start_p, lexbuf.lex_curr_p
let line p : int =
p.pos_lnum
let column p : int =
p.pos_cnum - p.pos_bol
let show place : string =
let startp, endp = place in
Printf.sprintf "File \"%s\", line %d, characters %d-%d"
startp.pos_fname
(line startp)
(column startp)
(endp.pos_cnum - startp.pos_bol) (* intentionally [startp.pos_bol] *)
let display continuation header place format =
Printf.fprintf stderr "%s:\n" (show place);
Printf.kfprintf
continuation
stderr
(header ^^ format ^^ "\n%!")
let error place format =
display
(fun _ -> exit 1)
"Error: "
place format
let set_filename lexbuf filename =
lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename }
let pp_place formatter _place =
Format.fprintf formatter "<>"
open Lexing
(* A place is a pair of a start position and an end position. *)
type place =
position * position
(* [set_filename lexbuf filename] updates [lexbuf] to record the
fact that the current file name is [filename]. This file name
is later used in error messages. *)
val set_filename: lexbuf -> string -> unit
(* [place lexbuf] produces a pair of the current token's start and
end positions. This function is useful when reporting an error
during lexing. *)
val place: lexbuf -> place
(* [error place format ...] displays an error message and exits.
The error message is located at [place]. The error message
is composed based on [format] and the extra arguments [...]. *)
val error: place -> ('a, out_channel, unit, 'b) format4 -> 'a
(* [pp_place formatter place] prints a place. It is used by
[@@deriving show] for data structures that contain places.
As of now, it prints nothing. *)
val pp_place: Format.formatter -> place -> unit
(* The source calculus. *)
module S = Top
(* The target calculus. *)
module T = C
(* -------------------------------------------------------------------------- *)
(* [interval i j f] constructs the list [[f i; f (i + 1); ...; f (j - 1)]]. *)
let rec interval i j (f : int -> 'a) : 'a list =
if i < j then
f i :: interval (i + 1) j f
else
[]
(* -------------------------------------------------------------------------- *)
(* [index xs] constructs a list of pairs, where each element of [xs] is paired
with its index. Indices are 0-based. *)
let index (xs : 'a list) : (int * 'a) list =
let n = List.length xs in
let indices = interval 0 n (fun i -> i) in
List.combine indices xs
(* -------------------------------------------------------------------------- *)
(* The number of fields of a block, not counting its tag. *)
let block_num_fields b =
match b with
| S.Con (_, vs) ->
List.length vs
(* -------------------------------------------------------------------------- *)
(* A simple-minded way of ensuring that every atom is printed as a
distinct string is to concatenate the atom's hint and identity,
with an underscore in between. This is guaranteed to rule out
collisions. *)
let var (x : S.variable) : T.ident =
Printf.sprintf "%s_%d" (Atom.hint x) (Atom.identity x)
let evar (x : S.variable) : T.expr =
T.Name (var x)
(* -------------------------------------------------------------------------- *)
(* Predefined C types and functions. *)
(* A universal type: every value is translated to a C value of type [univ].
This is a union type (i.e., an untagged sum) of integers and pointers to
memory blocks. *)
let univ : T.type_spec =
T.Named "univ"
(* The type of integers. *)
let int : T.type_spec =
T.Named "int"
(* The type [char] appears in the type of [main]. *)
let char : T.type_spec =
T.Named "char"
let answer : T.type_spec =
int
(* Our functions never actually return, since they are tail recursive.
We use [int] as their return type, since this is the return type of
[main]. *)
let exit : T.expr =
T.Name "exit"
let printf : T.expr =
T.Name "printf"
(* -------------------------------------------------------------------------- *)
(* [declare x init] constructs a local variable declaration for a variable [x]
of type [univ]. [x] is optionally initialized according to [init]. *)
let declare (x : S.variable) (init : T.init option) : T.declaration =
univ, None, [ T.Ident (var x), init ]
(* -------------------------------------------------------------------------- *)
(* Macro invocations. *)
let macro m es : T.expr =
(* We disguise a macro invocation as a procedure call. *)
T.Call (T.Name m, es)
(* -------------------------------------------------------------------------- *)
(* Integer literals; conversions between [univ] and [int]. *)
let iconst i : T.expr =
T.Constant (Constant.Int64, string_of_int i)
let to_int v : T.expr =
macro "TO_INT" [ v ]
(* This is an unsafe conversion, of course. *)
let from_int v : T.expr =
macro "FROM_INT" [ v ]
(* -------------------------------------------------------------------------- *)
(* The translation of values. *)
let finish_op = function
| S.OpAdd ->
T.K.Add
| S.OpSub ->
T.K.Sub
| S.OpMul ->
T.K.Mult
| S.OpDiv ->
T.K.Div
let rec finish_value (v : S.value) : T.expr =
match v with
| S.VVar x ->
evar x
| S.VLit i ->
from_int (iconst i)
| S.VBinOp (v1, op, v2) ->
from_int (
T.Op2 (
finish_op op,
to_int (finish_value v1),
to_int (finish_value v2)
)
)
let finish_values vs =
List.map finish_value vs
(* -------------------------------------------------------------------------- *)
(* A macro for allocating a memory block. *)
let alloc b : T.expr =
T.Call (T.Name "ALLOC", [ iconst (block_num_fields b) ])
(* -------------------------------------------------------------------------- *)
(* Macros for reading and initializing the tag of a memory block. *)
let read_tag (v : S.value) : T.expr =
macro "GET_TAG" [ finish_value v ]
let set_tag (x : S.variable) (tag : S.tag) : T.stmt =
T.Expr (macro "SET_TAG" [ evar x; iconst tag ])
(* -------------------------------------------------------------------------- *)
(* Macros for reading and setting a field in a memory block. *)
let read_field (v : S.value) (i : int) : T.expr =
(* [i] is a 0-based field index. *)
macro "GET_FIELD" [ finish_value v; iconst i ]
let read_field (v : S.value) (i, x) (t : T.stmt list) : T.stmt list =
(* [x] is a variable, which is declared and initialized with
the content of the [i]th field of the block [v]. *)
T.DeclStmt (declare x (Some (T.InitExpr (read_field v i)))) ::
t
let read_fields (v : S.value) xs (t : T.stmt list) : T.stmt list =
(* [xs] are variables, which are declared and initialized with
the contents of the fields of the block [v]. *)
List.fold_right (read_field v) (index xs) t
let set_field x i (v : S.value) : T.stmt =
T.Expr (macro "SET_FIELD" [ evar x; iconst i; finish_value v ])
(* -------------------------------------------------------------------------- *)
(* A sequence of instructions for initializing a memory block. *)
let init_block (x : S.variable) (b : S.block) : T.stmt list =
match b with
| S.Con (tag, vs) ->
T.Comment "Initializing a memory block:" ::
set_tag x tag ::
List.mapi (set_field x) vs
(* -------------------------------------------------------------------------- *)
(* Function calls, as expressions and as statements. *)
let ecall f args : T.expr =
T.Call (f, args)
let scall f args : T.stmt =
T.Expr (ecall f args)
(* -------------------------------------------------------------------------- *)
(* The translation of terms. *)
let rec finish_term (t : S.term) : C.stmt =
match t with
| S.Exit ->
T.Compound [
scall exit [ iconst 0 ]
]
| S.TailCall (f, vs) ->
T.Return (Some (ecall (evar f) (finish_values vs)))
| S.Print (v, t) ->
T.Compound [
scall printf [ T.Literal "%d\\n"; to_int (finish_value v) ];
finish_term t
]
| S.LetVal (x, v1, t2) ->
T.Compound [
T.DeclStmt (declare x (Some (T.InitExpr (finish_value v1))));
finish_term t2
]
| S.LetBlo (x, b1, t2) ->
T.Compound (
T.DeclStmt (declare x (Some (T.InitExpr (alloc b1)))) ::
init_block x b1 @
[ finish_term t2 ]
)
| S.Swi (v, bs) ->
T.Switch (
read_tag v,
finish_branches v bs,
default
)
and default : T.stmt =
(* This default [switch] branch should never be taken. *)
T.Compound [
scall printf [ T.Literal "Oops! A nonexistent case has been taken.\\n" ];
scall exit [ iconst 42 ];
]
and finish_branches v bs =
List.map (finish_branch v) bs
and finish_branch v (S.Branch (tag, xs, t)) : T.expr * T.stmt =
iconst tag,
T.Compound (read_fields v xs [finish_term t])
(* -------------------------------------------------------------------------- *)
(* Function declarations. *)
(* We distinguish the function [main], whose type is imposed by the C standard,
and ordinary functions, whose parameters have type [univ]. *)
(* A parameter of an ordinary function has type [univ]. *)
let param (x : S.variable) : T.param =
univ, T.Ident (var x)
(* A declaration of an ordinary function. *)
let declare_ordinary_function f xs : T.declaration =
answer, None, [ T.Function (None, T.Ident (var f), List.map param xs), None ]
(* The declaration of the main function. *)
let declare_main_function : T.declaration =
let params = [
int, T.Ident "argc";
char, T.Pointer (T.Pointer (T.Ident "argv"))
] in
int, None, [ T.Function (None, T.Ident "main", params), None ]
(* -------------------------------------------------------------------------- *)
(* A function definition. *)
type decl_or_fun =
T.declaration_or_function
let define (decl : T.declaration) (t : S.term) : decl_or_fun =
T.Function (
[], (* no comments *)
false, (* not inlined *)
decl,
T.Compound [finish_term t]
)
let define_ordinary_function (S.Fun (f, xs, t)) : decl_or_fun =
define (declare_ordinary_function f xs) t
let define_main_function (t : S.term) : decl_or_fun =
define declare_main_function t
(* -------------------------------------------------------------------------- *)
(* Because all functions are mutually recursive, their definitions must be
preceded with their prototypes. *)
let prototype (f : decl_or_fun) : decl_or_fun =
match f with
| T.Function (_, _, declaration, _) ->
T.Decl ([], declaration)
| T.Decl _ ->
assert false
let prototypes (fs : decl_or_fun list) : decl_or_fun list =
List.map prototype fs @
fs
(* -------------------------------------------------------------------------- *)
(* The translation of a complete program. *)
let finish_program (S.Prog (decls, main) : S.program) : T.program =
prototypes (
define_main_function main ::
List.map define_ordinary_function decls
)
(* This function implements a translation of the intermediate language [Top]
down to [C]. This transformation is mostly a matter of choosing appropriate
C constructs to reflect the concepts of the language [Top]. *)
val finish_program: Top.program -> C.program
(* This language is the untyped lambda-calculus, extended with recursive
lambda-abstractions, nonrecursive [let] bindings, integer literals and
integer arithmetic operations, and the primitive operation [print]. *)
(* This language is really the same language as [RawLambda], with the
following internal differences:
1. Instead of recursive [let] bindings, the language has recursive
lambda-abstractions. A [let rec] definition whose right-hand side is not
a lambda-abstraction is rejected during the translation of [RawLambda]
to [Lambda].
2. Variables are represented by atoms (instead of strings). A term with an
unbound variable is rejected during the translation of [RawLambda] to
[Lambda].
3. Terms are no longer annotated with places. *)
(* Variables are atoms. *)
type variable =
Atom.atom
(* Every lambda-abstraction is marked recursive or nonrecursive. Whereas a
nonrecursive lambda-abstraction [fun x -> t] binds one variable [x], a
recursive lambda-abstraction [fix f. fun x -> t] binds two variables [f]
and [x]. The variable [f] is a self-reference. *)
and self =
| Self of variable
| NoSelf
and binop = RawLambda.binop =
| OpAdd
| OpSub
| OpMul
| OpDiv
and term =
| Var of variable
| Lam of self * variable * term
| App of term * term
| Lit of int
| BinOp of term * binop * term
| Print of term
| Let of variable * term * term
[@@deriving show { with_path = false }]
{
open Lexing
open Error
open Parser
open RawLambda
}
(* -------------------------------------------------------------------------- *)
(* Regular expressions. *)
let newline =
('\010' | '\013' | "\013\010")
let whitespace =
[ ' ' '\t' ]
let lowercase =
['a'-'z' '\223'-'\246' '\248'-'\255' '_']
let uppercase =
['A'-'Z' '\192'-'\214' '\216'-'\222']
let identchar =
['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9']
let digit =
['0'-'9']
(* -------------------------------------------------------------------------- *)
(* The lexer. *)
rule entry = parse
| "fun"
{ FUN }
| "in"
{ IN }
| "let"
{ LET }
| "print"
{ PRINT }
| "rec"
{ REC }
| "->"
{ ARROW }
| "="
{ EQ }
| "("
{ LPAREN }
| ")"
{ RPAREN }
| "+"
{ ADDOP OpAdd }
| "-"
{ ADDOP OpSub }
| "*"
{ MULOP OpMul }
| "/"
{ MULOP OpDiv }
| (lowercase identchar *) as x
{ IDENT x }
| digit+ as i
{ try
INTLITERAL (int_of_string i)
with Failure _ ->
error (place lexbuf) "invalid integer literal." }
| "(*"
{ ocamlcomment (place lexbuf) lexbuf; entry lexbuf }
| newline
{ new_line lexbuf; entry lexbuf }
| whitespace+
{ entry lexbuf }
| eof
{ EOF }
| _ as c
{ error (place lexbuf) "unexpected character: '%c'." c }
(* ------------------------------------------------------------------------ *)
(* Skip OCaml-style comments. Comments can be nested. This sub-lexer is
parameterized with the place of the opening comment, so if an unterminated
comment is detected, we can show where it was opened. *)
and ocamlcomment p = parse
| "*)"
{ () }
| "(*"
{ ocamlcomment (place lexbuf) lexbuf; ocamlcomment p lexbuf }
| newline
{ new_line lexbuf; ocamlcomment p lexbuf }
| eof
{ error p "unterminated comment." }
| _
{ ocamlcomment p lexbuf }
(* -------------------------------------------------------------------------- *)
(* Parse the command line. *)
let debug =
ref false
let filenames =
ref []
let record filename =
filenames := filename :: !filenames
let options =
Arg.align [
"--debug", Arg.Set debug, " Enable debugging output";
]
let usage =
Printf.sprintf "Usage: %s <options> <filename>" Sys.argv.(0)
let () =
Arg.parse options record usage
let debug =
!debug
let filenames =
List.rev !filenames
(* -------------------------------------------------------------------------- *)
(* Printing a syntax tree in an intermediate language (for debugging). *)
let print_delimiter () =
Printf.eprintf "----------------------------------------";
Printf.eprintf "----------------------------------------\n"
let dump (phase : string) (show : 'term -> string) (t : 'term) =
if debug then begin
print_delimiter();
Printf.eprintf "%s:\n\n%s\n\n%!" phase (show t)
end;
t
(* -------------------------------------------------------------------------- *)
(* Reading and parsing a file. *)
let read filename : RawLambda.term =
try
let contents = Utils.file_get_contents filename in
let lexbuf = Lexing.from_string contents in
Error.set_filename lexbuf filename;
try
Parser.entry Lexer.entry lexbuf
with
| Parser.Error ->
Error.error (Error.place lexbuf) "Syntax error."
with
| Sys_error msg ->
prerr_endline msg;
exit 1
(* -------------------------------------------------------------------------- *)
(* Printing the final C program on the standard output channel. *)