Initial import

parent bfb8cf54
_build/
joujou
main.native
\ No newline at end of file
OCAMLBUILD := ocamlbuild -classic-display -use-ocamlfind -use-menhir -cflags "-g" -lflags "-g"
INCLUDE := -Is pllib
MAIN := main
EXEC := joujou
SUFFIX := native
.PHONY: all clean test
all:
$(OCAMLBUILD) $(INCLUDE) $(MAIN).$(SUFFIX)
ln -sf $(MAIN).$(SUFFIX) $(EXEC)
clean:
rm -f *~ $(EXEC)
$(OCAMLBUILD) -clean
test: all
$(MAKE) -C ../test $@
true: \
debug, \
strict_sequence, \
warn(A-3-4-30-44-42-45-50), \
package(unix), \
package(pprint), \
package(ppx_deriving.std)
\ No newline at end of file
open Atom
open Syntax
open Types
open Terms
type identifier =
Identifier.identifier
type atom =
Atom.atom
type env =
Import.env
(* ------------------------------------------------------------------------- *)
(* Term variables, type variables, type constructors, and data constructors are
explicitly bound. *)
let bind env id : atom * env =
let env = Import.bind env id in
Import.resolve env id, env
(* ------------------------------------------------------------------------- *)
(* [mktc] builds a [TyCon] type and checks that its arity is respected. *)
let mktc tctable env id tys =
let tc = Import.resolve env id in
let got = List.length tys in
let expected =
try
AtomMap.find tc tctable
with Not_found ->
(* if this fails, then [tc] is unbound and [resolve] has failed silently;
in that case, do not report an arity error *)
got
in
if expected <> got then
Error.signal
[ Identifier.location id ]
(Printf.sprintf "The type constructor %s expects %d arguments, but is here applied to %d arguments.\n"
(Identifier.name id) expected got);
TyCon (tc, tys)
(* ------------------------------------------------------------------------- *)
(* [itype] converts a type from external syntax into internal syntax. *)
let ityvar env a =
TyFreeVar (Import.resolve env a)
let ityvars env =
List.map (ityvar env)
let rec itype tctable env : Syntax.ftype -> Types.ftype = function
| SynTyArrow (domain, codomain) ->
TyArrow (itype tctable env domain, itype tctable env codomain)
| SynTyForall (a, body) ->
let a, env = bind env a in
TyForall (abstract a (itype tctable env body))
| SynTyVarOrTyCon (id, []) when Import.resolves env type_sort id ->
(* if there are no type arguments and if [id] resolves as a type variable [a],
then consider it is a type variable *)
let a = Identifier.mak type_sort id in
ityvar env a
| SynTyVarOrTyCon (id, tys) ->
(* otherwise, consider [id] must be a type constructor *)
let tc = Identifier.mak typecon_sort id in
mktc tctable env tc (itypes tctable env tys)
and itypes tctable env tys =
List.map (itype tctable env) tys
(* ------------------------------------------------------------------------- *)
(* [ischeme] converts a data constructor type scheme in external syntax into
a type in internal syntax. *)
let ischeme tctable env : Syntax.scheme -> Types.ftype = function
| SynScheme (quantifiers, fields, tc, params) ->
(* Deal with the universal quantifiers. *)
let env = Import.bind_simultaneously env quantifiers in
List.fold_right (fun quantifier ty ->
let quantifier = Import.resolve env quantifier in
TyForall (abstract quantifier ty)
) quantifiers (
(* Build a function type whose domain is a tuple of the fields and
whose codomain is an application of the type constructor [tc] to
the type variables [params]. *)
TyArrow (
TyTuple (itypes tctable env fields),
mktc tctable env tc (itypes tctable env params)
)
)
(* ------------------------------------------------------------------------- *)
(* [iterm] converts a term from external syntax into internal syntax. *)
let rec iterm tctable env = function
| SynTeVar id ->
TeVar (Import.resolve env id, ref None)
| SynTeAbs (x, ftype, term) ->
let x, env = bind env x in
TeAbs (x, itype tctable env ftype, iterm tctable env term)
| SynTeApp (term1, term2) ->
TeApp (iterm tctable env term1, iterm tctable env term2, ref None)
| SynTeLet (x, term1, term2) ->
let x, env' = bind env x in
TeLet (x, iterm tctable env term1, iterm tctable env' term2)
| SynTeTyAbs (a, term) ->
let a, env = bind env a in
TeTyAbs (a, iterm tctable env term)
| SynTeTyApp (term, ftype) ->
TeTyApp (iterm tctable env term, itype tctable env ftype, ref None)
| SynTeData (id, tys, fields) ->
TeData (Import.resolve env id, itypes tctable env tys, iterms tctable env fields, ref None)
| SynTeTyAnnot (t, ty) ->
TeTyAnnot (iterm tctable env t, itype tctable env ty)
| SynTeMatch (t, ty, clauses) ->
TeMatch (iterm tctable env t, itype tctable env ty, iclauses tctable env clauses, ref None)
| SynTeLoc (loc, t) ->
TeLoc (loc, iterm tctable env t)
and iterms tctable env terms =
List.map (iterm tctable env) terms
and iclauses tctable env clauses =
List.map (iclause tctable env) clauses
and iclause tctable env = function
| SynClause (p, t) ->
let p, env = ipattern env p in
Clause (p, iterm tctable env t)
and ipattern env = function
| SynPatData (loc, d, tyvars, fields) ->
let env, tyvars = Import.bind_sequentially env tyvars in
let env, fields = Import.bind_sequentially env fields in
PatData (loc, Import.resolve env d, tyvars, fields, ref None), env
(* ------------------------------------------------------------------------- *)
(* [build] builds the initial toplevel environment, which binds all
type constructors and all data constructors. It also builds the
type constructor and data constructor tables. *)
let build ds : type_table * datacon_table * env =
let env = Import.empty in
(* Gather all type constructors. *)
let tcs : identifier list =
List.fold_left (fun tcs -> function SynType (tc, _) -> tc :: tcs | _ -> tcs) [] ds
in
(* Build an import environment for type constructors. *)
let env = Import.bind_simultaneously env tcs in
Error.signaled();
(* Build a table that maps a type constructor to its arity. *)
let tctable : int AtomMap.t =
List.fold_left (fun tctable -> function
| SynType (tc, params) -> AtomMap.add (Import.resolve env tc) (List.length params) tctable
| _ -> tctable
) AtomMap.empty ds
in
(* Gather all data constructors. *)
let dcs : identifier list =
List.fold_left (fun dcs -> function SynDatacon (d, _) -> d :: dcs | _ -> dcs) [] ds
in
(* Extend the import environment with the data constructors. *)
let env = Import.bind_simultaneously env dcs in
(* Build a table that maps a data constructor to its type scheme. *)
let dctable : ftype AtomMap.t =
List.fold_left (fun dctable -> function
| SynDatacon (d, s) -> AtomMap.add (Import.resolve env d) (ischeme tctable env s) dctable
| _ -> dctable
) AtomMap.empty ds
in
tctable, dctable, env
(* ------------------------------------------------------------------------- *)
(* [program] converts a complete program. *)
let program = function
| SynProg (ds, t) ->
let tctable, dctable, env = build ds in
let t = iterm tctable env t in
Error.signaled();
Prog (tctable, dctable, t)
(* This module turns external syntax ito internal syntax. In particular,
this involves replacing identifiers with atoms, and making sure that
every identifier is properly bound. *)
val program: Syntax.program -> Terms.pre_program
{
open Lexing
open Parser
let pp_location _ _ = ()
(* A table of keywords. *)
let string2keyword, keyword2string =
LexerUtil.setup [
"fun", FUN;
"let", LET;
"join", JOIN;
"in", IN;
"jump", JUMP;
"match", MATCH;
"with", WITH;
"end", END;
"forall", FORALL;
"return", RETURN;
"data", DATA;
"constructor", CONSTRUCTOR;
"program", PROGRAM;
"type", TYPE;
]
}
let newline = ('\010' | '\013' | "\013\010")
let whitespace = [ ' ' '\t' ]
let digit = [ '0'-'9' ]
let integer = ( "0x" | "0o" | "0b" )? digit+
let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
let identifier = lowercase identchar*
let tag = uppercase identchar*
rule main = parse
| newline
{ LexerUtil.newline lexbuf; main lexbuf }
| whitespace+
{ main lexbuf }
| identifier as id
{ try
string2keyword id
with Not_found ->
IDENTIFIER (id, lexeme_start_p lexbuf, lexeme_end_p lexbuf)
}
| tag as id
{ TAG (id, lexeme_start_p lexbuf, lexeme_end_p lexbuf) }
| ";"
{ SEMI }
| ":"
{ COLON }
| "="
{ EQ }
| "."
{ DOT }
| "|"
{ BAR }
| "->"
{ ARROW }
| "("
{ LPAR }
| ")"
{ RPAR }
| "{"
{ LBRACE }
| "}"
{ RBRACE }
| "["
{ LBRACKET }
| "]"
{ RBRACKET }
| "(*"
{ comment (lexeme_start_p lexbuf) lexbuf; main lexbuf }
| eof
{ EOF }
| _
{ Error.errorb lexbuf "Unexpected character." }
and comment openingp = parse
| "(*"
{ comment (lexeme_start_p lexbuf) lexbuf; comment openingp lexbuf }
| "*)"
{ () }
| newline
{ LexerUtil.newline lexbuf; comment openingp lexbuf }
| eof
{ Error.error [ (openingp, lexeme_start_p lexbuf) ] "Unterminated comment." }
| _
{ comment openingp lexbuf }
open Printf
open Print
(* ------------------------------------------------------------------------- *)
(* Parse the command line. *)
let filename =
ref None
let please_echo =
ref false
let please_optimize =
ref false
let usage =
sprintf "Usage: %s <options> <filename>\n" Sys.argv.(0)
let () =
Arg.parse (Arg.align [
"--echo", Arg.Set please_echo, " Prior to typechecking, echo intermediary steps";
"--optimize", Arg.Set please_optimize, " Typecheck, optimize, and display the optimized program";
"--case-of-case", Arg.Set Simplifier.optimize_caseofcase, " Enable case-of-case optimization";
]) (fun name -> filename := Some name) usage
let filename =
match !filename with
| Some filename ->
filename
| None ->
fprintf stderr "%s%!" usage;
exit 1
(* -------------------------------------------------------------------------- *)
(* Printing a syntax tree in an intermediate language (for debugging). *)
let print_delimiter () =
Printf.eprintf "----------------------------------------";
Printf.eprintf "----------------------------------------\n"
let dump (phase : string) (show : 'term -> string) (t : 'term) =
if !please_echo then begin
print_delimiter();
Printf.eprintf "%s:\n\n%s\n\n%!" phase (show t)
end;
t
(* ------------------------------------------------------------------------- *)
(* Read the file; lex; parse; internalize; typecheck; simplify; print. *)
let read filename : Syntax.program =
let lexbuf = LexerUtil.open_in filename in
try
Parser.program Lexer.main lexbuf
with Parser.Error ->
Error.errorb lexbuf "Syntax error.\n"
let simplify filename prog =
ignore (Typecheck.run prog);
let prog = Typecheck.petrify prog in
if !please_optimize then
Simplifier.program filename prog
else
prog
let output prog =
Printf.printf "%s" (print_program prog)
let process filename =
filename
|> read
|> dump "AST" (Format.asprintf "%a" Syntax.pp_program)
|> Internalize.program
|> dump "Internalized" print_program
|> simplify filename
|> dump "Simplified" print_program
|> output
(* -------------------------------------------------------------------------- *)
(* The main program. *)
let () =
(* If we ask for the case-of-case optimization,
then we are asking for the simplifier to be run *)
if ! Simplifier.optimize_caseofcase then
please_optimize := true;
(* Go! *)
process filename
open Ocamlbuild_plugin
let () =
flag ["ocaml"; "parser"; "menhir" ] (A"--explain")
%{
open List
open Syntax
let distribute (xs : 'x list) (y : 'y) : ('x * 'y) list =
map (fun x -> (x, y)) xs
let abs te_args t =
fold_right (fun (x, ty) t -> SynTeAbs (x, ty, t)) te_args t
let tyabs ty_args t =
fold_right (fun a t -> SynTeTyAbs (a, t)) ty_args t
let tyapp t tys =
fold_left (fun t ty -> SynTeTyApp (t, ty)) t tys
let otyannot t = function
| None ->
t
| Some ty ->
SynTeTyAnnot (t, ty)
let forall tvars ty =
fold_right (fun tv ty -> SynTyForall (tv, ty)) tvars ty
let make_non_recursive_definition ty_args te_args ocodomain t =
(* if there are no arguments at all, then this is not a function *)
(* this special case is required to avoid ambiguity between the
primitive form [let x = t1 in t2] and the syntactic sugar
[let f (...) = t1 in t2], in the particular case where there
are no arguments. *)
tyabs ty_args (
abs te_args (
otyannot t ocodomain
)
)
%}
%token <string * Lexing.position * Lexing.position> IDENTIFIER TAG
%token SEMI COLON EQ DOT BAR ARROW
%token LPAR RPAR LBRACE RBRACE LBRACKET RBRACKET
%token FUN LET JOIN IN JUMP MATCH WITH END FORALL RETURN TYPE DATA CONSTRUCTOR PROGRAM
%token EOF
%start <Syntax.program> program
%%
(* ------------------------------------------------------------------------- *)
(* In ocaml syntax, SEMI can be used either as a delimiter or as a
terminator. That is, the last SEMI is optional. Here is a way of
saying so. *)
semi(X):
| (* nothing *)
{ [] }
| xs = terminated(X, SEMI)+
| xs = separated_nonempty_list(SEMI, X)
{ xs }
(* ------------------------------------------------------------------------- *)
(* Similarly, BAR can be used either as a delimiter or as an opening symbol.
That is, the first BAR is optional. *)
bar(X):
| (* nothing *)
{ [] }
| xs = preceded(BAR, X)+
| xs = separated_nonempty_list(BAR, X)
{ xs }
(* ------------------------------------------------------------------------- *)
(* Identifiers. *)
(* Each identifier is tagged with its sort, which is known thanks
to the context where the identifier appears. *)
(* Data constructors must begin with an uppercase letter, so the lexer
distinguishes them. Not drawing this distinction would take us beyond
LR(1). *)
term_variable:
| id = IDENTIFIER
{ Identifier.mak term_sort id }
type_variable:
| id = IDENTIFIER
{ Identifier.mak type_sort id }
data_constructor:
| id = TAG
{ Identifier.mak data_sort id }
type_constructor:
| id = IDENTIFIER
{ Identifier.mak typecon_sort id }
(* ------------------------------------------------------------------------- *)
(* Types. *)
typ0:
| id = IDENTIFIER
(* this could be a type variable or type constructor *)
{ SynTyVarOrTyCon (id, []) }
| LPAR ty = typ RPAR
{ ty }
typ1:
| id = IDENTIFIER params = typ0+
(* this should be a type constructor, since it has parameters,
but we leave the ambiguity for the sake of uniformity *)
{ SynTyVarOrTyCon (id, params) }
| ty = typ0
{ ty }
typ:
| ty = typ1
{ ty }
| ty1 = typ1 ARROW ty2 = typ
{ SynTyArrow (ty1, ty2) }
| FORALL tvars = type_variable* DOT ty = typ
{ forall tvars ty }
(* ------------------------------------------------------------------------- *)
(* Type schemes for data constructors. *)
quantifiers:
| FORALL tvars = type_variable* DOT
{ tvars }
scheme:
| tvars = loption(quantifiers)
LBRACE fields = semi(typ) RBRACE
ARROW tc = type_constructor params = typ0*
{ SynScheme (tvars, fields, tc, params) }
datacon_signature:
| DATA CONSTRUCTOR d = data_constructor COLON s = scheme
{ SynDatacon (d, s) }
(* ------------------------------------------------------------------------- *)
(* Type declarations. *)
type_signature:
| TYPE tc = type_constructor params = type_variable*
{ SynType (tc, params) }
(* ------------------------------------------------------------------------- *)
(* Function arguments. *)
(* Type variables are surrounded with square brackets. Term variables are
surrounded with parentheses and must carry a type annotation. For simplicity,
we require that type arguments come first. *)
(* Formal type arguments must be type variables. Actual type arguments may be
arbitrary types. *)
(* In order to avoid ambiguity, we restrict actual type arguments to only one
argument per bracket pair. For the sake of uniformity, we impose the same
restriction upon formal type arguments. *)
formal_type_arguments:
| LBRACKET tvar = type_variable RBRACKET (* could allow type_variable+ *)
{ [ tvar ] }
actual_type_arguments:
| LBRACKET tys = typ RBRACKET (* could instead allow typ0+ *)
{ [ tys ] }
term_arguments:
| LPAR xs = term_variable+ COLON domain = typ RPAR
{ distribute xs domain }
%inline multiple(X):
| xs = X*
{ concat xs }
(* ------------------------------------------------------------------------- *)
(* Function definitions. *)
(* The function definition follows either [FUN] or [LET f]. *)
(* If the definition begins with a function name, then the function is considered
recursive, and the result type annotation is mandatory. Otherwise, the
function is considered non-recursive, and the result type annotation is
optional. *)
non_recursive_def:
| ty_args = multiple(formal_type_arguments)
te_args = multiple(term_arguments)
codomain = preceded(COLON,typ)?
EQ t = loc(term)
{ make_non_recursive_definition ty_args te_args codomain t }
def:
| def = non_recursive_def
{ def }
(* ------------------------------------------------------------------------- *)
(* Terms. *)
term0:
| x = term_variable
{ SynTeVar x }