Commit db02e6b3 authored by POGODALLA Sylvain's avatar POGODALLA Sylvain

First steps in replacing dypgen by menhir to generate the ACG data file parsers

parent 320be489
......@@ -34,7 +34,7 @@ type return_status = Failure | Success of ((string option) * Actual_env.t)
(** Tries a reference to {!Logic.Lambda.Lambda.stype}. *)
module Actual_parser = Grammars.Data_parser.Parser
module Actual_parser = Grammars.Data_parser
let resize_terminal () =
let () = Utils.sterm_set_size () in
......
;; -*-lisp-*-
(rule
(targets data_parser.ml)
(deps (:input-file data_parser.dyp))
(action (chdir %{project_root} (run %{bin:dypgen} --noemit-token-type --no-pp --no-obj-type --no-mli %{input-file})))
)
;(rule
; (targets data_parser.ml)
; (deps (:input-file data_parser.dyp))
; (action (chdir %{project_root} (run %{bin:dypgen} --noemit-token-type --no-pp --no-obj-type --no-mli %{input-file})))
; )
(ocamllex data_lexer term_lexer)
(menhir
; (flags (--explain --table --compile-errors /home/pogodall/work/dev/ACGtk/src/grammars/term_parser.messages))
(flags (--explain --table))
(modules term_parser))
(menhir
; (flags (--explain --table --compile-errors /home/pogodall/work/dev/ACGtk/src/grammars/term_parser.messages))
(merge_into data_parser)
(flags (--explain --table))
(modules file_parser sig_parser lex_parser type_parser term2_parser))
(ocamllex data_lexer)
;; This stanza declares the Grammar library
(library
(name grammars)
(flags (:standard -w -58))
(modules (:standard \ acgc interactive))
(libraries
(modules (:standard \ acgc interactive term_test))
(libraries
menhirLib
logic
acgData
)
......@@ -26,7 +39,6 @@
(package acgtk)
(modules acgc)
(libraries
threads
logs
fmt.tty
logic
......@@ -34,5 +46,15 @@
grammars
))
(test
(name term_test)
(modules term_test)
(libraries
menhirLib
utilsLib
grammars)
)
(documentation (package acgtkLib))
%{
open UtilsLib
open Logic.Abstract_syntax
open AcgData
open Environment
module Typing_env = Utils.StringSet
type context =
| Signature of Environment.Signature1.t
| Abs_and_obj of (Environment.Signature1.t * Environment.Signature1.t)
| Env of Environment.t
type type_or_cst =
| Type of (string*Abstract_syntax.location)
| Term of (string*Abstract_syntax.location)
let type_or_cst_to_string = function
| Type _ -> "Type"
| Term _ -> "Term"
exception Is_type
exception Is_cst
let emit_parse_error e loc = raise (Error.Error (Error.Parse_error (e,loc)))
let new_loc (s,_) (_,e) = (s,e)
let get_term_location = function
| Abstract_syntax.Var (_,l) -> l
| Abstract_syntax.Const (_,l) -> l
| Abstract_syntax.Abs (_,_,_,l) -> l
| Abstract_syntax.LAbs (_,_,_,l) -> l
| Abstract_syntax.App (_,_,l) -> l
let abs (x,l1,t) = function
| Abstract_syntax.Linear -> Abstract_syntax.LAbs (x,l1,t,new_loc l1 (get_term_location t))
| Abstract_syntax.Non_linear -> Abstract_syntax.Abs (x,l1,t,new_loc l1 (get_term_location t))
let is_signature s e =
try
let _ = Environment.get_signature s e in
true
with
| Environment.Signature_not_found _ -> false
let is_lexicon l e =
try
let _ = Environment.get_lexicon l e in
true
with
| Environment.Lexicon_not_found _ -> false
let get_sig (name,loc) e =
try
Environment.get_signature name e
with
| Environment.Signature_not_found _ -> emit_parse_error (Error.No_such_signature name) loc
let get_lex (name,loc) e =
try
Environment.get_lexicon name e
with
| Environment.Lexicon_not_found _ ->
emit_parse_error (Error.No_such_lexicon name) loc
%}
%token EOI
%token <Logic.Abstract_syntax.Abstract_syntax.location> LPAREN
%token <Logic.Abstract_syntax.Abstract_syntax.location> RPAREN
%token <Logic.Abstract_syntax.Abstract_syntax.location> SIG_OPEN
%token <Logic.Abstract_syntax.Abstract_syntax.location> LEX_OPEN
%token <Logic.Abstract_syntax.Abstract_syntax.location> NL_LEX_OPEN
%token <Logic.Abstract_syntax.Abstract_syntax.location> END_OF_DEC
%token <(string*Logic.Abstract_syntax.Abstract_syntax.location)> IDENT
%token <Logic.Abstract_syntax.Abstract_syntax.location> COLON
%token <Logic.Abstract_syntax.Abstract_syntax.location> EQUAL
%token <Logic.Abstract_syntax.Abstract_syntax.location> SEMICOLON
%token <Logic.Abstract_syntax.Abstract_syntax.location> COMPOSE
%right COMPOSE
%start <Environment.Environment.t -> Environment.Environment.t> main
%%
main:
| s=signature EOI { fun e -> s e }
| l=lexicon EOI { fun e -> l e }
signature :
| SIG_OPEN id=IDENT EQUAL entries=separated_list(SEMICOLON,sig_entry) END_OF_DEC
{
fun e ->
let s,loc = id in
if is_signature s e then
raise (Error.(Error (Env_error (Duplicated_signature s,loc))))
else
List.fold_left
(fun acc entry ->
try
Environnement.Signature1.add_entry (entry acc e) acc
with
| Environment.Signature1.Duplicate_type_definition ->
emit_parse_error (Error.Duplicated_type s) loc)
Environnement.Signature1.empty
entries
}
lexicon :
| LEX_OPEN lex=lex_declaration
{fun e -> lex ~non_linear:false e }
| NL_LEX_OPEN lex=lex_declaration
{fun e -> lex ~non_linear:true e }
| LEX_OPEN lex=IDENT EQUAL exp=lexicon_exp {fun e -> exp lex e}
%inline lex_declaration :
| lex=IDENT LPAREN abs=IDENT RPAREN COLON obj=IDENT EQUAL entries=separated_list(SEMICOLON,lex_entry) END_OF_DEC
{fun ~non_linear e ->
let lex_name,lex_loc = lex in
(* let abs_name,abs_loc = abs in
let obj_name,obj_loc = obj in *)
let abs',obj'= get_sig abs e,get_sig obj e in
if is_lexicon lex_name e then
raise (Error.(Error (Env_error (Duplicated_lexicon lex_name,lex_loc))))
else
let lex' = List.fold_left
(fun acc entry ->
try
Environnement.Lexicon.insert (entry acc abs obj e) acc
with
| Environment.Signature1.Duplicate_type_definition ->
emit_parse_error (Error.Duplicated_type s) loc)
Environnement.Lexicon.empty lex_name ~abs:abs' ~obj:obj' ~non_linear
entries in
let () = Environnement.Lexicon.check lex' in
lex'
}
lexicon_exp0 :
| lex = IDENT { fun e -> get_lex lex e }
| LPAREN lex = lexicon_exp RPAREN { lex }
lexicon_exp :
| lex = lexicon_exp0 { lex }
| lex1 = lexicon_exp COMPOSE lex2 = lexicon_exp
{
let l1_name,(l1_start,_) = Environment.Lexicon.name lex1 in
let l2_name,(_,l2_end) = Environment.Lexicon.name lex2 in
let new_name = l1_name^" << "^l2_name in
let new_loc = l1_loc,l2_loc in
Environment.Lexicon.compose l1 l2 (new_name,new_loc) }
%token <Logic.Abstract_syntax.Abstract_syntax.location> COLON_EQUAL
%%
id_or_sym :
| id = IDENT { id }
| sym = SYMBOL { sym }
%public lex_entry :
| cst = separated_nonempty_list(COMMA,id_or_sym) COLON_EQUAL t=atomic_type_or_term
{
fun lex e ->
let abs,obj = Environment.Lexicon.get_sig lex in
let id,loc = t in
let k,term_or_type =
if fst (Environment.Signature1.is_constant id obj) then
(Term t),Abstract_syntax.Const t
else if Environment.Signature1.is_type id obj then
Type t,Abstract_syntax.Type_atom (id,loc,[])
else raise (Error.(Error (Parse_error ((Unknown_constant_nor_variable id),loc)))) in
List.fold_left
(fun acc (id,l) ->
match k,Environment.Signature1.is_constant id abs,Environment.Signature1.is_type id abs with
| Term _,(true,_),false -> Environment.Lexicon.insert (Abstract_syntax.Constant (id,loc,term_or_type)) acc
| Type _,(false,_),true -> Environment.Lexicon.insert (Abstract_syntax.Type (id,loc,term_or_type)) acc
| Term _,(false,_), _ -> emit_parse_error (Error.Unknown_constant id) l
| Type _,_,false -> emit_parse_error (Error.Unknown_type id) l
| _ , (true,_), true -> failwith (Printf.sprintf "Bug: should not happen. \"%s\" should not be both a type and a term" id))
lex
cst
}
| cst = separated_nonempty_list(COMMA,id_or_sym) COLON_EQUAL t=not_atomic_term
{
fun lex e ->
let abs,obj = Environment.Lexicon.get_sig lex in
let term,loc,ws = t obj e in
List.fold_left
(fun (acc,k) (id,l) ->
match Environment.Signature1.is_constant id abs,Environment.Signature1.is_type id abs with
| (true,_),false -> Environment.Lexicon.insert (Abstract_syntax.Constant (id,loc,term)) acc
| (false,_), _ -> emit_parse_error (Error.Unknown_constant id) l
| (true,_),true -> failwith (Printf.sprintf "Bug: should not happen. \"%s\" should not be both a type and a term" id))
lex
cst
}
| cst = separated_nonempty_list(COMMA,id_or_sym) COLON_EQUAL t=not_atomic_type_expression
{
fun lex e ->
let abs,obj = Environment.Lexicon.get_sig lex in
let term,loc,ws = t obj e in
List.fold_left
(fun (acc,k) (id,l) ->
match Environment.Signature1.is_constant id abs,Environment.Signature1.is_type id abs with
| (false,_),true -> Environment.Lexicon.insert (Abstract_syntax.Type (id,loc,term)) acc
| _,false -> emit_parse_error (Error.Unknown_type id) l
| (true,_),true -> failwith (Printf.sprintf "Bug: should not happen. \"%s\" should not be both a type and a term" id))
lex
cst
}
%token <(string*Logic.Abstract_syntax.Abstract_syntax.location)> SYMBOL
%token <Logic.Abstract_syntax.Abstract_syntax.location> COMMA
%token <Logic.Abstract_syntax.Abstract_syntax.location> TYPE
%token <Logic.Abstract_syntax.Abstract_syntax.location> PREFIX
%token <Logic.Abstract_syntax.Abstract_syntax.location> INFIX
%token <Logic.Abstract_syntax.Abstract_syntax.location> BINDER
%%
%public sig_entry :
| decl = type_declaration { decl }
| def = type_definition { def }
| decl = term_declaration { decl }
| def = term_definition { def }
type_declaration :
| ids = separated_nonempty_list(COMMA,IDENT) COLON TYPE
{
fun s _ ->
List.fold_left
(fun acc id ->
let id_name,id_loc = id in
try
Environment.Signature1.add_entry (Abstract_syntax.Type_decl (id_name,id_loc,(Abstract_syntax.K []))) acc
with
| Environment.Signature1.Duplicate_type_definition ->
emit_parse_error (Error.Duplicated_type id_name) id_loc)
s
ids
}
type_or_term_definition_prefix :
| id = IDENT EQUAL type_or_cst = atomic_type_or_term COLON { id,type_or_cst }
type_definition :
| definition = type_or_term_definition_prefix TYPE
{
fun sg _ ->
let (id_name,id_loc),(type_expr,type_expr_loc) = definition in
if Environment.Signature1.is_type type_expr sg then
try
Environment.Signature1.add_entry (Abstract_syntax.Type_def (id_name,id_loc,type_expr,Abstract_syntax.K [])) s
with
| Environment.Signature1.Duplicate_type_definition ->
emit_parse_error (Error.Duplicated_type id_name) id_loc
else
emit_parse_error (Error.Unknown_type type_expr) type_expr_loc
}
| id = IDENT EQUAL type_expr = not_atomic_type_expression COLON TYPE
{
fun s _ ->
let id_name,id_loc = id in
let type_expr',_ = type_expr s in
try
Environment.Signature1.add_entry (Abstract_syntax.Type_def (id_name,id_loc,type_expr',Abstract_syntax.K [])) s
with
| Environment.Signature1.Duplicate_type_definition ->
emit_parse_error (Error.Duplicated_type id_name) id_loc
}
term_declaration :
| dec = term_dec_start COLON type_exp = type_expression
{
fun s e ->
List.fold_left
(fun acc ((id,loc),kind) ->
try
let ty = fst (type_exp s e) in
Environment.Signature1.add_entry (Abstract_syntax.Term_decl (id,kind,loc,ty)) acc
with
| Environment.Signature1.Duplicate_term_definition ->
emit_parse_error (Error.Duplicated_term id) loc)
s
dec
}
%inline term_dec_start :
| ids = separated_nonempty_list(COMMA,IDENT) { List.map (fun id -> (id,Abstract_syntax.Default)) ids }
| PREFIX sym = SYMBOL { [sym,Abstract_syntax.Prefix] }
| INFIX sym = SYMBOL { [sym,Abstract_syntax.Infix] }
| BINDER id = IDENT { [id,Abstract_syntax.Binder] }
term_definition :
| definition = type_or_term_definition_prefix ty = type_expression
{
fun sg _ ->
let (id,l),(term,term_loc) = definition in
if Environment.Signature1.is_constant term sg then
try
let term = Abstract_syntax.Const (term,term_loc) in
let ty',_ = ty s in
Environment.Signature1.add_entry (Abstract_syntax.Term_def (id,Abstract_syntax.Default,l,term,ty')) s
with
| Environment.Signature1.Duplicate_term_definition ->
emit_parse_error (Error.Duplicated_term id) l
else
emit_parse_error (Error.Unknown_constant type_expr) type_expr_loc
}
| id = IDENT EQUAL t = not_atomic_term COLON ty = type_expression
{
fun s _ ->
let id',l = id in
try
let term = t s in
let ty',_ = ty s in
Environment.Signature1.add_entry (Abstract_syntax.Term_def (id',Abstract_syntax.Default,l,term,ty')) s
with
| Environment.Signature1.Duplicate_term_definition ->
emit_parse_error (Error.Duplicated_term id') l}
| def = term_def_start EQUAL t = term COLON ty = type_expression
{
fun s _ ->
let (id,l),k = def in
try
let term = t s in
let ty',_ = ty s in
Environment.Signature1.add_entry (Abstract_syntax.Term_def (id,k,l,term,ty')) s
with
| Environment.Signature1.Duplicate_term_definition ->
emit_parse_error (Error.Duplicated_term id) l}
%inline term_def_start :
| PREFIX sym = SYMBOL {sym,Abstract_syntax.Prefix}
| INFIX sym = SYMBOL {sym,Abstract_syntax.Infix}
| BINDER id = IDENT {id,Abstract_syntax.Binder}
%token <Logic.Abstract_syntax.Abstract_syntax.location> LAMBDA
%token <Logic.Abstract_syntax.Abstract_syntax.location> LAMBDA0
%token <Logic.Abstract_syntax.Abstract_syntax.location> DOT
%%
%public term:
| t = atomic_type_or_term {
fun type_env sg warnings ->
let id,loc = t in
match Environment.Signature1.is_constant id sg,Typing_env.mem id type_env with
| (true,_),false -> Abstract_syntax.Const (id,loc),loc,warnings
| (false,_),true -> Abstract_syntax.Var (id,loc),loc,warnings
| (true,_),true -> Abstract_syntax.Var (id,l),loc,(Error.Variable_or_constant (id,loc))::warnings
| false,false -> emit_parse_error (Error.Unknown_constant_nor_variable id) (fst l,snd l)
}
| t = not_atomic_term { t }
%public atomic_type_or_term:
| id = IDENT { id }
| LPAREN t = atomic_type_or_term RPAREN { t }
%public not_atomic_term:
| t1 = term0 terms = term0+
{ fun type_env sg warning ->
let t1,l1, ws = t1 type_env sg warnings in
let terms,l,warnings =
List.fold_left
(fun (lst,_,ws) t ->
let t',l',ws'= t type_env sg ws in
t'::lst,l',ws')
([],l1,ws)
terms in
Term_sequence.parse_sequence [] (Some t1) (List.rev terms),new_loc l1 l, warnings }
| t = bound_term { t }
%inline bound_term :
| binder = LAMBDA vars = IDENT+ DOT t = term
{
fun type_env sg warnings ->
let type_env = List.fold_left
(fun acc (var,_) -> Typing_env.add var acc)
type_env
vars in
let t',loc',ws' = t type_env sg warnings in
let n_loc = new_loc binder t' in
((List.fold_left
(fun acc (var,loc) ->
(fun t -> acc (abs (var,loc,t) Abstract_syntax.Non_linear)))
(fun x -> x)
vars)
t'),
n_loc,
ws'}
| binder = LAMBDA0 vars = IDENT+ DOT t = term
{
fun type_env sg warnings ->
let type_env = List.fold_left
(fun acc (var,_) -> Typing_env.add var acc)
type_env
vars in
let t',loc',ws' = t type_env sg warnings in
let n_loc = new_loc binder t' in
((List.fold_left
(fun acc (var,loc) ->
(fun t -> acc (abs (var,loc,t) Abstract_syntax.Linear)))
(fun x -> x)
vars)
t'),
n_loc,
ws' }
| binder = IDENT vars = IDENT+ DOT t = term
{
fun type_env sg warnings ->
let binder,loc = binder in
let linearity =
match Environment.Signature1.is_constant binder sg with
| true,Some Abstract_syntax.Binder ->
(match Environment.Signature1.get_binder_argument_functional_type binder sg with
| None -> failwith "Bug: Binder of non functional type"
| Some k -> k)
| _ -> emit_parse_error (Error.Binder_expected binder) (p1,p2) in
let type_env = List.fold_left
(fun acc (var,_) -> Typing_env.add var acc)
type_env
vars in
let t',loc',ws' = t type_env sg warnings in
let n_loc = new_loc binder t' in
Abstract_syntax.App
(Const binder,
((List.fold_left
(fun acc (var,loc) ->
(fun t -> acc (abs (var,loc,t) k )))
(fun x -> x)
vars)
t')),
n_loc,
ws' }
term0:
| id = atomic_type_or_term
{
fun type_env sg warnings ->
let id,loc = t in
let t,ws =
match Environment.Signature1.is_constant id sg,Typing_env.mem id type_env with
| (true,_),false -> Abstract_syntax.Const (id,loc),warnings
| (false,_),true -> Abstract_syntax.Var (id,loc),warnings
| (true,_),true -> Abstract_syntax.Var (id,l),(Error.Variable_or_constant (id,loc))::warnings
| false,false -> emit_parse_error (Error.Unknown_constant_nor_variable id) (fst l,snd l) in
Term_sequence.Term t,ws }
| id = SYMBOL
{
fun type_env sg warnings ->
let name,loc = id in
match Environment.Signature1.is_constant name sg with
| true,Some fix -> Op (name,fix,loc),warnings
| true,None -> failwith "Bug: Should no happen"
| false,_ -> emit_parse_error (Error.Unknown_constant id) loc
}
| LPAREN t = not_atomic_term RPAREN
{ Term_sequence.Term t }
{
open Term_parser
exception Error of string
let loc lexbuf = Lexing.lexeme_start_p lexbuf,Lexing.lexeme_end_p lexbuf
}
let letter = ['a'-'z' 'A'-'Z' 'µ' 'À'-'Ö' 'Ø'-'Ý' 'ß'-'ö' 'ø'-'ÿ']
let digit = ['0'-'9']
let string = (letter|digit|'_')*'\''*
let symbol = ['|' '!' '"' '#' '$' '%' '&' '\'' '*' '+' '-' '/' '<' '>' '?' '@' '[' '\\' ']' '^' '`' '{' '}' '~' ]
(* This rule looks for a single line, terminated with '\n' or eof.
It returns a pair of an optional string (the line that was found)
and a Boolean flag (false if eof was reached). *)
rule line = parse
| ([^'\n']* '\n') as line
(* Normal case: one line, no eof. *)
{ Some line, true }
| eof
(* Normal case: no data, eof. *)
{ None, false }
| ([^'\n']+ as line) eof
(* Special case: some data but missing '\n', then eof.
Consider this as the last line, and add the missing '\n'. *)
{ Some (line ^ "\n"), false }
(* This rule analyzes a single line and turns it into a stream of
tokens. *)
and token = parse
| [' ' '\t']
{ token lexbuf }
| '\n'
{ EOI }
| '('
{ LPAREN }
| ')'
{ RPAREN }
| '.'
{ DOT }
| "lambda "
{ LAMBDA }
| letter string as id
{ IDENT (id,loc lexbuf) }
| symbol as s
{ SYMBOL (Printf.sprintf "%c" s,loc lexbuf) }
| _
{ raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) }
%{ open Term_sequence_parser
%}
%token <string*(Lexing.position*Lexing.position)> IDENT SYMBOL
%token LAMBDA DOT LPAREN RPAREN
%token EOI
%start <Term_sequence_parser.term> main
%%
main:
| t = term1 EOI
{ t }
term0:
| id = IDENT
{ Term (Var (fst id)) }
| id = SYMBOL
{ let name = fst id in
Op (name,Term_sequence_parser.(get_fixity name test_sig)) }
| LPAREN t = term1 RPAREN
{ Term t }
term1:
| terms = term0+
{ let token,stream=next terms in
parse_sequence [] token stream}
| LAMBDA vars = IDENT+ DOT t = term1
{ (List.fold_left
(fun acc (var,_) ->
(fun t -> acc (Abs (var,t))))
(fun x -> x)
vars)
t }
open Logic.Abstract_syntax
open Environment
open AcgData
type token =
| Term of Abstract_syntax.term * Abstract_syntax.location
| Op of ( Abstract_syntax.term * Abstract_syntax.syntactic_behavior * Abstract_syntax.location)
let rec abs_term_to_string _ = function
| Abstract_syntax.Var (x,_)
| Abstract_syntax.Const (x,_) -> x
| Abstract_syntax.Abs (x,_,t,_)-> Printf.sprintf "(Lambda %s. %a)" x abs_term_to_string t
| Abstract_syntax.LAbs (x,_,t,_)-> Printf.sprintf "(lambda %s. %a)" x abs_term_to_string t
| Abstract_syntax.App (t1,t2,_) -> Printf.sprintf "(%a %a)" abs_term_to_string t1 abs_term_to_string t2
let to_string = abs_term_to_string ()
let tok_to_string = function
| Term (t,_)
| Op (t,_,_) -> to_string t
let is_infix = function
| Abstract_syntax.Infix _ -> true
| _ -> false
let is_prefix = function
| Abstract_syntax.Prefix -> true
| _ -> false
let lower_than f1 f2 =
match f1,f2 with
| Abstract_syntax.Infix (p1,_), Abstract_syntax.Infix (p2,_) -> p1 < p2
| _ -> false
let next = function
| [] -> None,[]
| a::tl -> Some a,tl
let new_loc (s,_) (_,e) = (s,e)
let rec parse_sequence stack token stream sg =
let () =
Printf.