From dfda5336daca15440e11edd9d4aaa3ce834e99d1 Mon Sep 17 00:00:00 2001 From: Andrei Paskevich Date: Fri, 8 Aug 2014 15:45:37 +0200 Subject: [PATCH] make driver/ independent of parser/ - move common lexing functions to util/lexlib.mll - move and rename Typing.create_user_tv to Ty.tv_of_string --- .gitignore | 1 + Makefile.in | 12 ++-- src/core/ty.ml | 7 +++ src/core/ty.mli | 2 + src/driver/driver.ml | 2 +- src/driver/driver_lexer.mll | 10 ++-- src/parser/lexer.mli | 12 ---- src/parser/lexer.mll | 106 +++++------------------------------- src/parser/typing.ml | 15 ++--- src/parser/typing.mli | 2 - src/util/lexlib.mli | 24 ++++++++ src/util/lexlib.mll | 106 ++++++++++++++++++++++++++++++++++++ src/whyml/mlw_driver.ml | 2 +- src/whyml/mlw_typing.ml | 10 ++-- 14 files changed, 174 insertions(+), 137 deletions(-) create mode 100644 src/util/lexlib.mli create mode 100644 src/util/lexlib.mll diff --git a/.gitignore b/.gitignore index c9d3e96e4..4c11cf070 100644 --- a/.gitignore +++ b/.gitignore @@ -175,6 +175,7 @@ pvsbin/ # /src/util/ /src/util/config.ml +/src/util/lexlib.ml /src/util/rc.ml # /src/session diff --git a/Makefile.in b/Makefile.in index 02b0d91ae..faff3fd52 100644 --- a/Makefile.in +++ b/Makefile.in @@ -105,7 +105,7 @@ endif ############## LIBGENERATED = src/util/config.ml \ - src/util/rc.ml src/parser/lexer.ml \ + src/util/rc.ml src/util/lexlib.ml src/parser/lexer.ml \ src/parser/parser.mli src/parser/parser.ml \ src/driver/driver_parser.mli src/driver/driver_parser.ml \ src/driver/driver_lexer.ml \ @@ -114,17 +114,17 @@ LIBGENERATED = src/util/config.ml \ LIB_UTIL = config bigInt util opt lists strings \ extmap extset exthtbl weakhtbl \ - hashcons stdlib exn_printer pp debug loc print_tree \ + hashcons stdlib exn_printer pp debug loc lexlib print_tree \ cmdline warning sysutil rc plugin bigInt number pqueue LIB_CORE = ident ty term pattern decl theory \ task pretty dterm env trans printer -LIB_PARSER = ptree glob parser typing lexer - LIB_DRIVER = call_provers driver_ast driver_parser driver_lexer driver \ whyconf autodetection +LIB_PARSER = ptree glob parser typing lexer + LIB_TRANSFORM = simplify_formula inlining split_goal induction \ eliminate_definition eliminate_algebraic \ eliminate_inductive eliminate_let eliminate_if \ @@ -145,8 +145,8 @@ LIB_WHYML = mlw_ty mlw_expr mlw_decl mlw_pretty mlw_wp mlw_module \ LIBMODULES = $(addprefix src/util/, $(LIB_UTIL)) \ $(addprefix src/core/, $(LIB_CORE)) \ - $(addprefix src/parser/, $(LIB_PARSER)) \ $(addprefix src/driver/, $(LIB_DRIVER)) \ + $(addprefix src/parser/, $(LIB_PARSER)) \ $(addprefix src/transform/, $(LIB_TRANSFORM)) \ $(addprefix src/printer/, $(LIB_PRINTER)) \ $(addprefix src/whyml/, $(LIB_WHYML)) @@ -155,7 +155,7 @@ LIB_SESSION = compress xml termcode session session_tools session_scheduler LIBSESSIONMODULES = $(addprefix src/session/, $(LIB_SESSION)) -LIBDIRS = util core parser driver transform printer whyml +LIBDIRS = util core driver parser transform printer whyml LIBINCLUDES = $(addprefix -I src/, $(LIBDIRS)) LIBSESSIONDIRS = session diff --git a/src/core/ty.ml b/src/core/ty.ml index 42fc0b2e4..7e731a853 100644 --- a/src/core/ty.ml +++ b/src/core/ty.ml @@ -33,6 +33,13 @@ let tv_compare tv1 tv2 = id_compare tv1.tv_name tv2.tv_name let create_tvsymbol n = { tv_name = id_register n } +let tv_of_string = + let hs = Hstr.create 17 in fun s -> + try Hstr.find hs s with Not_found -> + let tv = create_tvsymbol (id_fresh s) in + Hstr.add hs s tv; + tv + (* type symbols and types *) type tysymbol = { diff --git a/src/core/ty.mli b/src/core/ty.mli index 82015c070..dd14f33d6 100644 --- a/src/core/ty.mli +++ b/src/core/ty.mli @@ -27,6 +27,8 @@ val tv_hash : tvsymbol -> int val create_tvsymbol : preid -> tvsymbol +val tv_of_string : string -> tvsymbol + (* type symbols and types *) type tysymbol = private { diff --git a/src/driver/driver.ml b/src/driver/driver.ml index d7291124d..bb2804906 100644 --- a/src/driver/driver.ml +++ b/src/driver/driver.ml @@ -150,7 +150,7 @@ let load_driver = let driver_tag = ref (-1) in fun env file extra_files -> | Rmeta (s,al) -> let rec ty_of_pty = function | PTyvar x -> - Ty.ty_var (Typing.create_user_tv x) + Ty.ty_var (Ty.tv_of_string x) | PTyapp ((loc,_) as q,tyl) -> let ts = find_ts th q in let tyl = List.map ty_of_pty tyl in diff --git a/src/driver/driver_lexer.mll b/src/driver/driver_lexer.mll index 7fe364af4..d6e4d1225 100644 --- a/src/driver/driver_lexer.mll +++ b/src/driver/driver_lexer.mll @@ -13,7 +13,6 @@ open Format open Lexing open Driver_parser - open Lexer exception IllegalCharacter of char @@ -67,13 +66,13 @@ let op_char = ['=' '<' '>' '~' '+' '-' '*' '/' '%' rule token = parse | '\n' - { newline lexbuf; token lexbuf } + { Lexlib.newline lexbuf; token lexbuf } | space+ { token lexbuf } | "(*)" { LEFTPAR_STAR_RIGHTPAR } | "(*" - { comment lexbuf; token lexbuf } + { Lexlib.comment lexbuf; token lexbuf } | '_' { UNDERSCORE } | ident as id @@ -99,16 +98,15 @@ rule token = parse | op_char+ as op { OPERATOR op } | "\"" - { STRING (string lexbuf) } + { STRING (Lexlib.string lexbuf) } | "import" space* "\"" - { INPUT (string lexbuf) } + { INPUT (Lexlib.string lexbuf) } | eof { EOF } | _ as c { raise (IllegalCharacter c) } { - let parse_file_gen parse input_lexbuf lexbuf = let s = Stack.create () in Stack.push lexbuf s; diff --git a/src/parser/lexer.mli b/src/parser/lexer.mli index 4c8b47df8..178b68528 100644 --- a/src/parser/lexer.mli +++ b/src/parser/lexer.mli @@ -9,8 +9,6 @@ (* *) (********************************************************************) -(** parsing entry points *) - val library_of_env : Env.env -> unit Env.library val parse_logic_file : @@ -20,13 +18,3 @@ val parse_program_file : Ptree.incremental -> Lexing.lexbuf -> unit val token_counter : Lexing.lexbuf -> int * int - -(** other functions to be re-used in other lexers/parsers *) - -val newline : Lexing.lexbuf -> unit - -val comment : Lexing.lexbuf -> unit - -val string : Lexing.lexbuf -> string - -val remove_leading_plus : string -> string diff --git a/src/parser/lexer.mll b/src/parser/lexer.mll index c8dc94d1c..565ad034f 100644 --- a/src/parser/lexer.mll +++ b/src/parser/lexer.mll @@ -11,19 +11,12 @@ { open Format - open Lexing open Parser - (* lexical errors *) - exception IllegalCharacter of char - exception UnterminatedComment - exception UnterminatedString let () = Exn_printer.register (fun fmt e -> match e with | IllegalCharacter c -> fprintf fmt "illegal character %c" c - | UnterminatedComment -> fprintf fmt "unterminated comment" - | UnterminatedString -> fprintf fmt "unterminated string" | _ -> raise e) let keywords = Hashtbl.create 97 @@ -99,52 +92,8 @@ "while", WHILE; "writes", WRITES; ] - - let newline lexbuf = - let pos = lexbuf.lex_curr_p in - lexbuf.lex_curr_p <- - { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum } - - let string_start_loc = ref Loc.dummy_position - let string_buf = Buffer.create 1024 - - let comment_start_loc = ref Loc.dummy_position - - let char_for_backslash = function - | 'n' -> '\n' - | 't' -> '\t' - | c -> c - - let update_loc lexbuf file line chars = - let pos = lexbuf.lex_curr_p in - let new_file = match file with None -> pos.pos_fname | Some s -> s in - lexbuf.lex_curr_p <- - { pos with - pos_fname = new_file; - pos_lnum = int_of_string line; - pos_bol = pos.pos_cnum - int_of_string chars; - } - - let remove_leading_plus s = - let n = String.length s in - if n > 0 && s.[0] = '+' then String.sub s 1 (n-1) else s - - let loc lb = Loc.extract (lexeme_start_p lb, lexeme_end_p lb) - - let remove_underscores s = - if String.contains s '_' then begin - let count = - let nb = ref 0 in - String.iter (fun c -> if c = '_' then incr nb) s; - !nb in - let t = String.create (String.length s - count) in - let i = ref 0 in - String.iter (fun c -> if c <> '_' then (t.[!i] <-c; incr i)) s; - t - end else s } -let newline = '\n' let space = [' ' '\t' '\r'] let lalpha = ['a'-'z' '_'] let ualpha = ['A'-'Z'] @@ -167,14 +116,15 @@ let op_char_pref = ['!' '?'] rule token = parse | "##" space* ("\"" ([^ '\010' '\013' '"' ]* as file) "\"")? space* (digit+ as line) space* (digit+ as char) space* "##" - { update_loc lexbuf file line char; token lexbuf } + { Lexlib.update_loc lexbuf file (int_of_string line) (int_of_string char); + token lexbuf } | "#" space* "\"" ([^ '\010' '\013' '"' ]* as file) "\"" space* (digit+ as line) space* (digit+ as bchar) space* (digit+ as echar) space* "#" { POSITION (Loc.user_position file (int_of_string line) (int_of_string bchar) (int_of_string echar)) } - | newline - { newline lexbuf; token lexbuf } + | '\n' + { Lexlib.newline lexbuf; token lexbuf } | space+ { token lexbuf } | '_' @@ -184,27 +134,29 @@ rule token = parse | uident as id { UIDENT id } | ['0'-'9'] ['0'-'9' '_']* as s - { INTEGER (Number.int_const_dec (remove_underscores s)) } + { INTEGER (Number.int_const_dec (Lexlib.remove_underscores s)) } | '0' ['x' 'X'] (['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']* as s) - { INTEGER (Number.int_const_hex (remove_underscores s)) } + { INTEGER (Number.int_const_hex (Lexlib.remove_underscores s)) } | '0' ['o' 'O'] (['0'-'7'] ['0'-'7' '_']* as s) - { INTEGER (Number.int_const_oct (remove_underscores s)) } + { INTEGER (Number.int_const_oct (Lexlib.remove_underscores s)) } | '0' ['b' 'B'] (['0'-'1'] ['0'-'1' '_']* as s) - { INTEGER (Number.int_const_bin (remove_underscores s)) } + { INTEGER (Number.int_const_bin (Lexlib.remove_underscores s)) } | (digit+ as i) ("" as f) ['e' 'E'] (['-' '+']? digit+ as e) | (digit+ as i) '.' (digit* as f) (['e' 'E'] (['-' '+']? digit+ as e))? | (digit* as i) '.' (digit+ as f) (['e' 'E'] (['-' '+']? digit+ as e))? - { FLOAT (Number.real_const_dec i f (Opt.map remove_leading_plus e)) } + { FLOAT (Number.real_const_dec i f + (Opt.map Lexlib.remove_leading_plus e)) } | '0' ['x' 'X'] (hexadigit+ as i) ("" as f) ['p' 'P'] (['-' '+']? digit+ as e) | '0' ['x' 'X'] (hexadigit+ as i) '.' (hexadigit* as f) (['p' 'P'] (['-' '+']? digit+ as e))? | '0' ['x' 'X'] (hexadigit* as i) '.' (hexadigit+ as f) (['p' 'P'] (['-' '+']? digit+ as e))? - { FLOAT (Number.real_const_hex i f (Opt.map remove_leading_plus e)) } + { FLOAT (Number.real_const_hex i f + (Opt.map Lexlib.remove_leading_plus e)) } | "(*)" { LEFTPAR_STAR_RIGHTPAR } | "(*" - { comment_start_loc := loc lexbuf; comment lexbuf; token lexbuf } + { Lexlib.comment lexbuf; token lexbuf } | "~'" (lident as id) { OPAQUE_QUOTE_LIDENT id } | "'" (lident as id) @@ -264,43 +216,13 @@ rule token = parse | op_char_4+ as s { OP4 s } | "\"" - { string_start_loc := loc lexbuf; STRING (string lexbuf) } + { STRING (Lexlib.string lexbuf) } | eof { EOF } | _ as c { raise (IllegalCharacter c) } -and comment = parse - | "(*)" - { comment lexbuf } - | "*)" - { () } - | "(*" - { comment lexbuf; comment lexbuf } - | newline - { newline lexbuf; comment lexbuf } - | eof - { raise (Loc.Located (!comment_start_loc, UnterminatedComment)) } - | _ - { comment lexbuf } - -and string = parse - | "\"" - { let s = Buffer.contents string_buf in - Buffer.clear string_buf; - s } - | "\\" (_ as c) - { if c = '\n' then newline lexbuf; - Buffer.add_char string_buf (char_for_backslash c); string lexbuf } - | newline - { newline lexbuf; Buffer.add_char string_buf '\n'; string lexbuf } - | eof - { raise (Loc.Located (!string_start_loc, UnterminatedString)) } - | _ as c - { Buffer.add_char string_buf c; string lexbuf } - { - let parse_logic_file env path lb = open_file token (Lexing.from_string "") (Typing.open_file env path); Loc.with_location (logic_file token) lb; diff --git a/src/parser/typing.ml b/src/parser/typing.ml index 3bac386b0..020ce2802 100644 --- a/src/parser/typing.ml +++ b/src/parser/typing.ml @@ -99,20 +99,13 @@ let find_namespace_ns ns q = (** Parsing types *) -let create_user_tv = - let hs = Hstr.create 17 in fun s -> - try Hstr.find hs s with Not_found -> - let tv = create_tvsymbol (id_fresh s) in - Hstr.add hs s tv; - tv - let ty_of_pty ?(noop=true) uc pty = let rec get_ty = function | PPTtyvar ({id_loc = loc}, true) when noop -> Loc.errorm ~loc "Opaqueness@ annotations@ are@ only@ \ allowed@ in@ function@ and@ predicate@ prototypes" | PPTtyvar ({id = x}, _) -> - ty_var (create_user_tv x) + ty_var (tv_of_string x) | PPTtyapp (q, tyl) -> let ts = find_tysymbol uc q in let tyl = List.map get_ty tyl in @@ -129,7 +122,7 @@ let ty_of_pty ?(noop=true) uc pty = let opaque_tvs args = let rec opaque_tvs acc = function - | PPTtyvar (id, true) -> Stv.add (create_user_tv id.id) acc + | PPTtyvar (id, true) -> Stv.add (tv_of_string id.id) acc | PPTtyvar (_, false) -> acc | PPTtyapp (_, pl) | PPTtuple pl -> List.fold_left opaque_tvs acc pl @@ -409,7 +402,7 @@ let add_types dl th = let vl = List.map (fun id -> if Hstr.mem vars id.id then Loc.error ~loc:id.id_loc (DuplicateTypeVar id.id); - let i = create_user_tv id.id in + let i = tv_of_string id.id in Hstr.add vars id.id i; i) d.td_params in @@ -772,7 +765,7 @@ let type_inst th t s = | CStsym (loc,p,tvl,pty) -> let ts1 = find_tysymbol_ns t.th_export p in let id = id_user (ts1.ts_name.id_string ^ "_subst") loc in - let tvl = List.map (fun id -> create_user_tv id.id) tvl in + let tvl = List.map (fun id -> tv_of_string id.id) tvl in let def = Some (ty_of_pty th pty) in let ts2 = Loc.try3 ~loc create_tysymbol id tvl def in if Mts.mem ts1 s.inst_ts diff --git a/src/parser/typing.mli b/src/parser/typing.mli index 490536a0d..d07aeb184 100644 --- a/src/parser/typing.mli +++ b/src/parser/typing.mli @@ -12,7 +12,6 @@ (** Typing environments *) open Stdlib -open Ty open Term open Theory @@ -39,7 +38,6 @@ val close_file : unit -> theory Mstr.t (** The following is exported for program typing (src/whyml/mlw_typing.ml) *) (***************************************************************************) -val create_user_tv : string -> tvsymbol val create_user_id : Ptree.ident -> Ident.preid val qloc : Ptree.qualid -> Loc.position diff --git a/src/util/lexlib.mli b/src/util/lexlib.mli new file mode 100644 index 000000000..797f85ba5 --- /dev/null +++ b/src/util/lexlib.mli @@ -0,0 +1,24 @@ +(********************************************************************) +(* *) +(* The Why3 Verification Platform / The Why3 Development Team *) +(* Copyright 2010-2014 -- INRIA - CNRS - Paris-Sud University *) +(* *) +(* This software is distributed under the terms of the GNU Lesser *) +(* General Public License version 2.1, with the special exception *) +(* on linking described in file LICENSE. *) +(* *) +(********************************************************************) + +(** common functions to be used in lexers/parsers *) + +val newline : Lexing.lexbuf -> unit + +val comment : Lexing.lexbuf -> unit + +val string : Lexing.lexbuf -> string + +val update_loc : Lexing.lexbuf -> string option -> int -> int -> unit + +val remove_leading_plus : string -> string + +val remove_underscores : string -> string diff --git a/src/util/lexlib.mll b/src/util/lexlib.mll new file mode 100644 index 000000000..32225ceeb --- /dev/null +++ b/src/util/lexlib.mll @@ -0,0 +1,106 @@ +(********************************************************************) +(* *) +(* The Why3 Verification Platform / The Why3 Development Team *) +(* Copyright 2010-2014 -- INRIA - CNRS - Paris-Sud University *) +(* *) +(* This software is distributed under the terms of the GNU Lesser *) +(* General Public License version 2.1, with the special exception *) +(* on linking described in file LICENSE. *) +(* *) +(********************************************************************) + +{ + open Format + open Lexing + + (* lexical errors *) + + exception UnterminatedComment + exception UnterminatedString + + let () = Exn_printer.register (fun fmt e -> match e with + | UnterminatedComment -> fprintf fmt "unterminated comment" + | UnterminatedString -> fprintf fmt "unterminated string" + | _ -> raise e) + + let newline lexbuf = + let pos = lexbuf.lex_curr_p in + lexbuf.lex_curr_p <- + { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum } + + let string_start_loc = ref Loc.dummy_position + let string_buf = Buffer.create 1024 + + let comment_start_loc = ref Loc.dummy_position + + let char_for_backslash = function + | 'n' -> '\n' + | 't' -> '\t' + | c -> c + +} + +let newline = '\n' + +rule comment = parse + | "(*)" + { comment lexbuf } + | "*)" + { () } + | "(*" + { comment lexbuf; comment lexbuf } + | newline + { newline lexbuf; comment lexbuf } + | eof + { raise (Loc.Located (!comment_start_loc, UnterminatedComment)) } + | _ + { comment lexbuf } + +and string = parse + | "\"" + { let s = Buffer.contents string_buf in + Buffer.clear string_buf; + s } + | "\\" (_ as c) + { if c = '\n' then newline lexbuf; + Buffer.add_char string_buf (char_for_backslash c); string lexbuf } + | newline + { newline lexbuf; Buffer.add_char string_buf '\n'; string lexbuf } + | eof + { raise (Loc.Located (!string_start_loc, UnterminatedString)) } + | _ as c + { Buffer.add_char string_buf c; string lexbuf } + +{ + let loc lb = Loc.extract (lexeme_start_p lb, lexeme_end_p lb) + + let comment lexbuf = comment_start_loc := loc lexbuf; comment lexbuf + + let string lexbuf = string_start_loc := loc lexbuf; string lexbuf + + let update_loc lexbuf file line chars = + let pos = lexbuf.lex_curr_p in + let new_file = match file with None -> pos.pos_fname | Some s -> s in + lexbuf.lex_curr_p <- + { pos with + pos_fname = new_file; + pos_lnum = line; + pos_bol = pos.pos_cnum - chars; + } + + let remove_leading_plus s = + let n = String.length s in + if n > 0 && s.[0] = '+' then String.sub s 1 (n-1) else s + + let remove_underscores s = + if String.contains s '_' then begin + let count = + let nb = ref 0 in + String.iter (fun c -> if c = '_' then incr nb) s; + !nb in + let t = String.create (String.length s - count) in + let i = ref 0 in + String.iter (fun c -> if c <> '_' then (t.[!i] <-c; incr i)) s; + t + end else s +} diff --git a/src/whyml/mlw_driver.ml b/src/whyml/mlw_driver.ml index 60dfbb172..2a5b4d613 100644 --- a/src/whyml/mlw_driver.ml +++ b/src/whyml/mlw_driver.ml @@ -120,7 +120,7 @@ let load_driver lib file extra_files = | Rmeta (s,al) -> let rec ty_of_pty = function | PTyvar x -> - Ty.ty_var (Typing.create_user_tv x) + Ty.ty_var (Ty.tv_of_string x) | PTyapp ((loc,_) as q,tyl) -> let ts = find_ts th q in let tyl = List.map ty_of_pty tyl in diff --git a/src/whyml/mlw_typing.ml b/src/whyml/mlw_typing.ml index 3e5cc4565..71d7a24ad 100644 --- a/src/whyml/mlw_typing.ml +++ b/src/whyml/mlw_typing.ml @@ -91,15 +91,13 @@ let uc_find_ls uc p = (** parsing types *) -let create_user_tv = Typing.create_user_tv - let ity_of_pty ?(noop=true) uc pty = let rec get_ty = function | PPTtyvar ({id_loc = loc}, true) when noop -> Loc.errorm ~loc "Opaqueness@ annotations@ are@ only@ \ allowed@ in@ the@ types@ of@ formal@ arguments" | PPTtyvar ({id = x}, _) -> - ity_var (create_user_tv x) + ity_var (tv_of_string x) | PPTtyapp (q, tyl) -> let tyl = List.map get_ty tyl in begin match uc_find_ts uc q with @@ -117,7 +115,7 @@ let ity_of_pty ?(noop=true) uc pty = get_ty pty let rec opaque_tvs acc = function - | PPTtyvar (id, true) -> Stv.add (create_user_tv id.id) acc + | PPTtyvar (id, true) -> Stv.add (tv_of_string id.id) acc | PPTtyvar (_, false) -> acc | PPTtyapp (_, pl) | PPTtuple pl -> List.fold_left opaque_tvs acc pl @@ -656,7 +654,7 @@ let add_type_invariant loc uc id params inv = | _ -> Loc.errorm ~loc "type %s does not have an invariant" id.id in let add_tv acc { id = id; id_loc = loc } = let e = Loc.Located (loc, DuplicateTypeVar id) in - Sstr.add_new e id acc, Typing.create_user_tv id in + Sstr.add_new e id acc, tv_of_string id in let _, tvl = Lists.map_fold_left add_tv Sstr.empty params in let ty = ty_app its.its_ts (List.map ty_var tvl) in let res = create_vsymbol (id_fresh x) ty in @@ -815,7 +813,7 @@ let add_types ~wp uc tdl = let d = Mstr.find x def in let add_tv acc id = let e = Loc.Located (id.Ptree.id_loc, DuplicateTypeVar id.id) in - let tv = Typing.create_user_tv id.id in + let tv = tv_of_string id.id in Mstr.add_new e id.id tv acc in let vars = List.fold_left add_tv Mstr.empty d.td_params in let vl = List.map (fun id -> Mstr.find id.id vars) d.td_params in -- GitLab