Commit d57ad4af authored by Andrei Paskevich's avatar Andrei Paskevich
Browse files

TPTP lexer

parent bb133d8a
......@@ -72,14 +72,12 @@ type formula_node =
| LFqnt of quant * tyvar list * formula
| LFbin of binop * formula * formula
| LFnot of formula
| LFlet of binding list * formula
| LFite of formula * formula * formula
| LFapp of atomic_word * term list
| LFdef of defined_pred * term list
| LFvar of variable
and term_node =
| LTlet of binding list * term
| LTite of formula * term * term
| LTapp of atomic_word * term list
| LTdef of defined_func * term list
......
......@@ -22,6 +22,7 @@
open Lexing
open Tptp_ast
open Tptp_parser
open Tptp_typing
(* lexical errors *)
......@@ -33,6 +34,7 @@
| IllegalCharacter c -> fprintf fmt "illegal character %c" c
| UnterminatedComment -> fprintf fmt "unterminated comment"
| UnterminatedString -> fprintf fmt "unterminated string"
| DoubleDollarWord -> fprintf fmt "system-specifics are not supported"
| _ -> raise e)
let defwords = Hashtbl.create 97
......@@ -98,25 +100,8 @@
lexbuf.lex_curr_p <-
{ pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
let string_start_loc = ref Loc.dummy_position
let string_buf = Buffer.create 1024
let comment_start_loc = ref Loc.dummy_position
let char_for_backslash = function
| 'n' -> '\n'
| 't' -> '\t'
| c -> c
let update_loc lexbuf file line chars =
let pos = lexbuf.lex_curr_p in
let new_file = match file with None -> pos.pos_fname | Some s -> s in
lexbuf.lex_curr_p <- { pos with
pos_fname = new_file;
pos_lnum = int_of_string line;
pos_bol = pos.pos_cnum - int_of_string chars;
}
let loc lb = Loc.extract (lexeme_start_p lb, lexeme_end_p lb)
}
......@@ -129,12 +114,14 @@ let digit = ['0'-'9']
let nzero = ['1'-'9']
let alnum = lalpha | ualpha | digit | '_'
let lword = lalpha alnum*
let uword = ualpha alnum*
let lword = lalpha alnum*
let uword = ualpha alnum*
let positive = nzero digit*
let natural = '0' | positive
let negative = '-' natural
let sq_char = [' '-'&' '('-'[' ']'-'~'] | '\\' ['\\' '\'']
let do_char = [' '-'!' '#'-'[' ']'-'~'] | '\\' ['\\' '"']
rule token = parse
| newline
......@@ -145,115 +132,83 @@ rule token = parse
{ try Hashtbl.find keywords id with Not_found -> LWORD id }
| uword as id
{ UWORD id }
| '\'' (sq_char+ as sq) '\''
{ SINGLE_QUOTED sq }
| '"' (do_char* as dob) '"'
{ DISTINCT_OBJECT dob }
| '$' (lword as id)
{ DWORD id }
| "$$" lword
{ raise DoubleDollarWord }
| '+'? (natural as s)
| '-' natural as s
{ INTEGER s }
| negative as s
{ INTEGER s }
| '+'? (natural as n) '/' (natural as d)
{ RATIONAL (n,d) }
| (negative as n) '/' (natural as d)
| '+'? (natural as n) '/' (positive as d)
| ('-' natural as n) '/' (positive as d)
{ RATIONAL (n,d) }
|
| nzero
| ['0'-'9'] ['0'-'9' '_']* as s
{ INTEGER (IConstDecimal (remove_underscores s)) }
| '0' ['x' 'X'] (['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']* as s)
{ INTEGER (IConstHexa (remove_underscores s)) }
| '0' ['o' 'O'] (['0'-'7'] ['0'-'7' '_']* as s)
{ INTEGER (IConstOctal (remove_underscores s)) }
| '0' ['b' 'B'] (['0'-'1'] ['0'-'1' '_']* as s)
{ INTEGER (IConstBinary (remove_underscores s)) }
| (digit+ as i) ("" as f) ['e' 'E'] (['-' '+']? digit+ as e)
| (digit+ as i) '.' (digit* as f) (['e' 'E'] (['-' '+']? digit+ as e))?
| (digit* as i) '.' (digit+ as f) (['e' 'E'] (['-' '+']? digit+ as e))?
{ FLOAT (RConstDecimal (i, f, Util.option_map remove_leading_plus e)) }
| '0' ['x' 'X'] ((hexadigit* as i) '.' (hexadigit+ as f)
|(hexadigit+ as i) '.' (hexadigit* as f)
|(hexadigit+ as i) ("" as f))
['p' 'P'] (['-' '+']? digit+ as e)
{ FLOAT (RConstHexa (i, f, remove_leading_plus e)) }
| "(*)"
{ LEFTPAR_STAR_RIGHTPAR }
| "(*"
{ comment_start_loc := loc lexbuf; comment lexbuf; token lexbuf }
| "'"
{ QUOTE }
| '+'? (natural as i) ('.' (digit+ as f))? (['e' 'E'] ('+'? (natural as e)))?
| ('-' natural as i) ('.' (digit+ as f))? (['e' 'E'] ('+'? (natural as e)))?
| '+'? (natural as i) ('.' (digit+ as f))? (['e' 'E'] ('-' natural as e))?
| ('-' natural as i) ('.' (digit+ as f))? (['e' 'E'] ('-' natural as e))?
{ REAL (i,f,e) }
| "/*/"
{ SLASH_STAR_SLASH }
| "/*"
{ comment_start_loc := loc lexbuf; comment_block lexbuf; token lexbuf }
| "%"
{ comment_start_loc := loc lexbuf; comment_line lexbuf; token lexbuf }
| ","
{ COMMA }
| "("
{ LEFTPAR }
| ")"
{ RIGHTPAR }
| "{"
{ LEFTBRC }
| "}"
{ RIGHTBRC }
| "{|"
{ LEFTREC }
| "|}"
{ RIGHTREC }
| "["
{ LEFTSQ }
| "]"
{ RIGHTSQ }
| ":"
{ COLON }
| ";"
{ SEMICOLON }
| "->"
{ ARROW }
| "<-"
| "-->"
{ LONGARROW }
| "<="
{ LARROW }
| "<->"
| "=>"
{ RARROW }
| "<=>"
{ LRARROW }
| "&&"
{ AMPAMP }
| "||"
{ BARBAR }
| "/\\"
{ AND }
| "\\/"
{ OR }
| "\\"
{ LAMBDA }
| "\\?"
{ PRED }
| "\\!"
{ FUNC }
| "<~>"
{ NLRARROW }
| "~&"
{ NAMP }
| "~|"
{ NBAR }
| "~"
{ TILDE }
| "."
{ DOT }
| "|"
{ BAR }
| "="
{ EQUAL }
| "<>"
{ LTGT }
| "["
{ LEFTSQ }
| "]"
{ RIGHTSQ }
| op_char_pref op_char_4* as s
{ OPPREF s }
| op_char_1234* op_char_1 op_char_1234* as s
{ OP1 s }
| op_char_234* op_char_2 op_char_234* as s
{ OP2 s }
| op_char_34* op_char_3 op_char_34* as s
{ OP3 s }
| op_char_4+ as s
{ OP4 s }
| "\""
{ string_start_loc := loc lexbuf; STRING (string lexbuf) }
| "!="
{ NEQUAL }
| "*"
{ STAR }
| ">"
{ GT }
| "!>"
{ PI }
| "!"
{ BANG }
| "?"
{ QUES }
| eof
{ EOF }
| _ as c
{ raise (IllegalCharacter c) }
and comment = parse
| "(*)"
{ comment lexbuf }
| "*)"
and comment_block = parse
| "*/"
{ () }
| "(*"
{ comment lexbuf; comment lexbuf }
| newline
{ newline lexbuf; comment lexbuf }
| eof
......@@ -261,19 +216,13 @@ and comment = parse
| _
{ comment lexbuf }
and string = parse
| "\""
{ let s = Buffer.contents string_buf in
Buffer.clear string_buf;
s }
| "\\" (_ as c)
{ Buffer.add_char string_buf (char_for_backslash c); string lexbuf }
and comment_line = parse
| newline
{ newline lexbuf; Buffer.add_char string_buf '\n'; string lexbuf }
{ newline lexbuf; () }
| eof
{ raise (Loc.Located (!string_start_loc, UnterminatedString)) }
| _ as c
{ Buffer.add_char string_buf c; string lexbuf }
{ () }
| _
{ comment lexbuf }
{
let with_location f lb =
......@@ -282,32 +231,13 @@ and string = parse
| Loc.Located _ as e -> raise e
| e -> raise (Loc.Located (loc lb, e))
let parse_logic_file env path lb =
pre_logic_file token (Lexing.from_string "") env path;
with_location (logic_file token) lb
let parse_program_file = with_location (program_file token)
let token_counter lb =
let rec loop in_annot a p =
match token lb with
| LEFTBRC -> assert (not in_annot); loop true a p
| RIGHTBRC -> assert in_annot; loop false a p
| EOF -> assert (not in_annot); (a,p)
| _ ->
if in_annot
then loop in_annot (a+1) p
else loop in_annot a (p+1)
in
loop false 0 0
let read_channel env path file c =
let lb = Lexing.from_channel c in
Loc.set_file file lb;
parse_logic_file env path lb
let ast = with_location (tptp_file token) lb in
tptp_typing env path ast
let () = Env.register_format "why" ["why"] read_channel
}
(*
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment