lexer.mll 9.48 KB
Newer Older
Andrei Paskevich's avatar
Andrei Paskevich committed
1 2 3 4 5 6 7 8 9 10
(********************************************************************)
(*                                                                  *)
(*  The Why3 Verification Platform   /   The Why3 Development Team  *)
(*  Copyright 2010-2012   --   INRIA - CNRS - Paris-Sud University  *)
(*                                                                  *)
(*  This software is distributed under the terms of the GNU Lesser  *)
(*  General Public License version 2.1, with the special exception  *)
(*  on linking described in file LICENSE.                           *)
(*                                                                  *)
(********************************************************************)
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
11 12

{
13
  open Format
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
14
  open Lexing
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
15
  open Term
16
   open Parser
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
17

18 19
  (* lexical errors *)

20 21 22
  exception IllegalCharacter of char
  exception UnterminatedComment
  exception UnterminatedString
23

24
  let () = Exn_printer.register (fun fmt e -> match e with
25 26 27
    | IllegalCharacter c -> fprintf fmt "illegal character %c" c
    | UnterminatedComment -> fprintf fmt "unterminated comment"
    | UnterminatedString -> fprintf fmt "unterminated string"
28
    | _ -> raise e)
29

Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
30
  let keywords = Hashtbl.create 97
31 32
  let () =
    List.iter
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
33
      (fun (x,y) -> Hashtbl.add keywords x y)
34
      [
35 36 37
        "as", AS;
        "axiom", AXIOM;
        "clone", CLONE;
38
        "coinductive", COINDUCTIVE;
39
        "constant", CONSTANT;
40 41 42 43 44
        "else", ELSE;
        "end", END;
        "epsilon", EPSILON;
        "exists", EXISTS;
        "export", EXPORT;
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
45
        "false", FALSE;
46
        "forall", FORALL;
Andrei Paskevich's avatar
Andrei Paskevich committed
47
        "function", FUNCTION;
48 49 50 51 52 53 54 55 56 57 58
        "goal", GOAL;
        "if", IF;
        "import", IMPORT;
        "in", IN;
        "inductive", INDUCTIVE;
        "lemma", LEMMA;
        "let", LET;
        "match", MATCH;
        "meta", META;
        "namespace", NAMESPACE;
        "not", NOT;
Andrei Paskevich's avatar
Andrei Paskevich committed
59
        "predicate", PREDICATE;
60
        "prop", PROP;
61 62 63 64 65 66 67 68 69 70 71 72 73
        "then", THEN;
        "theory", THEORY;
        "true", TRUE;
        "type", TYPE;
        "use", USE;
        "with", WITH;
        (* programs *)
        "abstract", ABSTRACT;
        "absurd", ABSURD;
        "any", ANY;
        "assert", ASSERT;
        "assume", ASSUME;
        "begin", BEGIN;
74
        "check", CHECK;
75 76
        "do", DO;
        "done", DONE;
Andrei Paskevich's avatar
Andrei Paskevich committed
77
        "downto", DOWNTO;
78
        "ensures", ENSURES;
79 80 81
        "exception", EXCEPTION;
        "for", FOR;
        "fun", FUN;
82
        "ghost", GHOST;
83 84 85 86 87
        "invariant", INVARIANT;
        "loop", LOOP;
        "model", MODEL;
        "module", MODULE;
        "mutable", MUTABLE;
88
        "private", PRIVATE;
89 90 91 92
        "raise", RAISE;
        "raises", RAISES;
        "reads", READS;
        "rec", REC;
93 94
        "requires", REQUIRES;
        "returns", RETURNS;
95 96
        "to", TO;
        "try", TRY;
Andrei Paskevich's avatar
Andrei Paskevich committed
97
        "val", VAL;
98 99
        "variant", VARIANT;
        "while", WHILE;
100
        "writes", WRITES;
101
      ]
102

Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
103 104
  let newline lexbuf =
    let pos = lexbuf.lex_curr_p in
105
    lexbuf.lex_curr_p <-
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
106 107
      { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }

108
  let string_start_loc = ref Loc.dummy_position
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
109 110
  let string_buf = Buffer.create 1024

111 112
  let comment_start_loc = ref Loc.dummy_position

Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
113 114 115 116 117 118 119 120
  let char_for_backslash = function
    | 'n' -> '\n'
    | 't' -> '\t'
    | c -> c

  let update_loc lexbuf file line chars =
    let pos = lexbuf.lex_curr_p in
    let new_file = match file with None -> pos.pos_fname | Some s -> s in
121
    lexbuf.lex_curr_p <-
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
122
      { pos with
123 124 125
          pos_fname = new_file;
          pos_lnum = int_of_string line;
          pos_bol = pos.pos_cnum - int_of_string chars;
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
126 127 128
      }

  let remove_leading_plus s =
129
    let n = String.length s in
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
130 131
    if n > 0 && s.[0] = '+' then String.sub s 1 (n-1) else s

132
  let loc lb = Loc.extract (lexeme_start_p lb, lexeme_end_p lb)
133

134 135 136 137 138 139 140 141 142 143 144
  let remove_underscores s =
    if String.contains s '_' then begin
      let count =
        let nb = ref 0 in
        String.iter (fun c -> if c = '_' then incr nb) s;
        !nb in
      let t = String.create (String.length s - count) in
      let i = ref 0 in
      String.iter (fun c -> if c <> '_' then (t.[!i] <-c; incr i)) s;
      t
    end else s
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
145 146 147 148
}

let newline = '\n'
let space = [' ' '\t' '\r']
149 150 151
let lalpha = ['a'-'z' '_']
let ualpha = ['A'-'Z']
let alpha = lalpha | ualpha
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
152
let digit = ['0'-'9']
153 154
let lident = lalpha (alpha | digit | '\'')*
let uident = ualpha (alpha | digit | '\'')*
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
155 156
let hexadigit = ['0'-'9' 'a'-'f' 'A'-'F']

157
let op_char_1 = ['=' '<' '>' '~']
158 159
let op_char_2 = ['+' '-']
let op_char_3 = ['*' '/' '%']
160
let op_char_4 = ['!' '$' '&' '?' '@' '^' '.' ':' '|' '#']
161 162 163 164
let op_char_34 = op_char_3 | op_char_4
let op_char_234 = op_char_2 | op_char_34
let op_char_1234 = op_char_1 | op_char_234

165 166
let op_char_pref = ['!' '?']

Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
167
rule token = parse
168 169
  | "##" space* ("\"" ([^ '\010' '\013' '"' ]* as file) "\"")?
    space* (digit+ as line) space* (digit+ as char) space* "##"
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
170
      { update_loc lexbuf file line char; token lexbuf }
171 172 173 174 175
  | "#" space* "\"" ([^ '\010' '\013' '"' ]* as file) "\""
    space* (digit+ as line) space* (digit+ as bchar) space*
    (digit+ as echar) space* "#"
      { POSITION (Loc.user_position file (int_of_string line)
                 (int_of_string bchar) (int_of_string echar)) }
176
  | newline
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
177
      { newline lexbuf; token lexbuf }
178
  | space+
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
179
      { token lexbuf }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
180 181
  | '_'
      { UNDERSCORE }
182
  | lident as id
183
      { try Hashtbl.find keywords id with Not_found -> LIDENT id }
184
  | uident as id
185
      { UIDENT id }
186
  | ['0'-'9'] ['0'-'9' '_']* as s
187
      { INTEGER (Number.int_const_dec (remove_underscores s)) }
188
  | '0' ['x' 'X'] (['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']* as s)
189
      { INTEGER (Number.int_const_hex (remove_underscores s)) }
190
  | '0' ['o' 'O'] (['0'-'7'] ['0'-'7' '_']* as s)
191
      { INTEGER (Number.int_const_oct (remove_underscores s)) }
192
  | '0' ['b' 'B'] (['0'-'1'] ['0'-'1' '_']* as s)
193
      { INTEGER (Number.int_const_bin (remove_underscores s)) }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
194 195 196
  | (digit+ as i) ("" as f) ['e' 'E'] (['-' '+']? digit+ as e)
  | (digit+ as i) '.' (digit* as f) (['e' 'E'] (['-' '+']? digit+ as e))?
  | (digit* as i) '.' (digit+ as f) (['e' 'E'] (['-' '+']? digit+ as e))?
197 198 199 200 201 202 203
      { FLOAT (Number.real_const_dec i f (Opt.map remove_leading_plus e)) }
  | '0' ['x' 'X'] (digit+ as i) ("" as f) ['p' 'P'] (['-' '+']? digit+ as e)
  | '0' ['x' 'X'] (digit+ as i) '.' (digit* as f)
        (['p' 'P'] (['-' '+']? digit+ as e))?
  | '0' ['x' 'X'] (digit* as i) '.' (digit+ as f)
        (['p' 'P'] (['-' '+']? digit+ as e))?
      { FLOAT (Number.real_const_hex i f (Opt.map remove_leading_plus e)) }
204 205
  | "(*)"
      { LEFTPAR_STAR_RIGHTPAR }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
206
  | "(*"
207
      { comment_start_loc := loc lexbuf; comment lexbuf; token lexbuf }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
208 209 210 211 212 213 214 215
  | "'"
      { QUOTE }
  | ","
      { COMMA }
  | "("
      { LEFTPAR }
  | ")"
      { RIGHTPAR }
216 217 218 219
  | "{"
      { LEFTBRC }
  | "}"
      { RIGHTBRC }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
220 221
  | ":"
      { COLON }
222 223
  | ";"
      { SEMICOLON }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
224 225
  | "->"
      { ARROW }
226 227
  | "<-"
      { LARROW }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
228 229
  | "<->"
      { LRARROW }
230 231 232 233
  | "&&"
      { AMPAMP }
  | "||"
      { BARBAR }
234 235 236 237
  | "/\\"
      { AND }
  | "\\/"
      { OR }
Andrei Paskevich's avatar
Andrei Paskevich committed
238
  | "\\"
Andrei Paskevich's avatar
Andrei Paskevich committed
239
      { LAMBDA }
240 241 242 243
  | "\\?"
      { PRED }
  | "\\!"
      { FUNC }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
244 245
  | "."
      { DOT }
246 247 248 249
  | "|"
      { BAR }
  | "="
      { EQUAL }
250 251
  | "<>"
      { LTGT }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
252 253 254 255
  | "["
      { LEFTSQ }
  | "]"
      { RIGHTSQ }
256 257
  | op_char_pref op_char_4* as s
      { OPPREF s }
258 259 260 261 262 263 264 265
  | op_char_1234* op_char_1 op_char_1234* as s
      { OP1 s }
  | op_char_234*  op_char_2 op_char_234*  as s
      { OP2 s }
  | op_char_34*   op_char_3 op_char_34*  as s
      { OP3 s }
  | op_char_4+ as s
      { OP4 s }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
266
  | "\""
267
      { string_start_loc := loc lexbuf; STRING (string lexbuf) }
268
  | eof
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
269 270
      { EOF }
  | _ as c
271
      { raise (IllegalCharacter c) }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
272 273

and comment = parse
274 275
  | "(*)"
      { comment lexbuf }
276
  | "*)"
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
277
      { () }
278
  | "(*"
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
279
      { comment lexbuf; comment lexbuf }
280
  | newline
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
281 282
      { newline lexbuf; comment lexbuf }
  | eof
283
      { raise (Loc.Located (!comment_start_loc, UnterminatedComment)) }
284
  | _
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
285 286 287 288
      { comment lexbuf }

and string = parse
  | "\""
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
289
      { let s = Buffer.contents string_buf in
290 291
        Buffer.clear string_buf;
        s }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
292 293
  | "\\" (_ as c)
      { Buffer.add_char string_buf (char_for_backslash c); string lexbuf }
294
  | newline
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
295 296
      { newline lexbuf; Buffer.add_char string_buf '\n'; string lexbuf }
  | eof
297
      { raise (Loc.Located (!string_start_loc, UnterminatedString)) }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
298 299 300 301 302
  | _ as c
      { Buffer.add_char string_buf c; string lexbuf }

{
  let with_location f lb =
303 304
    if Debug.test_flag Debug.stack_trace then f lb else
    try f lb with
305 306
      | Loc.Located _ as e -> raise e
      | e -> raise (Loc.Located (loc lb, e))
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
307

308
  let parse_logic_file env path lb =
309 310 311
    open_file token (Lexing.from_string "") (Typing.open_file env path);
    with_location (logic_file token) lb;
    Typing.close_file ()
312

313 314 315
  let parse_program_file inc lb =
    open_file token (Lexing.from_string "") inc;
    with_location (program_file token) lb
316

317 318 319 320 321 322 323 324 325 326 327
  let token_counter lb =
    let rec loop in_annot a p =
      match token lb with
        | LEFTBRC -> assert (not in_annot); loop true a p
        | RIGHTBRC -> assert in_annot; loop false a p
        | EOF -> assert (not in_annot); (a,p)
        | _ ->
            if in_annot
            then loop in_annot (a+1) p
            else loop in_annot a (p+1)
    in
Andrei Paskevich's avatar
Andrei Paskevich committed
328 329
    loop false 0 0

330
  let read_channel env path file c =
331 332
    let lb = Lexing.from_channel c in
    Loc.set_file file lb;
Andrei Paskevich's avatar
Andrei Paskevich committed
333
    (), parse_logic_file env path lb
334

Andrei Paskevich's avatar
Andrei Paskevich committed
335
  let library_of_env = Env.register_format "why" ["why"] read_channel
Andrei Paskevich's avatar
Andrei Paskevich committed
336
    ~desc:"Why@ logical@ language"
337

Andrei Paskevich's avatar
Andrei Paskevich committed
338
  let parse_logic_file env = parse_logic_file (library_of_env env)
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
339 340 341
}

(*
342
Local Variables:
343
compile-command: "unset LANG; make -C ../.. test"
344
End:
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
345 346
*)