lexer.mll 9.48 KB
Newer Older
1 2 3
(********************************************************************)
(*                                                                  *)
(*  The Why3 Verification Platform   /   The Why3 Development Team  *)
Guillaume Melquiond's avatar
Guillaume Melquiond committed
4
(*  Copyright 2010-2018   --   Inria - CNRS - Paris-Sud University  *)
5 6 7 8
(*                                                                  *)
(*  This software is distributed under the terms of the GNU Lesser  *)
(*  General Public License version 2.1, with the special exception  *)
(*  on linking described in file LICENSE.                           *)
9
(*                                                                  *)
10
(********************************************************************)
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
11 12

{
13
  open Parser
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
14 15

  let keywords = Hashtbl.create 97
16 17
  let () =
    List.iter
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
18
      (fun (x,y) -> Hashtbl.add keywords x y)
19
      [
20
        "alias", ALIAS;
21 22
        "as", AS;
        "axiom", AXIOM;
Martin Clochard's avatar
Martin Clochard committed
23
        "by", BY;
24
        "clone", CLONE;
25
        "coinductive", COINDUCTIVE;
26
        "constant", CONSTANT;
27 28 29 30 31
        "else", ELSE;
        "end", END;
        "epsilon", EPSILON;
        "exists", EXISTS;
        "export", EXPORT;
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
32
        "false", FALSE;
Clément Fumex's avatar
Clément Fumex committed
33
        "float", FLOAT;
34
        "forall", FORALL;
Andrei Paskevich's avatar
Andrei Paskevich committed
35
        "function", FUNCTION;
36 37 38 39 40 41 42 43 44 45
        "goal", GOAL;
        "if", IF;
        "import", IMPORT;
        "in", IN;
        "inductive", INDUCTIVE;
        "lemma", LEMMA;
        "let", LET;
        "match", MATCH;
        "meta", META;
        "not", NOT;
Andrei Paskevich's avatar
Andrei Paskevich committed
46
        "predicate", PREDICATE;
Clément Fumex's avatar
Clément Fumex committed
47
        "range", RANGE;
48
        "scope", SCOPE;
Martin Clochard's avatar
Martin Clochard committed
49
        "so", SO;
50 51 52 53 54 55 56 57 58 59 60 61
        "then", THEN;
        "theory", THEORY;
        "true", TRUE;
        "type", TYPE;
        "use", USE;
        "with", WITH;
        (* programs *)
        "abstract", ABSTRACT;
        "absurd", ABSURD;
        "any", ANY;
        "assert", ASSERT;
        "assume", ASSUME;
62
        "at", AT;
63
        "begin", BEGIN;
Guillaume Melquiond's avatar
Guillaume Melquiond committed
64
        "break", BREAK;
65
        "check", CHECK;
Guillaume Melquiond's avatar
Guillaume Melquiond committed
66
        "continue", CONTINUE;
67
        "diverges", DIVERGES;
68 69
        "do", DO;
        "done", DONE;
Andrei Paskevich's avatar
Andrei Paskevich committed
70
        "downto", DOWNTO;
71
        "ensures", ENSURES;
72 73 74
        "exception", EXCEPTION;
        "for", FOR;
        "fun", FUN;
75
        "ghost", GHOST;
76
        "invariant", INVARIANT;
77
        "label", LABEL;
78 79
        "module", MODULE;
        "mutable", MUTABLE;
80
        "old", OLD;
81
        "private", PRIVATE;
82
        "pure", PURE;
83 84 85 86
        "raise", RAISE;
        "raises", RAISES;
        "reads", READS;
        "rec", REC;
87
        "requires", REQUIRES;
88
        "return", RETURN;
89
        "returns", RETURNS;
90 91
        "to", TO;
        "try", TRY;
92
        "val", VAL;
93 94
        "variant", VARIANT;
        "while", WHILE;
95
        "writes", WRITES;
96
      ]
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
97 98 99
}

let space = [' ' '\t' '\r']
100 101 102 103 104 105 106 107 108 109 110 111 112
let quote = '\''

let bin     = ['0' '1']
let oct     = ['0'-'7']
let dec     = ['0'-'9']
let hex     = ['0'-'9' 'a'-'f' 'A'-'F']

let bin_sep = ['0' '1' '_']
let oct_sep = ['0'-'7' '_']
let dec_sep = ['0'-'9' '_']
let hex_sep = ['0'-'9' 'a'-'f' 'A'-'F' '_']

let lalpha = ['a'-'z']
113
let ualpha = ['A'-'Z']
114 115 116 117 118 119 120 121 122
let alpha  = ['a'-'z' 'A'-'Z']

let suffix = (alpha | quote* dec_sep)* quote*
let lident = ['a'-'z' '_'] suffix
let uident = ['A'-'Z'] suffix

let core_suffix = quote alpha suffix
let core_lident = lident core_suffix+
let core_uident = uident core_suffix+
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
123

124
let op_char_1 = ['=' '<' '>' '~']
125
let op_char_2 = ['+' '-']
126
let op_char_3 = ['*' '/' '\\' '%']
127
let op_char_4 = ['!' '$' '&' '?' '@' '^' '.' ':' '|' '#']
128 129 130 131
let op_char_34 = op_char_3 | op_char_4
let op_char_234 = op_char_2 | op_char_34
let op_char_1234 = op_char_1 | op_char_234

132 133
let op_char_pref = ['!' '?']

Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
134
rule token = parse
135 136
  | "[##" space* ("\"" ([^ '\010' '\013' '"' ]* as file) "\"")?
    space* (dec+ as line) space* (dec+ as char) space* "]"
137 138
      { Lexlib.update_loc lexbuf file (int_of_string line) (int_of_string char);
        token lexbuf }
139
  | "[#" space* "\"" ([^ '\010' '\013' '"' ]* as file) "\""
140
    space* (dec+ as line) space* (dec+ as bchar) space*
141
    (dec+ as echar) space* "]"
142 143
      { POSITION (Loc.user_position file (int_of_string line)
                 (int_of_string bchar) (int_of_string echar)) }
144 145
  | "[@" space* ([^ ' ' '\n' ']']+ (' '+ [^ ' ' '\n' ']']+)* as lbl) space* ']'
      { ATTRIBUTE lbl }
146
  | '\n'
147
      { Lexing.new_line lexbuf; token lexbuf }
148
  | space+
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
149
      { token lexbuf }
150 151
  | '_'
      { UNDERSCORE }
152
  | lident as id
153
      { try Hashtbl.find keywords id with Not_found -> LIDENT id }
154 155
  | core_lident as id
      { CORE_LIDENT id }
156
  | uident as id
157
      { UIDENT id }
158 159 160
  | core_uident as id
      { CORE_UIDENT id }
  | dec dec_sep* as s
161
      { INTEGER (Number.int_literal_dec (Lexlib.remove_underscores s)) }
162
  | '0' ['x' 'X'] (hex hex_sep* as s)
163
      { INTEGER (Number.int_literal_hex (Lexlib.remove_underscores s)) }
164
  | '0' ['o' 'O'] (oct oct_sep* as s)
165
      { INTEGER (Number.int_literal_oct (Lexlib.remove_underscores s)) }
166
  | '0' ['b' 'B'] (bin bin_sep* as s)
167
      { INTEGER (Number.int_literal_bin (Lexlib.remove_underscores s)) }
168 169 170 171
  | (dec+ as i) ".."
      { Lexlib.backjump lexbuf 2; INTEGER (Number.int_literal_dec i) }
  | '0' ['x' 'X'] (hex+ as i) ".."
      { Lexlib.backjump lexbuf 2; INTEGER (Number.int_literal_hex i) }
172 173 174
  | (dec+ as i)     ("" as f)    ['e' 'E'] (['-' '+']? dec+ as e)
  | (dec+ as i) '.' (dec* as f) (['e' 'E'] (['-' '+']? dec+ as e))?
  | (dec* as i) '.' (dec+ as f) (['e' 'E'] (['-' '+']? dec+ as e))?
Clément Fumex's avatar
Clément Fumex committed
175
      { REAL (Number.real_const_dec i f
176
          (Opt.map Lexlib.remove_leading_plus e)) }
177 178 179 180 181
  | '0' ['x' 'X'] (hex+ as i) ("" as f) ['p' 'P'] (['-' '+']? dec+ as e)
  | '0' ['x' 'X'] (hex+ as i) '.' (hex* as f)
        (['p' 'P'] (['-' '+']? dec+ as e))?
  | '0' ['x' 'X'] (hex* as i) '.' (hex+ as f)
        (['p' 'P'] (['-' '+']? dec+ as e))?
Clément Fumex's avatar
Clément Fumex committed
182
      { REAL (Number.real_const_hex i f
183
          (Opt.map Lexlib.remove_leading_plus e)) }
184 185 186
  | "(*)"
      { Lexlib.backjump lexbuf 2; LEFTPAR }
  | "(*"
187
      { Lexlib.comment lexbuf; token lexbuf }
188 189
  | "'" (lident as id)
      { QUOTE_LIDENT id }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
190 191 192 193 194 195
  | ","
      { COMMA }
  | "("
      { LEFTPAR }
  | ")"
      { RIGHTPAR }
196 197 198 199
  | "{"
      { LEFTBRC }
  | "}"
      { RIGHTBRC }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
200 201
  | ":"
      { COLON }
202 203
  | ";"
      { SEMICOLON }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
204 205
  | "->"
      { ARROW }
206 207
  | "<-"
      { LARROW }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
208 209
  | "<->"
      { LRARROW }
210 211 212 213
  | "&&"
      { AMPAMP }
  | "||"
      { BARBAR }
214 215 216 217
  | "/\\"
      { AND }
  | "\\/"
      { OR }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
218 219
  | "."
      { DOT }
220 221
  | ".."
      { DOTDOT }
222 223
  | "|"
      { BAR }
Clément Fumex's avatar
Clément Fumex committed
224 225 226 227
  | "<"
      { LT }
  | ">"
      { GT }
228 229
  | "<>"
      { LTGT }
Clément Fumex's avatar
Clément Fumex committed
230 231
  | "="
      { EQUAL }
232 233
  | "-"
      { MINUS }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
234 235 236 237
  | "["
      { LEFTSQ }
  | "]"
      { RIGHTSQ }
238 239
  | op_char_pref op_char_4* as s
      { OPPREF s }
240 241 242 243 244 245 246 247
  | op_char_1234* op_char_1 op_char_1234* as s
      { OP1 s }
  | op_char_234*  op_char_2 op_char_234*  as s
      { OP2 s }
  | op_char_34*   op_char_3 op_char_34*  as s
      { OP3 s }
  | op_char_4+ as s
      { OP4 s }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
248
  | "\""
249
      { STRING (Lexlib.string lexbuf) }
250
  | eof
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
251 252
      { EOF }
  | _ as c
253
      { Lexlib.illegal_character c lexbuf }
Jean-Christophe Filliâtre's avatar
Jean-Christophe Filliâtre committed
254 255

{
256

257 258
  let debug = Debug.register_info_flag "print_modules"
    ~desc:"Print@ program@ modules@ after@ typechecking."
259

260
  exception Error of string
261

262 263 264 265 266 267 268
  let () = Exn_printer.register (fun fmt exn -> match exn with
  (* This ad hoc switch allows to not edit the automatically generated
     handcrafted.messages in ad hoc ways. *)
  | Error s when s = "<YOUR SYNTAX ERROR MESSAGE HERE>\n" ->
      Format.fprintf fmt "syntax error"
  | Error s -> Format.fprintf fmt "syntax error:\n %s" s
  | _ -> raise exn)
269

270 271 272 273 274 275
  (* Associate each token to a text representing it *)
  let match_tokens t =
    (* TODO generate this automatically *)
    match t with
    | None -> assert false
    | Some _t -> "NOT IMPLEMENTED"
276

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
  let build_parsing_function (parser_entry: Lexing.position -> 'a) lb =
    (* This records the last token which was read (for error messages) *)
    let last = ref None in
    let module I = Parser.MenhirInterpreter in
    let checkpoint = parser_entry lb.Lexing.lex_curr_p
    and supplier =
      I.lexer_lexbuf_to_supplier (fun x -> let t = token x in last := Some t; t) lb
    and succeed t = t
    and fail checkpoint =
      let t = Lexing.lexeme lb in
      let token = match_tokens !last in
      let s = Report.report (t, token) checkpoint in
      (* Typing.close_file is supposedly done at the end of the file in
         parsing.mly. If there is a syntax error, we still need to close it (to
         be able to reload). *)
      Loc.with_location (fun _x ->
        (try ignore(Typing.close_file ()) with
        | _e -> ());
        raise (Error s)) lb
    in
    I.loop_handle succeed fail supplier checkpoint
298

Guillaume Melquiond's avatar
Guillaume Melquiond committed
299
  open Wstdlib
300 301 302
  open Ident
  open Theory
  open Pmodule
303

304 305 306 307 308 309 310 311 312 313 314
  let parse_term lb =
    build_parsing_function Parser.Incremental.term_eof lb

  let parse_term_list lb = build_parsing_function Parser.Incremental.term_comma_list_eof lb

  let parse_qualid lb = build_parsing_function Parser.Incremental.qualid_eof lb

  let parse_list_ident lb = build_parsing_function Parser.Incremental.ident_comma_list_eof lb

  let parse_list_qualid lb = build_parsing_function Parser.Incremental.qualid_comma_list_eof lb

315
  let read_channel env path file c =
316 317
    let lb = Lexing.from_channel c in
    Loc.set_file file lb;
318
    Typing.open_file env path;
319
    let mm = build_parsing_function Parser.Incremental.mlw_file lb in
320
    if path = [] && Debug.test_flag debug then begin
Andrei Paskevich's avatar
Andrei Paskevich committed
321
      let print_m _ m = Format.eprintf "%a@\n@." print_module m in
322
      let add_m _ m mm = Mid.add m.mod_theory.th_name m mm in
Andrei Paskevich's avatar
Andrei Paskevich committed
323
      Mid.iter print_m (Mstr.fold add_m mm Mid.empty)
324 325
    end;
    mm
326

327
  let () = Env.register_format mlw_language "whyml" ["mlw";"why"] read_channel
328
    ~desc:"WhyML@ programming@ and@ specification@ language"
329

330
}