lexer.mll 8.53 KB
Newer Older
1 2 3
(********************************************************************)
(*                                                                  *)
(*  The Why3 Verification Platform   /   The Why3 Development Team  *)
Guillaume Melquiond's avatar
Guillaume Melquiond committed
4
(*  Copyright 2010-2019   --   Inria - CNRS - Paris-Sud University  *)
5 6 7 8
(*                                                                  *)
(*  This software is distributed under the terms of the GNU Lesser  *)
(*  General Public License version 2.1, with the special exception  *)
(*  on linking described in file LICENSE.                           *)
9
(*                                                                  *)
10
(********************************************************************)
11 12

{
13
  open Parser
14 15

  let keywords = Hashtbl.create 97
16 17
  let () =
    List.iter
18
      (fun (x,y) -> Hashtbl.add keywords x y)
19
      [
20
        "alias", ALIAS;
21 22
        "as", AS;
        "axiom", AXIOM;
23
        "by", BY;
24
        "clone", CLONE;
25
        "coinductive", COINDUCTIVE;
26
        "constant", CONSTANT;
27 28 29 30 31
        "else", ELSE;
        "end", END;
        "epsilon", EPSILON;
        "exists", EXISTS;
        "export", EXPORT;
32
        "false", FALSE;
Clément Fumex's avatar
Clément Fumex committed
33
        "float", FLOAT;
34
        "forall", FORALL;
35
        "function", FUNCTION;
36 37 38 39 40 41 42 43 44 45
        "goal", GOAL;
        "if", IF;
        "import", IMPORT;
        "in", IN;
        "inductive", INDUCTIVE;
        "lemma", LEMMA;
        "let", LET;
        "match", MATCH;
        "meta", META;
        "not", NOT;
46
        "partial", PARTIAL;
47
        "predicate", PREDICATE;
Clément Fumex's avatar
Clément Fumex committed
48
        "range", RANGE;
49
        "scope", SCOPE;
50
        "so", SO;
51 52 53 54 55 56 57 58 59 60 61 62
        "then", THEN;
        "theory", THEORY;
        "true", TRUE;
        "type", TYPE;
        "use", USE;
        "with", WITH;
        (* programs *)
        "abstract", ABSTRACT;
        "absurd", ABSURD;
        "any", ANY;
        "assert", ASSERT;
        "assume", ASSUME;
63
        "at", AT;
64
        "begin", BEGIN;
65
        "break", BREAK;
66
        "check", CHECK;
67
        "continue", CONTINUE;
68
        "diverges", DIVERGES;
69 70
        "do", DO;
        "done", DONE;
71
        "downto", DOWNTO;
72
        "ensures", ENSURES;
73 74 75
        "exception", EXCEPTION;
        "for", FOR;
        "fun", FUN;
76
        "ghost", GHOST;
77
        "invariant", INVARIANT;
78
        "label", LABEL;
79 80
        "module", MODULE;
        "mutable", MUTABLE;
81
        "old", OLD;
82
        "private", PRIVATE;
83
        "pure", PURE;
84 85 86 87
        "raise", RAISE;
        "raises", RAISES;
        "reads", READS;
        "rec", REC;
88
        "ref", REF;
89
        "requires", REQUIRES;
90
        "return", RETURN;
91
        "returns", RETURNS;
92 93
        "to", TO;
        "try", TRY;
94
        "val", VAL;
95 96
        "variant", VARIANT;
        "while", WHILE;
97
        "writes", WRITES;
98
      ]
99 100 101
}

let space = [' ' '\t' '\r']
102 103 104 105 106 107 108 109 110 111 112 113 114
let quote = '\''

let bin     = ['0' '1']
let oct     = ['0'-'7']
let dec     = ['0'-'9']
let hex     = ['0'-'9' 'a'-'f' 'A'-'F']

let bin_sep = ['0' '1' '_']
let oct_sep = ['0'-'7' '_']
let dec_sep = ['0'-'9' '_']
let hex_sep = ['0'-'9' 'a'-'f' 'A'-'F' '_']

let lalpha = ['a'-'z']
115
let ualpha = ['A'-'Z']
116 117 118 119 120 121 122 123 124
let alpha  = ['a'-'z' 'A'-'Z']

let suffix = (alpha | quote* dec_sep)* quote*
let lident = ['a'-'z' '_'] suffix
let uident = ['A'-'Z'] suffix

let core_suffix = quote alpha suffix
let core_lident = lident core_suffix+
let core_uident = uident core_suffix+
125

126
let op_char_1 = ['=' '<' '>' '~']
127
let op_char_2 = ['+' '-']
128
let op_char_3 = ['*' '/' '\\' '%']
129
let op_char_4 = ['!' '$' '&' '?' '@' '^' '.' ':' '|' '#']
130 131 132 133
let op_char_34 = op_char_3 | op_char_4
let op_char_234 = op_char_2 | op_char_34
let op_char_1234 = op_char_1 | op_char_234

134 135
let op_char_pref = ['!' '?']

136
rule token = parse
137 138
  | "[##" space* ("\"" ([^ '\010' '\013' '"' ]* as file) "\"")?
    space* (dec+ as line) space* (dec+ as char) space* "]"
139 140
      { Lexlib.update_loc lexbuf file (int_of_string line) (int_of_string char);
        token lexbuf }
141
  | "[#" space* "\"" ([^ '\010' '\013' '"' ]* as file) "\""
142
    space* (dec+ as line) space* (dec+ as bchar) space*
143
    (dec+ as echar) space* "]"
144 145
      { POSITION (Loc.user_position file (int_of_string line)
                 (int_of_string bchar) (int_of_string echar)) }
146 147
  | "[@" space* ([^ ' ' '\n' ']']+ (' '+ [^ ' ' '\n' ']']+)* as lbl) space* ']'
      { ATTRIBUTE lbl }
148
  | '\n'
149
      { Lexing.new_line lexbuf; token lexbuf }
150
  | space+
151
      { token lexbuf }
152 153
  | '_'
      { UNDERSCORE }
154
  | lident as id
155
      { try Hashtbl.find keywords id with Not_found -> LIDENT id }
156 157
  | core_lident as id
      { CORE_LIDENT id }
158
  | uident as id
159
      { UIDENT id }
160 161 162
  | core_uident as id
      { CORE_UIDENT id }
  | dec dec_sep* as s
163
      { INTEGER Number.(int_literal ILitDec ~neg:false (Lexlib.remove_underscores s)) }
164
  | '0' ['x' 'X'] (hex hex_sep* as s)
165
      { INTEGER Number.(int_literal ILitHex ~neg:false (Lexlib.remove_underscores s)) }
166
  | '0' ['o' 'O'] (oct oct_sep* as s)
167
      { INTEGER Number.(int_literal ILitOct ~neg:false (Lexlib.remove_underscores s)) }
168
  | '0' ['b' 'B'] (bin bin_sep* as s)
169
      { INTEGER Number.(int_literal ILitBin ~neg:false (Lexlib.remove_underscores s)) }
170
  | (dec+ as i) ".."
171
      { Lexlib.backjump lexbuf 2; INTEGER Number.(int_literal ILitDec ~neg:false i) }
172
  | '0' ['x' 'X'] (hex+ as i) ".."
173
      { Lexlib.backjump lexbuf 2; INTEGER Number.(int_literal ILitHex ~neg:false i) }
174 175 176
  | (dec+ as i)     ("" as f)    ['e' 'E'] (['-' '+']? dec+ as e)
  | (dec+ as i) '.' (dec* as f) (['e' 'E'] (['-' '+']? dec+ as e))?
  | (dec* as i) '.' (dec+ as f) (['e' 'E'] (['-' '+']? dec+ as e))?
177
      { REAL (Number.real_literal ~radix:10 ~neg:false ~int:i ~frac:f ~exp:(Opt.map Lexlib.remove_leading_plus e)) }
178 179 180 181 182
  | '0' ['x' 'X'] (hex+ as i) ("" as f) ['p' 'P'] (['-' '+']? dec+ as e)
  | '0' ['x' 'X'] (hex+ as i) '.' (hex* as f)
        (['p' 'P'] (['-' '+']? dec+ as e))?
  | '0' ['x' 'X'] (hex* as i) '.' (hex+ as f)
        (['p' 'P'] (['-' '+']? dec+ as e))?
183
      { REAL (Number.real_literal ~radix:16 ~neg:false ~int:i ~frac:f ~exp:(Opt.map Lexlib.remove_leading_plus e)) }
184 185 186
  | "(*)"
      { Lexlib.backjump lexbuf 2; LEFTPAR }
  | "(*"
187
      { Lexlib.comment lexbuf; token lexbuf }
188
  | "'" (lalpha suffix as id)
189
      { QUOTE_LIDENT id }
190 191 192 193 194 195
  | ","
      { COMMA }
  | "("
      { LEFTPAR }
  | ")"
      { RIGHTPAR }
196 197 198 199
  | "{"
      { LEFTBRC }
  | "}"
      { RIGHTBRC }
200 201
  | ":"
      { COLON }
202 203
  | ";"
      { SEMICOLON }
204 205
  | "->"
      { ARROW }
206 207
  | "->'"
      { Lexlib.backjump lexbuf 1; ARROW }
208 209
  | "<-"
      { LARROW }
210 211
  | "<->"
      { LRARROW }
212 213 214 215
  | "&&"
      { AMPAMP }
  | "||"
      { BARBAR }
216 217 218 219
  | "/\\"
      { AND }
  | "\\/"
      { OR }
220 221
  | "."
      { DOT }
222 223
  | ".."
      { DOTDOT }
224 225
  | "&"
      { AMP }
226 227
  | "|"
      { BAR }
Clément Fumex's avatar
Clément Fumex committed
228 229 230 231
  | "<"
      { LT }
  | ">"
      { GT }
232 233
  | "<>"
      { LTGT }
Clément Fumex's avatar
Clément Fumex committed
234 235
  | "="
      { EQUAL }
236 237
  | "-"
      { MINUS }
238 239 240 241
  | "["
      { LEFTSQ }
  | "]"
      { RIGHTSQ }
242 243
  | "]" (quote+ as s)
      { RIGHTSQ_QUOTE s }
244
  | ")" ('\'' alpha suffix core_suffix* as s)
245
      { RIGHTPAR_QUOTE s }
246 247
  | ")" ('_' alpha suffix core_suffix* as s)
      { RIGHTPAR_USCORE s }
248
  | op_char_pref op_char_4* quote* as s
249
      { OPPREF s }
250
  | op_char_1234* op_char_1 op_char_1234* quote* as s
251
      { OP1 s }
252
  | op_char_234* op_char_2 op_char_234* quote* as s
253
      { OP2 s }
254
  | op_char_34* op_char_3 op_char_34* quote* as s
255 256 257
      { OP3 s }
  | op_char_4+ as s
      { OP4 s }
258
  | "\""
259
      { STRING (Lexlib.string lexbuf) }
260
  | eof
261 262
      { EOF }
  | _ as c
263
      { Lexlib.illegal_character c lexbuf }
264 265

{
266

267 268
  let debug = Debug.register_info_flag "print_modules"
    ~desc:"Print@ program@ modules@ after@ typechecking."
269

270
  let build_parsing_function entry lb = Loc.with_location (entry token) lb
271

272
  let parse_term = build_parsing_function Parser.term_eof
273

274
  let parse_term_list = build_parsing_function Parser.term_comma_list_eof
275

276 277 278 279 280
  let parse_qualid = build_parsing_function Parser.qualid_eof

  let parse_list_ident = build_parsing_function Parser.ident_comma_list_eof

  let parse_list_qualid = build_parsing_function Parser.qualid_comma_list_eof
281

282
  open Wstdlib
283 284 285
  open Ident
  open Theory
  open Pmodule
286

287
  let read_channel env path file c =
288 289
    let lb = Lexing.from_channel c in
    Loc.set_file file lb;
290
    Typing.open_file env path;
291 292 293 294 295 296
    let mm =
      try
        build_parsing_function Parser.mlw_file lb
      with
        e -> ignore (Typing.discard_file ()); raise e
    in
297
    if path = [] && Debug.test_flag debug then begin
Andrei Paskevich's avatar
Andrei Paskevich committed
298
      let print_m _ m = Format.eprintf "%a@\n@." print_module m in
299
      let add_m _ m mm = Mid.add m.mod_theory.th_name m mm in
Andrei Paskevich's avatar
Andrei Paskevich committed
300
      Mid.iter print_m (Mstr.fold add_m mm Mid.empty)
301 302
    end;
    mm
303

304
  let () = Env.register_format mlw_language "whyml" ["mlw";"why"] read_channel
305
    ~desc:"WhyML@ programming@ and@ specification@ language"
306

307
}