py_lexer.mll 4.46 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
(********************************************************************)
(*                                                                  *)
(*  The Why3 Verification Platform   /   The Why3 Development Team  *)
(*  Copyright 2010-2016   --   INRIA - CNRS - Paris-Sud University  *)
(*                                                                  *)
(*  This software is distributed under the terms of the GNU Lesser  *)
(*  General Public License version 2.1, with the special exception  *)
(*  on linking described in file LICENSE.                           *)
(*                                                                  *)
(********************************************************************)
11 12 13 14 15 16 17 18

{
  open Lexing
  open Py_ast
  open Py_parser

  exception Lexing_error of string

19 20 21 22
  let () = Why3.Exn_printer.register (fun fmt exn -> match exn with
  | Lexing_error s -> Format.fprintf fmt "syntax error: %s" s
  | _ -> raise exn)

23 24 25
  let id_or_kwd =
    let h = Hashtbl.create 32 in
    List.iter (fun (s, tok) -> Hashtbl.add h s tok)
26
      ["def", DEF; "if", IF; "else", ELSE; "elif", ELIF;
27
       "return", RETURN; "while", WHILE;
28 29
       "for", FOR; "in", IN;
       "and", AND; "or", OR; "not", NOT;
30
       "True", TRUE; "False", FALSE; "None", NONE;
31
       "from", FROM; "import", IMPORT; "break", BREAK;
32 33 34
       (* annotations *)
       "forall", FORALL; "exists", EXISTS; "then", THEN; "let", LET;
      ];
35 36
   fun s -> try Hashtbl.find h s with Not_found -> IDENT s

37 38 39 40 41 42
  let annotation =
    let h = Hashtbl.create 32 in
    List.iter (fun (s, tok) -> Hashtbl.add h s tok)
      ["invariant", INVARIANT; "variant", VARIANT;
       "assert", ASSERT; "assume", ASSUME; "check", CHECK;
       "requires", REQUIRES; "ensures", ENSURES;
43
       "label", LABEL;
44 45 46 47
      ];
    fun s -> try Hashtbl.find h s with Not_found ->
      raise (Lexing_error ("no such annotation '" ^ s ^ "'"))

48 49 50 51 52 53 54 55
  let newline lexbuf =
    let pos = lexbuf.lex_curr_p in
    lexbuf.lex_curr_p <-
      { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }

  let string_buffer = Buffer.create 1024

  let stack = ref [0]  (* indentation stack *)
56

57 58 59 60
  let rec unindent n = match !stack with
    | m :: _ when m = n -> []
    | m :: st when m > n -> stack := st; END :: unindent n
    | _ -> raise (Lexing_error "bad indentation")
61 62 63 64 65 66 67 68 69

  let update_stack n =
    match !stack with
    | m :: _ when m < n ->
      stack := n :: !stack;
      [NEWLINE; BEGIN]
    | _ ->
      NEWLINE :: unindent n

70 71 72 73 74 75 76
}

let letter = ['a'-'z' 'A'-'Z']
let digit = ['0'-'9']
let ident = letter (letter | digit | '_')*
let integer = ['0'-'9']+
let space = ' ' | '\t'
77
let comment = "#" [^'@''\n'] [^'\n']*
78 79

rule next_tokens = parse
80
  | '\n'    { newline lexbuf; update_stack (indentation lexbuf) }
81
  | (space | comment)+
82
            { next_tokens lexbuf }
83 84
  | "#@" space* (ident as id)
            { [annotation id] }
85
  | "#@"    { raise (Lexing_error "expecting an annotation") }
86 87
  | ident as id
            { [id_or_kwd id] }
88 89 90
  | '+'     { [PLUS] }
  | '-'     { [MINUS] }
  | '*'     { [TIMES] }
91
  | "//"    { [DIV] }
92 93 94 95 96 97 98 99
  | '%'     { [MOD] }
  | '='     { [EQUAL] }
  | "=="    { [CMP Beq] }
  | "!="    { [CMP Bneq] }
  | "<"     { [CMP Blt] }
  | "<="    { [CMP Ble] }
  | ">"     { [CMP Bgt] }
  | ">="    { [CMP Bge] }
100 101 102 103
  | '('     { [LEFTPAR] }
  | ')'     { [RIGHTPAR] }
  | '['     { [LEFTSQ] }
  | ']'     { [RIGHTSQ] }
104 105
  | ','     { [COMMA] }
  | ':'     { [COLON] }
106 107 108
  (* logic symbols *)
  | "->"    { [ARROW] }
  | "->"    { [LRARROW] }
109
  | "."     { [DOT] }
110
  | integer as s
111 112
            { [INTEGER s] }
  | '"'     { [STRING (string lexbuf)] }
113 114 115
  | eof     { [EOF] }
  | _ as c  { raise (Lexing_error ("illegal character: " ^ String.make 1 c)) }

116
(* count the indentation, i.e. the number of space characters from bol *)
117 118
and indentation = parse
  | (space | comment)* '\n'
119
      (* skip empty lines *)
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
      { newline lexbuf; indentation lexbuf }
  | space* as s
      { String.length s }

and string = parse
  | '"'
      { let s = Buffer.contents string_buffer in
	Buffer.reset string_buffer;
	s }
  | "\\n"
      { Buffer.add_char string_buffer '\n';
	string lexbuf }
  | "\\\""
      { Buffer.add_char string_buffer '"';
	string lexbuf }
  | _ as c
      { Buffer.add_char string_buffer c;
	string lexbuf }
  | eof
      { raise (Lexing_error "unterminated string") }

{

  let next_token =
144
    let tokens = Queue.create () in
145 146 147 148 149 150 151 152 153 154
    fun lb ->
      if Queue.is_empty tokens then begin
	let l = next_tokens lb in
	List.iter (fun t -> Queue.add t tokens) l
      end;
      Queue.pop tokens
}