py_lexer.mll 3.76 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
(********************************************************************)
(*                                                                  *)
(*  The Why3 Verification Platform   /   The Why3 Development Team  *)
(*  Copyright 2010-2016   --   INRIA - CNRS - Paris-Sud University  *)
(*                                                                  *)
(*  This software is distributed under the terms of the GNU Lesser  *)
(*  General Public License version 2.1, with the special exception  *)
(*  on linking described in file LICENSE.                           *)
(*                                                                  *)
(********************************************************************)
11 12 13 14 15 16 17 18 19 20 21 22

{
  open Lexing
  open Py_ast
  open Py_parser

  exception Lexing_error of string

  let id_or_kwd =
    let h = Hashtbl.create 32 in
    List.iter (fun (s, tok) -> Hashtbl.add h s tok)
      ["def", DEF; "if", IF; "else", ELSE;
23
       "return", RETURN; "print", PRINT; "while", WHILE;
24 25
       "for", FOR; "in", IN;
       "and", AND; "or", OR; "not", NOT;
26 27 28
       "True", TRUE;
       "False", FALSE;
       "None", NONE;];
29 30 31 32 33 34 35 36 37 38
   fun s -> try Hashtbl.find h s with Not_found -> IDENT s

  let newline lexbuf =
    let pos = lexbuf.lex_curr_p in
    lexbuf.lex_curr_p <-
      { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }

  let string_buffer = Buffer.create 1024

  let stack = ref [0]  (* indentation stack *)
39

40 41 42 43
  let rec unindent n = match !stack with
    | m :: _ when m = n -> []
    | m :: st when m > n -> stack := st; END :: unindent n
    | _ -> raise (Lexing_error "bad indentation")
44 45 46 47 48 49 50 51 52

  let update_stack n =
    match !stack with
    | m :: _ when m < n ->
      stack := n :: !stack;
      [NEWLINE; BEGIN]
    | _ ->
      NEWLINE :: unindent n

53 54 55 56 57 58 59
}

let letter = ['a'-'z' 'A'-'Z']
let digit = ['0'-'9']
let ident = letter (letter | digit | '_')*
let integer = ['0'-'9']+
let space = ' ' | '\t'
60
let comment = "#" [^'#''\n'] [^'\n']*
61 62

rule next_tokens = parse
63
  | '\n'    { newline lexbuf; update_stack (indentation lexbuf) }
64
  | (space | comment)+
65 66 67 68 69 70
            { next_tokens lexbuf }
  | "##" space* "invariant" space+ { [INVARIANT] }
  | "##" space* "variant"   space+ { [VARIANT] }
  | "##" space* "assert"    space+ { [ASSERT] }
  | ident as id
            { [id_or_kwd id] }
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
  | '+'     { [PLUS] }
  | '-'     { [MINUS] }
  | '*'     { [TIMES] }
  | '/'     { [DIV] }
  | '%'     { [MOD] }
  | '='     { [EQUAL] }
  | "=="    { [CMP Beq] }
  | "!="    { [CMP Bneq] }
  | "<"     { [CMP Blt] }
  | "<="    { [CMP Ble] }
  | ">"     { [CMP Bgt] }
  | ">="    { [CMP Bge] }
  | '('     { [LP] }
  | ')'     { [RP] }
  | '['     { [LSQ] }
  | ']'     { [RSQ] }
  | ','     { [COMMA] }
  | ':'     { [COLON] }
89 90 91
  (* logic symbols *)
  | "->"    { [ARROW] }
  | "->"    { [LRARROW] }
92
  | integer as s
93 94
            { [INTEGER s] }
  | '"'     { [STRING (string lexbuf)] }
95 96 97
  | eof     { [EOF] }
  | _ as c  { raise (Lexing_error ("illegal character: " ^ String.make 1 c)) }

98
(* count the indentation, i.e. the number of space characters from bol *)
99 100
and indentation = parse
  | (space | comment)* '\n'
101
      (* skip empty lines *)
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
      { newline lexbuf; indentation lexbuf }
  | space* as s
      { String.length s }

and string = parse
  | '"'
      { let s = Buffer.contents string_buffer in
	Buffer.reset string_buffer;
	s }
  | "\\n"
      { Buffer.add_char string_buffer '\n';
	string lexbuf }
  | "\\\""
      { Buffer.add_char string_buffer '"';
	string lexbuf }
  | _ as c
      { Buffer.add_char string_buffer c;
	string lexbuf }
  | eof
      { raise (Lexing_error "unterminated string") }

{

  let next_token =
126
    let tokens = Queue.create () in
127 128 129 130 131 132 133 134 135 136
    fun lb ->
      if Queue.is_empty tokens then begin
	let l = next_tokens lb in
	List.iter (fun t -> Queue.add t tokens) l
      end;
      Queue.pop tokens
}