Commit 815970d1 authored by Andrei Paskevich's avatar Andrei Paskevich

Lexer: better organize the definitions of basic primitives

parent 382c98bd
......@@ -97,18 +97,29 @@
}
let space = [' ' '\t' '\r']
let lalpha = ['a'-'z' '_']
let quote = '\''
let bin = ['0' '1']
let oct = ['0'-'7']
let dec = ['0'-'9']
let hex = ['0'-'9' 'a'-'f' 'A'-'F']
let bin_sep = ['0' '1' '_']
let oct_sep = ['0'-'7' '_']
let dec_sep = ['0'-'9' '_']
let hex_sep = ['0'-'9' 'a'-'f' 'A'-'F' '_']
let lalpha = ['a'-'z']
let ualpha = ['A'-'Z']
let alpha = lalpha | ualpha
let digit = ['0'-'9']
let digit_or_us = ['0'-'9' '_']
let alpha_no_us = ['a'-'z' 'A'-'Z']
let suffix = (alpha_no_us | '\''* digit_or_us)* '\''*
let lident = lalpha suffix
let uident = ualpha suffix
let lident_quote = lident ('\'' alpha_no_us suffix)+
let uident_quote = uident ('\'' alpha_no_us suffix)+
let hexadigit = ['0'-'9' 'a'-'f' 'A'-'F']
let alpha = ['a'-'z' 'A'-'Z']
let suffix = (alpha | quote* dec_sep)* quote*
let lident = ['a'-'z' '_'] suffix
let uident = ['A'-'Z'] suffix
let core_suffix = quote alpha suffix
let core_lident = lident core_suffix+
let core_uident = uident core_suffix+
let op_char_1 = ['=' '<' '>' '~']
let op_char_2 = ['+' '-']
......@@ -122,12 +133,12 @@ let op_char_pref = ['!' '?']
rule token = parse
| "##" space* ("\"" ([^ '\010' '\013' '"' ]* as file) "\"")?
space* (digit+ as line) space* (digit+ as char) space* "##"
space* (dec+ as line) space* (dec+ as char) space* "##"
{ Lexlib.update_loc lexbuf file (int_of_string line) (int_of_string char);
token lexbuf }
| "#" space* "\"" ([^ '\010' '\013' '"' ]* as file) "\""
space* (digit+ as line) space* (digit+ as bchar) space*
(digit+ as echar) space* "#"
space* (dec+ as line) space* (dec+ as bchar) space*
(dec+ as echar) space* "#"
{ POSITION (Loc.user_position file (int_of_string line)
(int_of_string bchar) (int_of_string echar)) }
| "[@" space* ([^ ' ' '\n' ']']+ (' '+ [^ ' ' '\n' ']']+)* as lbl) space* ']'
......@@ -140,30 +151,30 @@ rule token = parse
{ UNDERSCORE }
| lident as id
{ try Hashtbl.find keywords id with Not_found -> LIDENT id }
| lident_quote as id
{ LIDENT_QUOTE id }
| core_lident as id
{ CORE_LIDENT id }
| uident as id
{ UIDENT id }
| uident_quote as id
{ UIDENT_QUOTE id }
| ['0'-'9'] ['0'-'9' '_']* as s
| core_uident as id
{ CORE_UIDENT id }
| dec dec_sep* as s
{ INTEGER (Number.int_literal_dec (Lexlib.remove_underscores s)) }
| '0' ['x' 'X'] (['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']* as s)
| '0' ['x' 'X'] (hex hex_sep* as s)
{ INTEGER (Number.int_literal_hex (Lexlib.remove_underscores s)) }
| '0' ['o' 'O'] (['0'-'7'] ['0'-'7' '_']* as s)
| '0' ['o' 'O'] (oct oct_sep* as s)
{ INTEGER (Number.int_literal_oct (Lexlib.remove_underscores s)) }
| '0' ['b' 'B'] (['0'-'1'] ['0'-'1' '_']* as s)
| '0' ['b' 'B'] (bin bin_sep* as s)
{ INTEGER (Number.int_literal_bin (Lexlib.remove_underscores s)) }
| (digit+ as i) ("" as f) ['e' 'E'] (['-' '+']? digit+ as e)
| (digit+ as i) '.' (digit* as f) (['e' 'E'] (['-' '+']? digit+ as e))?
| (digit* as i) '.' (digit+ as f) (['e' 'E'] (['-' '+']? digit+ as e))?
| (dec+ as i) ("" as f) ['e' 'E'] (['-' '+']? dec+ as e)
| (dec+ as i) '.' (dec* as f) (['e' 'E'] (['-' '+']? dec+ as e))?
| (dec* as i) '.' (dec+ as f) (['e' 'E'] (['-' '+']? dec+ as e))?
{ REAL (Number.real_const_dec i f
(Opt.map Lexlib.remove_leading_plus e)) }
| '0' ['x' 'X'] (hexadigit+ as i) ("" as f) ['p' 'P'] (['-' '+']? digit+ as e)
| '0' ['x' 'X'] (hexadigit+ as i) '.' (hexadigit* as f)
(['p' 'P'] (['-' '+']? digit+ as e))?
| '0' ['x' 'X'] (hexadigit* as i) '.' (hexadigit+ as f)
(['p' 'P'] (['-' '+']? digit+ as e))?
| '0' ['x' 'X'] (hex+ as i) ("" as f) ['p' 'P'] (['-' '+']? dec+ as e)
| '0' ['x' 'X'] (hex+ as i) '.' (hex* as f)
(['p' 'P'] (['-' '+']? dec+ as e))?
| '0' ['x' 'X'] (hex* as i) '.' (hex+ as f)
(['p' 'P'] (['-' '+']? dec+ as e))?
{ REAL (Number.real_const_hex i f
(Opt.map Lexlib.remove_leading_plus e)) }
| "(**)"
......
......@@ -116,7 +116,7 @@
(* Tokens *)
%token <string> LIDENT LIDENT_QUOTE UIDENT UIDENT_QUOTE
%token <string> LIDENT CORE_LIDENT UIDENT CORE_UIDENT
%token <Number.integer_literal> INTEGER
%token <string> OP1 OP2 OP3 OP4 OPPREF
%token <Number.real_literal> REAL
......@@ -1145,22 +1145,22 @@ ident_nq:
uident:
| UIDENT { mk_id $1 $startpos $endpos }
| UIDENT_QUOTE { mk_id $1 $startpos $endpos }
| CORE_UIDENT { mk_id $1 $startpos $endpos }
uident_nq:
| UIDENT { mk_id $1 $startpos $endpos }
| UIDENT_QUOTE { let loc = floc $startpos($1) $endpos($1) in
| CORE_UIDENT { let loc = floc $startpos($1) $endpos($1) in
Loc.errorm ~loc "Symbol %s cannot be user-defined" $1 }
lident:
| LIDENT { mk_id $1 $startpos $endpos }
| lident_keyword { mk_id $1 $startpos $endpos }
| LIDENT_QUOTE { mk_id $1 $startpos $endpos }
| CORE_LIDENT { mk_id $1 $startpos $endpos }
lident_nq:
| LIDENT { mk_id $1 $startpos $endpos }
| lident_keyword { mk_id $1 $startpos $endpos }
| LIDENT_QUOTE { let loc = floc $startpos($1) $endpos($1) in
| CORE_LIDENT { let loc = floc $startpos($1) $endpos($1) in
Loc.errorm ~loc "Symbol %s cannot be user-defined" $1 }
lident_keyword:
......@@ -1168,7 +1168,7 @@ lident_keyword:
| FLOAT { "float" }
quote_lident:
| QUOTE_LIDENT { mk_id $1 $startpos $endpos }
| QUOTE_LIDENT { mk_id $1 $startpos $endpos }
(* Idents + symbolic operation names *)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment