sentenceLexer.mll 1.86 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
(* This lexer is used to read the sentences provided on the standard input
   channel when [--interpret] is enabled. *)

{

  open Lexing
  open SentenceParser
  open Grammar

  (* A short-hand. *)

12 13
  let error2 lexbuf msg =
    Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p) msg
14 15 16 17 18 19 20 21 22 23 24 25 26

}

let newline   = ('\010' | '\013' | "\013\010")

let whitespace = [ ' ' '\t' ';' ]

let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']

let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']

let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)

27 28 29 30
let comment = '#' [^'\010''\013']* newline

let skip = newline whitespace* newline

31
rule lex = parse
32 33
  (* An identifier that begins with an lowercase letter is considered a
     non-terminal symbol. It should be a start symbol. *)
34 35 36 37 38 39
  | (lowercase identchar *) as lid
      { try
	  let nt = Nonterminal.lookup lid in
	  if StringSet.mem lid Front.grammar.UnparameterizedSyntax.start_symbols then
	    NONTERMINAL nt
	  else
40
	    error2 lexbuf (Printf.sprintf "\"%s\" is not a start symbol." lid)
41
	with Not_found ->
42
	  error2 lexbuf (Printf.sprintf "\"%s\" is not a known non-terminal symbol." lid)
43
      }
44 45
  (* An identifier that begins with an uppercase letter is considered a
     terminal symbol. *)
46 47 48 49
  | (uppercase identchar *) as uid
      { try
	  TERMINAL (Terminal.lookup uid)
	with Not_found ->
50
	  error2 lexbuf (Printf.sprintf "\"%s\" is not a known terminal symbol." uid)
51
      }
52
  (* Whitespace is ignored. *)
53
  | whitespace
54
      { lex lexbuf }
55
  (* The end of a line is translated to [EOL]. *)
56
  | newline
57
      { new_line lexbuf; EOL }
58 59
  (* A comment is ignored. *)
  | comment
60
      { new_line lexbuf; lex lexbuf }
61
  (* The end of file is translated to [EOF]. *)
62 63
  | eof
      { EOF }
64
  (* A colon. *)
65 66 67
  | ':'
      { COLON }
  | _
68
      { error2 lexbuf "unexpected character." }
69