sentenceLexer.mll 2.11 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
(* This lexer is used to read the sentences provided on the standard input
   channel when [--interpret] is enabled. *)

{

  open Lexing
  open SentenceParser
  open Grammar

  (* A short-hand. *)

12 13
  let error2 lexbuf msg =
    Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p) msg
14 15 16 17 18 19 20 21 22 23 24 25 26

}

let newline   = ('\010' | '\013' | "\013\010")

let whitespace = [ ' ' '\t' ';' ]

let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']

let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']

let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)

27 28 29
let autocomment = "##" [^'\010''\013']* newline

let comment = "#" [^'\010''\013']* newline
30 31 32

let skip = newline whitespace* newline

33
rule lex = parse
34 35
  (* An identifier that begins with an lowercase letter is considered a
     non-terminal symbol. It should be a start symbol. *)
36 37 38 39
  | (lowercase identchar *) as lid
      { try
	  let nt = Nonterminal.lookup lid in
	  if StringSet.mem lid Front.grammar.UnparameterizedSyntax.start_symbols then
40
	    NONTERMINAL (nt, lexbuf.lex_start_p, lexbuf.lex_curr_p)
41
	  else
42
	    error2 lexbuf (Printf.sprintf "\"%s\" is not a start symbol." lid)
43
	with Not_found ->
44
	  error2 lexbuf (Printf.sprintf "\"%s\" is not a known non-terminal symbol." lid)
45
      }
46 47
  (* An identifier that begins with an uppercase letter is considered a
     terminal symbol. *)
48 49
  | (uppercase identchar *) as uid
      { try
50
	  TERMINAL (Terminal.lookup uid, lexbuf.lex_start_p, lexbuf.lex_curr_p)
51
	with Not_found ->
52
	  error2 lexbuf (Printf.sprintf "\"%s\" is not a known terminal symbol." uid)
53
      }
54
  (* Whitespace is ignored. *)
55
  | whitespace
56
      { lex lexbuf }
57
  (* The end of a line is translated to [EOL]. *)
58
  | newline
59
      { new_line lexbuf; EOL }
60 61
  (* An auto-generated comment is ignored. *)
  | autocomment
62
      { new_line lexbuf; lex lexbuf }
63 64 65
  (* A manually-written comment is preserved. *)
  | comment as c
      { new_line lexbuf; COMMENT c }
66
  (* The end of file is translated to [EOF]. *)
67 68
  | eof
      { EOF }
69
  (* A colon. *)
70 71 72
  | ':'
      { COLON }
  | _
73
      { error2 lexbuf "unexpected character." }
74