sentenceLexer.mll 1.56 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
(* This lexer is used to read the sentences provided on the standard input
   channel when [--interpret] is enabled. *)

{

  open Lexing
  open SentenceParser
  open Grammar

  (* Updates the line counter, which is used in some error messages. *)

  let update_loc lexbuf =
    let pos = lexbuf.lex_curr_p in
    lexbuf.lex_curr_p <- { pos with
      pos_lnum = pos.pos_lnum + 1;
      pos_bol = pos.pos_cnum;
    }

  (* A short-hand. *)

  let error1 lexbuf msg =
    Error.error (Positions.one (lexeme_start_p lexbuf)) msg

}

let newline   = ('\010' | '\013' | "\013\010")

let whitespace = [ ' ' '\t' ';' ]

let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']

let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']

let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)

rule lex = parse
  | (lowercase identchar *) as lid
      { try
	  let nt = Nonterminal.lookup lid in
	  if StringSet.mem lid Front.grammar.UnparameterizedSyntax.start_symbols then
	    NONTERMINAL nt
	  else
	    error1 lexbuf (Printf.sprintf "\"%s\" is not a start symbol." lid)
	with Not_found ->
	  error1 lexbuf (Printf.sprintf "\"%s\" is not a known non-terminal symbol." lid)
      }
  | (uppercase identchar *) as uid
      { try
	  TERMINAL (Terminal.lookup uid)
	with Not_found ->
	  error1 lexbuf (Printf.sprintf "\"%s\" is not a known terminal symbol." uid)
      }
  | whitespace
      { lex lexbuf }
  | newline
      { update_loc lexbuf; EOL }
  | eof
      { EOF }
  | ':'
      { COLON }
  | _
      { error1 lexbuf "unexpected character(s)." }