Commit 903a0b53 authored by Guillaume Melquiond's avatar Guillaume Melquiond

Make error messages more robust to illegal characters (fixes #95).

parent fa80b355
......@@ -19,14 +19,12 @@
(* lexical errors *)
exception IllegalCharacter of char
exception IllegalLexeme of string
exception UnterminatedComment
exception UnknownDDW of string
exception UnknownDW of string
let () = Exn_printer.register (fun fmt e -> match e with
| IllegalCharacter c -> fprintf fmt "illegal character %c" c
| IllegalLexeme s -> fprintf fmt "illegal lexeme %s" s
| UnterminatedComment -> fprintf fmt "unterminated comment"
| UnknownDDW s -> fprintf fmt "unknown system_word %s" s
......@@ -211,7 +209,7 @@ rule token = parse
| eof
{ EOF }
| _ as c
{ raise (IllegalCharacter c) }
{ Lexlib.illegal_character c lexbuf }
and comment_block = parse
| "*/"
......
......@@ -10,16 +10,9 @@
(********************************************************************)
{
open Format
open Lexing
open Driver_parser
exception IllegalCharacter of char
let () = Exn_printer.register (fun fmt e -> match e with
| IllegalCharacter c -> fprintf fmt "illegal character %c" c
| _ -> raise e)
let keywords = Hashtbl.create 97
let () =
List.iter
......@@ -110,7 +103,7 @@ rule token = parse
| eof
{ EOF }
| _ as c
{ raise (IllegalCharacter c) }
{ Lexlib.illegal_character c lexbuf }
{
let parse_file_gen parse input_lexbuf lexbuf =
......
......@@ -10,15 +10,8 @@
(********************************************************************)
{
open Format
open Parser
exception IllegalCharacter of char
let () = Exn_printer.register (fun fmt e -> match e with
| IllegalCharacter c -> fprintf fmt "illegal character %c" c
| _ -> raise e)
let keywords = Hashtbl.create 97
let () =
List.iter
......@@ -242,7 +235,7 @@ rule token = parse
| eof
{ EOF }
| _ as c
{ raise (IllegalCharacter c) }
{ Lexlib.illegal_character c lexbuf }
{
......
......@@ -20,3 +20,5 @@ val update_loc : Lexing.lexbuf -> string option -> int -> int -> unit
val remove_leading_plus : string -> string
val remove_underscores : string -> string
val illegal_character : char -> Lexing.lexbuf -> 'a
......@@ -17,10 +17,12 @@
exception UnterminatedComment
exception UnterminatedString
exception IllegalCharacter of string
let () = Exn_printer.register (fun fmt e -> match e with
| UnterminatedComment -> fprintf fmt "unterminated comment"
| UnterminatedString -> fprintf fmt "unterminated string"
| IllegalCharacter s -> fprintf fmt "illegal character %s" s
| _ -> raise e)
let string_start_loc = ref Loc.dummy_position
......@@ -37,7 +39,15 @@
let newline = '\r'* '\n'
rule comment = parse
rule utf8_tail b n = parse
| eof
{ false }
| ['\128'-'\191'] as c
{ Buffer.add_char b c;
n == 1 || utf8_tail b (n - 1) lexbuf }
| _ { false }
and comment = parse
| "(*)"
{ comment lexbuf }
| "*)"
......@@ -104,4 +114,24 @@ and string_skip_spaces = parse
Bytes.unsafe_to_string t
end else s
let illegal_character c lexbuf =
let loc = loc lexbuf in
let b = Buffer.create 2 in
Buffer.add_char b c;
let n =
match c with
| '\000'..'\127' -> 0
| '\192'..'\223' -> 1
| '\224'..'\239' -> 2
| '\240'..'\247' -> 3
| _ -> -1 in
if n <> 0 && (n == -1 || not (utf8_tail b n lexbuf)) then begin
(* invalid encoding, convert the first character to a utf8 one *)
Buffer.reset b;
let c = Char.code c in
Buffer.add_char b (Char.chr (0xC0 lor (c lsr 6)));
Buffer.add_char b (Char.chr (c land 0xBF));
end;
(* TODO: check that the buffer does not hold a utf8 character in one of the invalid ranges *)
raise (Loc.Located (loc, IllegalCharacter (Buffer.contents b)))
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment