Commit 7f1275d2 authored by POTTIER Francois's avatar POTTIER Francois

Modified the sentence lexer/parser to drop ## comments and keep # comments.

Also simplified [Interpret] slightly to report just one syntax error.
[Segment] still drops # comments when in idle mode, this should be fixed.
parent 96944356
......@@ -5,27 +5,39 @@ module I = Invariant (* artificial dependency; ensures that [Invariant] runs fir
(* --------------------------------------------------------------------------- *)
(* The following definitions are in sync with [SentenceParser]. *)
open Grammar
type terminals = Terminal.t list
type sentence = Nonterminal.t option * terminals
type located_sentence = Positions.positions * sentence
type message = string
open SentenceParserAux
(* An error message. *)
(* A run is a series of sentences together with an error message. *)
type message =
string
type run = located_sentence list * message
(* A run is a series of sentences or comments together with an error message. *)
type run =
located_sentence or_comment list * message
(* A targeted sentence is a located sentence together with the state
into which it leads. *)
type targeted_sentence = located_sentence * Lr1.node
type targeted_sentence =
located_sentence * Lr1.node
(* A targeted run is a series of targeted sentences or comments together with
an error message. *)
(* A targeted run is a series of targeted sentences together with an error
message. *)
type maybe_targeted_run =
targeted_sentence option or_comment list * message
type targeted_run = targeted_sentence list * message
type targeted_run =
targeted_sentence or_comment list * message
(* A filtered targeted run is a series of targeted sentences together with an
error message. (The comments have been filtered out.) *)
type filtered_targeted_run =
targeted_sentence list * message
(* --------------------------------------------------------------------------- *)
......@@ -237,29 +249,45 @@ let interpret_error sentence =
let fail poss msg =
Error.signal poss (Printf.sprintf
"This sentence does not end with a syntax error, as desired.\n%s"
"This sentence does not end with a syntax error, as it should.\n%s"
msg
);
[] (* dummy result *)
None (* no result *)
let target_sentence : located_sentence -> targeted_sentence list =
let target_sentence : located_sentence -> targeted_sentence option =
fun (poss, sentence) ->
interpret_error_aux poss sentence
(fail poss)
(fun _nt _terminals s' -> [ (poss, sentence), s' ])
(fun _nt _terminals s' -> Some ((poss, sentence), s'))
let target_run_1 : run -> maybe_targeted_run =
fun (sentences, message) ->
List.map (or_comment_map target_sentence) sentences, message
let target_run : run -> targeted_run =
let target_run_2 : maybe_targeted_run -> targeted_run =
fun (sentences, message) ->
List.flatten (List.map target_sentence sentences), message
List.map (or_comment_map Misc.unSome) sentences, message
let target_runs : run list -> targeted_run list =
fun runs ->
let runs = List.map target_run runs in
(* Interpret all sentences, possibly displaying multiple errors. *)
let runs = List.map target_run_1 runs in
(* Abort if an error occurred. *)
if Error.errors() then exit 1;
(* Remove the options introduced by the first phase above. *)
let runs = List.map target_run_2 runs in
runs
(* --------------------------------------------------------------------------- *)
(* [filter_run] filters out the comments in a run. *)
let filter_run : targeted_run -> filtered_targeted_run =
fun (sentences, message) ->
List.flatten (List.map unSentence sentences), message
(* --------------------------------------------------------------------------- *)
(* [setup()] returns a function [read] which reads one sentence from the
standard input channel. *)
......@@ -328,30 +356,24 @@ let read_messages filename : run list =
match segments with
| [] ->
List.rev accu
| (_, lexbuf) :: [] ->
(* Oops, we are desynchronized. *)
Error.signal
(Positions.one (Lexing.lexeme_end_p lexbuf))
"Syntax error: missing a final message. I may be desynchronized.";
List.rev accu
| (_, lexbuf) :: (text, _) :: segments ->
(* Read a non-empty series of located sentences. *)
| (_, lexbuf) :: segments ->
(* Read a series of located sentences. *)
match SentenceParser.entry SentenceLexer.lex lexbuf with
| exception Parsing.Parse_error ->
(* Report an error. *)
Error.signal
Error.error
(Positions.one (Lexing.lexeme_start_p lexbuf))
"Syntax error: ill-formed sentence.";
(* Continue anyway. *)
loop accu segments
"Ill-formed sentence."
| sentences ->
loop ((sentences, text) :: accu) segments
(* Read a segment of text. *)
match segments with
| [] ->
Error.error
(Positions.one (Lexing.lexeme_end_p lexbuf))
"Syntax error: missing a final message. I may be desynchronized."
| (text, _) :: segments ->
loop ((sentences, text) :: accu) segments
in
let runs = loop [] segments in
if Error.errors() then exit 1;
(* Although we try to report several errors, [SentenceLexer.lex] may
abort the whole process after just one error. This could be improved. *)
runs
loop [] segments
(* --------------------------------------------------------------------------- *)
......@@ -359,7 +381,7 @@ let read_messages filename : run list =
states to located sentences and messages. Optionally, it can detect that
two sentences lead to the same state, and report an error. *)
let message_table (detect_redundancy : bool) (runs : targeted_run list)
let message_table (detect_redundancy : bool) (runs : filtered_targeted_run list)
: (located_sentence * message) Lr1.NodeMap.t =
let table =
......@@ -387,7 +409,7 @@ let message_table (detect_redundancy : bool) (runs : targeted_run list)
a mapping of state numbers to error messages. The code is sent to the
standard output channel. *)
let compile_runs filename (runs : targeted_run list) : unit =
let compile_runs filename (runs : filtered_targeted_run list) : unit =
(* We wish to produce a function that maps a state number to a message.
By convention, we call this function [message]. *)
......@@ -450,6 +472,9 @@ let () =
sentence does not end in an error, as expected. *)
let runs = target_runs runs in
(* Remove comments. *)
let runs = List.map filter_run runs in
(* Build a mapping of states to located sentences. This allows us to
detect if two sentences lead to the same state. *)
let _ = message_table true runs in
......@@ -484,6 +509,8 @@ let () =
and runs2 = read_messages filename2 in
let runs1 = target_runs runs1
and runs2 = target_runs runs2 in (* here, it would be OK to ignore errors *)
let runs1 = List.map filter_run runs1
and runs2 = List.map filter_run runs2 in
let table1 = message_table false runs1
and table2 = message_table false runs2 in
......
......@@ -24,7 +24,9 @@ let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)
let comment = '#' [^'\010''\013']* newline
let autocomment = "##" [^'\010''\013']* newline
let comment = "#" [^'\010''\013']* newline
let skip = newline whitespace* newline
......@@ -55,9 +57,12 @@ rule lex = parse
(* The end of a line is translated to [EOL]. *)
| newline
{ new_line lexbuf; EOL }
(* A comment is ignored. *)
| comment
(* An auto-generated comment is ignored. *)
| autocomment
{ new_line lexbuf; lex lexbuf }
(* A manually-written comment is preserved. *)
| comment as c
{ new_line lexbuf; COMMENT c }
(* The end of file is translated to [EOF]. *)
| eof
{ EOF }
......
......@@ -11,16 +11,15 @@
%token COLON EOF EOL
%token<Grammar.Terminal.t> TERMINAL
%token<Grammar.Nonterminal.t> NONTERMINAL
%token<string> COMMENT
/* only manually-written comments, beginning with a single # */
/* ------------------------------------------------------------------------ */
/* Types. */
%{
open Grammar
type terminals = Terminal.t list
type sentence = Nonterminal.t option * terminals
type located_sentence = Positions.positions * sentence
open SentenceParserAux
%}
......@@ -28,28 +27,25 @@
%type <sentence> sentence
%type <located_sentence> located_sentence
/* %start <sentence option> optional_sentence */
%type
<(Grammar.Nonterminal.t option * Grammar.Terminal.t list) option>
optional_sentence
%type <SentenceParserAux.sentence option> optional_sentence
%start optional_sentence
/* %start<located_sentence list> entry */
%type
<(Positions.positions * (Grammar.Nonterminal.t option * Grammar.Terminal.t list)) list>
entry
%type<SentenceParserAux.located_sentence SentenceParserAux.or_comment list> entry
%start entry
%%
/* ------------------------------------------------------------------------ */
/* An entry is a non-empty list of located sentences. */
entry: located_sentence located_sentences EOF
{ $1 :: $2 }
/* An entry is a list of located sentences or comments. */
entry: located_sentences_or_comments EOF
{ $1 }
/* A list of located sentences. */
located_sentences: { [] } | located_sentence located_sentences { $1 :: $2 }
/* A list of located sentences or comments. */
located_sentences_or_comments:
{ [] }
| located_sentence located_sentences_or_comments { Sentence $1 :: $2 }
| COMMENT located_sentences_or_comments { Comment $1 :: $2 }
/* A located sentence. */
located_sentence: sentence
......
open Grammar
type terminals =
Terminal.t list
type sentence =
Nonterminal.t option * terminals
type located_sentence =
Positions.positions * sentence
type comment =
string
type 'a or_comment =
| Sentence of 'a
| Comment of comment
let or_comment_map f = function
| Sentence s ->
Sentence (f s)
| Comment c ->
Comment c
let unSentence = function
| Sentence x ->
[ x ]
| Comment _ ->
[]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment