Commit 7afbfc58 authored by POTTIER Francois's avatar POTTIER Francois
Browse files

Implement [--merge-errors].

parent 049fb34b
Pipeline #190784 passed with stages
in 53 seconds
......@@ -326,6 +326,16 @@ let write_run : maybe_targeted_run or_comment -> unit =
(* Must begin with a blank line. *)
print_string comments
let untarget_sentence (sentence, target) =
(sentence, Some target)
let untarget_run (run : targeted_run) : maybe_targeted_run =
{ run with elements = (or_comment_map untarget_sentence) run.elements }
let write_targeted_run (run : targeted_run or_comment) =
write_run (or_comment_map untarget_run run)
(* --------------------------------------------------------------------------- *)
(* [interpret_error] interprets a sentence, expecting it to end in an error.
......@@ -728,6 +738,368 @@ let () =
(* --------------------------------------------------------------------------- *)
(* Auxiliary functions for [merge_errors]. *)
(* [is_blank c] determines whether the comment [c] is blank. *)
let is_blank_char c =
match c with
| ' ' | '\n' | '\r' | '\t' ->
| _ ->
let rec is_blank c i n =
i = n || is_blank_char c.[i] && is_blank c (i+1) n
let is_blank c =
is_blank c 0 (String.length c)
(* [remove_leading_blank_comment] removes a leading blank comment
from a list. *)
let remove_leading_blank_comment xs =
match xs with
| [] ->
| Comment c :: xs when is_blank c ->
| _ :: xs ->
(* A simple queue where [emit] inserts an element at the end and [elements]
returns the current list of all elements and clears the queue. *)
module Q = struct
let create () =
let q = ref [] in
let emit x =
q := x :: !q
and elements () =
let xs = List.rev !q in
q := [];
emit, elements
let conflict_comment filename =
"#@ WARNING:\n\
#@ The following sentence has been copied from \"%s\".\n\
#@ It is redundant with a sentence that appears earlier in this file,\n\
#@ so one of them must be removed.\n"
let toplevel_comment filename =
"#@ WARNING:\n\
#@ The following comment has been copied from \"%s\".\n\
#@ It may need to be proofread, updated, moved, or removed.\n"
(* [is_default_run p run] tests whether [run] is a default run, that is, a
run that consists of a single sentence and a default message. If so, it
additionally tests whether the sentence's target state satisfies [p]. *)
let is_default_run (p : Lr1.node -> bool) (run : targeted_run) =
run.message = default_message &&
let sentences : targeted_sentence list =
List.fold_left (or_comment_fold (fun xs x -> x :: xs)) [] run.elements
match sentences with
| [ (_sentence, target) ] ->
let s = target2state target in
p s
| _ ->
(* [remove_default_runs] removes from the list [runs] the default runs
whose target state satisfies [p]. *)
(* We make the assumption that a default run does not contain interesting
comments, so it is not a problem to lose these comments when the run
is removed. *)
let rec remove_default_runs p (runs : targeted_run or_comment list) =
match runs with
| [] ->
| Comment c :: runs ->
Comment c :: remove_default_runs p runs
| Thing run :: runs ->
if is_default_run p run then
remove_default_runs p (remove_leading_blank_comment runs)
Thing run :: remove_default_runs p runs
(* [keep_default_runs] keeps from the list [runs] just the default runs. *)
let keep_default_runs (runs : targeted_run or_comment list) =
List.flatten ( (function
| Comment _ ->
| Thing run ->
if is_default_run (fun _ -> true) run then
[ Thing run ]
) runs)
(* [targets run] is the set of target states of a run. *)
let targets (run : targeted_run) : Lr1.NodeSet.t =
List.fold_left (or_comment_fold (fun states (_, target) ->
let s = target2state target in
Lr1.NodeSet.add s states
)) Lr1.NodeSet.empty run.elements
(* [insert_runs inserts runs] inserts the content of the table [insert] into
the list [runs] at appropriate points that are determined by the target
states. *)
let insert_runs
(inserts : targeted_run or_comment list Lr1.NodeMap.t)
(runs : targeted_run or_comment list)
: targeted_run or_comment list =
let emit, emitted = Q.create() in
runs |> List.iter begin function
| Thing run ->
(* Emit this run. *)
emit (Thing run);
(* Then, check if the states reached by the sentences in this run appear
in the table [inserts]. If so, emit the corresponding data. *)
targets run |> Lr1.NodeSet.iter begin fun s ->
match Lr1.NodeMap.find s inserts with
| data ->
List.iter emit data
| exception Not_found ->
| Comment c ->
emit (Comment c)
(* [gather_followers] turns a list of things and comments into a list of
things-followed-with-comments. Any leading comments are silently lost. *)
let rec gather_followers (xs : 'a or_comment list) : ('a * comment list) list =
match xs with
| Comment _ :: _xs ->
(* If there is a leading comment, ignore it. I believe that in a list
of sentences, our current lexer never produces a leading comment.
Indeed, a leading comment would be considered part of the previous
toplevel comment. *)
gather_followers xs
| Thing x :: xs ->
gather_followers_thing x [] xs
| [] ->
and gather_followers_thing x cs xs =
match xs with
| Comment c :: xs ->
gather_followers_thing x (c :: cs) xs
| _ ->
(x, List.rev cs) :: gather_followers xs
(* [space xs] ensures that every thing is followed with a least one newline.
If that is not the case, a blank line is inserted. This is unpleasant, but
I have difficulty dealing with my own baroque file format. *)
let has_leading_newline = function
| Comment c ->
assert (c <> "");
c.[0] = '\n'
| Thing _ ->
let rec space (xs : 'a or_comment list) : 'a or_comment list =
match xs with
| [] ->
| Thing x1 :: x2 :: xs when not (has_leading_newline x2) ->
Thing x1 :: Comment "\n" :: space (x2 :: xs)
| x :: xs ->
x :: space xs
(* --------------------------------------------------------------------------- *)
(* If two [--merge-errors <filename>] directives are provided, compare the two
message descriptions files and produce a merged .messages file. *)
(* The code is modeled after [compare_errors] above. When we find that an
entry exists on the left-hand side yet is missing on the right-hand side,
we note that it should be added. *)
(* If multiple sentences on the left-hand side share an error message, we
attempt to preserve this feature when these sentences are copied to the
right-hand side. This prevents us from using [foreach_targeted_sentence];
we use two nested loops instead. *)
(* If the target state of a sentence on the left-hand side does not exist on
the right-hand side, then this sentence/message pair is inserted at the end
of the right-hand side.
If the target state of a sentence on the left-hand side exists also on the
right-hand side, albeit with a different message, then the left-hand
sentence/message pair must be inserted into the right-hand side at a
suitable position (that is, after the sentence/message pair that already
exists on the right-hand side). Furthermore, if the sentence/message pair
on the right-hand side involves the default message, then it should be
removed and replaced. *)
let merge_errors filename1 filename2 =
let runs1 = read_messages filename1
and runs2 = read_messages filename2 in
let runs1 = target_runs runs1
and runs2 = target_runs runs2 in
(* Remove the default runs on the right-hand side whose target state also
appears on the left-hand side. We lose no information in doing so. *)
let table1 = message_table false runs1 in
let covered1 s = Lr1.NodeMap.mem s table1 in
let runs2 = remove_default_runs covered1 runs2 in
(* Remove the default runs on the left-hand side whose target state also
appears on the right-hand side. Again, we lose nothing in doing so. *)
let table2 = message_table false runs2 in
let covered2 s = Lr1.NodeMap.mem s table2 in
let runs1 = remove_default_runs covered2 runs1 in
(* The default runs that remain on either side are unique. Set them aside,
to be copied at the end. *)
let default1 = keep_default_runs runs1
and default2 = keep_default_runs runs2
and runs1 = remove_default_runs (fun _ -> true) runs1
and runs2 = remove_default_runs (fun _ -> true) runs2 in
(* Use [append] when a run must be appended at the end. *)
let (append : targeted_run or_comment -> unit), appended =
(* Use [insert] when a run must be inserted at a specific point. *)
let inserts : targeted_run or_comment list Lr1.NodeMap.t ref =
ref Lr1.NodeMap.empty in
let insert (s : Lr1.node) (newer : targeted_run or_comment list) =
let earlier = try Lr1.NodeMap.find s !inserts with Not_found -> [] in
inserts := Lr1.NodeMap.add s (earlier @ newer) !inserts
runs1 |> List.iter begin fun entry ->
match entry with
| Comment c ->
(* We do not want to lose the toplevel comments in the left-hand
file, so we append them. This is not great, as they may become
badly placed. We cannot really do better, though, as we do not
know with what sentence they should be attached. (It may even
be the case that they should be split and attached partly with
the previous sentence and partly with the next one.) *)
if not (is_blank c) then begin
append (Comment (toplevel_comment filename1));
append entry
| Thing run1 ->
let message1 = run1.message in
assert (message1 <> default_message);
(* The sentences in the queue [retained] are to be associated with
[message1], forming a run, which is to be inserted at the end. *)
let retain, retained = Q.create() in
(* The fact that [run1.elements] is a mixture of sentences and comments is
problematic. We do not know which comments are intended to be paired
with which sentences. We adopt the convention that a comment is
associated with the sentence that precedes it. The auxiliary
function [gather_followers] helps us follow this convention. *)
|> gather_followers
|> List.iter begin fun ((sentence1, target1), comments) ->
let comments = (fun c -> Comment c) comments in
let s = target2state target1 in
match Lr1.NodeMap.find s table2 with
| exception Not_found ->
(* This sentence is missing on the right-hand side, so this pair
of a sentence and message must be retained. The accompanying
comments are preserved. *)
retain (Thing (sentence1, target1));
List.iter retain comments
| _sentence2, message2 ->
assert (message2 <> default_message);
if message1 <> message2 then begin
(* This sentence exists on the right-hand side, with a different
message, so this sentence and message must be inserted in the
right-hand side. We construct a singleton run (consisting of
just one sentence and one message) and schedule it for
insertion. If this sentence was part of a group of several
sentences that share a message, then this sharing is lost.
Preserving it would be difficult. The user can manually
recreate it if desired. *)
let c = conflict_comment filename1 in
let elements = Thing (sentence1, target1) :: comments in
let run = { run1 with elements } in
insert s [Comment c; Thing run]
end; (* end of the loop over the elements of this run *)
(* If the queue [retained] is nonempty, then all of the sentences in it
must be associated with [message1], forming a run, which must be
inserted at the end. *)
let retained = retained() in
if retained <> [] then begin
let elements = retained in
let run = { run1 with elements } in
append (Thing run)
end; (* end of the loop over runs *)
(* The new data is constructed as follows: *)
let runs =
(* The non-default runs in [runs2], into which we insert some runs
from [run1]. *)
insert_runs !inserts runs2 @
(* The non-default runs from [runs1] that we have decided to append
at the end. *)
appended() @
(* The default runs from both sides. *)
default1 @
(* Print. *)
List.iter write_targeted_run (space runs)
let () =
Settings.merge_errors |> Option.iter (fun (filename1, filename2) ->
merge_errors filename1 filename2;
exit 0
(* --------------------------------------------------------------------------- *)
(* If [--update-errors <filename>] is set, update the error message
descriptions found in file [filename]. The idea is to re-generate
the auto-comments, which are marked with ##, while leaving the
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment