conflict.ml 18.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15
open Grammar

16 17 18 19 20
let () =
  if Settings.graph then
    DependencyGraph.print_dependency_graph()
  (* artificial dependency *)

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
(* -------------------------------------------------------------------------- *)
(* Explaining shift actions. *)

(* The existence of a shift action stems from the existence of a shift
   item in the LR(0) core that underlies the LR(1) state of interest.
   That is, lookahead sets are not relevant. The existence of a shift
   item in the LR(0) core is explained by finding a path from a start
   item to the shift item in the LR(0) nondeterministic automaton,
   such that the symbols read along this path form the (previously
   fixed) symbol string that leads to the conflict state in the LR(1)
   automaton. There may be several such paths: a shortest one is
   chosen. There may also be several shift items in the conflict
   state: an arbitrary one is chosen. I believe it would not be
   interesting to display traces for several shift items: they would
   be identical except in their last line (where the desired shift
   item actually appears). *)

(* Symbolic execution of the nondeterministic LR(0) automaton. *)

(* Configurations are pairs of an LR(0) item and an offset into
   the input string, which indicates how much has been read so
   far. *)

type configuration0 =
    Item.t * int

(* This function builds a derivation out of a (nonempty, reversed) sequence of
   configurations. The derivation is constructed from bottom to top, that is,
   beginning at the last configuration and moving back towards to the start
   configuration. *)

let rec follow derivation offset' = function
  | [] ->
      assert (offset' = 0);
      derivation
  | (item, offset) :: configs ->
      let _, _, rhs, pos, _ = Item.def item in
      let derivation =
59 60 61 62 63 64 65 66
        if offset = offset' then
          (* This is an epsilon transition. Put a new root node on top of
             the existing derivation. *)
          Derivation.build pos rhs derivation None
        else
          (* This was a shift transition. Tack symbol in front of the
             forest. *)
          Derivation.prepend rhs.(pos) derivation
67 68 69 70 71 72 73
      in
      follow derivation offset configs

(* Symbolic execution begins with a start item (corresponding
   to one of the automaton's entry nodes), a fixed string of
   input symbols, to be fully consumed, and a goal item. The
   objective is to find a path through the automaton that
74
   leads from the start configuration [(start, 0)] to the goal
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
   configuration [(stop, n)], where [n] is the length of the
   input string. The automaton is explored via breadth-first
   search. A hash table is used to record which configurations
   have been visited and to build a spanning tree of shortest
   paths. *)

exception Done

let explain_shift_item
    (start : Item.t)
    (input : Symbol.t array)
    (stop : Item.t)
    : Derivation.t =

  let n =
    Array.length input
  in

  let table : (configuration0, configuration0 option) Hashtbl.t =
    Hashtbl.create 1023
  in

  let queue : configuration0 Queue.t =
    Queue.create()
  in

  let enqueue ancestor config =
    try
      let _ = Hashtbl.find table config in
      ()
    with Not_found ->
      Hashtbl.add table config ancestor;
      Queue.add config queue
  in

  enqueue None (start, 0);
  try
    Misc.qiter (function (item, offset) as config ->

      (* If the item we're looking at is the goal item and if
115
         we have read all of the input symbols, stop. *)
116 117

      if (Item.equal item stop) && (offset = n) then
118
        raise Done;
119 120 121 122 123 124

      (* Otherwise, explore the transitions out of this item. *)

      let prod, _, rhs, pos, length = Item.def item in

      (* Shift transition, followed only if the symbol matches
125
         the symbol found in the input string. *)
126 127 128 129

      if (pos < length)
      && (offset < n)
      && (Symbol.equal rhs.(pos) input.(offset)) then begin
130 131
        let config' = (Item.import (prod, pos+1), offset+1) in
        enqueue (Some config) config'
132 133 134 135 136
      end;

      (* Epsilon transitions. *)

      if pos < length then
137 138 139 140 141
        match rhs.(pos) with
        | Symbol.N nt ->
            Production.iternt nt (fun prod ->
              let config' = (Item.import (prod, 0), offset) in
              enqueue (Some config) config'
142
            )
143 144
        | Symbol.T _ ->
            ()
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196

    ) queue;
    assert false
  with Done ->

    (* We have found a (shortest) path from the start configuration to
       the goal configuration. Turn it into an explicit derivation. *)

    let configs = Misc.materialize table (stop, n) in
    let _, _, rhs, pos, _ = Item.def stop in
    let derivation = Derivation.tail pos rhs in
    let derivation = follow derivation n configs in
    derivation

(* -------------------------------------------------------------------------- *)
(* Explaining reduce actions. *)

(* The existence of a reduce action stems from the existence of a
   reduce item, whose lookahead set contains the token of interest, in
   the state of interest. Here, lookahead sets are relevant only
   insofar as they contain or do not contain the token of interest --
   in other words, lookahead sets can be abstracted by Boolean
   values. The existence of the reduce item is explained by finding a
   path from a start item to the reduce item in the LR(1)
   nondeterministic automaton, such that the symbols read along this
   path form the (previously fixed) symbol string that leads to the
   conflict state in the LR(1) automaton. There may be several such
   paths: a shortest one is chosen. *)

(* Symbolic execution of the nondeterministic LR(1) automaton. *)

(* Configurations are pairs of an LR(1) item and an offset into the
   input string, which indicates how much has been read so far. An
   LR(1) item is itself represented as the combination of an LR(0)
   item and a Boolean flag, telling whether the token of interest
   appears or does not appear in the lookahead set. *)

type configuration1 =
    Item.t * bool * int

(* This function builds a derivation out of a sequence of configurations. The
   end of the sequence is dealt with specially -- we want to explain how the
   lookahead symbol appears and is inherited. Once that is done, the rest
   (that is, the beginning) of the derivation is dealt with as above. *)

let config1toconfig0 (item, _, offset) =
  (item, offset)

let rec follow1 tok derivation offset' = function
  | [] ->
      assert (Terminal.equal tok Terminal.sharp);
      (* One could emit a comment saying that the lookahead token is
197 198 199
         initially [#]. That comment would have to be displayed above
         the derivation, though, and there is no support for that
         at the moment, so let's skip it. *)
200 201
      derivation
  | (item, _, offset) :: configs ->
202
      let prod, _, rhs, pos, length = Item.def item in
203 204
      if offset = offset' then

205 206 207
        (* This is an epsilon transition. Attack a new line and add
           a comment that explains why the lookahead symbol is
           produced or inherited. *)
208

209
        let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in
210

211
        if TerminalSet.mem tok first then
212

213 214
          (* The lookahead symbol is produced (and perhaps also inherited,
             but let's ignore that). *)
215

216 217 218 219 220 221 222
          let e = Analysis.explain_first_rhs tok rhs (pos + 1) in
          let comment =
            "lookahead token appears" ^ (if e = "" then "" else " because " ^ e)
          in
          let derivation =
            Derivation.build pos rhs derivation (Some comment)
          in
223

224 225
          (* Print the rest of the derivation without paying attention to
             the lookahead symbols. *)
226

227
          follow derivation offset (List.map config1toconfig0 configs)
228

229
        else begin
230

231
          (* The lookahead symbol is not produced, so it is definitely inherited. *)
232

233
          assert nullable;
234

235 236 237 238 239 240 241
          let comment =
            "lookahead token is inherited" ^
            (if pos + 1 < length then Printf.sprintf " because %scan vanish" (Symbol.printao (pos + 1) rhs) else "")
          in
          let derivation =
            Derivation.build pos rhs derivation (Some comment)
          in
242

243
          follow1 tok derivation offset configs
244

245
        end
246 247 248

      else

249
        (* This is a shift transition. Tack symbol in front of forest. *)
250

251
        let derivation =
252 253
          Derivation.prepend rhs.(pos) derivation
        in
254

255
        follow1 tok derivation offset configs
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294

(* Symbolic execution is performed in the same manner as above. *)

let explain_reduce_item
    (tok : Terminal.t)
    (start : Item.t)
    (input : Symbol.t array)
    (stop : Item.t)
    : Derivation.t =

  let n =
    Array.length input
  in

  let table : (configuration1, configuration1 option) Hashtbl.t =
    Hashtbl.create 1023
  in

  let queue : configuration1 Queue.t =
    Queue.create()
  in

  let enqueue ancestor config =
    try
      let _ = Hashtbl.find table config in
      ()
    with Not_found ->
      Hashtbl.add table config ancestor;
      Queue.add config queue
  in

  (* If the lookahead token is #, then it initially appear in the lookahead
     set, otherwise it doesn't. *)

  enqueue None (start, Terminal.equal tok Terminal.sharp, 0);
  try
    Misc.qiter (function (item, lookahead, offset) as config ->

      (* If the item we're looking at is the goal item and if
295
         we have read all of the input symbols, stop. *)
296 297

      if (Item.equal item stop) && lookahead && (offset = n) then
298
        raise Done;
299 300 301

      (* Otherwise, explore the transitions out of this item. *)

302
      let prod, _nt, rhs, pos, length = Item.def item in
303 304

      (* Shift transition, followed only if the symbol matches
305
         the symbol found in the input string. *)
306 307 308 309

      if (pos < length)
      && (offset < n)
      && (Symbol.equal rhs.(pos) input.(offset)) then begin
310 311
        let config' = (Item.import (prod, pos+1), lookahead, offset+1) in
        enqueue (Some config) config'
312 313 314 315 316
      end;

      (* Epsilon transitions. *)

      if pos < length then
317 318 319 320 321 322 323 324 325 326
        match rhs.(pos) with
        | Symbol.N nt ->
            let nullable, first = Analysis.nullable_first_prod prod (pos + 1) in
            let first : bool = TerminalSet.mem tok first in
            let lookahead' =
              if nullable then first || lookahead else first
            in
            Production.iternt nt (fun prod ->
              let config' = (Item.import (prod, 0), lookahead', offset) in
              enqueue (Some config) config'
327
            )
328 329
        | Symbol.T _ ->
            ()
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348

    ) queue;
    assert false
  with Done ->

    (* We have found a (shortest) path from the start configuration to
       the goal configuration. Turn it into an explicit derivation. *)

    let configs = Misc.materialize table (stop, true, n) in
    let derivation = Derivation.empty in
    let derivation = follow1 tok derivation n configs in
    derivation

(* -------------------------------------------------------------------------- *)
(* Putting it all together. *)

let () =
  if Settings.explain then begin

349 350 351 352 353 354 355 356 357
    (* 2018/09/05: when [--explain] is enabled, always create a fresh
       .conflicts file (wiping out any pre-existing file), even if
       there are in fact no conflicts. This should avoid confusion with
       outdated .conflicts files. *)

    let out =
      open_out (Settings.base ^ ".conflicts")
    in

358
    Lr1.conflicts (fun toks node ->
359
    try
360 361

      (* Construct a partial LR(1) automaton, looking for a conflict
362 363 364 365 366 367
         in a state that corresponds to this node. Because Pager's
         algorithm can merge two states as soon as one of them has a
         conflict, we can't be too specific about the conflict that we
         expect to find in the canonical automaton. So, we must supply
         a set of conflict tokens and accept any kind of conflict that
         involves one of them. *)
368 369

      (* TEMPORARY with the new compatibility criterion, we can be
370 371
         sure that every conflict token is indeed involved in a
         conflict. Exploit that? Avoid focusing on a single token? *)
372 373

      let module P = Lr1partial.Run (struct
374 375
        let tokens = toks
        let goal = node
376 377 378
      end) in

      let closure =
379
        Lr0.closure P.goal in
380 381 382 383

      (* Determine what kind of conflict was found. *)

      let shift, reduce = Item.Map.fold (fun item toks (shift, reduce) ->
384 385 386 387 388 389 390 391 392
        match Item.classify item with
        | Item.Shift (Symbol.T tok, _)
          when Terminal.equal tok P.token ->
              shift + 1, reduce
        | Item.Reduce _
          when TerminalSet.mem P.token toks ->
            shift, reduce + 1
        | _ ->
            shift, reduce
393 394 395
      ) closure (0, 0) in

      let kind =
396 397 398 399 400 401
        if (shift > 0) && (reduce > 1) then
          "shift/reduce/reduce"
        else if (shift > 0) then
          "shift/reduce"
        else
          "reduce/reduce"
402 403 404 405 406 407
      in

      (* Explain how the conflict state is reached. *)

      Printf.fprintf out "\n\
        ** Conflict (%s) in state %d.\n\
408 409
        ** Token%s involved: %s\n%s\
        ** This state is reached from %s after reading:\n\n%s\n"
410 411 412 413
      kind (Lr1.number node)
      (if TerminalSet.cardinal toks > 1 then "s" else "")
      (TerminalSet.print toks)
      (if TerminalSet.cardinal toks > 1 then
414
        Printf.sprintf "** The following explanations concentrate on token %s.\n" (Terminal.print P.token)
415 416 417 418 419
      else "")
      (Nonterminal.print false (Item.startnt P.source))
      (Symbol.printa P.path);

      (* Examine the items in that state, focusing on one particular
420 421
         token. Out of the shift items, we explain just one -- this
         seems enough. We explain each of the reduce items. *)
422 423 424 425

      (* First, build a mapping of items to derivations. *)

      let (_ : bool), derivations =
426 427
        Item.Map.fold (fun item toks (still_looking_for_shift_item, derivations) ->
          match Item.classify item with
428

429 430
          | Item.Shift (Symbol.T tok, _)
            when still_looking_for_shift_item && (Terminal.equal tok P.token) ->
431

432 433 434
              false,
              let derivation = explain_shift_item P.source P.path item in
              Item.Map.add item derivation derivations
435

436 437
          | Item.Reduce _
            when TerminalSet.mem P.token toks ->
438

439 440 441
              still_looking_for_shift_item,
              let derivation = explain_reduce_item P.token P.source P.path item in
              Item.Map.add item derivation derivations
442

443
          | _ ->
444

445 446
              still_looking_for_shift_item,
              derivations
447 448 449 450 451

        ) closure (true, Item.Map.empty)
      in

      (* Factor out the common context among all derivations, so as to avoid
452 453
         repeating it. This helps prevent derivation trees from drifting too
         far away towards the right. It also helps produce sub-derivations
454 455 456
         that are quite compact. *)

      let context, derivations =
457
        Derivation.factor derivations
458 459 460 461 462
      in

      (* Display the common context. *)

      Printf.fprintf out
463 464
        "\n** The derivations that appear below have the following common factor:\
         \n** (The question mark symbol (?) represents the spot where the derivations begin to differ.)\n\n";
465 466 467 468 469 470
      Derivation.printc out context;

      (* Then, display the sub-derivations. *)

      Item.Map.iter (fun item derivation ->

471 472 473 474
        Printf.fprintf out
          "\n** In state %d, looking ahead at %s, "
          (Lr1.number node)
          (Terminal.print P.token);
475

476 477 478 479 480 481 482 483
        begin match Item.classify item with
        | Item.Shift _ ->
            Printf.fprintf out "shifting is permitted\n** because of the following sub-derivation:\n\n"
        | Item.Reduce prod ->
            Printf.fprintf out
              "reducing production\n** %s\n** is permitted because of the following sub-derivation:\n\n"
              (Production.print prod)
        end;
484

485
        Derivation.print out derivation
486 487 488 489 490

      ) derivations;

      flush out

491
    with Lr1partial.Oops ->
492

493 494 495 496 497 498 499
      (* Ha ha! We were unable to explain this conflict. This could happen
         because the automaton was butchered by conflict resolution directives,
         or because [--lalr] was enabled and we have unexplainable LALR conflicts.
         Anyway, send the error message to the .conflicts file and continue. *)

      Printf.fprintf out "\n\
        ** Conflict (unexplainable) in state %d.\n\
500 501
        ** Token%s involved: %s\n\
        ** %s.\n%!"
502 503 504
      (Lr1.number node)
      (if TerminalSet.cardinal toks > 1 then "s" else "")
      (TerminalSet.print toks)
505 506 507 508 509 510 511 512
      (match Settings.construction_mode with
      | Settings.ModeLALR ->
          "This may be an artificial conflict caused by your use of --lalr"
      | Settings.ModeCanonical
      | Settings.ModeInclusionOnly
      | Settings.ModePager ->
          "Please send your grammar to Menhir's developers"
      )
513

514 515 516 517 518 519 520 521 522 523 524 525
    );
    Time.tick "Explaining conflicts"

  end

(* ------------------------------------------------------------------------ *)
(* Resolve the conflicts that remain in the automaton. *)

let () =
  Lr1.default_conflict_resolution();
  Time.tick "Resolving remaining conflicts"

POTTIER Francois's avatar
POTTIER Francois committed
526 527 528
(* ------------------------------------------------------------------------ *)
(* Now is as good a time as any to add extra reductions, if requested by the
   user. This must be done after conflicts have been resolved. *)
529 530 531 532 533

let () =
  Lr1.extra_reductions();
  Time.tick "Adding extra reductions"

534 535 536 537 538 539 540
(* ------------------------------------------------------------------------ *)
(* If any warnings about the grammar have been emitted up to this point,
   and if [--strict] is enabled, now is the time to stop, before going
   into the back-end. *)

let () =
  Error.exit_if Error.grammatical_error