LRijkstra.ml 18.4 KB
Newer Older
1 2
open Grammar

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
(* ------------------------------------------------------------------------ *)

(* This returns the list of reductions of [state] on token [z]. This
   is a list of zero or one elements. *)

let reductions s z =
  assert (not (Terminal.equal z Terminal.error));
  try
    TerminalMap.find z (Lr1.reductions s)
  with Not_found ->
    []

(* This tests whether state [s] is willing to reduce some production
   when the lookahead symbol is [z]. This test takes a possible default
   reduction into account. *)

let has_reduction s z : Production.index option =
  assert (not (Terminal.equal z Terminal.error));
  match Invariant.has_default_reduction s with
  | Some (prod, _) ->
      Some prod
  | None ->
      match reductions s z with
      | prod :: prods ->
          assert (prods = []);
          Some prod
      | [] ->
          None

(* This tests whether state [s] will initiate an error on the lookahead
   symbol [z]. *)

let causes_an_error s z =
  assert (not (Terminal.equal z Terminal.error));
  match Invariant.has_default_reduction s with
  | Some _ ->
      false
  | None ->
      reductions s z = [] &&
      not (SymbolMap.mem (Symbol.T z) (Lr1.transitions s))

44 45 46 47 48 49 50 51
  let id x = x
  let some x = Some x

let update_ref r f : bool =
  let v = !r in
  let v' = f v in
  v != v' && (r := v'; true)

52 53 54 55 56 57 58 59 60 61 62 63
let update add find none some key m f =
  match find key m with
  | data ->
      let data' = f (some data) in
      if data' == data then
        m
      else
        add key data' m
  | exception Not_found ->
      let data' = f none in
      add key data' m

64 65 66
module MyMap (X : Map.OrderedType) = struct
  include Map.Make(X)
  let update none some key m f =
67
    update add find none some key m f
68 69
end

70 71 72 73 74 75 76
module W : sig

  type word
  val epsilon: word
  val singleton: Terminal.t -> word
  val append: word -> word -> word
  val length: word -> int
77
  val first: word -> Terminal.t -> Terminal.t
78
  val elements: word -> Terminal.t list
79
  val print: word -> string
80

81 82
  val stats: unit -> unit

83 84
end = struct

85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
  let () =
    assert (Terminal.n < 256)

  let t2c (t : Terminal.t) : char =
    Char.chr (Terminal.t2i t)
  let c2t (t : char) : Terminal.t =
    Obj.magic (Char.code t)

  module H = Hashtbl.Make(struct
      type t = string
      let equal = (=)
      let hash = Hashtbl.hash
  end)
  let table = H.create 1023
  let c = ref 0
  let n = ref 0
  let intern s =
    incr c;
    try
      H.find table s
    with Not_found ->
      incr n;
      H.add table s s;
      s

  type word = string
  let epsilon = ""
  let singleton t = intern (String.make 1 (t2c t))
  let append w1 w2 = intern (w1 ^ w2)
  let length = String.length
  let first w z = if length w > 0 then c2t w.[0] else z
  let rec chars i n w =
    if i = n then
      []
    else
      c2t w.[i] :: chars (i + 1) n w
  let elements w =
    chars 0 (String.length w) w
123
  let print w =
124
    string_of_int (length w) ^ " " ^
125
    String.concat " " (List.map Terminal.print (elements w))
126

127 128 129 130 131 132
  let stats () =
    Printf.fprintf stderr
      "Number of calls to intern: %d\n\
       Number of elements in intern table: %d\n%!"
      !c !n

133 134 135 136
end

module Q = LowIntegerPriorityQueue

137 138 139 140
module Trie = struct

  let c = ref 0

141 142
  type trie = {
    identity: int;
143 144
    source: Lr1.node;
    target: Lr1.node;
145
    productions: Production.index list;
146
    transitions: trie SymbolMap.t;
147
  }
148

149
  let mktrie source target productions transitions =
150
    let identity = Misc.postincrement c in
151
    { identity; source; target; productions; transitions }
152

153 154
  let empty source =
    mktrie source source [] SymbolMap.empty
155

156 157
  let is_empty t =
    t.productions = [] && SymbolMap.is_empty t.transitions
158

159 160
  let accepts prod t =
    List.mem prod t.productions
161

162
  let rec insert target w prod t =
163 164
    match w with
    | [] ->
165
        mktrie t.source target (prod :: t.productions) t.transitions
166
    | a :: w ->
167 168 169 170 171 172 173 174 175 176
        match SymbolMap.find a (Lr1.transitions target) with
        | successor ->
            let child = mktrie t.source successor [] SymbolMap.empty in
            mktrie t.source target t.productions
              (update SymbolMap.add SymbolMap.find child id a t.transitions (insert successor w prod))
        | exception Not_found ->
            t

  let insert w prod t =
    insert t.source w prod t
177

178
  let derivative a t =
179
    try
180
      SymbolMap.find a t.transitions
181
    with Not_found ->
182 183 184 185
      assert false

  let has_derivative a t =
    SymbolMap.mem a t.transitions
186

187 188
  let compare t1 t2 =
    Pervasives.compare (t1.identity : int) t2.identity
189

190 191
  let rec size t =
    SymbolMap.fold (fun _ child accu -> size child + accu) t.transitions 1
192

193 194
end

195
type fact = {
196
  future: Trie.trie;
197
  word: W.word;
198
  lookahead: Terminal.t
199 200
}

201 202 203 204 205 206
let source fact =
  fact.future.Trie.source

let target fact =
  fact.future.Trie.target

207
let extensible fact sym =
208
  Trie.has_derivative sym fact.future
209

210 211 212 213 214 215
let foreach_terminal f =
  Terminal.iter (fun t ->
    if not (Terminal.equal t Terminal.error) then
      f t
  )

216 217 218 219 220 221 222 223
let star s : Trie.trie =
  SymbolMap.fold (fun sym _ accu ->
    match sym with
    | Symbol.T _ ->
        accu
    | Symbol.N nt ->
        Production.foldnt nt accu (fun prod accu ->
          let w = Array.to_list (Production.rhs prod) in
224
          (* could insert this branch only if viable -- leads to 12600 instead of 12900 in ocaml.mly --lalr *)
225
          Trie.insert w prod accu
226
        )
227
  ) (Lr1.transitions s) (Trie.empty s)
228

229 230 231 232
let q =
  Q.create()

let add fact =
233 234
  (* assert (not (causes_an_error (target fact) fact.lookahead)); *)

235 236
  (* The length of the word serves as the priority of this fact. *)
  Q.add q fact (W.length fact.word)
POTTIER Francois's avatar
POTTIER Francois committed
237 238
    (* In principle, there is no need to insert the fact into the queue
       if [T] already stores a comparable fact. *)
239

240 241
let stars = ref 0

242
let init s =
243
  let trie = star s in
244 245 246 247
  let size = (Trie.size trie) in
  stars := !stars + size;
  Printf.fprintf stderr "State %d has a star of size %d\n.%!"
    (Lr1.number s) size;
POTTIER Francois's avatar
POTTIER Francois committed
248
  if not (Trie.is_empty trie) then
249
    foreach_terminal (fun z ->
250 251 252 253 254 255
      if not (causes_an_error s z) then
        add {
          future = trie;
          word = W.epsilon;
          lookahead = z
        }
256
    )
257 258

module T : sig
259 260

  (* [register fact] registers the fact [fact]. It returns [true] if this fact
261
     is new, i.e., no fact concerning the same quintuple of [source], [future],
262 263 264 265 266
     [target], [a], and [z] was previously known. *)
  val register: fact -> bool

  (* [query target z f] enumerates all known facts whose target state is [target]
     and whose lookahead assumption is [z]. *)
267
  val query: Lr1.node -> Terminal.t -> (fact -> unit) -> unit
268

269 270
  val stats: unit -> unit

271
end = struct
272

273 274 275 276 277 278 279 280 281 282 283
  (* This module implements a set of facts. Two facts are considered equal
     (for the purposes of this set) if they have the same [future], [a], and
     [z] fields. The [word] is not considered. Indeed, we are not interested
     in keeping track of several words that produce the same effect. Only the
     shortest such word is of interest. *)

  (* We need to query the set of facts in two ways. In [register], we need to
     test whether a fact is in the set. In [query], we need to find all facts
     that match a pair [target, z]. For this reason, we use a two-level table.
     The first level is a matrix indexed by [target] and [z]. At the second
     level, we find sets of facts. *)
284 285 286
(**)

  module M =
287
    MySet.Make(struct
288 289 290
      type t = fact
      let compare fact1 fact2 =
        let c = Trie.compare fact1.future fact2.future in
291
        if c <> 0 then c else
292 293
        let a1 = W.first fact1.word fact1.lookahead
        and a2 = W.first fact2.word fact2.lookahead in
294 295
        Terminal.compare a1 a2
    end)
296

297 298 299 300 301
  let table = (* a pretty large table... *)
    Array.make (Lr1.n * Terminal.n) M.empty

  let index target z =
    Terminal.n * (Lr1.number target) + Terminal.t2i z
302

303 304
  let count = ref 0

305
  let register fact =
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
    let target = target fact in
    let z = fact.lookahead in
    let i = index target z in
    let m = table.(i) in
    (* We crucially rely on the fact that [M.add] guarantees not to
       change the set if an ``equal'' fact already exists. Thus, a
       later, longer path is ignored in favor of an earlier, shorter
       path. *)
    let m' = M.add fact m in
    m != m' && begin
      incr count;
      table.(i) <- m';
      true
    end

  let query target z f =
    let i = index target z in
    let m = table.(i) in
    M.iter f m
325

326 327 328
  let stats () =
    Printf.fprintf stderr "T stores %d facts.\n%!" !count

329 330 331 332 333 334 335 336 337
end

(* The module [E] is in charge of recording the non-terminal edges that we have
   discovered, or more precisely, the conditions under which these edges can be
   taken. *)

module E : sig

  (* [register s nt w z] records that, in state [s], the outgoing edge labeled
338 339 340
     [nt] can be taken by consuming the word [w], if the next symbol is [z].
     It returns [true] if this information is new. *)
  val register: Lr1.node -> Nonterminal.t -> W.word -> Terminal.t -> bool
341 342 343 344

  (* [query s nt a z] answers whether, in state [s], the outgoing edge labeled
     [nt] can be taken by consuming some word [w], under the assumption that
     the next symbol is [z], and under the constraint that the first symbol of
345 346
     [w.z] is [a]. *)
  val query: Lr1.node -> Nonterminal.t -> Terminal.t -> Terminal.t -> (W.word -> unit) -> unit
347

348 349
  val stats: unit -> unit

350 351 352 353 354
end = struct

  (* For now, we implement a mapping of [s, nt, a, z] to [w]. *)

  module M =
355
    MyMap(struct
356 357 358 359 360 361 362 363 364 365 366 367 368 369
      type t = Lr1.node * Nonterminal.t * Terminal.t * Terminal.t
      let compare (s1, nt1, a1, z1) (s2, nt2, a2, z2) =
        let c = Lr1.Node.compare s1 s2 in
        if c <> 0 then c else
        let c = Nonterminal.compare nt1 nt2 in
        if c <> 0 then c else
        let c = Terminal.compare a1 a2 in
        if c <> 0 then c else
        Terminal.compare z1 z2
    end)

  let m =
    ref M.empty

370 371
  let count = ref 0

372
  let register s nt w z =
373
    let a = W.first w z in
374 375 376
    update_ref m (fun m ->
      M.update None some (s, nt, a, z) m (function
      | None ->
377
          incr count;
378 379
          w
      | Some earlier_w ->
380
          (* assert (W.length earlier_w <= W.length w); *)
381 382 383
          earlier_w
      )
    )
384

385 386 387 388
  let query s nt a z f =
    match M.find (s, nt, a, z) !m with
    | w -> f w
    | exception Not_found -> ()
389

390 391 392
  let stats () =
    Printf.fprintf stderr "E stores %d facts.\n%!" !count

393 394
end

395
let new_edge s nt w z =
POTTIER Francois's avatar
POTTIER Francois committed
396 397 398 399
  (*
  Printf.fprintf stderr "Considering reduction on %s in state %d\n"
    (Terminal.print z) (Lr1.number s);
  *)
400
  if E.register s nt w z then
401
    let sym = (Symbol.N nt) in
402
    T.query s (W.first w z) (fun fact ->
403 404 405 406 407 408 409 410 411 412
      if extensible fact sym then begin
        let future = Trie.derivative sym fact.future in
        assert (Terminal.equal fact.lookahead (W.first w z));
        if not (causes_an_error future.Trie.target z) then
          add {
            future;
            word = W.append fact.word w;
            lookahead = z
          }
      end
413
    )
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435

(* [consequences fact] is invoked when we discover a new fact (i.e., one that
   was not previously known). It studies the consequences of this fact. These
   consequences are of two kinds:

   - As in Dijkstra's algorithm, the new fact can be viewed as a newly
   discovered vertex. We study its (currently known) outgoing edges,
   and enqueue new facts in the priority queue.

   - Sometimes, a fact can also be viewed as a newly discovered edge.
   This is the case when the word from [fact.source] to [fact.target]
   represents a production of the grammar and [fact.target] is willing
   to reduce this production. We record the existence of this edge,
   and re-inspect any previously discovered vertices which are
   interested in this outgoing edge.
*)
(**)

let consequences fact =

  (* 1. View [fact] as a vertex. Examine the transitions out of [fact.target]. *)
  
436
  SymbolMap.iter (fun sym s' ->
437
    if extensible fact sym then
438 439 440
      match sym with
      | Symbol.T t ->

441 442 443 444
          (* 1a. There is a transition labeled [t] out of [fact.target]. If
             the lookahead assumption [fact.lookahead] is compatible with [t],
             then we derive a new fact, where one more edge has been taken. We
             enqueue this new fact for later examination. *)
445 446 447
          (**)

          if Terminal.equal fact.lookahead t then
448 449
            let future = Trie.derivative sym fact.future
            and word = W.append fact.word (W.singleton t) in
450 451 452 453
            (* assert (Lr1.Node.compare future.Trie.target s' = 0); *)
            foreach_terminal (fun z ->
              if not (causes_an_error s' z) then
                add { future; word; lookahead = z }
454 455 456 457 458 459 460 461 462 463 464 465
            )

      | Symbol.N nt ->

          (* 1b. There is a transition labeled [nt] out of [fact.target]. We
             need to know how this nonterminal edge can be taken. We query for a
             word [w] that allows us to take this edge. The answer depends on
             the terminal symbol [z] that comes *after* this word: we try all
             such symbols. Furthermore, we need the first symbol of [w.z] to
             satisfy the lookahead assumption [fact.lookahead], so the answer
             also depends on this assumption. *)
          (**)
466

467
          let future = Trie.derivative sym fact.future in
468
          foreach_terminal (fun z ->
469 470 471 472 473 474 475 476
            if not (causes_an_error s' z) then
              E.query (target fact) nt fact.lookahead z (fun w ->
                assert (Terminal.equal fact.lookahead (W.first w z));
                add {
                  future;
                  word = W.append fact.word w;
                  lookahead = z
                }
477
            )
478
          )
479

480
  ) (Lr1.transitions (target fact));
481 482 483 484

  (* 2. View [fact] as a possible edge. This is possible if the path from
     [fact.source] to [fact.target] represents a production [prod] and
     [fact.target] is willing to reduce this production. We check that
485 486 487 488 489
     [fact.future] accepts [epsilon]. This guarantees that reducing [prod]
     takes us all the way back to [fact.source]. Thus, this production gives
     rise to an edge labeled [nt] -- the left-hand side of [prod] -- out of
     [fact.source]. This edge is subject to the lookahead assumption
     [fact.lookahead], so we record that. *)
490 491
  (**)

492
  match has_reduction (target fact) fact.lookahead with
493
  | Some prod when Trie.accepts prod fact.future ->
494
      new_edge (source fact) (Production.nt prod) fact.word fact.lookahead
495 496 497
  | _ ->
      ()

498
let level = ref 0
499

POTTIER Francois's avatar
POTTIER Francois committed
500 501
let done_with_level () =
  Printf.fprintf stderr "Done with level %d.\n" !level;
502
  W.stats();
POTTIER Francois's avatar
POTTIER Francois committed
503 504 505 506
  T.stats();
  E.stats();
  Printf.fprintf stderr "Q stores %d facts.\n%!" (Q.cardinal q)

507
let discover fact =
508
  if T.register fact then begin
509
    if W.length fact.word > ! level then begin
POTTIER Francois's avatar
POTTIER Francois committed
510
      done_with_level();
511 512
      level := W.length fact.word;
    end;
513
    consequences fact
514
  end
515

POTTIER Francois's avatar
POTTIER Francois committed
516
let () =
517
  Lr1.iter init;
518
  Printf.fprintf stderr "Cumulated star size: %d\n%!" !stars;
519
  Q.repeat q discover;
520
  Time.tick "Running LRijkstra";
POTTIER Francois's avatar
POTTIER Francois committed
521
  done_with_level()
522

523 524
(* ------------------------------------------------------------------------ *)

525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
(* The following code validates the fact that an error can be triggered in
   state [s'] by beginning in the initial state [s] and reading the
   sequence of terminal symbols [w]. We use this for debugging purposes. *)

let fail msg =
  Printf.fprintf stderr "coverage: internal error: %s.\n%!" msg;
  false

open ReferenceInterpreter

let validate s s' w : bool =
  match
    ReferenceInterpreter.check_error_path (Lr1.nt_of_entry s) (W.elements w)
  with
  | OInputReadPastEnd ->
      fail "input was read past its end"
  | OInputNotFullyConsumed ->
      fail "input was not fully consumed"
  | OUnexpectedAccept ->
      fail "input was unexpectedly accepted"
  | OK state ->
      Lr1.Node.compare state s' = 0 ||
      fail (
        Printf.sprintf "error occurred in state %d instead of %d"
          (Lr1.number state)
          (Lr1.number s')
      )

(* ------------------------------------------------------------------------ *)

555 556 557 558 559 560 561
(* We now wish to determine, given a state [s'] and a terminal symbol [z], a
   minimal path that takes us from some entry state to state [s'] with [z] as
   the next (unconsumed) symbol. *)

(* This can be formulated as a search for a shortest path in a graph. The
   graph is not just the automaton, though. It is a (much) larger graph whose
   vertices are pairs [s, z] and whose edges are obtained by querying the
562
   module [E] above. *)
563

564
let forward () =
565

566
  let module A = Astar.Make(struct
567

568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
    (* A vertex is a pair [s, z].
       [z] cannot be the [error] token. *)
    type node =
        Lr1.node * Terminal.t

    let equal (s'1, z1) (s'2, z2) =
      Lr1.Node.compare s'1 s'2 = 0 && Terminal.compare z1 z2 = 0

    let hash (s, z) =
      Hashtbl.hash (Lr1.number s, z)

    (* An edge is labeled with a word. *)
    type label =
      W.word

    (* Forward search from every [s, z], where [s] is an initial state. *)
    let sources f =
      foreach_terminal (fun z ->
        ProductionMap.iter (fun _ s ->
          f (s, z)
        ) Lr1.entry
589 590
      )

591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
    let successors (s, z) edge =
      assert (not (Terminal.equal z Terminal.error));
      SymbolMap.iter (fun sym s' ->
        match sym with
        | Symbol.T t ->
            if Terminal.equal z t then
              let w = W.singleton t in
              foreach_terminal (fun z ->
                edge w 1 (s', z)
              )
        | Symbol.N nt ->
           foreach_terminal (fun z' ->
             E.query s nt z z' (fun w ->
               edge w (W.length w) (s', z')
             )
           )
      ) (Lr1.transitions s)

    let estimate _ =
      0

  end) in

  (* Search forward. *)

616
  Printf.fprintf stderr "Forward search:\n%!";
617
  let seen = ref Lr1.NodeSet.empty in
618
  let _, _ = A.search (fun ((s', z), path) ->
619 620 621 622 623 624 625 626 627 628 629 630 631 632
    if causes_an_error s' z && not (Lr1.NodeSet.mem s' !seen) then begin
      seen := Lr1.NodeSet.add s' !seen;
      (* An error can be triggered in state [s'] by beginning in the initial
         state [s] and reading the sequence of terminal symbols [w]. *)
      let (s, _), ws = A.reverse path in
      let w = List.fold_right W.append ws (W.singleton z) in
      Printf.fprintf stderr
        "An error can be reached from state %d to state %d:\n%!"
        (Lr1.number s)
        (Lr1.number s');
      Printf.fprintf stderr "%s\n%!" (W.print w);
      assert (validate s s' w)
    end
  ) in
633 634 635
  Printf.fprintf stderr "Reachable (forward): %d states\n%!"
    (Lr1.NodeSet.cardinal !seen);
  !seen
636

637 638
(* TEMPORARY what about the pseudo-token [#]? *)
(* TEMPORARY the code in this module should run only if --coverage is set *)
639 640

let () =
641 642
  let f = forward() in
  Time.tick "Forward search";
643
  ignore f