LRijkstra.ml 19.6 KB
Newer Older
1 2
open Grammar

3 4 5 6
(* Throughout, we ignore the [error] pseudo-token completely. We consider that
   it never appears on the input stream. Hence, any state whose incoming
   symbol is [error] is considered unreachable. *)

7 8
(* ------------------------------------------------------------------------ *)

9 10 11 12 13 14 15 16
(* We begin with a number of auxiliary functions that provide information
   about the LR(1) automaton. These functions could perhaps be moved to a
   separate module. We keep them here, for the moment, because they are not
   used anywhere else. *)

(* [reductions s z] is the list of reductions permitted in state [s] when the
   lookahead symbol is [z]. This is a list of zero or one elements. This does
   not take default reductions into account. *)
17

18
let reductions s z : Production.index list =
19 20 21 22 23
  try
    TerminalMap.find z (Lr1.reductions s)
  with Not_found ->
    []

24 25 26
(* [has_reduction s z] tells whether state [s] is willing to reduce some
   production (and if so, which one) when the lookahead symbol is [z]. It
   takes a possible default reduction into account. *)
27 28 29 30 31 32 33 34 35 36 37 38 39

let has_reduction s z : Production.index option =
  match Invariant.has_default_reduction s with
  | Some (prod, _) ->
      Some prod
  | None ->
      match reductions s z with
      | prod :: prods ->
          assert (prods = []);
          Some prod
      | [] ->
          None

40 41
(* [causes_an_error s z] tells whether state [s] will initiate an error on the
   lookahead symbol [z]. *)
42

43
let causes_an_error s z : bool =
44 45 46 47 48 49 50
  match Invariant.has_default_reduction s with
  | Some _ ->
      false
  | None ->
      reductions s z = [] &&
      not (SymbolMap.mem (Symbol.T z) (Lr1.transitions s))

51 52 53
(* [foreach_terminal f] applies the function [f] to every terminal symbol in
   turn, except [error]. *)

54 55 56 57 58 59
let foreach_terminal f =
  Terminal.iter (fun t ->
    if not (Terminal.equal t Terminal.error) then
      f t
  )

60 61 62 63 64
(* [foreach_terminal_not_causing_an_error s f] applies the function [f] to
   every terminal symbol [z] such that [causes_an_error s z] is false. This
   could be implemented in a naive manner using [foreach_terminal] and
   [causes_an_error]. This implementation is slightly more efficient. *)

65 66 67
let foreach_terminal_not_causing_an_error s f =
  match Invariant.has_default_reduction s with
  | Some _ ->
68
      (* There is a default reduction. No symbol causes an error. *)
69 70
      foreach_terminal f
  | None ->
71 72 73 74 75
      (* Enumerate every terminal symbol [z] for which there is a
         reduction. *)
      TerminalMap.iter (fun z _ ->
        if not (Terminal.equal z Terminal.error) then
          f z
76
      ) (Lr1.reductions s);
77 78
      (* Enumerate every terminal symbol [z] for which there is a
         transition. *)
79 80
      SymbolMap.iter (fun sym _ ->
        match sym with
81 82 83
        | Symbol.T z ->
            if not (Terminal.equal z Terminal.error) then
              f z
84 85 86 87
        | Symbol.N _ ->
            ()
      ) (Lr1.transitions s)

88 89
(* ------------------------------------------------------------------------ *)

90 91
let id x = x
let some x = Some x
92 93 94 95 96 97

let update_ref r f : bool =
  let v = !r in
  let v' = f v in
  v != v' && (r := v'; true)

98 99 100 101 102 103 104 105 106 107 108 109
let update add find none some key m f =
  match find key m with
  | data ->
      let data' = f (some data) in
      if data' == data then
        m
      else
        add key data' m
  | exception Not_found ->
      let data' = f none in
      add key data' m

110 111 112
module MyMap (X : Map.OrderedType) = struct
  include Map.Make(X)
  let update none some key m f =
113
    update add find none some key m f
114 115
end

116 117 118 119 120 121 122
module W : sig

  type word
  val epsilon: word
  val singleton: Terminal.t -> word
  val append: word -> word -> word
  val length: word -> int
123
  val first: word -> Terminal.t -> Terminal.t
124
  val elements: word -> Terminal.t list
125
  val print: word -> string
126

127 128
  val stats: unit -> unit

129 130
end = struct

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
  let () =
    assert (Terminal.n < 256)

  let t2c (t : Terminal.t) : char =
    Char.chr (Terminal.t2i t)
  let c2t (t : char) : Terminal.t =
    Obj.magic (Char.code t)

  module H = Hashtbl.Make(struct
      type t = string
      let equal = (=)
      let hash = Hashtbl.hash
  end)
  let table = H.create 1023
  let c = ref 0
  let n = ref 0
  let intern s =
    incr c;
    try
      H.find table s
    with Not_found ->
      incr n;
      H.add table s s;
      s

  type word = string
  let epsilon = ""
  let singleton t = intern (String.make 1 (t2c t))
  let append w1 w2 = intern (w1 ^ w2)
  let length = String.length
  let first w z = if length w > 0 then c2t w.[0] else z
  let rec chars i n w =
    if i = n then
      []
    else
      c2t w.[i] :: chars (i + 1) n w
  let elements w =
    chars 0 (String.length w) w
169
  let print w =
170
    string_of_int (length w) ^ " " ^
171
    String.concat " " (List.map Terminal.print (elements w))
172

173 174 175 176 177 178
  let stats () =
    Printf.fprintf stderr
      "Number of calls to intern: %d\n\
       Number of elements in intern table: %d\n%!"
      !c !n

179 180 181 182
end

module Q = LowIntegerPriorityQueue

183 184 185 186
module Trie = struct

  let c = ref 0

187 188
  type trie = {
    identity: int;
189 190
    source: Lr1.node;
    target: Lr1.node;
191
    productions: Production.index list;
192
    transitions: trie SymbolMap.t;
193
  }
194

195
  let mktrie source target productions transitions =
196
    let identity = Misc.postincrement c in
197
    { identity; source; target; productions; transitions }
198

199 200
  let empty source =
    mktrie source source [] SymbolMap.empty
201

202 203
  let is_empty t =
    t.productions = [] && SymbolMap.is_empty t.transitions
204

205 206
  let accepts prod t =
    List.mem prod t.productions
207

208
  let rec insert target w prod t =
209 210
    match w with
    | [] ->
211
        mktrie t.source target (prod :: t.productions) t.transitions
212
    | a :: w ->
213 214 215 216 217 218 219 220 221 222
        match SymbolMap.find a (Lr1.transitions target) with
        | successor ->
            let child = mktrie t.source successor [] SymbolMap.empty in
            mktrie t.source target t.productions
              (update SymbolMap.add SymbolMap.find child id a t.transitions (insert successor w prod))
        | exception Not_found ->
            t

  let insert w prod t =
    insert t.source w prod t
223

224
  let derivative a t =
225
    SymbolMap.find a t.transitions (* careful: may raise [Not_found] *)
226

227 228
  let compare t1 t2 =
    Pervasives.compare (t1.identity : int) t2.identity
229

230 231
  let rec size t =
    SymbolMap.fold (fun _ child accu -> size child + accu) t.transitions 1
232

233 234
end

235
type fact = {
236
  future: Trie.trie;
237
  word: W.word;
238
  lookahead: Terminal.t
239 240
}

241 242 243 244 245 246
let source fact =
  fact.future.Trie.source

let target fact =
  fact.future.Trie.target

247 248 249 250 251 252 253 254
let star s : Trie.trie =
  SymbolMap.fold (fun sym _ accu ->
    match sym with
    | Symbol.T _ ->
        accu
    | Symbol.N nt ->
        Production.foldnt nt accu (fun prod accu ->
          let w = Array.to_list (Production.rhs prod) in
255
          (* could insert this branch only if viable -- leads to 12600 instead of 12900 in ocaml.mly --lalr *)
256
          Trie.insert w prod accu
257
        )
258
  ) (Lr1.transitions s) (Trie.empty s)
259

260 261 262 263
let q =
  Q.create()

let add fact =
264 265
  (* assert (not (causes_an_error (target fact) fact.lookahead)); *)

266 267
  (* The length of the word serves as the priority of this fact. *)
  Q.add q fact (W.length fact.word)
POTTIER Francois's avatar
POTTIER Francois committed
268 269
    (* In principle, there is no need to insert the fact into the queue
       if [T] already stores a comparable fact. *)
270

271 272
let stars = ref 0

273
let init s =
274
  let trie = star s in
275 276
  let size = (Trie.size trie) in
  stars := !stars + size;
POTTIER Francois's avatar
POTTIER Francois committed
277
  if not (Trie.is_empty trie) then
278 279 280 281 282 283
    foreach_terminal_not_causing_an_error s (fun z ->
      add {
        future = trie;
        word = W.epsilon;
        lookahead = z
      }
284
    )
285 286

module T : sig
287 288

  (* [register fact] registers the fact [fact]. It returns [true] if this fact
289
     is new, i.e., no fact concerning the same quintuple of [source], [future],
290 291 292 293 294
     [target], [a], and [z] was previously known. *)
  val register: fact -> bool

  (* [query target z f] enumerates all known facts whose target state is [target]
     and whose lookahead assumption is [z]. *)
295
  val query: Lr1.node -> Terminal.t -> (fact -> unit) -> unit
296

297 298
  val stats: unit -> unit

299
end = struct
300

301 302 303 304 305 306 307 308 309 310 311
  (* This module implements a set of facts. Two facts are considered equal
     (for the purposes of this set) if they have the same [future], [a], and
     [z] fields. The [word] is not considered. Indeed, we are not interested
     in keeping track of several words that produce the same effect. Only the
     shortest such word is of interest. *)

  (* We need to query the set of facts in two ways. In [register], we need to
     test whether a fact is in the set. In [query], we need to find all facts
     that match a pair [target, z]. For this reason, we use a two-level table.
     The first level is a matrix indexed by [target] and [z]. At the second
     level, we find sets of facts. *)
312 313 314
(**)

  module M =
315
    MySet.Make(struct
316 317 318
      type t = fact
      let compare fact1 fact2 =
        let c = Trie.compare fact1.future fact2.future in
319
        if c <> 0 then c else
320 321
        let a1 = W.first fact1.word fact1.lookahead
        and a2 = W.first fact2.word fact2.lookahead in
322 323
        Terminal.compare a1 a2
    end)
324

325 326 327 328 329
  let table = (* a pretty large table... *)
    Array.make (Lr1.n * Terminal.n) M.empty

  let index target z =
    Terminal.n * (Lr1.number target) + Terminal.t2i z
330

331 332
  let count = ref 0

333
  let register fact =
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
    let target = target fact in
    let z = fact.lookahead in
    let i = index target z in
    let m = table.(i) in
    (* We crucially rely on the fact that [M.add] guarantees not to
       change the set if an ``equal'' fact already exists. Thus, a
       later, longer path is ignored in favor of an earlier, shorter
       path. *)
    let m' = M.add fact m in
    m != m' && begin
      incr count;
      table.(i) <- m';
      true
    end

  let query target z f =
    let i = index target z in
    let m = table.(i) in
    M.iter f m
353

354 355 356
  let stats () =
    Printf.fprintf stderr "T stores %d facts.\n%!" !count

357 358 359 360 361 362 363 364 365
end

(* The module [E] is in charge of recording the non-terminal edges that we have
   discovered, or more precisely, the conditions under which these edges can be
   taken. *)

module E : sig

  (* [register s nt w z] records that, in state [s], the outgoing edge labeled
366 367 368
     [nt] can be taken by consuming the word [w], if the next symbol is [z].
     It returns [true] if this information is new. *)
  val register: Lr1.node -> Nonterminal.t -> W.word -> Terminal.t -> bool
369 370 371 372

  (* [query s nt a z] answers whether, in state [s], the outgoing edge labeled
     [nt] can be taken by consuming some word [w], under the assumption that
     the next symbol is [z], and under the constraint that the first symbol of
373 374
     [w.z] is [a]. *)
  val query: Lr1.node -> Nonterminal.t -> Terminal.t -> Terminal.t -> (W.word -> unit) -> unit
375

376 377
  val stats: unit -> unit

378 379 380 381 382
end = struct

  (* For now, we implement a mapping of [s, nt, a, z] to [w]. *)

  module M =
383
    MyMap(struct
384 385 386 387 388 389 390 391 392 393 394 395 396 397
      type t = Lr1.node * Nonterminal.t * Terminal.t * Terminal.t
      let compare (s1, nt1, a1, z1) (s2, nt2, a2, z2) =
        let c = Lr1.Node.compare s1 s2 in
        if c <> 0 then c else
        let c = Nonterminal.compare nt1 nt2 in
        if c <> 0 then c else
        let c = Terminal.compare a1 a2 in
        if c <> 0 then c else
        Terminal.compare z1 z2
    end)

  let m =
    ref M.empty

398 399
  let count = ref 0

400
  let register s nt w z =
401
    let a = W.first w z in
402 403 404
    update_ref m (fun m ->
      M.update None some (s, nt, a, z) m (function
      | None ->
405
          incr count;
406 407
          w
      | Some earlier_w ->
408
          (* assert (W.length earlier_w <= W.length w); *)
409 410 411
          earlier_w
      )
    )
412

413 414 415 416
  let query s nt a z f =
    match M.find (s, nt, a, z) !m with
    | w -> f w
    | exception Not_found -> ()
417

418 419 420
  let stats () =
    Printf.fprintf stderr "E stores %d facts.\n%!" !count

421 422
end

423
let new_edge s nt w z =
POTTIER Francois's avatar
POTTIER Francois committed
424 425 426 427
  (*
  Printf.fprintf stderr "Considering reduction on %s in state %d\n"
    (Terminal.print z) (Lr1.number s);
  *)
428
  if E.register s nt w z then
429
    let sym = Symbol.N nt in
430
    T.query s (W.first w z) (fun fact ->
431 432 433 434 435 436 437 438 439 440 441
      assert (Terminal.equal fact.lookahead (W.first w z));
      match Trie.derivative sym fact.future with
      | future ->
          if not (causes_an_error future.Trie.target z) then
            add {
              future;
              word = W.append fact.word w;
              lookahead = z
            }
      | exception Not_found ->
          ()
442
    )
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462

(* [consequences fact] is invoked when we discover a new fact (i.e., one that
   was not previously known). It studies the consequences of this fact. These
   consequences are of two kinds:

   - As in Dijkstra's algorithm, the new fact can be viewed as a newly
   discovered vertex. We study its (currently known) outgoing edges,
   and enqueue new facts in the priority queue.

   - Sometimes, a fact can also be viewed as a newly discovered edge.
   This is the case when the word from [fact.source] to [fact.target]
   represents a production of the grammar and [fact.target] is willing
   to reduce this production. We record the existence of this edge,
   and re-inspect any previously discovered vertices which are
   interested in this outgoing edge.
*)
(**)

let consequences fact =

463 464 465
  let target = target fact in

  (* 1. View [fact] as a vertex. Examine the transitions out of [target]. *)
466
  
467
  SymbolMap.iter (fun sym s' ->
468 469 470 471
    match Trie.derivative sym fact.future, sym with
    | exception Not_found -> ()
    | future, Symbol.T t ->

472
        (* 1a. There is a transition labeled [t] out of [target]. If
473 474 475 476 477 478 479 480
           the lookahead assumption [fact.lookahead] is compatible with [t],
           then we derive a new fact, where one more edge has been taken. We
           enqueue this new fact for later examination. *)
        (**)

        if Terminal.equal fact.lookahead t then
          let word = W.append fact.word (W.singleton t) in
          (* assert (Lr1.Node.compare future.Trie.target s' = 0); *)
481
          foreach_terminal_not_causing_an_error s' (fun z ->
482
            add { future; word; lookahead = z }
483
          )
484 485 486

    | future, Symbol.N nt ->

487
        (* 1b. There is a transition labeled [nt] out of [target]. We
488 489 490 491 492 493 494 495 496
           need to know how this nonterminal edge can be taken. We query for a
           word [w] that allows us to take this edge. The answer depends on
           the terminal symbol [z] that comes *after* this word: we try all
           such symbols. Furthermore, we need the first symbol of [w.z] to
           satisfy the lookahead assumption [fact.lookahead], so the answer
           also depends on this assumption. *)
        (**)

        foreach_terminal_not_causing_an_error s' (fun z ->
497
          E.query target nt fact.lookahead z (fun w ->
498 499 500 501 502 503 504 505
            assert (Terminal.equal fact.lookahead (W.first w z));
            add {
              future;
              word = W.append fact.word w;
              lookahead = z
            }
          )
        )
506

507
  ) (Lr1.transitions target);
508 509

  (* 2. View [fact] as a possible edge. This is possible if the path from
510 511
     [fact.source] to [target] represents a production [prod] and
     [target] is willing to reduce this production. We check that
512 513 514 515 516
     [fact.future] accepts [epsilon]. This guarantees that reducing [prod]
     takes us all the way back to [fact.source]. Thus, this production gives
     rise to an edge labeled [nt] -- the left-hand side of [prod] -- out of
     [fact.source]. This edge is subject to the lookahead assumption
     [fact.lookahead], so we record that. *)
517 518
  (**)

519
  match has_reduction target fact.lookahead with
520
  | Some prod when Trie.accepts prod fact.future ->
521
      new_edge (source fact) (Production.nt prod) fact.word fact.lookahead
522 523 524
  | _ ->
      ()

525
let level = ref 0
526

POTTIER Francois's avatar
POTTIER Francois committed
527 528
let done_with_level () =
  Printf.fprintf stderr "Done with level %d.\n" !level;
529
  W.stats();
POTTIER Francois's avatar
POTTIER Francois committed
530 531 532 533
  T.stats();
  E.stats();
  Printf.fprintf stderr "Q stores %d facts.\n%!" (Q.cardinal q)

534
let discover fact =
535
  if T.register fact then begin
536
    if W.length fact.word > ! level then begin
POTTIER Francois's avatar
POTTIER Francois committed
537
      done_with_level();
538 539
      level := W.length fact.word;
    end;
540
    consequences fact
541
  end
542

POTTIER Francois's avatar
POTTIER Francois committed
543
let () =
544
  Lr1.iter init;
545
  Printf.fprintf stderr "Cumulated star size: %d\n%!" !stars;
546
  Q.repeat q discover;
547
  Time.tick "Running LRijkstra";
POTTIER Francois's avatar
POTTIER Francois committed
548
  done_with_level()
549

550 551
(* ------------------------------------------------------------------------ *)

552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
(* The following code validates the fact that an error can be triggered in
   state [s'] by beginning in the initial state [s] and reading the
   sequence of terminal symbols [w]. We use this for debugging purposes. *)

let fail msg =
  Printf.fprintf stderr "coverage: internal error: %s.\n%!" msg;
  false

open ReferenceInterpreter

let validate s s' w : bool =
  match
    ReferenceInterpreter.check_error_path (Lr1.nt_of_entry s) (W.elements w)
  with
  | OInputReadPastEnd ->
      fail "input was read past its end"
  | OInputNotFullyConsumed ->
      fail "input was not fully consumed"
  | OUnexpectedAccept ->
      fail "input was unexpectedly accepted"
  | OK state ->
      Lr1.Node.compare state s' = 0 ||
      fail (
        Printf.sprintf "error occurred in state %d instead of %d"
          (Lr1.number state)
          (Lr1.number s')
      )

(* ------------------------------------------------------------------------ *)

582 583 584 585 586 587 588
(* We now wish to determine, given a state [s'] and a terminal symbol [z], a
   minimal path that takes us from some entry state to state [s'] with [z] as
   the next (unconsumed) symbol. *)

(* This can be formulated as a search for a shortest path in a graph. The
   graph is not just the automaton, though. It is a (much) larger graph whose
   vertices are pairs [s, z] and whose edges are obtained by querying the
589
   module [E] above. *)
590

591
let forward () =
592

593
  let module A = Astar.Make(struct
594

595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
    (* A vertex is a pair [s, z].
       [z] cannot be the [error] token. *)
    type node =
        Lr1.node * Terminal.t

    let equal (s'1, z1) (s'2, z2) =
      Lr1.Node.compare s'1 s'2 = 0 && Terminal.compare z1 z2 = 0

    let hash (s, z) =
      Hashtbl.hash (Lr1.number s, z)

    (* An edge is labeled with a word. *)
    type label =
      W.word

    (* Forward search from every [s, z], where [s] is an initial state. *)
    let sources f =
      foreach_terminal (fun z ->
        ProductionMap.iter (fun _ s ->
          f (s, z)
        ) Lr1.entry
616 617
      )

618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
    let successors (s, z) edge =
      assert (not (Terminal.equal z Terminal.error));
      SymbolMap.iter (fun sym s' ->
        match sym with
        | Symbol.T t ->
            if Terminal.equal z t then
              let w = W.singleton t in
              foreach_terminal (fun z ->
                edge w 1 (s', z)
              )
        | Symbol.N nt ->
           foreach_terminal (fun z' ->
             E.query s nt z z' (fun w ->
               edge w (W.length w) (s', z')
             )
           )
      ) (Lr1.transitions s)

    let estimate _ =
      0

  end) in

  (* Search forward. *)

643
  Printf.fprintf stderr "Forward search:\n%!";
644
  let seen = ref Lr1.NodeSet.empty in
645
  let _, _ = A.search (fun ((s', z), path) ->
646 647 648 649 650 651 652 653 654 655 656 657 658 659
    if causes_an_error s' z && not (Lr1.NodeSet.mem s' !seen) then begin
      seen := Lr1.NodeSet.add s' !seen;
      (* An error can be triggered in state [s'] by beginning in the initial
         state [s] and reading the sequence of terminal symbols [w]. *)
      let (s, _), ws = A.reverse path in
      let w = List.fold_right W.append ws (W.singleton z) in
      Printf.fprintf stderr
        "An error can be reached from state %d to state %d:\n%!"
        (Lr1.number s)
        (Lr1.number s');
      Printf.fprintf stderr "%s\n%!" (W.print w);
      assert (validate s s' w)
    end
  ) in
660 661 662
  Printf.fprintf stderr "Reachable (forward): %d states\n%!"
    (Lr1.NodeSet.cardinal !seen);
  !seen
663

664
(* TEMPORARY the code in this module should run only if --coverage is set *)
665 666

let () =
667 668
  let f = forward() in
  Time.tick "Forward search";
669
  ignore f