LRijkstra.ml 18.7 KB
Newer Older
1 2
open Grammar

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
(* ------------------------------------------------------------------------ *)

(* This returns the list of reductions of [state] on token [z]. This
   is a list of zero or one elements. *)

let reductions s z =
  assert (not (Terminal.equal z Terminal.error));
  try
    TerminalMap.find z (Lr1.reductions s)
  with Not_found ->
    []

(* This tests whether state [s] is willing to reduce some production
   when the lookahead symbol is [z]. This test takes a possible default
   reduction into account. *)

let has_reduction s z : Production.index option =
  assert (not (Terminal.equal z Terminal.error));
  match Invariant.has_default_reduction s with
  | Some (prod, _) ->
      Some prod
  | None ->
      match reductions s z with
      | prod :: prods ->
          assert (prods = []);
          Some prod
      | [] ->
          None

(* This tests whether state [s] will initiate an error on the lookahead
   symbol [z]. *)

let causes_an_error s z =
  assert (not (Terminal.equal z Terminal.error));
  match Invariant.has_default_reduction s with
  | Some _ ->
      false
  | None ->
      reductions s z = [] &&
      not (SymbolMap.mem (Symbol.T z) (Lr1.transitions s))

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
let foreach_terminal f =
  Terminal.iter (fun t ->
    if not (Terminal.equal t Terminal.error) then
      f t
  )

let foreach_terminal_not_causing_an_error s f =
  match Invariant.has_default_reduction s with
  | Some _ ->
      (* No terminal causes an error. *)
      foreach_terminal f
  | None ->
      TerminalMap.iter (fun t _ ->
        if not (Terminal.equal t Terminal.error) then
          f t
      ) (Lr1.reductions s);
      SymbolMap.iter (fun sym _ ->
        match sym with
        | Symbol.T t ->
            if not (Terminal.equal t Terminal.error) then
              f t
        | Symbol.N _ ->
            ()
      ) (Lr1.transitions s)

69 70 71 72 73 74 75 76 77 78 79 80
(* TEMPORARY
let number2node : int -> Lr1.node =
  let table = Array.make Lr1.n None in
  Lr1.iter (fun node ->
    table.(Lr1.number node) <- Some node
  );
  let table =
    Array.map (function Some x -> x | None -> assert false) table
  in
  Array.get table
*)

81 82
let id x = x
let some x = Some x
83 84 85 86 87 88

let update_ref r f : bool =
  let v = !r in
  let v' = f v in
  v != v' && (r := v'; true)

89 90 91 92 93 94 95 96 97 98 99 100
let update add find none some key m f =
  match find key m with
  | data ->
      let data' = f (some data) in
      if data' == data then
        m
      else
        add key data' m
  | exception Not_found ->
      let data' = f none in
      add key data' m

101 102 103
module MyMap (X : Map.OrderedType) = struct
  include Map.Make(X)
  let update none some key m f =
104
    update add find none some key m f
105 106
end

107 108 109 110 111 112 113
module W : sig

  type word
  val epsilon: word
  val singleton: Terminal.t -> word
  val append: word -> word -> word
  val length: word -> int
114
  val first: word -> Terminal.t -> Terminal.t
115
  val elements: word -> Terminal.t list
116
  val print: word -> string
117

118 119
  val stats: unit -> unit

120 121
end = struct

122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
  let () =
    assert (Terminal.n < 256)

  let t2c (t : Terminal.t) : char =
    Char.chr (Terminal.t2i t)
  let c2t (t : char) : Terminal.t =
    Obj.magic (Char.code t)

  module H = Hashtbl.Make(struct
      type t = string
      let equal = (=)
      let hash = Hashtbl.hash
  end)
  let table = H.create 1023
  let c = ref 0
  let n = ref 0
  let intern s =
    incr c;
    try
      H.find table s
    with Not_found ->
      incr n;
      H.add table s s;
      s

  type word = string
  let epsilon = ""
  let singleton t = intern (String.make 1 (t2c t))
  let append w1 w2 = intern (w1 ^ w2)
  let length = String.length
  let first w z = if length w > 0 then c2t w.[0] else z
  let rec chars i n w =
    if i = n then
      []
    else
      c2t w.[i] :: chars (i + 1) n w
  let elements w =
    chars 0 (String.length w) w
160
  let print w =
161
    string_of_int (length w) ^ " " ^
162
    String.concat " " (List.map Terminal.print (elements w))
163

164 165 166 167 168 169
  let stats () =
    Printf.fprintf stderr
      "Number of calls to intern: %d\n\
       Number of elements in intern table: %d\n%!"
      !c !n

170 171 172 173
end

module Q = LowIntegerPriorityQueue

174 175 176 177
module Trie = struct

  let c = ref 0

178 179
  type trie = {
    identity: int;
180 181
    source: Lr1.node;
    target: Lr1.node;
182
    productions: Production.index list;
183
    transitions: trie SymbolMap.t;
184
  }
185

186
  let mktrie source target productions transitions =
187
    let identity = Misc.postincrement c in
188
    { identity; source; target; productions; transitions }
189

190 191
  let empty source =
    mktrie source source [] SymbolMap.empty
192

193 194
  let is_empty t =
    t.productions = [] && SymbolMap.is_empty t.transitions
195

196 197
  let accepts prod t =
    List.mem prod t.productions
198

199
  let rec insert target w prod t =
200 201
    match w with
    | [] ->
202
        mktrie t.source target (prod :: t.productions) t.transitions
203
    | a :: w ->
204 205 206 207 208 209 210 211 212 213
        match SymbolMap.find a (Lr1.transitions target) with
        | successor ->
            let child = mktrie t.source successor [] SymbolMap.empty in
            mktrie t.source target t.productions
              (update SymbolMap.add SymbolMap.find child id a t.transitions (insert successor w prod))
        | exception Not_found ->
            t

  let insert w prod t =
    insert t.source w prod t
214

215
  let derivative a t =
216
    SymbolMap.find a t.transitions (* careful: may raise [Not_found] *)
217

218 219
  let compare t1 t2 =
    Pervasives.compare (t1.identity : int) t2.identity
220

221 222
  let rec size t =
    SymbolMap.fold (fun _ child accu -> size child + accu) t.transitions 1
223

224 225
end

226
type fact = {
227
  future: Trie.trie;
228
  word: W.word;
229
  lookahead: Terminal.t
230 231
}

232 233 234 235 236 237
let source fact =
  fact.future.Trie.source

let target fact =
  fact.future.Trie.target

238 239 240 241 242 243 244 245
let star s : Trie.trie =
  SymbolMap.fold (fun sym _ accu ->
    match sym with
    | Symbol.T _ ->
        accu
    | Symbol.N nt ->
        Production.foldnt nt accu (fun prod accu ->
          let w = Array.to_list (Production.rhs prod) in
246
          (* could insert this branch only if viable -- leads to 12600 instead of 12900 in ocaml.mly --lalr *)
247
          Trie.insert w prod accu
248
        )
249
  ) (Lr1.transitions s) (Trie.empty s)
250

251 252 253 254
let q =
  Q.create()

let add fact =
255 256
  (* assert (not (causes_an_error (target fact) fact.lookahead)); *)

257 258
  (* The length of the word serves as the priority of this fact. *)
  Q.add q fact (W.length fact.word)
POTTIER Francois's avatar
POTTIER Francois committed
259 260
    (* In principle, there is no need to insert the fact into the queue
       if [T] already stores a comparable fact. *)
261

262 263
let stars = ref 0

264
let init s =
265
  let trie = star s in
266 267
  let size = (Trie.size trie) in
  stars := !stars + size;
POTTIER Francois's avatar
POTTIER Francois committed
268
  if not (Trie.is_empty trie) then
269 270 271 272 273 274
    foreach_terminal_not_causing_an_error s (fun z ->
      add {
        future = trie;
        word = W.epsilon;
        lookahead = z
      }
275
    )
276 277

module T : sig
278 279

  (* [register fact] registers the fact [fact]. It returns [true] if this fact
280
     is new, i.e., no fact concerning the same quintuple of [source], [future],
281 282 283 284 285
     [target], [a], and [z] was previously known. *)
  val register: fact -> bool

  (* [query target z f] enumerates all known facts whose target state is [target]
     and whose lookahead assumption is [z]. *)
286
  val query: Lr1.node -> Terminal.t -> (fact -> unit) -> unit
287

288 289
  val stats: unit -> unit

290
end = struct
291

292 293 294 295 296 297 298 299 300 301 302
  (* This module implements a set of facts. Two facts are considered equal
     (for the purposes of this set) if they have the same [future], [a], and
     [z] fields. The [word] is not considered. Indeed, we are not interested
     in keeping track of several words that produce the same effect. Only the
     shortest such word is of interest. *)

  (* We need to query the set of facts in two ways. In [register], we need to
     test whether a fact is in the set. In [query], we need to find all facts
     that match a pair [target, z]. For this reason, we use a two-level table.
     The first level is a matrix indexed by [target] and [z]. At the second
     level, we find sets of facts. *)
303 304 305
(**)

  module M =
306
    MySet.Make(struct
307 308 309
      type t = fact
      let compare fact1 fact2 =
        let c = Trie.compare fact1.future fact2.future in
310
        if c <> 0 then c else
311 312
        let a1 = W.first fact1.word fact1.lookahead
        and a2 = W.first fact2.word fact2.lookahead in
313 314
        Terminal.compare a1 a2
    end)
315

316 317 318 319 320
  let table = (* a pretty large table... *)
    Array.make (Lr1.n * Terminal.n) M.empty

  let index target z =
    Terminal.n * (Lr1.number target) + Terminal.t2i z
321

322 323
  let count = ref 0

324
  let register fact =
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
    let target = target fact in
    let z = fact.lookahead in
    let i = index target z in
    let m = table.(i) in
    (* We crucially rely on the fact that [M.add] guarantees not to
       change the set if an ``equal'' fact already exists. Thus, a
       later, longer path is ignored in favor of an earlier, shorter
       path. *)
    let m' = M.add fact m in
    m != m' && begin
      incr count;
      table.(i) <- m';
      true
    end

  let query target z f =
    let i = index target z in
    let m = table.(i) in
    M.iter f m
344

345 346 347
  let stats () =
    Printf.fprintf stderr "T stores %d facts.\n%!" !count

348 349 350 351 352 353 354 355 356
end

(* The module [E] is in charge of recording the non-terminal edges that we have
   discovered, or more precisely, the conditions under which these edges can be
   taken. *)

module E : sig

  (* [register s nt w z] records that, in state [s], the outgoing edge labeled
357 358 359
     [nt] can be taken by consuming the word [w], if the next symbol is [z].
     It returns [true] if this information is new. *)
  val register: Lr1.node -> Nonterminal.t -> W.word -> Terminal.t -> bool
360 361 362 363

  (* [query s nt a z] answers whether, in state [s], the outgoing edge labeled
     [nt] can be taken by consuming some word [w], under the assumption that
     the next symbol is [z], and under the constraint that the first symbol of
364 365
     [w.z] is [a]. *)
  val query: Lr1.node -> Nonterminal.t -> Terminal.t -> Terminal.t -> (W.word -> unit) -> unit
366

367 368
  val stats: unit -> unit

369 370 371 372 373
end = struct

  (* For now, we implement a mapping of [s, nt, a, z] to [w]. *)

  module M =
374
    MyMap(struct
375 376 377 378 379 380 381 382 383 384 385 386 387 388
      type t = Lr1.node * Nonterminal.t * Terminal.t * Terminal.t
      let compare (s1, nt1, a1, z1) (s2, nt2, a2, z2) =
        let c = Lr1.Node.compare s1 s2 in
        if c <> 0 then c else
        let c = Nonterminal.compare nt1 nt2 in
        if c <> 0 then c else
        let c = Terminal.compare a1 a2 in
        if c <> 0 then c else
        Terminal.compare z1 z2
    end)

  let m =
    ref M.empty

389 390
  let count = ref 0

391
  let register s nt w z =
392
    let a = W.first w z in
393 394 395
    update_ref m (fun m ->
      M.update None some (s, nt, a, z) m (function
      | None ->
396
          incr count;
397 398
          w
      | Some earlier_w ->
399
          (* assert (W.length earlier_w <= W.length w); *)
400 401 402
          earlier_w
      )
    )
403

404 405 406 407
  let query s nt a z f =
    match M.find (s, nt, a, z) !m with
    | w -> f w
    | exception Not_found -> ()
408

409 410 411
  let stats () =
    Printf.fprintf stderr "E stores %d facts.\n%!" !count

412 413
end

414
let new_edge s nt w z =
POTTIER Francois's avatar
POTTIER Francois committed
415 416 417 418
  (*
  Printf.fprintf stderr "Considering reduction on %s in state %d\n"
    (Terminal.print z) (Lr1.number s);
  *)
419
  if E.register s nt w z then
420
    let sym = Symbol.N nt in
421
    T.query s (W.first w z) (fun fact ->
422 423 424 425 426 427 428 429 430 431 432
      assert (Terminal.equal fact.lookahead (W.first w z));
      match Trie.derivative sym fact.future with
      | future ->
          if not (causes_an_error future.Trie.target z) then
            add {
              future;
              word = W.append fact.word w;
              lookahead = z
            }
      | exception Not_found ->
          ()
433
    )
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453

(* [consequences fact] is invoked when we discover a new fact (i.e., one that
   was not previously known). It studies the consequences of this fact. These
   consequences are of two kinds:

   - As in Dijkstra's algorithm, the new fact can be viewed as a newly
   discovered vertex. We study its (currently known) outgoing edges,
   and enqueue new facts in the priority queue.

   - Sometimes, a fact can also be viewed as a newly discovered edge.
   This is the case when the word from [fact.source] to [fact.target]
   represents a production of the grammar and [fact.target] is willing
   to reduce this production. We record the existence of this edge,
   and re-inspect any previously discovered vertices which are
   interested in this outgoing edge.
*)
(**)

let consequences fact =

454 455 456
  let target = target fact in

  (* 1. View [fact] as a vertex. Examine the transitions out of [target]. *)
457
  
458
  SymbolMap.iter (fun sym s' ->
459 460 461 462
    match Trie.derivative sym fact.future, sym with
    | exception Not_found -> ()
    | future, Symbol.T t ->

463
        (* 1a. There is a transition labeled [t] out of [target]. If
464 465 466 467 468 469 470 471
           the lookahead assumption [fact.lookahead] is compatible with [t],
           then we derive a new fact, where one more edge has been taken. We
           enqueue this new fact for later examination. *)
        (**)

        if Terminal.equal fact.lookahead t then
          let word = W.append fact.word (W.singleton t) in
          (* assert (Lr1.Node.compare future.Trie.target s' = 0); *)
472
          foreach_terminal_not_causing_an_error s' (fun z ->
473
            add { future; word; lookahead = z }
474
          )
475 476 477

    | future, Symbol.N nt ->

478
        (* 1b. There is a transition labeled [nt] out of [target]. We
479 480 481 482 483 484 485 486 487
           need to know how this nonterminal edge can be taken. We query for a
           word [w] that allows us to take this edge. The answer depends on
           the terminal symbol [z] that comes *after* this word: we try all
           such symbols. Furthermore, we need the first symbol of [w.z] to
           satisfy the lookahead assumption [fact.lookahead], so the answer
           also depends on this assumption. *)
        (**)

        foreach_terminal_not_causing_an_error s' (fun z ->
488
          E.query target nt fact.lookahead z (fun w ->
489 490 491 492 493 494 495 496
            assert (Terminal.equal fact.lookahead (W.first w z));
            add {
              future;
              word = W.append fact.word w;
              lookahead = z
            }
          )
        )
497

498
  ) (Lr1.transitions target);
499 500

  (* 2. View [fact] as a possible edge. This is possible if the path from
501 502
     [fact.source] to [target] represents a production [prod] and
     [target] is willing to reduce this production. We check that
503 504 505 506 507
     [fact.future] accepts [epsilon]. This guarantees that reducing [prod]
     takes us all the way back to [fact.source]. Thus, this production gives
     rise to an edge labeled [nt] -- the left-hand side of [prod] -- out of
     [fact.source]. This edge is subject to the lookahead assumption
     [fact.lookahead], so we record that. *)
508 509
  (**)

510
  match has_reduction target fact.lookahead with
511
  | Some prod when Trie.accepts prod fact.future ->
512
      new_edge (source fact) (Production.nt prod) fact.word fact.lookahead
513 514 515
  | _ ->
      ()

516
let level = ref 0
517

POTTIER Francois's avatar
POTTIER Francois committed
518 519
let done_with_level () =
  Printf.fprintf stderr "Done with level %d.\n" !level;
520
  W.stats();
POTTIER Francois's avatar
POTTIER Francois committed
521 522 523 524
  T.stats();
  E.stats();
  Printf.fprintf stderr "Q stores %d facts.\n%!" (Q.cardinal q)

525
let discover fact =
526
  if T.register fact then begin
527
    if W.length fact.word > ! level then begin
POTTIER Francois's avatar
POTTIER Francois committed
528
      done_with_level();
529 530
      level := W.length fact.word;
    end;
531
    consequences fact
532
  end
533

POTTIER Francois's avatar
POTTIER Francois committed
534
let () =
535
  Lr1.iter init;
536
  Printf.fprintf stderr "Cumulated star size: %d\n%!" !stars;
537
  Q.repeat q discover;
538
  Time.tick "Running LRijkstra";
POTTIER Francois's avatar
POTTIER Francois committed
539
  done_with_level()
540

541 542
(* ------------------------------------------------------------------------ *)

543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
(* The following code validates the fact that an error can be triggered in
   state [s'] by beginning in the initial state [s] and reading the
   sequence of terminal symbols [w]. We use this for debugging purposes. *)

let fail msg =
  Printf.fprintf stderr "coverage: internal error: %s.\n%!" msg;
  false

open ReferenceInterpreter

let validate s s' w : bool =
  match
    ReferenceInterpreter.check_error_path (Lr1.nt_of_entry s) (W.elements w)
  with
  | OInputReadPastEnd ->
      fail "input was read past its end"
  | OInputNotFullyConsumed ->
      fail "input was not fully consumed"
  | OUnexpectedAccept ->
      fail "input was unexpectedly accepted"
  | OK state ->
      Lr1.Node.compare state s' = 0 ||
      fail (
        Printf.sprintf "error occurred in state %d instead of %d"
          (Lr1.number state)
          (Lr1.number s')
      )

(* ------------------------------------------------------------------------ *)

573 574 575 576 577 578 579
(* We now wish to determine, given a state [s'] and a terminal symbol [z], a
   minimal path that takes us from some entry state to state [s'] with [z] as
   the next (unconsumed) symbol. *)

(* This can be formulated as a search for a shortest path in a graph. The
   graph is not just the automaton, though. It is a (much) larger graph whose
   vertices are pairs [s, z] and whose edges are obtained by querying the
580
   module [E] above. *)
581

582
let forward () =
583

584
  let module A = Astar.Make(struct
585

586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
    (* A vertex is a pair [s, z].
       [z] cannot be the [error] token. *)
    type node =
        Lr1.node * Terminal.t

    let equal (s'1, z1) (s'2, z2) =
      Lr1.Node.compare s'1 s'2 = 0 && Terminal.compare z1 z2 = 0

    let hash (s, z) =
      Hashtbl.hash (Lr1.number s, z)

    (* An edge is labeled with a word. *)
    type label =
      W.word

    (* Forward search from every [s, z], where [s] is an initial state. *)
    let sources f =
      foreach_terminal (fun z ->
        ProductionMap.iter (fun _ s ->
          f (s, z)
        ) Lr1.entry
607 608
      )

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633
    let successors (s, z) edge =
      assert (not (Terminal.equal z Terminal.error));
      SymbolMap.iter (fun sym s' ->
        match sym with
        | Symbol.T t ->
            if Terminal.equal z t then
              let w = W.singleton t in
              foreach_terminal (fun z ->
                edge w 1 (s', z)
              )
        | Symbol.N nt ->
           foreach_terminal (fun z' ->
             E.query s nt z z' (fun w ->
               edge w (W.length w) (s', z')
             )
           )
      ) (Lr1.transitions s)

    let estimate _ =
      0

  end) in

  (* Search forward. *)

634
  Printf.fprintf stderr "Forward search:\n%!";
635
  let seen = ref Lr1.NodeSet.empty in
636
  let _, _ = A.search (fun ((s', z), path) ->
637 638 639 640 641 642 643 644 645 646 647 648 649 650
    if causes_an_error s' z && not (Lr1.NodeSet.mem s' !seen) then begin
      seen := Lr1.NodeSet.add s' !seen;
      (* An error can be triggered in state [s'] by beginning in the initial
         state [s] and reading the sequence of terminal symbols [w]. *)
      let (s, _), ws = A.reverse path in
      let w = List.fold_right W.append ws (W.singleton z) in
      Printf.fprintf stderr
        "An error can be reached from state %d to state %d:\n%!"
        (Lr1.number s)
        (Lr1.number s');
      Printf.fprintf stderr "%s\n%!" (W.print w);
      assert (validate s s' w)
    end
  ) in
651 652 653
  Printf.fprintf stderr "Reachable (forward): %d states\n%!"
    (Lr1.NodeSet.cardinal !seen);
  !seen
654

655
(* TEMPORARY the code in this module should run only if --coverage is set *)
656 657

let () =
658 659
  let f = forward() in
  Time.tick "Forward search";
660
  ignore f