LRijkstra.ml 17.2 KB
Newer Older
1 2
open Grammar

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
(* ------------------------------------------------------------------------ *)

(* This returns the list of reductions of [state] on token [z]. This
   is a list of zero or one elements. *)

let reductions s z =
  assert (not (Terminal.equal z Terminal.error));
  try
    TerminalMap.find z (Lr1.reductions s)
  with Not_found ->
    []

(* This tests whether state [s] is willing to reduce some production
   when the lookahead symbol is [z]. This test takes a possible default
   reduction into account. *)

let has_reduction s z : Production.index option =
  assert (not (Terminal.equal z Terminal.error));
  match Invariant.has_default_reduction s with
  | Some (prod, _) ->
      Some prod
  | None ->
      match reductions s z with
      | prod :: prods ->
          assert (prods = []);
          Some prod
      | [] ->
          None

(* This tests whether state [s] will initiate an error on the lookahead
   symbol [z]. *)

let causes_an_error s z =
  assert (not (Terminal.equal z Terminal.error));
  match Invariant.has_default_reduction s with
  | Some _ ->
      false
  | None ->
      reductions s z = [] &&
      not (SymbolMap.mem (Symbol.T z) (Lr1.transitions s))

44 45 46 47 48 49 50 51
  let id x = x
  let some x = Some x

let update_ref r f : bool =
  let v = !r in
  let v' = f v in
  v != v' && (r := v'; true)

52 53 54 55 56 57 58 59 60 61 62 63
let update add find none some key m f =
  match find key m with
  | data ->
      let data' = f (some data) in
      if data' == data then
        m
      else
        add key data' m
  | exception Not_found ->
      let data' = f none in
      add key data' m

64 65 66
module MyMap (X : Map.OrderedType) = struct
  include Map.Make(X)
  let update none some key m f =
67
    update add find none some key m f
68 69
end

70 71 72 73 74 75 76
module W : sig

  type word
  val epsilon: word
  val singleton: Terminal.t -> word
  val append: word -> word -> word
  val length: word -> int
77
  val first: word -> Terminal.t -> Terminal.t
78
  val elements: word -> Terminal.t list
79
  val print: word -> string
80 81 82

end = struct

83 84 85 86 87 88 89
  type word = Terminal.t list
  let epsilon = []
  let singleton t = [t]
  let append = (@)
  let length = List.length
  let first w z = match w with a :: _ -> a | [] -> z
  let elements w = w
90
  let print w =
91
    string_of_int (length w) ^ " " ^
92
    String.concat " " (List.map Terminal.print (elements w))
93

94 95 96 97
end

module Q = LowIntegerPriorityQueue

98 99 100 101
module Trie = struct

  let c = ref 0

102 103
  type trie = {
    identity: int;
104 105
    source: Lr1.node;
    target: Lr1.node;
106
    productions: Production.index list;
107
    transitions: trie SymbolMap.t;
108
  }
109

110
  let mktrie source target productions transitions =
111
    let identity = Misc.postincrement c in
112
    { identity; source; target; productions; transitions }
113

114 115
  let empty source =
    mktrie source source [] SymbolMap.empty
116

117 118
  let is_empty t =
    t.productions = [] && SymbolMap.is_empty t.transitions
119

120 121
  let accepts prod t =
    List.mem prod t.productions
122

123
  let rec insert target w prod t =
124 125
    match w with
    | [] ->
126
        mktrie t.source target (prod :: t.productions) t.transitions
127
    | a :: w ->
128 129 130 131 132 133 134 135 136 137
        match SymbolMap.find a (Lr1.transitions target) with
        | successor ->
            let child = mktrie t.source successor [] SymbolMap.empty in
            mktrie t.source target t.productions
              (update SymbolMap.add SymbolMap.find child id a t.transitions (insert successor w prod))
        | exception Not_found ->
            t

  let insert w prod t =
    insert t.source w prod t
138

139
  let derivative a t =
140
    try
141
      SymbolMap.find a t.transitions
142
    with Not_found ->
143 144 145 146
      assert false

  let has_derivative a t =
    SymbolMap.mem a t.transitions
147

148 149
  let compare t1 t2 =
    Pervasives.compare (t1.identity : int) t2.identity
150

151 152
  let rec size t =
    SymbolMap.fold (fun _ child accu -> size child + accu) t.transitions 1
153

154 155
end

156
type fact = {
157
  future: Trie.trie;
158
  word: W.word;
159
  lookahead: Terminal.t
160 161
}

162 163 164 165 166 167
let source fact =
  fact.future.Trie.source

let target fact =
  fact.future.Trie.target

168 169
let print_fact fact =
  Printf.fprintf stderr
170
    "from state %d to state %d via %s . %s\n%!"
171 172
    (Lr1.number (source fact))
    (Lr1.number (target fact))
173 174 175
    (W.print fact.word)
    (Terminal.print fact.lookahead)

176
let extensible fact sym =
177
  Trie.has_derivative sym fact.future
178

179 180 181 182 183 184
let foreach_terminal f =
  Terminal.iter (fun t ->
    if not (Terminal.equal t Terminal.error) then
      f t
  )

185 186 187 188 189 190 191 192
let star s : Trie.trie =
  SymbolMap.fold (fun sym _ accu ->
    match sym with
    | Symbol.T _ ->
        accu
    | Symbol.N nt ->
        Production.foldnt nt accu (fun prod accu ->
          let w = Array.to_list (Production.rhs prod) in
193
          (* could insert this branch only if viable -- leads to 12600 instead of 12900 in ocaml.mly --lalr *)
194
          Trie.insert w prod accu
195
        )
196
  ) (Lr1.transitions s) (Trie.empty s)
197

198 199 200 201 202 203
let q =
  Q.create()

let add fact =
  (* The length of the word serves as the priority of this fact. *)
  Q.add q fact (W.length fact.word)
POTTIER Francois's avatar
POTTIER Francois committed
204 205
    (* In principle, there is no need to insert the fact into the queue
       if [T] already stores a comparable fact. *)
206

207 208
let stars = ref 0

209
let init s =
210
  let trie = star s in
211 212 213 214
  let size = (Trie.size trie) in
  stars := !stars + size;
  Printf.fprintf stderr "State %d has a star of size %d\n.%!"
    (Lr1.number s) size;
POTTIER Francois's avatar
POTTIER Francois committed
215
  if not (Trie.is_empty trie) then
216 217
    foreach_terminal (fun z ->
      add {
218
        future = trie;
219 220 221 222
        word = W.epsilon;
        lookahead = z
      }
    )
223 224

module T : sig
225 226

  (* [register fact] registers the fact [fact]. It returns [true] if this fact
227
     is new, i.e., no fact concerning the same quintuple of [source], [future],
228 229 230 231 232
     [target], [a], and [z] was previously known. *)
  val register: fact -> bool

  (* [query target z f] enumerates all known facts whose target state is [target]
     and whose lookahead assumption is [z]. *)
233
  val query: Lr1.node -> Terminal.t -> (fact -> unit) -> unit
234

235 236
  val stats: unit -> unit

237
end = struct
238

239 240 241 242 243 244 245 246 247 248 249
  (* This module implements a set of facts. Two facts are considered equal
     (for the purposes of this set) if they have the same [future], [a], and
     [z] fields. The [word] is not considered. Indeed, we are not interested
     in keeping track of several words that produce the same effect. Only the
     shortest such word is of interest. *)

  (* We need to query the set of facts in two ways. In [register], we need to
     test whether a fact is in the set. In [query], we need to find all facts
     that match a pair [target, z]. For this reason, we use a two-level table.
     The first level is a matrix indexed by [target] and [z]. At the second
     level, we find sets of facts. *)
250 251 252
(**)

  module M =
253
    MySet.Make(struct
254 255 256
      type t = fact
      let compare fact1 fact2 =
        let c = Trie.compare fact1.future fact2.future in
257
        if c <> 0 then c else
258 259
        let a1 = W.first fact1.word fact1.lookahead
        and a2 = W.first fact2.word fact2.lookahead in
260 261
        Terminal.compare a1 a2
    end)
262

263 264 265 266 267
  let table = (* a pretty large table... *)
    Array.make (Lr1.n * Terminal.n) M.empty

  let index target z =
    Terminal.n * (Lr1.number target) + Terminal.t2i z
268

269 270
  let count = ref 0

271
  let register fact =
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
    let target = target fact in
    let z = fact.lookahead in
    let i = index target z in
    let m = table.(i) in
    (* We crucially rely on the fact that [M.add] guarantees not to
       change the set if an ``equal'' fact already exists. Thus, a
       later, longer path is ignored in favor of an earlier, shorter
       path. *)
    let m' = M.add fact m in
    m != m' && begin
      incr count;
      table.(i) <- m';
      true
    end

  let query target z f =
    let i = index target z in
    let m = table.(i) in
    M.iter f m
291

292 293 294
  let stats () =
    Printf.fprintf stderr "T stores %d facts.\n%!" !count

295 296 297 298 299 300 301 302 303
end

(* The module [E] is in charge of recording the non-terminal edges that we have
   discovered, or more precisely, the conditions under which these edges can be
   taken. *)

module E : sig

  (* [register s nt w z] records that, in state [s], the outgoing edge labeled
304 305 306
     [nt] can be taken by consuming the word [w], if the next symbol is [z].
     It returns [true] if this information is new. *)
  val register: Lr1.node -> Nonterminal.t -> W.word -> Terminal.t -> bool
307 308 309 310

  (* [query s nt a z] answers whether, in state [s], the outgoing edge labeled
     [nt] can be taken by consuming some word [w], under the assumption that
     the next symbol is [z], and under the constraint that the first symbol of
311 312
     [w.z] is [a]. *)
  val query: Lr1.node -> Nonterminal.t -> Terminal.t -> Terminal.t -> (W.word -> unit) -> unit
313

314 315
  val stats: unit -> unit

316 317 318 319 320
end = struct

  (* For now, we implement a mapping of [s, nt, a, z] to [w]. *)

  module M =
321
    MyMap(struct
322 323 324 325 326 327 328 329 330 331 332 333 334 335
      type t = Lr1.node * Nonterminal.t * Terminal.t * Terminal.t
      let compare (s1, nt1, a1, z1) (s2, nt2, a2, z2) =
        let c = Lr1.Node.compare s1 s2 in
        if c <> 0 then c else
        let c = Nonterminal.compare nt1 nt2 in
        if c <> 0 then c else
        let c = Terminal.compare a1 a2 in
        if c <> 0 then c else
        Terminal.compare z1 z2
    end)

  let m =
    ref M.empty

336 337
  let count = ref 0

338
  let register s nt w z =
339
    let a = W.first w z in
340 341 342
    update_ref m (fun m ->
      M.update None some (s, nt, a, z) m (function
      | None ->
343
          incr count;
344 345
          w
      | Some earlier_w ->
346
          (* assert (W.length earlier_w <= W.length w); *)
347 348 349
          earlier_w
      )
    )
350

351 352 353 354
  let query s nt a z f =
    match M.find (s, nt, a, z) !m with
    | w -> f w
    | exception Not_found -> ()
355

356 357 358
  let stats () =
    Printf.fprintf stderr "E stores %d facts.\n%!" !count

359 360
end

361 362
let extend fact sym w z =
  assert (Terminal.equal fact.lookahead (W.first w z));
363
  {
364
    future = Trie.derivative sym fact.future;
365
    word = W.append fact.word w;
366
    lookahead = z
367 368
  }

369
let new_edge s nt w z =
POTTIER Francois's avatar
POTTIER Francois committed
370 371 372 373
  (*
  Printf.fprintf stderr "Considering reduction on %s in state %d\n"
    (Terminal.print z) (Lr1.number s);
  *)
374
  if E.register s nt w z then
375
    let sym = (Symbol.N nt) in
376
    T.query s (W.first w z) (fun fact ->
377
      if extensible fact sym then
378
        add (extend fact sym w z)
379
    )
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401

(* [consequences fact] is invoked when we discover a new fact (i.e., one that
   was not previously known). It studies the consequences of this fact. These
   consequences are of two kinds:

   - As in Dijkstra's algorithm, the new fact can be viewed as a newly
   discovered vertex. We study its (currently known) outgoing edges,
   and enqueue new facts in the priority queue.

   - Sometimes, a fact can also be viewed as a newly discovered edge.
   This is the case when the word from [fact.source] to [fact.target]
   represents a production of the grammar and [fact.target] is willing
   to reduce this production. We record the existence of this edge,
   and re-inspect any previously discovered vertices which are
   interested in this outgoing edge.
*)
(**)

let consequences fact =

  (* 1. View [fact] as a vertex. Examine the transitions out of [fact.target]. *)
  
402
  SymbolMap.iter (fun sym _ ->
403
    if extensible fact sym then
404 405 406
      match sym with
      | Symbol.T t ->

407 408 409 410
          (* 1a. There is a transition labeled [t] out of [fact.target]. If
             the lookahead assumption [fact.lookahead] is compatible with [t],
             then we derive a new fact, where one more edge has been taken. We
             enqueue this new fact for later examination. *)
411 412 413 414
          (**)

          if Terminal.equal fact.lookahead t then
            foreach_terminal (fun z ->
415
              add (extend fact sym (W.singleton t) z)
416 417 418 419 420 421 422 423 424 425 426 427
            )

      | Symbol.N nt ->

          (* 1b. There is a transition labeled [nt] out of [fact.target]. We
             need to know how this nonterminal edge can be taken. We query for a
             word [w] that allows us to take this edge. The answer depends on
             the terminal symbol [z] that comes *after* this word: we try all
             such symbols. Furthermore, we need the first symbol of [w.z] to
             satisfy the lookahead assumption [fact.lookahead], so the answer
             also depends on this assumption. *)
          (**)
428

429
          foreach_terminal (fun z ->
430 431
            E.query (target fact) nt fact.lookahead z (fun w ->
              add (extend fact sym w z)
432
            )
433
          )
434

435
  ) (Lr1.transitions (target fact));
436 437 438 439

  (* 2. View [fact] as a possible edge. This is possible if the path from
     [fact.source] to [fact.target] represents a production [prod] and
     [fact.target] is willing to reduce this production. We check that
440 441 442 443 444
     [fact.future] accepts [epsilon]. This guarantees that reducing [prod]
     takes us all the way back to [fact.source]. Thus, this production gives
     rise to an edge labeled [nt] -- the left-hand side of [prod] -- out of
     [fact.source]. This edge is subject to the lookahead assumption
     [fact.lookahead], so we record that. *)
445 446
  (**)

447
  match has_reduction (target fact) fact.lookahead with
448
  | Some prod when Trie.accepts prod fact.future ->
449
      new_edge (source fact) (Production.nt prod) fact.word fact.lookahead
450 451 452
  | _ ->
      ()

453
let level = ref 0
454 455

let discover fact =
456
  if T.register fact then begin
457 458 459 460 461

    if W.length fact.word > ! level then begin
      Printf.fprintf stderr "Done with level %d.\n" !level;
      level := W.length fact.word;
      T.stats();
462 463
      E.stats();
      Printf.fprintf stderr "Q stores %d facts.\n%!" (Q.cardinal q)
464
    end;
465
(*
466
    incr facts;
467
    Printf.fprintf stderr "Facts = %d, current length = %d\n%!"
468
      !facts ();
469
    Printf.fprintf stderr "New fact:\n";
470
    print_fact fact;
471
*)
472
    consequences fact
473
  end
474

475
let main =
476
  Lr1.iter init;
477
  Printf.fprintf stderr "Cumulated star size: %d\n%!" !stars;
478
  Q.repeat q discover;
479 480
  Time.tick "Running LRijkstra";
  T.stats();
481
  E.stats()
482

483 484
(* ------------------------------------------------------------------------ *)

485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
(* The following code validates the fact that an error can be triggered in
   state [s'] by beginning in the initial state [s] and reading the
   sequence of terminal symbols [w]. We use this for debugging purposes. *)

let fail msg =
  Printf.fprintf stderr "coverage: internal error: %s.\n%!" msg;
  false

open ReferenceInterpreter

let validate s s' w : bool =
  match
    ReferenceInterpreter.check_error_path (Lr1.nt_of_entry s) (W.elements w)
  with
  | OInputReadPastEnd ->
      fail "input was read past its end"
  | OInputNotFullyConsumed ->
      fail "input was not fully consumed"
  | OUnexpectedAccept ->
      fail "input was unexpectedly accepted"
  | OK state ->
      Lr1.Node.compare state s' = 0 ||
      fail (
        Printf.sprintf "error occurred in state %d instead of %d"
          (Lr1.number state)
          (Lr1.number s')
      )

(* ------------------------------------------------------------------------ *)

515 516 517 518 519 520 521
(* We now wish to determine, given a state [s'] and a terminal symbol [z], a
   minimal path that takes us from some entry state to state [s'] with [z] as
   the next (unconsumed) symbol. *)

(* This can be formulated as a search for a shortest path in a graph. The
   graph is not just the automaton, though. It is a (much) larger graph whose
   vertices are pairs [s, z] and whose edges are obtained by querying the
522
   module [E] above. *)
523

524
let forward () =
525

526
  let module A = Astar.Make(struct
527

528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
    (* A vertex is a pair [s, z].
       [z] cannot be the [error] token. *)
    type node =
        Lr1.node * Terminal.t

    let equal (s'1, z1) (s'2, z2) =
      Lr1.Node.compare s'1 s'2 = 0 && Terminal.compare z1 z2 = 0

    let hash (s, z) =
      Hashtbl.hash (Lr1.number s, z)

    (* An edge is labeled with a word. *)
    type label =
      W.word

    (* Forward search from every [s, z], where [s] is an initial state. *)
    let sources f =
      foreach_terminal (fun z ->
        ProductionMap.iter (fun _ s ->
          f (s, z)
        ) Lr1.entry
549 550
      )

551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
    let successors (s, z) edge =
      assert (not (Terminal.equal z Terminal.error));
      SymbolMap.iter (fun sym s' ->
        match sym with
        | Symbol.T t ->
            if Terminal.equal z t then
              let w = W.singleton t in
              foreach_terminal (fun z ->
                edge w 1 (s', z)
              )
        | Symbol.N nt ->
           foreach_terminal (fun z' ->
             E.query s nt z z' (fun w ->
               edge w (W.length w) (s', z')
             )
           )
      ) (Lr1.transitions s)

    let estimate _ =
      0

  end) in

  (* Search forward. *)

576
  Printf.fprintf stderr "Forward search:\n%!";
577
  let seen = ref Lr1.NodeSet.empty in
578
  let _, _ = A.search (fun ((s', z), path) ->
579 580 581 582 583 584 585 586 587 588 589 590 591 592
    if causes_an_error s' z && not (Lr1.NodeSet.mem s' !seen) then begin
      seen := Lr1.NodeSet.add s' !seen;
      (* An error can be triggered in state [s'] by beginning in the initial
         state [s] and reading the sequence of terminal symbols [w]. *)
      let (s, _), ws = A.reverse path in
      let w = List.fold_right W.append ws (W.singleton z) in
      Printf.fprintf stderr
        "An error can be reached from state %d to state %d:\n%!"
        (Lr1.number s)
        (Lr1.number s');
      Printf.fprintf stderr "%s\n%!" (W.print w);
      assert (validate s s' w)
    end
  ) in
593 594 595
  Printf.fprintf stderr "Reachable (forward): %d states\n%!"
    (Lr1.NodeSet.cardinal !seen);
  !seen
596

597 598
(* TEMPORARY what about the pseudo-token [#]? *)
(* TEMPORARY the code in this module should run only if --coverage is set *)
599 600

let () =
601 602
  let f = forward() in
  Time.tick "Forward search";
603
  ignore f