Commit 27a0227f authored by POTTIER Francois's avatar POTTIER Francois
Browse files

Moved [Word], which is now a sub-module of [Grammar.Terminal].

parent cc07e0a0
open Grammar
module Q = LowIntegerPriorityQueue
module W = Terminal.Word(struct end)
(* Throughout, we ignore the [error] pseudo-token completely. We consider that
it never appears on the input stream. Hence, any state whose incoming
......@@ -102,49 +103,6 @@ let update add find none some key m f =
let data' = f none in
add key data' m
module W : sig
type word
val epsilon: word
val singleton: Terminal.t -> word
val append: word -> word -> word
val length: word -> int
val first: word -> Terminal.t -> Terminal.t
val elements: word -> Terminal.t list
val print: word -> string
end = struct
let () =
assert (Terminal.n < 256)
let t2c (t : Terminal.t) : char =
Char.chr (Terminal.t2i t)
let c2t (t : char) : Terminal.t =
Obj.magic (Char.code t)
let intern =
Misc.new_intern 1023
type word = string
let epsilon = ""
let singleton t = intern (String.make 1 (t2c t))
let append w1 w2 = intern (w1 ^ w2)
let length = String.length
let first w z = if length w > 0 then c2t w.[0] else z
let rec chars i n w =
if i = n then
[]
else
c2t w.[i] :: chars (i + 1) n w
let elements w =
chars 0 (String.length w) w
let print w =
string_of_int (length w) ^ " " ^
String.concat " " (List.map Terminal.print (elements w))
end
module Trie = struct
let c = ref 0
......
......@@ -250,6 +250,75 @@ module Terminal = struct
with Not_found ->
None
(* The sub-module [Word] offers an implementation of words (that is,
sequences) of terminal symbols. It is used by [LRijkstra]. We
make it a functor, because it has internal state (a hash table)
and a side effect (failure if there are more than 256 terminal
symbols). *)
module Word (X : sig end) = struct
(* We could use lists, or perhaps the sequences offered by the module
[Seq], which support constant time concatenation. However, we need a
much more compact representation: [LRijkstra] stores tens of millions
of such words. We use strings, because they are very compact (8 bits
per symbol), and on top of that, we use a hash-consing facility. In
practice, hash-consing allows us to save 1000x in space. *)
(* A drawback of this approach is that it works only if the number of
terminal symbols is at most 256. For the moment, this is good enough.
[LRijkstra] already has difficulty at 100 terminal symbols or so. *)
let () =
if n > 256 then
Error.error [] (Printf.sprintf
"the --coverage analysis supports at most 256 terminal symbols.\n\
The grammar has %d terminal symbols." n)
let intern : string -> string =
Misc.new_intern 1023
type word =
string
let epsilon =
""
(* TEMPORARY tabulate? *)
let singleton t =
intern (String.make 1 (Char.chr t))
let append w1 w2 =
intern (w1 ^ w2)
let length =
String.length
let first w z =
if length w > 0 then
Char.code w.[0]
else
z
let rec elements i n w =
if i = n then
[]
else
Char.code w.[i] :: elements (i + 1) n w
let elements w =
elements 0 (String.length w) w
let print w =
let b = Buffer.create 128 in
Printf.bprintf b "%d " (length w); (* TEMPORARY *)
String.iter (fun c ->
Printf.bprintf b "%s " (print (Char.code c));
) w;
Buffer.contents b
end
end
(* Sets of terminals are used intensively in the LR(1) construction,
......
......@@ -153,6 +153,26 @@ module Terminal : sig
val map: (t -> 'a) -> 'a list
val mapx: (t -> 'a) -> 'a list
(* The sub-module [Word] offers an implementation of words (that is,
sequences) of terminal symbols. It is used by [LRijkstra]. We
make it a functor, because it has internal state (a hash table)
and a side effect (failure if there are more than 256 terminal
symbols). *)
module Word (X : sig end) : sig
type word
val epsilon: word
val singleton: t -> word
val append: word -> word -> word
val length: word -> int
(* [first w z] returns the first symbol of the word [w.z]. *)
val first: word -> t -> t
val elements: word -> t list
val print: word -> string
end
end
(* ------------------------------------------------------------------------ *)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment