Commit f3bfeab2 authored by POTTIER Francois's avatar POTTIER Francois

Progress.

parent fa61ac2b
......@@ -413,19 +413,17 @@ let dfa (e : regexp) : dfa =
more expressions being identified, therefore smaller automata. *)
let module G = struct
type t = regexp
let foreach_successor e f =
let foreach_successor e yield =
if may_have_successors e then
(* The successors of [e] are its derivatives along every character
[a], provided they are nonempty. *)
Char.foreach (fun a ->
let e' = delta a e in
if nonempty e' then
f e'
if nonempty e' then yield e'
)
(* The single root is [e], if it is nonempty. *)
let foreach_root f =
if nonempty e then
f e
let foreach_root yield =
if nonempty e then yield e
end in
let module N = Number.ForHashedType(R)(G) in
(* We have [n] states which are mapped to nonempty expressions by [decode]. *)
......
......@@ -416,46 +416,52 @@ type `dfa` is easy.
let dfa (e : regexp) : dfa =
let module G = struct
type t = regexp
let foreach_successor e f =
let foreach_successor e yield =
Char.foreach (fun a ->
let e' = delta a e in
if nonempty e' then
f e'
if nonempty e' then yield e'
)
let foreach_root f =
if nonempty e then
f e
let foreach_root yield =
if nonempty e then yield e
end in
let module N = Number.ForHashedType(R)(G) in
let n, decode = N.n, N.decode in
let encode e : state option =
if nonempty e then Some (N.encode e) else None
in
let encode e = if nonempty e then Some (N.encode e) else None in
let init = encode e in
let transition q a = encode (delta a (decode q)) in
{ n; init; decode; transition }
```
In the above code, the module `G` is a description of the graph that I wish to
traverse.
The functor application `Number.ForHashedType(R)(G)` performs a
traversal of this graph and constructs a numbering `N` of its vertices.
(The module
[Number](https://gitlab.inria.fr/fpottier/fix/blob/master/src/Number.mli)
is part of
[fix](https://gitlab.inria.fr/fpottier/fix/).)
The module `N` thus obtained contains the number `n` of vertices that have
been discovered as well as two functions `encode: regexp -> int` and `decode:
int -> regexp` which record the correspondence between vertices and numbers.
In other words, these functions convert, both ways,
between regular expressions and state numbers. Without any effort,
I know, for each automaton state, which regular expression it stands for.
Neat!
<!-- TEMPORARY actually, if we are interested only in running the automaton up to
the first match, then a smaller graph suffices: a final state need have no successors.
If we are interested in finding all matches, then this graph is fine. -->
In the above code, the module `G` is
a description of the graph that I wish to traverse.
The module
[Number](https://gitlab.inria.fr/fpottier/fix/blob/master/src/Number.mli),
which is part of [fix](https://gitlab.inria.fr/fpottier/fix/),
helps explore this graph.
The functor application `Number.ForHashedType(R)(G)` performs a traversal of
the graph `G` and constructs a numbering `N` of its vertices.
The module `N` contains the following data:
```
val n: int
val encode: regexp -> int
val decode: int -> regexp
```
Here, `n` is the number of vertices that have been discovered. The functions
`encode` and `decode` record the correspondence between vertices and numbers.
In other words, `decode` has type `state -> regexp` and maps a state to the
nonempty expression that this state stands for. Conversely, `encode` has type
`regexp -> state` and maps a nonempty expression to a state.
I extend `encode` to a function of type `regexp -> state option`, which can be
applied to a possibly empty expression. Once this is done, the initial state
`init` is obtained by transporting the expression `e` through the encoding,
while the transition function `transition` is obtained by transporting the
function `delta` through the encoding.
That's all!
<!-- TEMPORARY -->
<!-- show an example of searching for one word, KMP -->
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment