Commit f27dddeb authored by POTTIER Francois's avatar POTTIER Francois Committed by POTTIER Francois
Browse files

New module [Fix.DataFlow], which implements a forward data flow analysis.

parent 4cec2717
# CHANGES
## 2020/02/XX
* New module `DataFlow`, which performs a forward data flow analysis over a
directed graph. (Such a computation could previously be performed by using
the generic solver `Fix.Make`, but it was somewhat awkward to write, as it
required access to predecessors. The new algorithm is easier to use and is
more efficient.)
## 2020/01/31
* In `Gensym`, new abstract type `generator`,
......
......@@ -103,3 +103,18 @@ release:
publish:
# Publish an opam description.
@ opam publish -v $(DATE) $(THIS) $(ARCHIVE) .
MENHIR_WORKING_COPY=$(HOME)/dev/menhir
.PHONY: menhir
menhir:
# Check if this is the menhir branch.
@ if [ "$$(git symbolic-ref --short HEAD)" != "menhir" ] ; then \
echo "Error: this is not the menhir branch." ; \
git branch ; \
exit 1 ; \
fi
# Copy our source files to the Menhir repository.
@ (cd .. && cp -r fix $(MENHIR_WORKING_COPY))
# Remove a number of unneeded subdirectories.
@ (cd $(MENHIR_WORKING_COPY)/fix && rm -rf .git demos misc)
......@@ -12,6 +12,12 @@ Type `opam install fix`.
At the top of an OCaml module, declare `open Fix`.
This gives you access to the following submodules:
* [`DataFlow`](src/DataFlow.ml) performs a forward data flow analysis
over a directed graph. Like [`Fix`](src/Core.mli), it computes the
least function of type `variable -> property` that satisfies a fixed
point equation. It is less widely applicable than `Fix`, but, when
it is applicable, it is easier to use and more efficient than `Fix`.
* [`Gensym`](src/Gensym.mli) offers a simple facility
for **generating fresh integer identifiers**.
......
(lang dune 2.0)
(context (opam (switch 4.02.3)))
(context (opam (switch 4.03.0)))
(context (opam (switch 4.04.2)))
(context (opam (switch 4.05.0)))
......
......@@ -11,7 +11,7 @@ build: [
["dune" "build" "-p" name "-j" jobs]
]
depends: [
"ocaml" { >= "4.03" }
"dune" { build & >= "1.3" }
"ocaml" { >= "4.02.3" }
"dune" { >= "1.3" }
]
synopsis: "Facilities for memoization and fixed points"
......@@ -46,6 +46,9 @@ type equations =
[node2]. Then, an update of the current property at [node2] causes a signal
to be sent to [node1]. A node can observe itself. *)
module Graph =
CoreDependencyGraph
type node =
data Graph.node
......
(******************************************************************************)
(* *)
(* Fix *)
(* *)
(* François Pottier, Inria Paris *)
(* *)
(* Copyright Inria. All rights reserved. This file is distributed under the *)
(* terms of the GNU Library General Public License version 2, with a *)
(* special exception on linking, as described in the file LICENSE. *)
(* *)
(******************************************************************************)
(* Using doubly-linked adjacency lists, one could implement [predecessors] in
worst-case linear time with respect to the length of the output list,
[set_successors] in worst-case linear time with respect to the length of
the input list, and [clear_successors] in worst-case linear time with
respect to the number of edges that are removed. We use a simpler
implementation, based on singly-linked adjacency lists, with deferred
removal of edges. It achieves the same complexity bounds, except
[predecessors] only offers an amortized complexity bound. This is good
enough for our purposes, and, in practice, is more efficient by a constant
factor. This simplification was suggested by Arthur Charguéraud. *)
(* -------------------------------------------------------------------------- *)
(* Nodes and edges. *)
type 'data node = {
(* The client information associated with this node. *)
data: 'data;
(* This node's incoming and outgoing edges. *)
mutable outgoing: 'data edge list;
mutable incoming: 'data edge list;
(* A transient mark, always set to [false], except when checking
against duplicate elements in a successor list. *)
mutable marked: bool;
}
and 'data edge = {
(* This edge's nodes. Edges are symmetric: source and destination are not
distinguished. Thus, an edge appears both in the outgoing edge list of
its source node and in the incoming edge list of its destination node.
This allows edges to be easily marked as destroyed. *)
node1: 'data node;
node2: 'data node;
(* Edges that are destroyed are marked as such, but are not immediately
removed from the adjacency lists. *)
mutable destroyed: bool;
}
(* -------------------------------------------------------------------------- *)
(* Node creation. *)
let create data = {
data = data;
outgoing = [];
incoming = [];
marked = false;
}
(* Data access. *)
let data node =
node.data
(* [follow src edge] returns the node that is connected to [src] by [edge].
Time complexity: constant. *)
let follow src edge =
if edge.node1 == src then
edge.node2
else begin
assert (edge.node2 == src);
edge.node1
end
(* The [predecessors] function removes edges that have been marked
destroyed. The cost of removing these has already been paid for,
so the amortized time complexity of [predecessors] is linear in
the length of the output list. *)
let predecessors (node : 'data node) : 'data node list =
let predecessors =
List.filter (fun edge -> not edge.destroyed) node.incoming
in
node.incoming <- predecessors;
List.map (follow node) predecessors
(* [link src dst] creates a new edge from [src] to [dst], together
with its reverse edge. Time complexity: constant. *)
let link (src : 'data node) (dst : 'data node) =
let edge = {
node1 = src;
node2 = dst;
destroyed = false;
} in
src.outgoing <- edge :: src.outgoing;
dst.incoming <- edge :: dst.incoming
let set_successors (src : 'data node) (dsts : 'data node list) =
assert (src.outgoing = []);
let rec loop = function
| [] ->
()
| dst :: dsts ->
if dst.marked then
loop dsts (* skip duplicate elements *)
else begin
dst.marked <- true;
link src dst;
loop dsts;
dst.marked <- false
end
in
loop dsts
let clear_successors node =
List.iter (fun edge ->
assert (not edge.destroyed);
edge.destroyed <- true;
) node.outgoing;
node.outgoing <- []
(******************************************************************************)
(* *)
(* Fix *)
(* *)
(* François Pottier, Inria Paris *)
(* *)
(* Copyright Inria. All rights reserved. This file is distributed under the *)
(* terms of the GNU Library General Public License version 2, with a *)
(* special exception on linking, as described in the file LICENSE. *)
(* *)
(******************************************************************************)
(* This module provides a data structure for maintaining and modifying
a directed graph. Each node is allowed to carry a piece of client
data. There are functions for creating a new node, looking up a
node's data, looking up a node's predecessors, and setting or
clearing a node's successors (all at once). *)
type 'data node
(* [create data] creates a new node, with no incident edges, with
client information [data]. Time complexity: constant. *)
val create: 'data -> 'data node
(* [data node] returns the client information associated with
the node [node]. Time complexity: constant. *)
val data: 'data node -> 'data
(* [predecessors node] returns a list of [node]'s predecessors.
Amortized time complexity: linear in the length of the output list. *)
val predecessors: 'data node -> 'data node list
(* [set_successors src dsts] creates an edge from the node [src] to
each of the nodes in the list [dsts]. Duplicate elements in the
list [dsts] are removed, so that no duplicate edges are created. It
is assumed that [src] initially has no successors. Time complexity:
linear in the length of the input list. *)
val set_successors: 'data node -> 'data node list -> unit
(* [clear_successors node] removes all of [node]'s outgoing edges.
Time complexity: linear in the number of edges that are removed. *)
val clear_successors: 'data node -> unit
(******************************************************************************)
(* *)
(* Fix *)
(* *)
(* François Pottier, Inria Paris *)
(* *)
(* Copyright Inria. All rights reserved. This file is distributed under the *)
(* terms of the GNU Library General Public License version 2, with a *)
(* special exception on linking, as described in the file LICENSE. *)
(* *)
(******************************************************************************)
open Sigs
(* Such a data flow analysis problem could also be solved by the generic least
fixed point computation algorithm [Fix.Make.lfp]. However, such an approach
would be less efficient, as (1) it would require reversing the graph first,
so to have access to predecessors; (2) whenever a dirty node is examined,
the contributions of all of its predecessors would be recomputed and
joined, whereas the forward data flow analysis algorithm pushes information
from a dirty node to its successors, thereby avoiding recomputation along
edges whose source is not dirty; (3) the generic algorithm performs dynamic
discovery of dependencies, whereas in this situation, all dependencies are
explicitly provided by the user. *)
module Run
(M : MINIMAL_IMPERATIVE_MAPS)
(P : SEMI_LATTICE)
(G : DATA_FLOW_GRAPH with type variable = M.key and type property = P.property)
= struct
open P
type variable = M.key
(* A mapping of variables to properties. This mapping is initially empty. *)
let properties =
M.create()
(* A set of dirty variables, whose outgoing transitions must be examined. *)
(* The set of dirty variables is represented as a combination of a stack and
a map of variables to Booleans. This map keeps track of which variables
are in the stack and allows us to avoid pushing a variable onto the stack
when it is already in the stack. (In principle, a map of variables to
[unit] should suffice, but our minimal map API does not offer a [remove]
function. Thus, we have to use a map of variables to Booleans.) *)
let pending : variable Stack.t =
Stack.create()
let dirty : bool M.t =
M.create()
let is_dirty (x : variable) =
try M.find x dirty with Not_found -> false
let schedule (x : variable) =
if not (is_dirty x) then begin
M.add x true dirty;
Stack.push x pending
end
(* [update x' p'] ensures that the property associated with the variable [x']
is at least [p']. If this causes a change in the property at [x'], then
[x] is scheduled or rescheduled. *)
let update (x' : variable) (p' : property) =
match M.find x' properties with
| exception Not_found ->
(* [x'] is newly discovered. *)
M.add x' p' properties;
schedule x'
| p ->
(* [x'] has been discovered earlier. *)
if not (P.leq p' p) then begin
(* [x'] is affected by this update and must itself be scheduled. *)
M.add x' (P.join p' p) properties;
schedule x'
end
(* [examine] examines a variable that has just been taken out of the stack.
Its outgoing transitions are inspected and its successors are updated. *)
let examine (x : variable) =
(* [x] is dirty, so a property must have been associated with it. *)
let p = try M.find x properties with Not_found -> assert false in
G.foreach_successor x p update
(* Populate the stack with the root variables. *)
let () =
G.foreach_root (fun x p ->
M.add x p properties;
schedule x
)
(* As long as the stack is nonempty, pop a variable and examine it. *)
let () =
try
while true do
let x = Stack.pop pending in
M.add x false dirty;
examine x
done
with Stack.Empty ->
()
(* Expose the solution. *)
type property = P.property option
let solution x =
try
Some (M.find x properties)
with Not_found ->
None
end
module ForOrderedType (T : OrderedType) =
Run(Glue.PersistentMapsToImperativeMaps(Map.Make(T)))
module ForHashedType (T : HashedType) =
Run(Glue.HashTablesAsImperativeMaps(T))
module ForType (T : TYPE) =
ForHashedType(Glue.TrivialHashedType(T))
module ForIntSegment (K : sig val n: int end) =
Run(Glue.ArraysAsImperativeMaps(K))
(******************************************************************************)
(* *)
(* Fix *)
(* *)
(* François Pottier, Inria Paris *)
(* *)
(* Copyright Inria. All rights reserved. This file is distributed under the *)
(* terms of the GNU Library General Public License version 2, with a *)
(* special exception on linking, as described in the file LICENSE. *)
(* *)
(******************************************************************************)
open Sigs
(* [Run] requires a type [variable] that is equipped with an implementation of
imperative maps, a type [property] that is equipped with [leq] and [join]
functions, and a data flow graph whose edges describe the propagation of
properties. It performs a forward data flow analysis and returns its
result. *)
(* The function [solution] has type [variable -> property option]. A reachable
variable is mapped to [Some _]; an unreachable one is mapped to [None]. *)
module Run
(M : MINIMAL_IMPERATIVE_MAPS)
(P : SEMI_LATTICE)
(G : DATA_FLOW_GRAPH with type variable = M.key and type property = P.property)
: SOLUTION
with type variable = G.variable
and type property = P.property option
(* [ForOrderedType] is a special case of [Run] where it
suffices to pass an ordered type [T] as an argument.
A reference to a persistent map is used to hold the
memoization table. *)
module ForOrderedType
(T : OrderedType)
(P : SEMI_LATTICE)
(G : DATA_FLOW_GRAPH with type variable = T.t and type property = P.property)
: SOLUTION
with type variable = G.variable
and type property = P.property option
(* [ForHashedType] is a special case of [Run] where it
suffices to pass a hashed type [T] as an argument. A
hash table is used to hold the memoization table. *)
module ForHashedType
(T : HashedType)
(P : SEMI_LATTICE)
(G : DATA_FLOW_GRAPH with type variable = T.t and type property = P.property)
: SOLUTION
with type variable = G.variable
and type property = P.property option
(* [ForType] is a special case of [Run] where it suffices
to pass an arbitrary type [T] as an argument. A hash table
is used to hold the memoization table. OCaml's built-in
generic equality and hash functions are used. *)
module ForType
(T : TYPE)
(P : SEMI_LATTICE)
(G : DATA_FLOW_GRAPH with type variable = T.t and type property = P.property)
: SOLUTION
with type variable = G.variable
and type property = P.property option
(* [ForIntSegment] is a special case of [Run] where the type of variables
is the integer segment [0..n). An array is used to hold the table. *)
module ForIntSegment
(K : sig val n: int end)
(P : SEMI_LATTICE)
(G : DATA_FLOW_GRAPH with type variable = int and type property = P.property)
: SOLUTION
with type variable = G.variable
and type property = P.property option
......@@ -30,6 +30,7 @@ module GraphNumbering = GraphNumbering
module Tabulate = Tabulate
module Gensym = Gensym
module HashCons = HashCons
module DataFlow = DataFlow
module Prop = struct
(* A number of ready-made implementations of the signature [PROPERTY]. *)
......
......@@ -82,7 +82,8 @@ end
(* -------------------------------------------------------------------------- *)
(* Properties. *)
(* The signature [PROPERTY] is used by [Fix.Make], the least fixed point
computation algorithm. *)
(* The type [property] must form a partial order. It must be equipped with a
least element [bottom] and with an equality test [equal]. (In the function
......@@ -107,6 +108,16 @@ end
(* -------------------------------------------------------------------------- *)
(* The signature [SEMI_LATTICE] is used by [Fix.DataFlow]. *)
module type SEMI_LATTICE = sig
type property
val leq: property -> property -> bool
val join: property -> property -> property
end
(* -------------------------------------------------------------------------- *)
(* Memoizers -- higher-order functions that construct memoizing functions. *)
module type MEMOIZER = sig
......@@ -191,6 +202,16 @@ end
(* -------------------------------------------------------------------------- *)
(* The signature [SOLUTION] is used to describe the result of [Fix.DataFlow]. *)
module type SOLUTION = sig
type variable
type property
val solution: variable -> property
end
(* -------------------------------------------------------------------------- *)
(* Directed, rooted graphs. *)
module type GRAPH = sig
......@@ -201,6 +222,28 @@ end
(* -------------------------------------------------------------------------- *)
(* The signature [DATA_FLOW_GRAPH] is used to describe a data flow analysis
problem. It is used to describe the input to [Fix.DataFlow]. *)
(* The function [foreach_root] describes the root nodes of the data flow graph
as well as the properties associated with them. *)
(* The function [foreach_successor] describes the edges of the data flow graph
as well as the manner in which a property at the source of an edge is
transformed into a property at the target. *)
module type DATA_FLOW_GRAPH = sig
type variable
type property
val foreach_root:
(variable -> property -> unit) -> unit
val foreach_successor:
variable -> property ->
(variable -> property -> unit) -> unit
end
(* -------------------------------------------------------------------------- *)
(* Numberings. *)
(* An ongoing numbering of (a subset of) a type [t] offers a function [encode]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment