SparseBitSet.ml 5.78 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14
15
16
17
18
(* This data structure implements sets of integers (of unbounded magnitude). *)

(* A sparse bit set is a list of pairs of integers. The first component of
   every pair is an index, while the second component is a bit field. The list
   is sorted by order of increasing indices. *)
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

type t =
  | N
  | C of int * int * t

type element =
    int

let word_size =
  Sys.word_size - 1

let empty =
  N

let is_empty = function
  | N ->
      true
  | C _ ->
      false

POTTIER Francois's avatar
POTTIER Francois committed
39
let add i s =
40
41
42
43
44
  let ioffset = i mod word_size in
  let iaddr = i - ioffset
  and imask = 1 lsl ioffset in
  let rec add = function
    | N ->
45
46
        (* Insert at end. *)
        C (iaddr, imask, N)
47
    | C (addr, ss, qs) as s ->
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
        if iaddr < addr then
          (* Insert in front. *)
          C (iaddr, imask, s)
        else if iaddr = addr then
          (* Found appropriate cell, update bit field. *)
          let ss' = ss lor imask in
          if ss' = ss then
            s
          else
            C (addr, ss', qs)
        else
          (* Not there yet, continue. *)
          let qs' = add qs in
          if qs == qs' then
            s
          else
            C (addr, ss, qs')
65
66
  in
  add s
POTTIER Francois's avatar
POTTIER Francois committed
67
68

let singleton i =
69
70
   add i N

POTTIER Francois's avatar
POTTIER Francois committed
71
let remove i s =
72
73
74
75
76
  let ioffset = i mod word_size in
  let iaddr = i - ioffset
  and imask = 1 lsl ioffset in
  let rec remove = function
    | N ->
77
        N
78
    | C (addr, ss, qs) as s ->
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
        if iaddr < addr then
          s
        else if iaddr = addr then
          (* Found appropriate cell, update bit field. *)
          let ss' = ss land (lnot imask) in
          if ss' = 0 then
            qs
          else if ss' = ss then
            s
          else
            C (addr, ss', qs)
        else
          (* Not there yet, continue. *)
          let qs' = remove qs in
          if qs == qs' then
            s
          else
            C (addr, ss, qs')
97
98
  in
  remove s
POTTIER Francois's avatar
POTTIER Francois committed
99
100

let rec fold f s accu =
101
102
103
104
  match s with
  | N ->
      accu
  | C (base, ss, qs) ->
105
      loop f qs base ss accu
106

107
108
and loop f qs i ss accu =
  if ss = 0 then
109
110
    fold f qs accu
  else
111
112
113
114
    (* One could in principle check whether [ss land 0x3] is zero and if
       so move to [i + 2] and [ss lsr 2], and similarly for various sizes.
       In practice, this does not seem to make a measurable difference. *)
    loop f qs (i + 1) (ss lsr 1) (if ss land 1 = 1 then f i accu else accu)
115
116
117
118

let iter f s =
  fold (fun x () -> f x) s ()

119
120
121
122
123
124
125
126
127
128
let is_singleton s =
  match s with
  | C (_, ss, N) ->
      (* Test whether only one bit is set in [ss]. We do this by turning
         off the rightmost bit, then comparing to zero. *)
      ss land (ss - 1) = 0
  | C (_, _, C _)
  | N ->
      false

POTTIER Francois's avatar
POTTIER Francois committed
129
let cardinal s =
130
131
132
133
134
  fold (fun _ m -> m + 1) s 0

let elements s =
  fold (fun tl hd -> tl :: hd) s []

POTTIER Francois's avatar
POTTIER Francois committed
135
let rec subset s1 s2 =
136
137
138
139
140
  match s1, s2 with
  | N, _ ->
      true
  | _, N ->
      false
POTTIER Francois's avatar
POTTIER Francois committed
141
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
142
      if addr1 < addr2 then
143
        false
144
      else if addr1 = addr2 then
POTTIER Francois's avatar
POTTIER Francois committed
145
        if (ss1 land ss2) <> ss1 then
146
147
148
          false
        else
          subset qs1 qs2
POTTIER Francois's avatar
POTTIER Francois committed
149
      else
150
        subset s1 qs2
151
152
153
154

let mem i s =
  subset (singleton i) s

POTTIER Francois's avatar
POTTIER Francois committed
155
let rec union s1 s2 =
156
157
158
159
160
161
  match s1, s2 with
  | N, s
  | s, N ->
      s
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
      if addr1 < addr2 then
162
        C (addr1, ss1, union qs1 s2)
163
      else if addr1 > addr2 then
164
165
166
        let s = union s1 qs2 in
        if s == qs2 then
          s2
POTTIER Francois's avatar
POTTIER Francois committed
167
        else
168
          C (addr2, ss2, s)
POTTIER Francois's avatar
POTTIER Francois committed
169
      else
170
171
        let ss = ss1 lor ss2 in
        let s = union qs1 qs2 in
POTTIER Francois's avatar
POTTIER Francois committed
172
173
        if ss == ss2 && s == qs2 then
          s2
174
175
        else
          C (addr1, ss, s)
176

POTTIER Francois's avatar
POTTIER Francois committed
177
let rec inter s1 s2 =
178
179
180
181
182
183
  match s1, s2 with
  | N, _
  | _, N ->
      N
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
      if addr1 < addr2 then
184
        inter qs1 s2
185
      else if addr1 > addr2 then
186
        inter s1 qs2
POTTIER Francois's avatar
POTTIER Francois committed
187
      else
188
189
190
191
192
193
194
195
196
        let ss = ss1 land ss2 in
        let s = inter qs1 qs2 in
        if ss = 0 then
          s
        else
          if (ss = ss1) && (s == qs1) then
            s1
          else
            C (addr1, ss, s)
197
198
199

exception Found of int

POTTIER Francois's avatar
POTTIER Francois committed
200
let choose s =
201
202
203
204
205
206
207
208
  try
    iter (fun x ->
      raise (Found x)
    ) s;
    raise Not_found
  with Found x ->
    x

POTTIER Francois's avatar
POTTIER Francois committed
209
let rec compare s1 s2 =
210
211
212
213
214
  match s1, s2 with
      N, N ->  0
    | _, N ->  1
    | N, _ -> -1
    | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
215
216
217
218
219
        if addr1 < addr2 then -1
        else if addr1 > addr2 then 1
        else if ss1 < ss2 then -1
        else if ss1 > ss2 then 1
        else compare qs1 qs2
220

221
let equal s1 s2 =
222
223
  compare s1 s2 = 0

POTTIER Francois's avatar
POTTIER Francois committed
224
let rec disjoint s1 s2 =
225
226
227
228
  match s1, s2 with
  | N, _
  | _, N ->
      true
POTTIER Francois's avatar
POTTIER Francois committed
229
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
230
      if addr1 = addr2 then
POTTIER Francois's avatar
POTTIER Francois committed
231
        if (ss1 land ss2) = 0 then
232
          disjoint qs1 qs2
POTTIER Francois's avatar
POTTIER Francois committed
233
        else
234
          false
POTTIER Francois's avatar
POTTIER Francois committed
235
      else if addr1 < addr2 then
236
        disjoint qs1 s2
POTTIER Francois's avatar
POTTIER Francois committed
237
      else
238
        disjoint s1 qs2