compressedBitSet.ml 4.29 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
(* A compressed (or should we say sparse?) bit set is a list of pairs
   of integers. The first component of every pair is an index, while
   the second component is a bit field. The list is sorted by order
   of increasing indices. *)

type t =
  | N
  | C of int * int * t

type element =
    int

let word_size =
  Sys.word_size - 1

let empty =
  N

let is_empty = function
  | N ->
      true
  | C _ ->
      false

let add i s = 
  let ioffset = i mod word_size in
  let iaddr = i - ioffset
  and imask = 1 lsl ioffset in
  let rec add = function
    | N ->
	(* Insert at end. *)
	C (iaddr, imask, N)
    | C (addr, ss, qs) as s ->
	if iaddr < addr then
	  (* Insert in front. *)
	  C (iaddr, imask, s)
	else if iaddr = addr then
	  (* Found appropriate cell, update bit field. *)
	  let ss' = ss lor imask in
	  if ss' = ss then
	    s
	  else
	    C (addr, ss', qs)
	else
	  (* Not there yet, continue. *)
	  let qs' = add qs in
	  if qs == qs' then
	    s
	  else
	    C (addr, ss, qs')
  in
  add s
    
let singleton i = 
   add i N

let remove i s = 
  let ioffset = i mod word_size in
  let iaddr = i - ioffset
  and imask = 1 lsl ioffset in
  let rec remove = function
    | N ->
	N
    | C (addr, ss, qs) as s ->
	if iaddr < addr then
	  s
	else if iaddr = addr then
	  (* Found appropriate cell, update bit field. *)
	  let ss' = ss land (lnot imask) in
	  if ss' = 0 then
	    qs
	  else if ss' = ss then
	    s
	  else
	    C (addr, ss', qs)
	else
	  (* Not there yet, continue. *)
	  let qs' = remove qs in
	  if qs == qs' then
	    s
	  else
	    C (addr, ss, qs')
  in
  remove s
    
let rec fold f s accu = 
  match s with
  | N ->
      accu
  | C (base, ss, qs) ->
91
      loop f qs base ss accu
92

93 94
and loop f qs i ss accu =
  if ss = 0 then
95 96
    fold f qs accu
  else
97 98 99 100
    (* One could in principle check whether [ss land 0x3] is zero and if
       so move to [i + 2] and [ss lsr 2], and similarly for various sizes.
       In practice, this does not seem to make a measurable difference. *)
    loop f qs (i + 1) (ss lsr 1) (if ss land 1 = 1 then f i accu else accu)
101 102 103 104

let iter f s =
  fold (fun x () -> f x) s ()

105 106 107 108 109 110 111 112 113 114
let is_singleton s =
  match s with
  | C (_, ss, N) ->
      (* Test whether only one bit is set in [ss]. We do this by turning
         off the rightmost bit, then comparing to zero. *)
      ss land (ss - 1) = 0
  | C (_, _, C _)
  | N ->
      false

115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
let cardinal s = 
  fold (fun _ m -> m + 1) s 0

let elements s =
  fold (fun tl hd -> tl :: hd) s []

let rec subset s1 s2 = 
  match s1, s2 with
  | N, _ ->
      true
  | _, N ->
      false
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> 
      if addr1 < addr2 then
	false
      else if addr1 = addr2 then
	if (ss1 land ss2) <> ss1 then 
	  false
	else
	  subset qs1 qs2
      else 
	subset s1 qs2

let mem i s =
  subset (singleton i) s

let rec union s1 s2 =  
  match s1, s2 with
  | N, s
  | s, N ->
      s
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
      if addr1 < addr2 then
	C (addr1, ss1, union qs1 s2)
      else if addr1 > addr2 then
	let s = union s1 qs2 in
	if s == qs2 then
	  s2
	else 
	  C (addr2, ss2, s)
      else 
	let ss = ss1 lor ss2 in
	let s = union qs1 qs2 in
	if ss == ss2 && s == qs2 then 
	  s2 
	else
	  C (addr1, ss, s)

let rec inter s1 s2 =  
  match s1, s2 with
  | N, _
  | _, N ->
      N
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
      if addr1 < addr2 then
	inter qs1 s2
      else if addr1 > addr2 then
	inter s1 qs2
      else 
	let ss = ss1 land ss2 in
	let s = inter qs1 qs2 in
	if ss = 0 then
	  s
	else
	  if (ss = ss1) && (s == qs1) then
	    s1
	  else
	    C (addr1, ss, s)

exception Found of int

let choose s = 
  try
    iter (fun x ->
      raise (Found x)
    ) s;
    raise Not_found
  with Found x ->
    x

let rec compare s1 s2 = 
  match s1, s2 with
      N, N ->  0
    | _, N ->  1
    | N, _ -> -1
    | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
	if addr1 < addr2 then -1
	else if addr1 > addr2 then 1
	else if ss1 < ss2 then -1
	else if ss1 > ss2 then 1
	else compare qs1 qs2

207
let equal s1 s2 =
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
  compare s1 s2 = 0

let rec disjoint s1 s2 = 
  match s1, s2 with
  | N, _
  | _, N ->
      true
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> 
      if addr1 = addr2 then
	if (ss1 land ss2) = 0 then 
	  disjoint qs1 qs2
	else 
	  false
      else if addr1 < addr2 then 
	disjoint qs1 s2
      else 
	disjoint s1 qs2