compressedBitSet.ml 3.86 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
(* A compressed (or should we say sparse?) bit set is a list of pairs
   of integers. The first component of every pair is an index, while
   the second component is a bit field. The list is sorted by order
   of increasing indices. *)

type t =
  | N
  | C of int * int * t

type element =
    int

let word_size =
  Sys.word_size - 1

let empty =
  N

let is_empty = function
  | N ->
      true
  | C _ ->
      false

let add i s = 
  let ioffset = i mod word_size in
  let iaddr = i - ioffset
  and imask = 1 lsl ioffset in
  let rec add = function
    | N ->
	(* Insert at end. *)
	C (iaddr, imask, N)
    | C (addr, ss, qs) as s ->
	if iaddr < addr then
	  (* Insert in front. *)
	  C (iaddr, imask, s)
	else if iaddr = addr then
	  (* Found appropriate cell, update bit field. *)
	  let ss' = ss lor imask in
	  if ss' = ss then
	    s
	  else
	    C (addr, ss', qs)
	else
	  (* Not there yet, continue. *)
	  let qs' = add qs in
	  if qs == qs' then
	    s
	  else
	    C (addr, ss, qs')
  in
  add s
    
let singleton i = 
   add i N

let remove i s = 
  let ioffset = i mod word_size in
  let iaddr = i - ioffset
  and imask = 1 lsl ioffset in
  let rec remove = function
    | N ->
	N
    | C (addr, ss, qs) as s ->
	if iaddr < addr then
	  s
	else if iaddr = addr then
	  (* Found appropriate cell, update bit field. *)
	  let ss' = ss land (lnot imask) in
	  if ss' = 0 then
	    qs
	  else if ss' = ss then
	    s
	  else
	    C (addr, ss', qs)
	else
	  (* Not there yet, continue. *)
	  let qs' = remove qs in
	  if qs == qs' then
	    s
	  else
	    C (addr, ss, qs')
  in
  remove s
    
let rec fold f s accu = 
  match s with
  | N ->
      accu
  | C (base, ss, qs) ->
      let limit = base + word_size in
      let rec loop i ss accu =
	if i = limit then
	  accu
	else
	  loop (i + 1) (ss lsr 1) (if ss land 1 = 1 then f i accu else accu)
      in
      fold f qs (loop base ss accu)

let iter f s =
  fold (fun x () -> f x) s ()

let cardinal s = 
  fold (fun _ m -> m + 1) s 0

let elements s =
  fold (fun tl hd -> tl :: hd) s []

let rec subset s1 s2 = 
  match s1, s2 with
  | N, _ ->
      true
  | _, N ->
      false
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> 
      if addr1 < addr2 then
	false
      else if addr1 = addr2 then
	if (ss1 land ss2) <> ss1 then 
	  false
	else
	  subset qs1 qs2
      else 
	subset s1 qs2

let mem i s =
  subset (singleton i) s

let rec union s1 s2 =  
  match s1, s2 with
  | N, s
  | s, N ->
      s
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
      if addr1 < addr2 then
	C (addr1, ss1, union qs1 s2)
      else if addr1 > addr2 then
	let s = union s1 qs2 in
	if s == qs2 then
	  s2
	else 
	  C (addr2, ss2, s)
      else 
	let ss = ss1 lor ss2 in
	let s = union qs1 qs2 in
	if ss == ss2 && s == qs2 then 
	  s2 
	else
	  C (addr1, ss, s)

let rec inter s1 s2 =  
  match s1, s2 with
  | N, _
  | _, N ->
      N
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
      if addr1 < addr2 then
	inter qs1 s2
      else if addr1 > addr2 then
	inter s1 qs2
      else 
	let ss = ss1 land ss2 in
	let s = inter qs1 qs2 in
	if ss = 0 then
	  s
	else
	  if (ss = ss1) && (s == qs1) then
	    s1
	  else
	    C (addr1, ss, s)

exception Found of int

let choose s = 
  try
    iter (fun x ->
      raise (Found x)
    ) s;
    raise Not_found
  with Found x ->
    x

let rec compare s1 s2 = 
  match s1, s2 with
      N, N ->  0
    | _, N ->  1
    | N, _ -> -1
    | C (addr1, ss1, qs1), C (addr2, ss2, qs2) ->
	if addr1 < addr2 then -1
	else if addr1 > addr2 then 1
	else if ss1 < ss2 then -1
	else if ss1 > ss2 then 1
	else compare qs1 qs2

195
let equal s1 s2 =
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
  compare s1 s2 = 0

let rec disjoint s1 s2 = 
  match s1, s2 with
  | N, _
  | _, N ->
      true
  | C (addr1, ss1, qs1), C (addr2, ss2, qs2) -> 
      if addr1 = addr2 then
	if (ss1 land ss2) = 0 then 
	  disjoint qs1 qs2
	else 
	  false
      else if addr1 < addr2 then 
	disjoint qs1 s2
      else 
	disjoint s1 qs2