packedIntArray.ml 4.62 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
(* A packed integer array is represented as a pair of an integer [k] and
   a string [s]. The integer [k] is the number of bits per integer that we
   use. The string [s] is just an array of bits, which is read in 8-bit
   chunks. *)

(* The ocaml programming language treats string literals and array literals
   in slightly different ways: the former are statically allocated, while
   the latter are dynamically allocated. (This is rather arbitrary.) In the
   context of Menhir's table-based back-end, where compact, immutable
   integer arrays are needed, ocaml strings are preferable to ocaml arrays. *)

type t =
  int * string

(* The magnitude [k] of an integer [v] is the number of bits required
   to represent [v]. It is rounded up to the nearest power of two, so
   that [k] divides [Sys.word_size]. *)

let magnitude (v : int) =
  if v < 0 then
    Sys.word_size
  else
    let rec check k max = (* [max] equals [2^k] *)
      if (max <= 0) || (v < max) then
	k
	  (* if [max] just overflew, then [v] requires a full ocaml
	     integer, and [k] is the number of bits in an ocaml integer
	     plus one, that is, [Sys.word_size]. *)
      else
	check (2 * k) (max * max)
    in
    check 1 2

(* [pack a] turns an array of integers into a packed integer array. *)

(* Because the sign bit is the most significant bit, the magnitude of
   any negative number is the word size. In other words, [pack] does
   not achieve any space savings as soon as [a] contains any negative
   numbers, even if they are ``small''. *)

let pack (a : int array) : t =

  let m = Array.length a in
  
  (* Compute the maximum magnitude of the array elements. This tells
     us how many bits per element we are going to use. *)

  let k =
    Array.fold_left (fun k v ->
      max k (magnitude v)
    ) 1 a
  in

  (* Because access to ocaml strings is performed on an 8-bit basis,
     two cases arise. If [k] is less than 8, then we can pack multiple
     array entries into a single character. If [k] is greater than 8,
     then we must use multiple characters to represent a single array
     entry. *)

  if k <= 8 then begin

    (* [w] is the number of array entries that we pack in a character. *)

    assert (8 mod k = 0);
    let w = 8 / k in

    (* [n] is the length of the string that we allocate. *)

    let n =
      if m mod w = 0 then
	m / w
      else
	m / w + 1
    in

    let s =
      String.create n
    in

    (* Define a reader for the source array. The reader might run off
       the end if [w] does not divide [m]. *)

    let i = ref 0 in
    let next () =
      let ii = !i in
      if ii = m then
	0 (* ran off the end, pad with zeroes *)
      else
	let v = a.(ii) in
	i := ii + 1;
	v
    in

    (* Fill up the string. *)

    for j = 0 to n - 1 do
      let c = ref 0 in
      for x = 1 to w do
	c := (!c lsl k) lor next()
      done;
      s.[j] <- Char.chr !c
    done;

    (* Done. *)

    k, s

  end
  else begin (* k > 8 *)

    (* [w] is the number of characters that we use to encode an array entry. *)

    assert (k mod 8 = 0);
    let w = k / 8 in

    (* [n] is the length of the string that we allocate. *)

    let n =
      m * w
    in

    let s =
      String.create n
    in

    (* Fill up the string. *)

    for i = 0 to m - 1 do
      let v = ref a.(i) in
      for x = 1 to w do
	s.[(i + 1) * w - x] <- Char.chr (!v land 255);
	v := !v lsr 8
      done
    done;

    (* Done. *)

    k, s

  end

(* Access to a string. *)

let read (s : string) (i : int) : int =
  Char.code (String.unsafe_get s i)

(* [get1 t i] returns the integer stored in the packed array [t] at index [i].
   It assumes (and does not check) that the array's bit width is [1]. The
   parameter [t] is just a string. *)

let get1 (s : string) (i : int) : int =
  let c = read s (i lsr 3) in
  let c = c lsr ((lnot i) land 0b111) in
  let c = c land 0b1 in
  c

(* [get t i] returns the integer stored in the packed array [t] at index [i]. *)

(* Together, [pack] and [get] satisfy the following property: if the index [i]
   is within bounds, then [get (pack a) i] equals [a.(i)]. *)

let get ((k, s) : t) (i : int) : int =
  match k with
  | 1 ->
      get1 s i
  | 2 ->
      let c = read s (i lsr 2) in
      let c = c lsr (2 * ((lnot i) land 0b11)) in
      let c = c land 0b11 in
      c
  | 4 ->
      let c = read s (i lsr 1) in
      let c = c lsr (4 * ((lnot i) land 0b1)) in
      let c = c land 0b1111 in
      c
  | 8 ->
      read s i
  | 16 ->
      let j = 2 * i in
      (read s j) lsl 8 + read s (j + 1)
  | _ ->
      assert (k = 32); (* 64 bits unlikely, not supported *)
      let j = 4 * i in
      (((read s j lsl 8) + read s (j + 1)) lsl 8 + read s (j + 2)) lsl 8 + read s (j + 3)