diff --git a/benchmark/src/parray_narytree.ml b/benchmark/src/parray_narytree.ml
index 2ac2833b1a7a0328b2f019c34715f2031a793a4f..1c2a345198fa2ce7dde746011a5e229fb052fdd9 100644
--- a/benchmark/src/parray_narytree.ml
+++ b/benchmark/src/parray_narytree.ml
@@ -1,3 +1,11 @@
+(*
+
+  NOTE: the file pdarray_narytree contains additional optimizations:
+  - packing of the two parameters (base and depth) on a single integer
+  - unrolling of the [get] function to avoid needless recursive calls
+
+*)
+
 let default_base = 5 (* chunk of size 32 *)
 
 let cast = Obj.magic
diff --git a/benchmark/src/pdarray_narytree.ml b/benchmark/src/pdarray_narytree.ml
index 6bfdab830ac6d5b9f2c44077dbb1a19f8a9a82ba..5bedaa117f51cf1b1e08ac35e86027400502cdeb 100644
--- a/benchmark/src/pdarray_narytree.ml
+++ b/benchmark/src/pdarray_narytree.ml
@@ -4,18 +4,44 @@ let cast = Obj.magic
 
 (** This data structure represents a tree of arity [2^base].
     Each node is represented as an array of elements or as an array of subtrees.
-    The array at the first level stores two extra cells at its front,
-    one for storing the [base], and one for storing the [depth].
+    The array at the first level stores one extra cell at its front,
+    used to represent the [base] (<=max_array_length=2^54) and
+    the [depth] (<=40, fits on 8 bits).
     Other cells store either elements or pointers to arrays, each of
     which stores elements or arrays of ...etc.
     All arrays in depth store between 1 and 2^base elements, inclusive.
-    The root array stores the base and the depth, then between 0 and 2^base elements. *)
+    The root array stores the base and the depth, then between 0 and 2^base elements.
+
+    The value [depth] used in the code is defined as:
+    - 0 if the structure is a plain array of elements
+    - 1 if the structure is an array of arrays, etc.
+    Beware that this might be shifted by one unit compared with other possible definitions of "depth".
+
+    Space usage bound: (probably missing a few "floor")
+      n + (n - 1) / (K - 1) + log_K(n) + 1
+
+    Simplified space bound for all practical purpose
+      (1 + 1/(K-1))*n + 10   where K=2^B is the chunk size
+
+*)
 
 type 'a t = 'a Darray.t  (* or ['a array array] or ['a array array array], etc, using Obj.magic casts *)
 
+let log_max_array_length = 54
+
+let int_of_params (base : int) (depth : int) : int =
+  assert (depth < 1 lsl 8); (* depth fits on 8 bits *)
+  (depth lsl log_max_array_length) + base  (* fits on a 63-bit signed integer representation *)
+
 let params (*[@inline]*) (s : 'a t) : int * int = (* returns [base] and [depth] *)
   let s = cast s in (* cast for reading parameters *)
-  Darray.get s 0, Darray.get s 1
+  let repr = Darray.get s 0 in
+  let depth = repr lsr log_max_array_length in
+  let base = repr land ((1 lsl log_max_array_length) - 1) in
+   (* base = repr - (depth lsl log_max_array_length) *)
+  (base, depth)
+
+let nb_cells_params = 1 (* number of array cells required for storing params *)
 
 let length (s : 'a t) : int =
   let b, d = params s in
@@ -31,7 +57,7 @@ let length (s : 'a t) : int =
       let items_in_last_subtree = aux (d - 1) subtree in
       items_in_full_subtrees + items_in_last_subtree
     end in
-  aux ~offset:2 d s
+  aux ~offset:nb_cells_params d s
 
 (** [get s i] returns the element at index [i].
     Requires [0 <= i < length t]. *)
@@ -40,7 +66,7 @@ let get (s : 'a t) (i : int) : 'a =
   let b, d = params s in
   (* optimizations *)
   if d = 0 then begin
-     Darray.get s (2 + i)
+     Darray.get s (nb_cells_params + i)
   end else if d = 1 then begin
 
     let i1 = i in
@@ -49,7 +75,7 @@ let get (s : 'a t) (i : int) : 'a =
     let w1 = b in
     let id1 = i1 lsr w1 in
     let i0 = i1 - id1 * (1 lsl w1) in
-    let s0 = Darray.get (cast s1) (2 + id1) in
+    let s0 = Darray.get (cast s1) (nb_cells_params + id1) in
 
     Darray.get s0 i0
 
@@ -61,7 +87,7 @@ let get (s : 'a t) (i : int) : 'a =
     let w2 = 2*b in
     let id2 = i2 lsr w2 in
     let i1 = i2 - id2 * (1 lsl w2) in
-    let s1 = Darray.get (cast s2) (2 + id2) in
+    let s1 = Darray.get (cast s2) (nb_cells_params + id2) in
 
     let w1 = b in
     let id1 = i1 lsr w1 in
@@ -78,7 +104,7 @@ let get (s : 'a t) (i : int) : 'a =
     let w3 = 3*b in
     let id3 = i3 lsr w3 in
     let i2 = i3 - id3 * (1 lsl w3) in
-    let s2 = Darray.get (cast s3) (2 + id3) in
+    let s2 = Darray.get (cast s3) (nb_cells_params + id3) in
 
     let w2 = 2*b in
     let id2 = i2 lsr w2 in
@@ -100,7 +126,39 @@ let get (s : 'a t) (i : int) : 'a =
     let w4 = 4*b in
     let id4 = i4 lsr w4 in
     let i3 = i4 - id4 * (1 lsl w4) in
-    let s3 = Darray.get (cast s4) (2 + id4) in
+    let s3 = Darray.get (cast s4) (nb_cells_params + id4) in
+
+    let w3 = 3*b in
+    let id3 = i3 lsr w3 in
+    let i2 = i3 - id3 * (1 lsl w3) in
+    let s2 = Darray.get (cast s3) id3 in
+
+    let w2 = 2*b in
+    let id2 = i2 lsr w2 in
+    let i1 = i2 - id2 * (1 lsl w2) in
+    let s1 = Darray.get (cast s2) id2 in
+
+    let w1 = b in
+    let id1 = i1 lsr w1 in
+    let i0 = i1 - id1 * (1 lsl w1) in
+    let s0 = Darray.get (cast s1) id1 in
+
+    Darray.get s0 i0
+
+  end else if d = 5 then begin
+
+    let i5 = i in
+    let s5 = s in
+
+    let w5 = 5*b in
+    let id5 = i5 lsr w5 in
+    let i4 = i5 - id5 * (1 lsl w5) in
+    let s4 = Darray.get (cast s5) (nb_cells_params + id5) in
+
+    let w4 = 4*b in
+    let id4 = i4 lsr w4 in
+    let i3 = i4 - id4 * (1 lsl w4) in
+    let s3 = Darray.get (cast s4) id4 in
 
     let w3 = 3*b in
     let id3 = i3 lsr w3 in
@@ -133,7 +191,7 @@ let get (s : 'a t) (i : int) : 'a =
       let subtree = Darray.get (cast s) (offset + id_subtree) in
       aux (d-1) subtree index_in_subtree
     end in
-  aux ~offset:2 d s i
+  aux ~offset:nb_cells_params d s i
 
 
 (** [set s i v] updates the element at index [i] with [v].
@@ -154,7 +212,7 @@ let set (s : 'a t) (i : int) (v : 'a) : 'a t =
       let updated_subtree = aux (d-1) subtree index_in_subtree (cast v) in
       Darray.set s (offset + id_subtree) (cast updated_subtree)
     end in
-  aux ~offset:2 d s i v
+  aux ~offset:nb_cells_params d s i v
 
 let depth_of_base_and_length (base:int) (n:int) : int =
   (* returns the least value of [d] such that [n <= 2^((d+1)*b)], that is, [d = log_(2*b)(n)-1].
@@ -174,7 +232,7 @@ let init ?(base:int = default_base) (n:int) (f: int -> 'a) : 'a t =
   (*Printf.printf "base=%d, depth=%d\n" b d;*)
   if d = 0 then begin
      (* assert (0 <= n && n <= 1 lsl base) *)
-     Darray.init (2 + n) (fun i ->
+     Darray.init (nb_cells_params + n) (fun i ->
        if i >= 2 then f i
        else if i = 0 then cast b
        else (* if i = 1 then *) cast d)
@@ -199,7 +257,7 @@ let init ?(base:int = default_base) (n:int) (f: int -> 'a) : 'a t =
           aux (cast f) (d - 1) (offset + i lsl w) size_of_subtree))
       end in
     let t = aux f d 0 n in
-    Darray.of_array_with_transfer (Array.append (cast [| b; d |]) (Darray.to_array_with_transfer t))
+    Darray.of_array_with_transfer (Array.append (cast [| int_of_params b d |]) (Darray.to_array_with_transfer t))
   end
 
 (** [iter f s] applies [f] to each of the elements from the sequence [s]. *)
@@ -218,7 +276,7 @@ let iter (type a) (f : a -> unit) (s : a t) : unit =
        done
     end
     in
-  aux f ~offset:2 d s
+  aux f ~offset:nb_cells_params d s
   (* note: if it wasn't for performance, the two for loops could be factorized with a if inside the body *)
 
 (** [print s] prints the sequence [s] for debugging purpose. *)
@@ -241,30 +299,29 @@ let _print (type a) (f:a -> unit) (s : a t) : unit =
     done;
     pr "]";
     in
-  aux f ~offset:2 d s;
+  aux f ~offset:nb_cells_params d s;
   pr "\n"
 
-
-(*
+ (*
 module Test = struct
 
-let _test () =
+let _test =
   let pr = Printf.printf in
   let print = _print (fun x -> pr "%d" x) in
 
- (*
+  let base = 2 in
+
   for n = 0 to 20 do
     pr "-----make %d:\n" n;
-    let t : int t = init ~base:1 n (fun i -> i)in
+    let t : int t = init ~base n (fun i -> i)in
     print t;
     pr "----->length:%d\n" (length t);
 
   done;
-   *)
 
   pr "-----make one:\n";
   let n = 30 in
-  let t : int t = init ~base:1 n (fun i -> i)in
+  let t : int t = init ~base n (fun i -> i)in
   print t;
 
   pr "-----length:%d\n" (length t);
@@ -290,7 +347,12 @@ let _test () =
   done;
   pr "\n"
 (**)
-
+*)
+(* Test using:
+ocamlc darray.mli
+ocamlc pdarray_narytree.mli
+ocamlc darray.ml pdarray_narytree.ml
+./a.out
+*)
 end
 
-*)
\ No newline at end of file