compact.ml 7.13 KB
Newer Older
charguer's avatar
charguer committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  All rights reserved.  This file is distributed    *)
(*  under the terms of the Q Public License version 1.0.               *)
(*                                                                     *)
(***********************************************************************)

(* $Id: compact.ml 9547 2010-01-22 12:48:24Z doligez $ *)

(* Compaction of an automata *)

open Lexgen

(* Code for memory actions  *)
let code = Table.create 0

(* instructions are 2 8-bits integers, a 0xff byte means return *)

let emit_int i = Table.emit code i

let ins_mem i c =  match i with
  | Copy (dst, src) -> dst::src::c
  | Set dst         -> dst::0xff::c


let ins_tag i c = match i with
  | SetTag (dst, src) -> dst::src::c
  | EraseTag dst      -> dst::0xff::c


let do_emit_code c =
  let r = Table.size code in
  List.iter emit_int c ;
  emit_int 0xff ;
  r

let memory = Hashtbl.create 101

let mem_emit_code c =
  try Hashtbl.find memory c with
  | Not_found ->
      let r = do_emit_code c in
      Hashtbl.add memory c r ;
      r

(* Code address 0 is the empty code (ie do nothing) *)
let _ = mem_emit_code []

let emit_tag_code c = mem_emit_code (List.fold_right ins_tag c [])
and emit_mem_code c  =mem_emit_code (List.fold_right ins_mem c [])

(*******************************************)
(* Compact the transition and check arrays *)
(*******************************************)


(* Determine the integer occurring most frequently in an array *)

let most_frequent_elt v =
  let frequencies = Hashtbl.create 17 in
  let max_freq = ref 0 in
  let most_freq = ref (v.(0)) in
  for i = 0 to Array.length v - 1 do
    let e = v.(i) in
    let r =
      try
        Hashtbl.find frequencies e
      with Not_found ->
        let r = ref 1 in Hashtbl.add frequencies e r; r in
    incr r;
    if !r > !max_freq then begin max_freq := !r; most_freq := e end
  done;
  !most_freq

(* Transform an array into a list of (position, non-default element) *)

let non_default_elements def v =
  let rec nondef i =
    if i >= Array.length v then [] else begin
      let e = v.(i) in
      if e = def then nondef(i+1) else (i, e) :: nondef(i+1)
    end in
  nondef 0


type t_compact =
 {mutable c_trans : int array ;
  mutable c_check : int array ;
  mutable c_last_used : int ; }

let create_compact () =
97 98
  { c_trans = Array.make 1024 0 ;
    c_check = Array.make 1024 (-1) ;
charguer's avatar
charguer committed
99 100 101
    c_last_used = 0 ; }

let reset_compact c =
102 103
  c.c_trans <- Array.make 1024 0 ;
  c.c_check <- Array.make 1024 (-1) ;
charguer's avatar
charguer committed
104 105 106 107 108 109 110 111 112 113 114
  c.c_last_used <- 0

(* One compacted table for transitions, one other for memory actions *)
let trans = create_compact ()
and moves = create_compact ()


let grow_compact c =
  let old_trans = c.c_trans
  and old_check = c.c_check in
  let n = Array.length old_trans in
115
  c.c_trans <- Array.make (2*n) 0;
charguer's avatar
charguer committed
116
  Array.blit old_trans 0 c.c_trans 0 c.c_last_used;
117
  c.c_check <- Array.make (2*n) (-1);
charguer's avatar
charguer committed
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
  Array.blit old_check 0 c.c_check 0 c.c_last_used

let do_pack state_num orig compact =
  let default = most_frequent_elt orig in
  let nondef = non_default_elements default orig in
  let rec pack_from b =
    while
      b + 257 > Array.length compact.c_trans
    do
      grow_compact compact
    done;
    let rec try_pack = function
      [] -> b
    | (pos, v) :: rem ->
        if compact.c_check.(b + pos) = -1 then
          try_pack rem
        else pack_from (b+1) in
    try_pack nondef in
  let base = pack_from 0 in
  List.iter
    (fun (pos, v) ->
      compact.c_trans.(base + pos) <- v;
      compact.c_check.(base + pos) <- state_num)
    nondef;
  if base + 257 > compact.c_last_used then
    compact.c_last_used <- base + 257;
  (base, default)

let pack_moves state_num move_t =
147 148
  let move_v = Array.make 257 0
  and move_m = Array.make 257 0 in
charguer's avatar
charguer committed
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
  for i = 0 to 256 do
    let act,c = move_t.(i) in
    move_v.(i) <- (match act with Backtrack -> -1 | Goto n -> n) ;
    move_m.(i) <- emit_mem_code c
  done ;
  let pk_trans = do_pack state_num move_v trans
  and pk_moves = do_pack state_num move_m moves in
  pk_trans, pk_moves


(* Build the tables *)

type lex_tables =
  { tbl_base: int array;                 (* Perform / Shift *)
    tbl_backtrk: int array;              (* No_remember / Remember *)
    tbl_default: int array;              (* Default transition *)
    tbl_trans: int array;                (* Transitions (compacted) *)
    tbl_check: int array;                (* Check (compacted) *)
(* code addresses are managed in a similar fashion as transitions *)
    tbl_base_code : int array;           (* code ptr / base for Shift *)
    tbl_backtrk_code : int array;        (* nothing / code when Remember *)
(* moves to execute before transitions (compacted) *)
    tbl_default_code : int array;
    tbl_trans_code : int array;
    tbl_check_code : int array;
(* byte code itself *)
    tbl_code: int array;}


let compact_tables state_v =
  let n = Array.length state_v in
180 181 182 183 184 185
  let base = Array.make n 0
  and backtrk = Array.make n (-1)
  and default = Array.make n 0
  and base_code = Array.make n 0
  and backtrk_code = Array.make n 0
  and default_code = Array.make n 0 in
charguer's avatar
charguer committed
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
  for i = 0 to n - 1 do
    match state_v.(i) with
    | Perform (n,c) ->
        base.(i) <- -(n+1) ;
        base_code.(i) <- emit_tag_code c
    | Shift(trans, move) ->
        begin match trans with
        | No_remember -> ()
        | Remember (n,c) ->
            backtrk.(i) <- n ;
            backtrk_code.(i) <- emit_tag_code c
        end;
        let (b_trans, d_trans),(b_moves,d_moves) = pack_moves i move in
        base.(i) <- b_trans; default.(i) <- d_trans ;
        base_code.(i) <- b_moves; default_code.(i) <- d_moves ;
  done;
  let code = Table.trim code in
  let tables =
    if Array.length code > 1 then
      { tbl_base = base;
        tbl_backtrk = backtrk;
        tbl_default = default;
        tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
        tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
        tbl_base_code = base_code ;
        tbl_backtrk_code = backtrk_code;
        tbl_default_code = default_code;
        tbl_trans_code = Array.sub moves.c_trans 0 moves.c_last_used;
        tbl_check_code = Array.sub moves.c_check 0 moves.c_last_used;
        tbl_code = code}
    else (* when no memory moves, do not emit related tables *)
       { tbl_base = base;
        tbl_backtrk = backtrk;
        tbl_default = default;
        tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
        tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
        tbl_base_code = [||] ;
        tbl_backtrk_code = [||];
        tbl_default_code = [||];
        tbl_trans_code = [||];
        tbl_check_code = [||];
        tbl_code = [||]}
  in
  reset_compact trans ;
  reset_compact moves ;
  tables