derivation.ml 9.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
open Grammar

(* -------------------------------------------------------------------------- *)

(* This is a data structure for linear derivation trees. These are derivation
   trees that are list-like (that is, they do not branch), because a single
   path is of interest.

   A tree is either empty or formed of a non-terminal symbol at the root and a
   forest below the root.

   A forest is an ordered list of elements. However, its elements are not
   trees, as one would perhaps expect. Because we are interested in *linear*
   derivation trees, only one element of the forest receives focus and is a
   tree. All other elements remain un-expanded, so they are just symbols.

   In other words, a linear derivation tree is roughly just a list of levels,
   where each forest corresponds to one level. *)

type 'focus level = {
    prefix: Symbol.t list;
    focus: 'focus;
    suffix: Symbol.t list;
    comment: string
  }

type tree =
  | TEmpty
  | TRooted of Symbol.t * forest

and forest =
    tree level

(* We make use of contexts with a forest-shaped hole. We have tree contexts
   and forest contexts.

   Tree contexts do not have a case for holes, since we work with
   forest-shaped holes only. Forest contexts have one. *)

type ctree =
  | CRooted of Symbol.t * cforest

and cforest =
  | CHole
  | CCons of ctree level

(* Make a few types visible to clients. *)

type t =
    forest

type context =
    cforest

(* -------------------------------------------------------------------------- *)

(* Construction. *)

let rec array_to_list a i j =
  if i = j then
    []
  else
    a.(i) :: array_to_list a (i + 1) j

let empty =
  {
    prefix = [];
    focus = TEmpty;
    suffix = [];
    comment = ""
  }

let tail pos rhs =
  let length = Array.length rhs in
  assert (pos < length);
  {
    prefix = [];
    focus = TEmpty;
    suffix = array_to_list rhs pos length;
    comment = ""
  }

let build pos rhs forest comment =
  let length = Array.length rhs in
  assert (pos < length);
  match rhs.(pos) with
  | Symbol.T _ ->
      assert false
  | Symbol.N _ as symbol ->
      {
104 105 106 107
        prefix = [];
        focus = TRooted (symbol, forest);
        suffix = array_to_list rhs (pos + 1) length;
        comment = (match comment with None -> "" | Some comment -> comment)
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
      }

let prepend symbol forest =
  { forest with
    prefix = symbol :: forest.prefix }

(* -------------------------------------------------------------------------- *)

(* Display. *)

let buffer =
  Buffer.create 32768

let rec print_blank k =
  if k > 0 then begin
    Buffer.add_char buffer ' ';
    print_blank (k - 1)
  end

let print_symbol symbol =
  let word = Symbol.print symbol in
  Buffer.add_string buffer word;
  Buffer.add_char buffer ' ';
  String.length word + 1

let print_symbols symbols =
  List.fold_left (fun offset symbol ->
    offset + print_symbol symbol
  ) 0 symbols

let print_level print_focus_root print_focus_remainder offset forest =
  print_blank offset;
  let offset = offset + print_symbols forest.prefix in
  print_focus_root forest.focus;
  let (_ : int) = print_symbols forest.suffix in
  if String.length forest.comment > 0 then begin
    Buffer.add_string buffer "// ";
    Buffer.add_string buffer forest.comment
  end;
  Buffer.add_char buffer '\n';
  print_focus_remainder offset forest.focus

let print_tree_root = function
  | TEmpty ->
      Buffer.add_string buffer ". "
  | TRooted (symbol, _) ->
      let (_ : int) = print_symbol symbol in
      ()

let rec print_forest offset forest =
  print_level print_tree_root print_tree_remainder offset forest

and print_tree_remainder offset = function
  | TEmpty ->
      ()
  | TRooted (_, forest) ->
      print_forest offset forest

let print_ctree_root = function
  | CRooted (symbol, _) ->
      let (_ : int) = print_symbol symbol in
      ()

let rec print_cforest offset cforest =
  match cforest with
  | CHole ->
      print_blank offset;
      Buffer.add_string buffer "(?)\n"
  | CCons forest ->
      print_level print_ctree_root print_ctree_remainder offset forest

and print_ctree_remainder offset = function
  | CRooted (_, cforest) ->
      print_cforest offset cforest

let wrap print channel x =
  Buffer.clear buffer;
  print 0 x;
  Buffer.output_buffer channel buffer

let print =
  wrap print_forest

let printc =
  wrap print_cforest

(* -------------------------------------------------------------------------- *)

(* [punch] turns a (tree or forest) into a pair of a (tree or forest) context
   and a residual forest, where the context is chosen maximal. In other words,
   the residual forest consists of a single level -- its focus is [TEmpty]. *)

let rec punch_tree tree : (ctree * forest) option =
  match tree with
  | TEmpty ->
      None
  | TRooted (symbol, forest) ->
      let forest1, forest2 = punch_forest forest in
      Some (CRooted (symbol, forest1), forest2)

and punch_forest forest : cforest * forest =
  match punch_tree forest.focus with
  | None ->
      CHole, forest
  | Some (ctree1, forest2) ->
      CCons {
        prefix = forest.prefix;
        focus = ctree1;
        suffix = forest.suffix;
        comment = forest.comment
      }, forest2

(* [fill] fills a (tree or forest) context with a forest so as to produce
   a new (tree or forest). *)

let rec fill_tree ctree1 forest2 : tree =
  match ctree1 with
  | CRooted (symbol1, cforest1) ->
      TRooted (symbol1, fill_forest cforest1 forest2)

and fill_forest cforest1 forest2 : forest =
  match cforest1 with
  | CHole ->
      forest2
  | CCons level1 ->
      {
        prefix = level1.prefix;
        focus = fill_tree level1.focus forest2;
        suffix = level1.suffix;
        comment = level1.comment
      }

(* [common] factors the maximal common (tree or forest) context out of a pair
   of a (tree or forest) context and a (tree or forest). It returns the (tree
   or forest) context as well as the residuals of the two parameters. *)

let rec common_tree ctree1 tree2 : (ctree * cforest * forest) option =
  match ctree1, tree2 with
  | CRooted _, TEmpty ->
      None
  | CRooted (symbol1, cforest1), TRooted (symbol2, forest2) ->
      if Symbol.equal symbol1 symbol2 then
250 251 252 253
        let cforest, cforest1, forest2 =
          common_forest cforest1 forest2
        in
        Some (CRooted (symbol1, cforest), cforest1, forest2)
254
      else
255
        None
256 257 258 259 260 261 262 263 264 265

and common_forest cforest1 forest2 : cforest * cforest * forest =
  match cforest1 with
  | CHole ->
      CHole, cforest1, forest2
  | CCons forest1 ->
      if Symbol.lequal forest1.prefix forest2.prefix
      && Symbol.lequal forest1.suffix forest2.suffix
      && forest1.comment = forest2.comment
      then begin
266 267 268 269 270 271 272 273 274 275 276
        match common_tree forest1.focus forest2.focus with
        | None ->
            CHole, cforest1, forest2
        | Some (ctree, csubforest1, subforest2) ->
            let cforest = {
              prefix = forest1.prefix;
              focus = ctree;
              suffix = forest1.suffix;
              comment = forest1.comment
            } in
            CCons cforest, csubforest1, subforest2
277 278
      end
      else
279
        CHole, cforest1, forest2
280 281 282 283 284 285

(* [factor] factors the maximal common forest context out of a nonempty family
   of forests. We assume that the family is represented as a map indexed by
   items, because this is convenient for the application that we have in mind,
   but this assumption is really irrelevant. *)

286
let factor forests =
287 288 289 290 291
  match
    Item.Map.fold (fun item forest accu ->
      match accu with
      | None ->

292 293 294
          (* First time through the loop, so [forest] is the first forest
             that we examine. Punch it, so as to produce a maximal forest
             context and a residual forest. *)
295

296 297
          let context, residual = punch_forest forest in
          Some (context, Item.Map.singleton item residual)
298 299 300 301

      | Some (context, residuals) ->

          (* Another iteration through the loop. [context] and [residuals] are
302 303
             the maximal common context and the residuals of the forests
             examined so far. *)
304 305

          (* Combine the common context obtained so far with the forest at hand.
306 307
             This yields a new, smaller common context, as well as residuals for
             the previous common context and for the forest at hand. *)
308

309
          let context, contextr, forestr = common_forest context forest in
310

311
          (* The residual forests are now: (i) the residual forest [forestr];
312 313
             and (ii) the previous residual forests [residuals], each of which
             must be placed with the residual context [contextr]. *)
314 315

          let residuals =
316 317
            Item.Map.add item forestr (Item.Map.map (fill_forest contextr) residuals)
          in
318

319
          Some (context, residuals)
320 321 322 323 324 325 326 327

    ) forests None
  with
  | None ->
      assert false (* parameter [forests] was an empty map *)
  | Some (context, residuals) ->
      context, residuals