grew_graph.ml 37.9 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11 12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17 18
open Grew_types

pj2m's avatar
pj2m committed
19 20 21 22
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
23
(* ================================================================================ *)
24
module P_deco = struct
bguillaum's avatar
bguillaum committed
25 26 27 28
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
29 30

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
31
end (* module P_deco *)
pj2m's avatar
pj2m committed
32

bguillaum's avatar
bguillaum committed
33
(* ================================================================================ *)
34 35
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
36

37
  let empty = Pid_map.empty
38

39
  let find = Pid_map.find
pj2m's avatar
pj2m committed
40

41
  let pid_name_list t = Pid_map.fold (fun _ node acc -> (P_node.get_name node)::acc) t []
42

43 44 45 46 47 48 49 50 51 52 53
  let to_json ?domain t =
    `List (
      Pid_map.fold
        (fun pid p_node acc ->
          (`Assoc [
            ("id", `String (Pid.to_string pid));
            ("node", P_node.to_json ?domain p_node)
          ]) :: acc
        ) t []
      )

54
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
55
  let map_add_edge map id_src label id_tar =
56
    let node_src =
pj2m's avatar
pj2m committed
57
      (* Not found can be raised when adding an edge from pos to neg *)
58 59
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
60 61
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
62

bguillaum's avatar
bguillaum committed
63
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
64
  let build_filter ?domain table (ast_node, loc) =
65
    let pid = Id.build ~loc ast_node.Ast.node_id table in
bguillaum's avatar
bguillaum committed
66
    let fs = P_fs.build ?domain ast_node.Ast.fs in
67 68
    (pid, fs)

bguillaum's avatar
bguillaum committed
69
  (* -------------------------------------------------------------------------------- *)
Bruno Guillaume's avatar
Bruno Guillaume committed
70
  let build ?domain ?pat_vars (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
71

72
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
73
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
74
    let rec insert (ast_node, loc) = function
bguillaum's avatar
bguillaum committed
75
      | [] -> [P_node.build ?domain ?pat_vars (ast_node, loc)]
76
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
77
        begin
bguillaum's avatar
bguillaum committed
78
          try (node_id, P_node.unif_fs (P_fs.build ?domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
79 80
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
81
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
82

83
    let (named_nodes : (Id.name * P_node.t) list) =
84
      List.fold_left
85 86
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
87

pj2m's avatar
pj2m committed
88 89 90
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

91 92
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
93 94

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
95 96
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
97
      Pid_map.empty node_list in
98

bguillaum's avatar
bguillaum committed
99
    let (map : t) =
pj2m's avatar
pj2m committed
100
      List.fold_left
bguillaum's avatar
bguillaum committed
101 102 103
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
bguillaum's avatar
bguillaum committed
104
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
105 106 107
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
108
              (P_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
109
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
110 111
          )
        ) map_without_edges full_edge_list in
112
    (map, pos_table)
bguillaum's avatar
bguillaum committed
113

114

bguillaum's avatar
bguillaum committed
115
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
116 117
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
118
  type extension = {
bguillaum's avatar
bguillaum committed
119 120 121
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
122

bguillaum's avatar
bguillaum committed
123
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
124
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
Bruno Guillaume's avatar
Bruno Guillaume committed
125
  let build_extension ?domain ?pat_vars pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
126

bguillaum's avatar
bguillaum committed
127
    let built_nodes = List.map (P_node.build ?domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
128

129 130
    let (old_nodes, new_nodes) =
      List.partition
131
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
132
        built_nodes in
bguillaum's avatar
bguillaum committed
133

pj2m's avatar
pj2m committed
134 135 136 137 138 139
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
140 141 142 143

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
144 145 146
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
147 148 149

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
150 151 152 153 154 155 156
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
157

158
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
159
      List.fold_left
bguillaum's avatar
bguillaum committed
160
        (fun acc (ast_edge, loc) ->
161 162
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
163 164
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
165
              | Some i -> Pid.Pos i
166
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
167 168
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
169
              | Some i -> Pid.Pos i
170
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
bguillaum's avatar
bguillaum committed
171
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
172 173
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
174
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
175
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
176 177
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

178
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
179 180 181 182 183 184
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
185
      Pid_map.fold
bguillaum's avatar
bguillaum committed
186 187 188 189 190 191 192 193 194 195 196
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
197 198

    let roots =
199
      Pid_map.fold
bguillaum's avatar
bguillaum committed
200 201 202 203 204
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
205

pj2m's avatar
pj2m committed
206 207
    (!tree_prop, roots)

208
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
209
  let roots graph = snd (tree_and_roots graph)
210
end (* module P_graph *)
pj2m's avatar
pj2m committed
211

bguillaum's avatar
bguillaum committed
212
(* ================================================================================ *)
213 214
module G_deco = struct
  type t = {
215 216
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
217 218 219 220
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
221

bguillaum's avatar
bguillaum committed
222
(* ================================================================================ *)
223
module G_graph = struct
224
  type t = {
bguillaum's avatar
bguillaum committed
225 226
    meta: string list;                       (* meta-informations *)
    map: G_node.t Gid_map.t;                 (* node description *)
227
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
228
    highest_index: int;                      (* the next free integer index *)
229 230
  }

bguillaum's avatar
bguillaum committed
231
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; }
232

bguillaum's avatar
bguillaum committed
233
  (* ---------------------------------------------------------------------- *)
234 235 236 237 238 239 240 241 242 243
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
  exception Not_a_tree
  type tree = T of (Gid.t * tree list)

  let rec tree_to_string = function
  | T (gid, daughters) ->
    sprintf "%s [%s]"
      (Gid.to_string gid)
      ((String.concat) ";" (List.map tree_to_string daughters))

  let graph_to_tree (g : t) : tree =
    let rec build_sub_tree map forest gid =
      if List.mem_assoc gid forest
      then (map, forest)
      else
        let (new_map, daugthers, new_forest) = Massoc_gid.fold
          (fun (acc_map, sub_trees, acc_forest) gid2 edge ->
            if edge = G_edge.sub
            then
              (* ensure that gid2 is in forest *)
              let (new_acc_map, new_acc_forest) = build_sub_tree acc_map acc_forest gid2 in
              let sub = List.assoc gid2 new_acc_forest in
              ( new_acc_map,
                sub::sub_trees,
                List.remove_assoc gid2 new_acc_forest
              )
            else (acc_map, sub_trees, acc_forest)
        ) (map,[],forest) (G_node.get_next (Gid_map.find gid map)) in
      (Gid_map.remove gid new_map, (gid, T (gid, List.rev daugthers))::new_forest) in

    let rec loop (unused_map, forest) =
      match (Gid_map.is_empty unused_map, forest) with
      | (true, [(_,tree)]) -> tree
      | (true, _) -> raise Not_a_tree
      | _ ->
        (* pick one unused node *)
        let (gid,_) = Gid_map.choose unused_map in
        loop (build_sub_tree unused_map forest gid) in
    loop (g.map, [])

bguillaum's avatar
bguillaum committed
283
  let get_highest g = g.highest_index
284 285 286 287

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
288

bguillaum's avatar
bguillaum committed
289
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
290

291 292
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
293

bguillaum's avatar
bguillaum committed
294
  (* is there an edge e out of node i ? *)
bguillaum's avatar
bguillaum committed
295
  let edge_out ?domain graph node_id label_cst =
296
    let node = Gid_map.find node_id graph.map in
bguillaum's avatar
bguillaum committed
297
    Massoc_gid.exists (fun _ e -> Label_cst.match_ ?domain label_cst e) (G_node.get_next node)
298

299 300
  let get_annot_info graph = failwith "Unused function !"
    (* let annot_info =
301 302 303 304
      Gid_map.fold
        (fun _ node acc ->
          match (G_node.get_annot_info node, acc) with
            | (None,_) -> acc
305
            | (Some f, None) -> Some (f,G_node.get_position node)
306 307 308 309
            | (Some _, Some _) -> Error.build "[G_node.get_annot_info] Two nodes with annot info"
        ) graph.map None in
    match annot_info with
      | Some x -> x
310
      | None -> Error.build "[G_node.get_annot_info] No nodes with annot info" *)
311

bguillaum's avatar
bguillaum committed
312
  (* -------------------------------------------------------------------------------- *)
313
  let map_add_edge_opt map id_src label id_tar =
314
    let node_src =
315
      (* Not found can be raised when adding an edge from pos to neg *)
316
      try Gid_map.find id_src map with Not_found -> G_node.empty in
317
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
318 319
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
320

321 322 323 324 325 326 327
  (* -------------------------------------------------------------------------------- *)
  let map_add_edge map id_src label id_tar =
    let node_src = Gid_map.find id_src map in
    match G_node.add_edge label id_tar node_src with
      | Some new_node -> Gid_map.add id_src new_node map
      | None -> Log.fbug "[Graph.map_add_edge] duplicate"; exit 2

328
  (* -------------------------------------------------------------------------------- *)
329
  let add_edge graph id_src label id_tar =
330
    match map_add_edge_opt graph.map id_src label id_tar with
331 332
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
333

bguillaum's avatar
bguillaum committed
334
  (* -------------------------------------------------------------------------------- *)
Bruno Guillaume's avatar
Bruno Guillaume committed
335
  let build ?domain ?(grewpy=false) gr_ast =
336 337 338 339
    let full_node_list =
      if grewpy
      then List.sort (Ast.grewpy_compare) gr_ast.Ast.nodes
      else gr_ast.Ast.nodes
340
    and full_edge_list = gr_ast.Ast.edges in
341

342

bguillaum's avatar
bguillaum committed
343 344
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
345

bguillaum's avatar
bguillaum committed
346 347 348 349 350
      | (ast_node, loc)::tail ->
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
351 352
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some index) tail in
          let succ = if tail = [] then None else Some (index+1) in
353
          let new_node = G_node.build ?domain ?prec ?succ ~position:(float index) (ast_node, loc) in
bguillaum's avatar
bguillaum committed
354
            (
355
              Gid_map.add index new_node new_tail,
bguillaum's avatar
bguillaum committed
356 357
              (node_id,index)::table
            ) in
358

bguillaum's avatar
bguillaum committed
359
    let (map_without_edges, table) = loop [] 0 None full_node_list in
360

361 362
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
363
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
364 365
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
bguillaum's avatar
bguillaum committed
366
          let edge = G_edge.build ?domain (ast_edge, loc) in
367
          (match map_add_edge_opt acc i1 edge i2 with
bguillaum's avatar
bguillaum committed
368 369
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
370
              (G_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
371
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
372 373
          )
        ) map_without_edges full_edge_list in
374

bguillaum's avatar
bguillaum committed
375
    {meta=gr_ast.Ast.meta; map=map; fusion = []; highest_index = (List.length full_node_list) -1}
376

bguillaum's avatar
bguillaum committed
377
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
378
  let of_conll ?domain conll =
379

380
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
bguillaum's avatar
bguillaum committed
381

382
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), Conll.Id.to_dot) in
383

384 385 386 387
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
bguillaum's avatar
bguillaum committed
388
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec last) Gid_map.empty
389 390
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
bguillaum's avatar
bguillaum committed
391
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec ~succ:(index+1) line)
392
          (loop (index+1) (Some index) tail) in
393 394

    let map_without_edges = loop 0 None sorted_lines in
395

bguillaum's avatar
bguillaum committed
396
    let map_with_edges =
bguillaum's avatar
bguillaum committed
397 398
      List.fold_left
        (fun acc line ->
399 400
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
401 402
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
403
              let gov_id = Id.gbuild ~loc gov gtable in
bguillaum's avatar
bguillaum committed
404
              let edge = G_edge.make ?domain ~loc dep_lab in
405
              (match map_add_edge_opt acc2 gov_id edge dep_id with
bguillaum's avatar
bguillaum committed
406 407
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
408
                  (G_edge.to_string ?domain edge)
409
                  (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
410
              )
411
            ) acc line.Conll.deps
412
        ) map_without_edges conll.Conll.lines in
413 414 415

      let fusion =
        List.map
416
          (fun {Conll.first; last; fusion; mw_line_num} ->
417
              let loc = Loc.file_opt_line_opt conll.Conll.file mw_line_num in
418
              (
419
                Id.gbuild ~loc (first,None) gtable,
420
                (
421
                  Id.gbuild ~loc (last, None) gtable,
422 423
                  fusion
                )
424
              )
425
          ) conll.Conll.multiwords in
426

bguillaum's avatar
bguillaum committed
427
    {meta = conll.Conll.meta; map=map_with_edges; fusion; highest_index= (List.length sorted_lines) -1 }
bguillaum's avatar
bguillaum committed
428

429 430
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
bguillaum's avatar
bguillaum committed
431
  let of_brown ?domain ?sentid brown =
432
    let units = Str.split (Str.regexp " ") brown in
433
      let conll_lines = List.mapi
434
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
435
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
436
        let pos = String.sub pos 1 ((String.length pos)-2) in
437
        let feats = match (i,sentid) with
438 439
          | (0,Some id) -> [("sentid", id)]
          | _ -> [] in
440
        Conll.build_line ~id:(i+1,None) ~form ~lemma ~xpos:pos ~feats ~deps:([(i, "SUC")]) ()
441
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\") in >>>%s<<<" item brown
442
      ) units in
bguillaum's avatar
bguillaum committed
443
    of_conll ?domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
444

445
  (* -------------------------------------------------------------------------------- *)
446 447 448
  let of_pst ?domain pst =
    let cpt = ref 0 in
    let get_pos () = incr cpt; !cpt - 1 in
449

450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
    let leaf_list = ref [] in

    let rec loop nodes = function
    | Ast.Leaf (loc, phon) ->
      let fresh_id = get_pos () in
      let node = G_node.pst_leaf ~loc ?domain phon fresh_id in
      leaf_list := fresh_id :: ! leaf_list;
      (fresh_id, Gid_map.add fresh_id node nodes)

    | Ast.T (loc, cat, daughters) ->
      let fresh_id = get_pos () in
      let new_node = G_node.pst_node ~loc ?domain cat fresh_id in
      let with_mother = Gid_map.add fresh_id new_node nodes in
      let new_nodes = List.fold_left
        (fun map daughter ->
          let (daughter_id, new_map) = loop map daughter in
          map_add_edge new_map fresh_id G_edge.sub daughter_id
        ) with_mother daughters in
      (fresh_id, new_nodes) in

    let (_,map) = loop Gid_map.empty pst in

    let rec prec_loop map = function
    | [] | [_] -> map
    | n1 :: n2 :: tail ->
      let new_map = prec_loop map (n2 :: tail) in

477 478 479 480 481 482 483
      let node1 = Gid_map.find n1 new_map
      and node2 = Gid_map.find n2 new_map in
      new_map
      |> (Gid_map.add n1 (G_node.set_succ n2 node1))
      |> (Gid_map.add n2 (G_node.set_prec n1 node2)) in

    {meta=[]; map=prec_loop map (List.rev !leaf_list); fusion = []; highest_index = !cpt}
bguillaum's avatar
bguillaum committed
484

bguillaum's avatar
bguillaum committed
485
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
486
  let del_edge ?domain ?edge_ident loc graph id_src label id_tar =
487
    let node_src =
488
      try Gid_map.find id_src graph.map
489
      with Not_found ->
490
        match edge_ident with
bguillaum's avatar
bguillaum committed
491 492
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
493
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
bguillaum's avatar
bguillaum committed
494
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string ?domain label)
pj2m's avatar
pj2m committed
495

bguillaum's avatar
bguillaum committed
496
  (* -------------------------------------------------------------------------------- *)
497
  let del_node graph node_id =
498 499 500 501 502 503 504
    let map_wo_node =
      Gid_map.fold
        (fun id value acc ->
          if id = node_id
          then acc
          else Gid_map.add id (G_node.remove_key node_id value) acc
        ) graph.map Gid_map.empty in
505
    let node = Gid_map.find node_id graph.map in
506 507 508 509
    let new_map =
      match (G_node.get_prec node, G_node.get_succ node) with
      | (Some id_prec, Some id_succ) ->
        begin
510 511
          let prec = Gid_map.find id_prec map_wo_node
          and succ = Gid_map.find id_succ map_wo_node in
512 513 514 515 516 517
          map_wo_node
          |> (Gid_map.add id_prec (G_node.set_succ id_succ prec))
          |> (Gid_map.add id_succ (G_node.set_prec id_prec succ))
        end
      | (Some id_prec, None) ->
        begin
518
          let prec = Gid_map.find id_prec map_wo_node in
519 520 521 522 523
          map_wo_node
          |> (Gid_map.add id_prec (G_node.remove_succ prec))
        end
      | (None, Some id_succ) ->
        begin
524
          let succ = Gid_map.find id_succ map_wo_node in
525 526 527 528 529
          map_wo_node
          |> (Gid_map.add id_succ (G_node.remove_prec succ))
        end
      | (None, None) -> map_wo_node in
    { graph with map = new_map }
pj2m's avatar
pj2m committed
530

bguillaum's avatar
bguillaum committed
531
  (* -------------------------------------------------------------------------------- *)
532
  let insert id1 id2 graph =
bguillaum's avatar
bguillaum committed
533 534
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
535 536 537
    let new_pos = match (G_node.get_position node1, G_node.get_position node2) with
    | (G_node.Ordered pos1, G_node.Ordered pos2) -> (pos1 +. pos2) /. 2.
    | _ -> Error.run "Try to insert into non ordered nodes" in
538
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
539
    let map = graph.map
540
      |> (Gid_map.add new_gid (G_node.fresh ~prec:id1 ~succ:id2 new_pos))
bguillaum's avatar
bguillaum committed
541 542
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
543
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
544 545

  (* -------------------------------------------------------------------------------- *)
546
  let append id graph =
bguillaum's avatar
bguillaum committed
547
    let node = Gid_map.find id graph.map in
548 549 550
    let new_pos = match G_node.get_position node with
    | G_node.Ordered pos -> pos +. 1.
    | _ -> Error.run "Try to append into non ordered nodes" in
551
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
552
    let map = graph.map
553
      |> (Gid_map.add new_gid (G_node.fresh ~prec:id new_pos))
bguillaum's avatar
bguillaum committed
554
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
555
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
556 557

  (* -------------------------------------------------------------------------------- *)
558
  let prepend id graph =
bguillaum's avatar
bguillaum committed
559
    let node = Gid_map.find id graph.map in
560 561 562
    let new_pos = match G_node.get_position node with
    | G_node.Ordered pos -> pos -. 1.
    | _ -> Error.run "Try to prepend into non ordered nodes" in
563
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
564
    let map = graph.map
565
      |> (Gid_map.add new_gid (G_node.fresh ~succ:id new_pos))
bguillaum's avatar
bguillaum committed
566
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
567
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
568 569

  (* -------------------------------------------------------------------------------- *)
570
  let add_after node_id graph =
bguillaum's avatar
bguillaum committed
571 572
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
573 574
    | Some gid_succ -> insert node_id gid_succ graph
    | None -> append node_id graph
bguillaum's avatar
bguillaum committed
575 576

  (* -------------------------------------------------------------------------------- *)
577
  let add_before node_id graph =
bguillaum's avatar
bguillaum committed
578 579
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
580 581 582 583 584 585 586 587
    | Some gid_prec -> insert gid_prec node_id graph
    | None -> prepend node_id graph

  (* -------------------------------------------------------------------------------- *)
  let add_unordered graph =
    let new_gid = graph.highest_index + 1 in
    let map = Gid_map.add new_gid (G_node.fresh_unordered ()) graph.map in
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
588

bguillaum's avatar
bguillaum committed
589
  (* -------------------------------------------------------------------------------- *)
590
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
591
  let shift_out loc ?domain src_gid tar_gid is_gid_local label_cst graph =
592
    let src_node = Gid_map.find src_gid graph.map in
593
    let tar_node = Gid_map.find tar_gid graph.map in
594

595 596 597
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

598
    let (new_src_next, new_tar_next) =
599 600
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
601
        if Label_cst.match_ ?domain label_cst edge && not (is_gid_local next_gid)
602 603 604
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
bguillaum's avatar
bguillaum committed
605
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
606 607 608
        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
609

610
    { graph with map =
611 612 613
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
614
    }
pj2m's avatar
pj2m committed
615

bguillaum's avatar
bguillaum committed
616
  (* -------------------------------------------------------------------------------- *)
617
  let shift_in loc ?domain src_gid tar_gid is_gid_local label_cst graph =
618
    { graph with map =
619 620 621 622 623
      Gid_map.mapi
        (fun node_id node ->
          if is_gid_local node_id
          then node
          else
624 625 626 627 628 629 630 631
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
bguillaum's avatar
bguillaum committed
632
                if Label_cst.match_ ?domain label_cst edge
633 634
                then
                  match List_.usort_insert edge acc_node_tar_edges with
bguillaum's avatar
bguillaum committed
635
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
636 637 638 639 640 641 642 643 644
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
645 646
          ) graph.map
    }
647

bguillaum's avatar
bguillaum committed
648
  (* -------------------------------------------------------------------------------- *)
649
  let shift_edges loc ?domain src_gid tar_gid is_gid_local label_cst graph =
650
    graph
651 652
    |> (shift_in loc ?domain src_gid tar_gid is_gid_local label_cst)
    |> (shift_out loc ?domain src_gid tar_gid is_gid_local label_cst)
pj2m's avatar
pj2m committed
653

bguillaum's avatar
bguillaum committed
654
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
655
  let set_feat ?loc ?domain graph node_id feat_name new_value =
656
    let node = Gid_map.find node_id graph.map in
657 658 659 660
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
bguillaum's avatar
bguillaum committed
661
          let new_fs = G_fs.set_feat ?loc ?domain feat_name new_value (G_node.get_fs node) in
662
          (G_node.set_fs new_fs node) in
663
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
664

bguillaum's avatar
bguillaum committed
665
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
666
  let update_feat ?loc ?domain graph tar_id tar_feat_name item_list =
667 668 669
    let strings_to_concat =
      List.map
        (function
670 671
          | Concat_item.Feat (node_gid, "position") ->
            let node = Gid_map.find node_gid graph.map in
672 673 674 675 676
            begin
              match G_node.get_position node with
              | G_node.Ordered p -> sprintf "%g" p
              | _ -> Error.run ?loc "Try to read position of an unordered node"
            end
bguillaum's avatar
bguillaum committed
677
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
678 679
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
680
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
681
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
682
            )
bguillaum's avatar
bguillaum committed
683
          | Concat_item.String s -> s
684 685
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
bguillaum's avatar
bguillaum committed
686
    (set_feat ?loc ?domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
687

bguillaum's avatar
bguillaum committed
688
  (* -------------------------------------------------------------------------------- *)
689
  let del_feat graph node_id feat_name =
690
    let node = Gid_map.find node_id graph.map in
691
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
692
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
693

bguillaum's avatar
bguillaum committed
694
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
695
  let to_gr ?domain graph =
696 697 698

    let gr_id id = G_node.get_name id (Gid_map.find id graph.map) in

699
    let buff = Buffer.create 32 in
700

701 702
    bprintf buff "graph {\n";

703 704
    (* meta data *)
    List.iter
705 706
      (fun (s) ->
        bprintf buff "  %s;\n" s
707 708
      ) graph.meta;

709
    (* nodes *)
710 711 712 713 714 715 716 717
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
718

719 720
    List.iter
      (fun (id,node) ->
721
        bprintf buff "  %s %s;\n" (gr_id id) (G_node.to_gr node)
722
      ) sorted_nodes;
723

724
    (* edges *)
725 726
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
727 728
        Massoc_gid.iter
          (fun tar edge ->
729
            bprintf buff "  %s -[%s]-> %s;\n" (gr_id id) (G_edge.to_string ?domain edge) (gr_id tar)
bguillaum's avatar
bguillaum committed
730
          ) (G_node.get_next node)
731
      ) sorted_nodes;
732

733 734 735
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
736
  (* -------------------------------------------------------------------------------- *)
737
  let to_sentence ?main_feat graph =
738
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
739
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
740 741 742 743 744 745 746 747 748 749

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
750 751
        " -t-", "-t-";
        "_-_", "-";
752 753
        "_", " ";
        "' ", "'";
754 755
        " ,", ",";
        " .", ".";
756 757 758 759
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
760

bguillaum's avatar
bguillaum committed
761
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
762
  let to_dep ?domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
763
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
764
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
765

766 767 768 769
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

770
    (* nodes *)
771
    List.iter
772
      (fun (id, node) ->
773
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
774
        let fs = G_node.get_fs node in
775 776
        let pos= match G_node.get_position node with G_node.Ordered x -> Some x | _ -> None in
        let dep_fs = G_fs.to_dep ~decorated_feat ?position:pos ?filter ?main_feat fs in
777 778 779

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
780 781 782
          | _ -> match G_fs.get_string_atom "_UD_empty" fs with
            | Some "Yes" -> "; forecolor=purple; subcolor=purple; "
            | _ -> "" in
bguillaum's avatar
bguillaum committed
783 784 785 786 787

        bprintf buff "N_%s { %s%s }\n"
          (Gid.to_string id)
          dep_fs
          style
788 789
      ) snodes;
    bprintf buff "} \n";
790

791
    (* edges *)
792
    bprintf buff "[EDGES] { \n";
793

bguillaum's avatar
bguillaum committed
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
    if !Global.debug
    then
      List.iter
        (fun (id, node) ->
          begin
            match G_node.get_prec node with
            | None -> ()
            | Some p -> bprintf buff "N_%s -> N_%s { label=\"__PREC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string p)
          end;
          begin
            match G_node.get_succ node with
            | None -> ()
            | Some s -> bprintf buff "N_%s -> N_%s { label=\"__SUCC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string s)
          end
        ) snodes;
809

810
    Gid_map.iter
811
      (fun gid elt ->
bguillaum's avatar
bguillaum committed
812 813
        Massoc_gid.iter
          (fun tar g_edge ->
bguillaum's avatar
bguillaum committed
814
            if not (G_edge.is_void ?domain g_edge)
815 816
            then
              let deco = List.mem (gid,g_edge,tar) deco.G_deco.edges in
bguillaum's avatar
bguillaum committed
817
              bprintf buff "N_%s -> N_%s %s\n" (Gid.to_string gid) (Gid.to_string tar) (G_edge.to_dep ?domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
818
          ) (G_node.get_next elt)
819
      ) graph.map;
820 821

    bprintf buff "} \n";
822 823
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
824
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
825
  let to_dot ?domain ?main_feat ?(deco=G_deco.empty) graph =
826
    let buff = Buffer.create 32 in
827

828
    bprintf buff "digraph G {\n";
bguillaum's avatar
bguillaum committed
829
    (* bprintf buff "  rankdir=LR;\n"; *)
830
    (* bprintf buff "  node [shape=none];\n"; *)
831

832
    (* nodes *)
833 834
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
835
        let decorated_feat =
836 837
          try List.assoc id deco.G_deco.nodes
          with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
838 839
        bprintf buff "  N_%s [label=<%s>, color=%s]\n"
          (Gid.to_string id)
840 841 842
          (G_fs.to_dot ~decorated_feat ?main_feat (G_node.get_fs node))
          (* TODO: add bgcolor in dot output *)
          (if List.mem_assoc id deco.G_deco.nodes then "red" else "black")
843
      ) graph.map;
844 845

    (* edges *)
846 847
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
848 849 850
        Massoc_gid.iter
          (fun tar g_edge ->
            let deco = List.mem (id,g_edge,tar) deco.G_deco.edges in
851 852 853 854 855 856 857 858 859 860 861
            if g_edge = G_edge.sub
            then bprintf buff "  N_%s -> N_%s [dir=none];\n" (Gid.to_string id) (Gid.to_string tar)
            else bprintf buff "  N_%s -> N_%s%s;\n" (Gid.to_string id) (Gid.to_string tar) (G_edge.to_dot ?domain ~deco g_edge)
          ) (G_node.get_next node)
      ) graph.map;

    Gid_map.iter
      (fun id node ->
        begin
          match G_node.get_succ node with
          | Some s when !Global.debug ->
862
              bprintf buff "  N_%s -> N_%s [label=\"SUCC\", style=dotted, fontcolor=lightblue, color=lightblue]; {rank=same; N_%s; N_%s };\n"
863
                (Gid.to_string id) (Gid.to_string s) (Gid.to_string id) (Gid.to_string s)
864
          (* | Some s ->
865
              bprintf buff "  N_%s -> N_%s [style=invis]; {rank=same; N_%s; N_%s };\n"
866