grew_graph.ml 38.4 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11 12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17 18
open Grew_types

pj2m's avatar
pj2m committed
19 20 21 22
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
23
(* ================================================================================ *)
24
module P_deco = struct
bguillaum's avatar
bguillaum committed
25 26 27 28
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
29 30

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
31
end (* module P_deco *)
pj2m's avatar
pj2m committed
32

bguillaum's avatar
bguillaum committed
33
(* ================================================================================ *)
34 35
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
36

37
  let empty = Pid_map.empty
38

39
  let find = Pid_map.find
pj2m's avatar
pj2m committed
40

41
  let pid_name_list t = Pid_map.fold (fun _ node acc -> (P_node.get_name node)::acc) t []
42

43
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
44
  let map_add_edge map id_src label id_tar =
45
    let node_src =
pj2m's avatar
pj2m committed
46
      (* Not found can be raised when adding an edge from pos to neg *)
47 48
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
49 50
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
51

bguillaum's avatar
bguillaum committed
52
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
53
  let build_filter ?domain table (ast_node, loc) =
54
    let pid = Id.build ~loc ast_node.Ast.node_id table in
bguillaum's avatar
bguillaum committed
55
    let fs = P_fs.build ?domain ast_node.Ast.fs in
56 57
    (pid, fs)

bguillaum's avatar
bguillaum committed
58
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
59
  let build ?domain ?pat_vars ?(locals=[||]) (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
60

61
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
62
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
63
    let rec insert (ast_node, loc) = function
bguillaum's avatar
bguillaum committed
64
      | [] -> [P_node.build ?domain ?pat_vars (ast_node, loc)]
65
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
66
        begin
bguillaum's avatar
bguillaum committed
67
          try (node_id, P_node.unif_fs (P_fs.build ?domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
68 69
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
70
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
71

72
    let (named_nodes : (Id.name * P_node.t) list) =
73
      List.fold_left
74 75
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
76

pj2m's avatar
pj2m committed
77 78 79
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

80 81
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
82 83

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
84 85
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
86
      Pid_map.empty node_list in
87

bguillaum's avatar
bguillaum committed
88
    let (map : t) =
pj2m's avatar
pj2m committed
89
      List.fold_left
bguillaum's avatar
bguillaum committed
90 91 92
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
bguillaum's avatar
bguillaum committed
93
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
94 95 96
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
97
              (P_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
98
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
99 100
          )
        ) map_without_edges full_edge_list in
101
    (map, pos_table)
bguillaum's avatar
bguillaum committed
102

103

bguillaum's avatar
bguillaum committed
104
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
105 106
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
107
  type extension = {
bguillaum's avatar
bguillaum committed
108 109 110
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
111

bguillaum's avatar
bguillaum committed
112
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
113
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
bguillaum's avatar
bguillaum committed
114
  let build_extension ?domain ?pat_vars ?(locals=[||]) pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
115

bguillaum's avatar
bguillaum committed
116
    let built_nodes = List.map (P_node.build ?domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
117

118 119
    let (old_nodes, new_nodes) =
      List.partition
120
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
121
        built_nodes in
bguillaum's avatar
bguillaum committed
122

pj2m's avatar
pj2m committed
123 124 125 126 127 128
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
129 130 131 132

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
133 134 135
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
136 137 138

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
139 140 141 142 143 144 145
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
146

147
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
148
      List.fold_left
bguillaum's avatar
bguillaum committed
149
        (fun acc (ast_edge, loc) ->
150 151
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
152 153
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
154
              | Some i -> Pid.Pos i
155
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
156 157
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
158
              | Some i -> Pid.Pos i
159
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
bguillaum's avatar
bguillaum committed
160
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
161 162
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
163
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
164
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
165 166
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

167
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
168 169 170 171 172 173
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
174
      Pid_map.fold
bguillaum's avatar
bguillaum committed
175 176 177 178 179 180 181 182 183 184 185
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
186 187

    let roots =
188
      Pid_map.fold
bguillaum's avatar
bguillaum committed
189 190 191 192 193
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
194

pj2m's avatar
pj2m committed
195 196
    (!tree_prop, roots)

197
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
198
  let roots graph = snd (tree_and_roots graph)
199
end (* module P_graph *)
pj2m's avatar
pj2m committed
200

bguillaum's avatar
bguillaum committed
201
(* ================================================================================ *)
202 203
module G_deco = struct
  type t = {
204 205
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
206 207 208 209
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
210

bguillaum's avatar
bguillaum committed
211
(* ================================================================================ *)
212
module G_graph = struct
213
  type t = {
bguillaum's avatar
bguillaum committed
214 215
    meta: string list;                       (* meta-informations *)
    map: G_node.t Gid_map.t;                 (* node description *)
bguillaum's avatar
bguillaum committed
216
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
217
    highest_index: int;                      (* the next free integer index *)
218 219
  }

bguillaum's avatar
bguillaum committed
220
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; }
221

bguillaum's avatar
bguillaum committed
222
  (* ---------------------------------------------------------------------- *)
223 224 225 226 227 228 229 230 231 232
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

bguillaum's avatar
bguillaum committed
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
  exception Not_a_tree
  type tree = T of (Gid.t * tree list)

  let rec tree_to_string = function
  | T (gid, daughters) ->
    sprintf "%s [%s]"
      (Gid.to_string gid)
      ((String.concat) ";" (List.map tree_to_string daughters))

  let graph_to_tree (g : t) : tree =
    let rec build_sub_tree map forest gid =
      if List.mem_assoc gid forest
      then (map, forest)
      else
        let (new_map, daugthers, new_forest) = Massoc_gid.fold
          (fun (acc_map, sub_trees, acc_forest) gid2 edge ->
            if edge = G_edge.sub
            then
              (* ensure that gid2 is in forest *)
              let (new_acc_map, new_acc_forest) = build_sub_tree acc_map acc_forest gid2 in
              let sub = List.assoc gid2 new_acc_forest in
              ( new_acc_map,
                sub::sub_trees,
                List.remove_assoc gid2 new_acc_forest
              )
            else (acc_map, sub_trees, acc_forest)
        ) (map,[],forest) (G_node.get_next (Gid_map.find gid map)) in
      (Gid_map.remove gid new_map, (gid, T (gid, List.rev daugthers))::new_forest) in

    let rec loop (unused_map, forest) =
      match (Gid_map.is_empty unused_map, forest) with
      | (true, [(_,tree)]) -> tree
      | (true, _) -> raise Not_a_tree
      | _ ->
        (* pick one unused node *)
        let (gid,_) = Gid_map.choose unused_map in
        loop (build_sub_tree unused_map forest gid) in
    loop (g.map, [])

bguillaum's avatar
bguillaum committed
272
  let get_highest g = g.highest_index
273 274 275 276

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
277

bguillaum's avatar
bguillaum committed
278
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
279

280 281
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
282

bguillaum's avatar
bguillaum committed
283
  (* is there an edge e out of node i ? *)
bguillaum's avatar
bguillaum committed
284
  let edge_out ?domain graph node_id label_cst =
285
    let node = Gid_map.find node_id graph.map in
bguillaum's avatar
bguillaum committed
286
    Massoc_gid.exists (fun _ e -> Label_cst.match_ ?domain label_cst e) (G_node.get_next node)
287

bguillaum's avatar
bguillaum committed
288 289 290 291 292 293
  let get_annot_info graph =
    let annot_info =
      Gid_map.fold
        (fun _ node acc ->
          match (G_node.get_annot_info node, acc) with
            | (None,_) -> acc
bguillaum's avatar
bguillaum committed
294
            | (Some f, None) -> Some (f,G_node.get_position node)
bguillaum's avatar
bguillaum committed
295 296 297 298 299 300
            | (Some _, Some _) -> Error.build "[G_node.get_annot_info] Two nodes with annot info"
        ) graph.map None in
    match annot_info with
      | Some x -> x
      | None -> Error.build "[G_node.get_annot_info] No nodes with annot info"

bguillaum's avatar
bguillaum committed
301
  (* -------------------------------------------------------------------------------- *)
302
  let map_add_edge_opt map id_src label id_tar =
303
    let node_src =
304
      (* Not found can be raised when adding an edge from pos to neg *)
305
      try Gid_map.find id_src map with Not_found -> G_node.empty in
306
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
307 308
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
309

310 311 312 313 314 315 316
  (* -------------------------------------------------------------------------------- *)
  let map_add_edge map id_src label id_tar =
    let node_src = Gid_map.find id_src map in
    match G_node.add_edge label id_tar node_src with
      | Some new_node -> Gid_map.add id_src new_node map
      | None -> Log.fbug "[Graph.map_add_edge] duplicate"; exit 2

317
  (* -------------------------------------------------------------------------------- *)
318
  let add_edge graph id_src label id_tar =
319
    match map_add_edge_opt graph.map id_src label id_tar with
320 321
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
322

bguillaum's avatar
bguillaum committed
323
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
324
  let build ?domain ?(grewpy=false) ?(locals=[||]) gr_ast =
325 326 327 328
    let full_node_list =
      if grewpy
      then List.sort (Ast.grewpy_compare) gr_ast.Ast.nodes
      else gr_ast.Ast.nodes
329
    and full_edge_list = gr_ast.Ast.edges in
330

331

bguillaum's avatar
bguillaum committed
332 333
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
334

bguillaum's avatar
bguillaum committed
335 336 337 338 339
      | (ast_node, loc)::tail ->
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
340 341
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some index) tail in
          let succ = if tail = [] then None else Some (index+1) in
342
          let new_node = G_node.build ?domain ?prec ?succ index (ast_node, loc) in
bguillaum's avatar
bguillaum committed
343
            (
344
              Gid_map.add index new_node new_tail,
bguillaum's avatar
bguillaum committed
345 346
              (node_id,index)::table
            ) in
347

bguillaum's avatar
bguillaum committed
348
    let (map_without_edges, table) = loop [] 0 None full_node_list in
349

350 351
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
352
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
353 354
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
bguillaum's avatar
bguillaum committed
355
          let edge = G_edge.build ?domain (ast_edge, loc) in
356
          (match map_add_edge_opt acc i1 edge i2 with
bguillaum's avatar
bguillaum committed
357 358
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
359
              (G_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
360
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
361 362
          )
        ) map_without_edges full_edge_list in
363

bguillaum's avatar
bguillaum committed
364
    {meta=gr_ast.Ast.meta; map=map; fusion = []; highest_index = (List.length full_node_list) -1}
bguillaum's avatar
bguillaum committed
365

bguillaum's avatar
bguillaum committed
366
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
367
  let of_conll ?domain conll =
368

369
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
bguillaum's avatar
bguillaum committed
370

371
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), Conll.Id.to_dot) in
372

373 374 375 376
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
bguillaum's avatar
bguillaum committed
377
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec last) Gid_map.empty
378 379
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
bguillaum's avatar
bguillaum committed
380
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec ~succ:(index+1) line)
381
          (loop (index+1) (Some index) tail) in
382 383

    let map_without_edges = loop 0 None sorted_lines in
384

bguillaum's avatar
bguillaum committed
385
    let map_with_edges =
bguillaum's avatar
bguillaum committed
386 387
      List.fold_left
        (fun acc line ->
388 389
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
390 391
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
392
              let gov_id = Id.gbuild ~loc gov gtable in
bguillaum's avatar
bguillaum committed
393
              let edge = G_edge.make ?domain ~loc dep_lab in
394
              (match map_add_edge_opt acc2 gov_id edge dep_id with
bguillaum's avatar
bguillaum committed
395 396
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
397
                  (G_edge.to_string ?domain edge)
398
                  (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
399
              )
400
            ) acc line.Conll.deps
401
        ) map_without_edges conll.Conll.lines in
bguillaum's avatar
bguillaum committed
402 403 404

      let fusion =
        List.map
405
          (fun {Conll.first; last; fusion; mw_line_num} ->
406
              let loc = Loc.file_opt_line_opt conll.Conll.file mw_line_num in
407
              (
408
                Id.gbuild ~loc (first,None) gtable,
409
                (
410
                  Id.gbuild ~loc (last, None) gtable,
411 412
                  fusion
                )
bguillaum's avatar
bguillaum committed
413
              )
414
          ) conll.Conll.multiwords in
bguillaum's avatar
bguillaum committed
415

bguillaum's avatar
bguillaum committed
416
    {meta = conll.Conll.meta; map=map_with_edges; fusion; highest_index= (List.length sorted_lines) -1 }
bguillaum's avatar
bguillaum committed
417

418 419
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
bguillaum's avatar
bguillaum committed
420
  let of_brown ?domain ?sentid brown =
421
    let units = Str.split (Str.regexp " ") brown in
422
      let conll_lines = List.mapi
423
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
424
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
425
        let pos = String.sub pos 1 ((String.length pos)-2) in
426
        let feats = match (i,sentid) with
427 428
          | (0,Some id) -> [("sentid", id)]
          | _ -> [] in
429
        Conll.build_line ~id:(i+1,None) ~form ~lemma ~xpos:pos ~feats ~deps:([(i, "SUC")]) ()
430
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\") in >>>%s<<<" item brown
Bruno Guillaume's avatar
Bruno Guillaume committed
431
      ) units in
bguillaum's avatar
bguillaum committed
432
    of_conll ?domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
433

434
  (* -------------------------------------------------------------------------------- *)
435 436 437
  let of_pst ?domain pst =
    let cpt = ref 0 in
    let get_pos () = incr cpt; !cpt - 1 in
438

439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
    let leaf_list = ref [] in

    let rec loop nodes = function
    | Ast.Leaf (loc, phon) ->
      let fresh_id = get_pos () in
      let node = G_node.pst_leaf ~loc ?domain phon fresh_id in
      leaf_list := fresh_id :: ! leaf_list;
      (fresh_id, Gid_map.add fresh_id node nodes)

    | Ast.T (loc, cat, daughters) ->
      let fresh_id = get_pos () in
      let new_node = G_node.pst_node ~loc ?domain cat fresh_id in
      let with_mother = Gid_map.add fresh_id new_node nodes in
      let new_nodes = List.fold_left
        (fun map daughter ->
          let (daughter_id, new_map) = loop map daughter in
          map_add_edge new_map fresh_id G_edge.sub daughter_id
        ) with_mother daughters in
      (fresh_id, new_nodes) in

    let (_,map) = loop Gid_map.empty pst in

    let rec prec_loop map = function
    | [] | [_] -> map
    | n1 :: n2 :: tail ->
      let new_map = prec_loop map (n2 :: tail) in

466 467 468 469 470 471 472
      let node1 = Gid_map.find n1 new_map
      and node2 = Gid_map.find n2 new_map in
      new_map
      |> (Gid_map.add n1 (G_node.set_succ n2 node1))
      |> (Gid_map.add n2 (G_node.set_prec n1 node2)) in

    {meta=[]; map=prec_loop map (List.rev !leaf_list); fusion = []; highest_index = !cpt}
bguillaum's avatar
bguillaum committed
473

bguillaum's avatar
bguillaum committed
474
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
475
  let del_edge ?domain ?edge_ident loc graph id_src label id_tar =
476
    let node_src =
477
      try Gid_map.find id_src graph.map
478
      with Not_found ->
479
        match edge_ident with
bguillaum's avatar
bguillaum committed
480 481
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
482
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
bguillaum's avatar
bguillaum committed
483
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string ?domain label)
pj2m's avatar
pj2m committed
484

bguillaum's avatar
bguillaum committed
485
  (* -------------------------------------------------------------------------------- *)
486
  let del_node graph node_id =
487 488 489 490 491 492 493
    let map_wo_node =
      Gid_map.fold
        (fun id value acc ->
          if id = node_id
          then acc
          else Gid_map.add id (G_node.remove_key node_id value) acc
        ) graph.map Gid_map.empty in
494
    let node = Gid_map.find node_id graph.map in
495 496 497 498
    let new_map =
      match (G_node.get_prec node, G_node.get_succ node) with
      | (Some id_prec, Some id_succ) ->
        begin
499 500
          let prec = Gid_map.find id_prec map_wo_node
          and succ = Gid_map.find id_succ map_wo_node in
501 502 503 504 505 506
          map_wo_node
          |> (Gid_map.add id_prec (G_node.set_succ id_succ prec))
          |> (Gid_map.add id_succ (G_node.set_prec id_prec succ))
        end
      | (Some id_prec, None) ->
        begin
507
          let prec = Gid_map.find id_prec map_wo_node in
508 509 510 511 512
          map_wo_node
          |> (Gid_map.add id_prec (G_node.remove_succ prec))
        end
      | (None, Some id_succ) ->
        begin
513
          let succ = Gid_map.find id_succ map_wo_node in
514 515 516 517 518
          map_wo_node
          |> (Gid_map.add id_succ (G_node.remove_prec succ))
        end
      | (None, None) -> map_wo_node in
    { graph with map = new_map }
pj2m's avatar
pj2m committed
519

bguillaum's avatar
bguillaum committed
520
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
521
  let add_neighbour loc ?domain graph node_id label = failwith "no more add_neighbour"
pj2m's avatar
pj2m committed
522

bguillaum's avatar
bguillaum committed
523
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
524
  let insert ?domain id1 id2 graph =
bguillaum's avatar
bguillaum committed
525 526 527 528 529
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
    let pos1 = G_node.get_position node1 in
    let pos2 = G_node.get_position node2 in
    let new_pos= (pos1 +. pos2) /. 2. in
530
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
531
    let map = graph.map
bguillaum's avatar
bguillaum committed
532
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~prec:id1 ~succ:id2 new_pos))
bguillaum's avatar
bguillaum committed
533 534
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
535
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
536 537

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
538
  let append ?domain id graph =
bguillaum's avatar
bguillaum committed
539 540 541
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos +. 1. in
542
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
543
    let map = graph.map
bguillaum's avatar
bguillaum committed
544
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~prec:id new_pos))
bguillaum's avatar
bguillaum committed
545
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
546
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
547 548

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
549
  let prepend ?domain id graph =
bguillaum's avatar
bguillaum committed
550 551 552
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos -. 1. in
553
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
554
    let map = graph.map
bguillaum's avatar
bguillaum committed
555
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~succ:id new_pos))
bguillaum's avatar
bguillaum committed
556
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
557
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
558 559

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
560
  let add_after loc ?domain node_id graph =
bguillaum's avatar
bguillaum committed
561 562
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
bguillaum's avatar
bguillaum committed
563 564
    | Some gid_succ -> insert ?domain node_id gid_succ graph
    | None -> append ?domain node_id graph
bguillaum's avatar
bguillaum committed
565 566

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
567
  let add_before loc ?domain node_id graph =
bguillaum's avatar
bguillaum committed
568 569
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
bguillaum's avatar
bguillaum committed
570 571
    | Some gid_prec -> insert ?domain gid_prec node_id graph
    | None -> prepend ?domain node_id graph
bguillaum's avatar
bguillaum committed
572

bguillaum's avatar
bguillaum committed
573
  (* -------------------------------------------------------------------------------- *)
574
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
bguillaum's avatar
bguillaum committed
575
  let shift_out loc ?domain src_gid tar_gid label_cst graph =
576
    let src_node = Gid_map.find src_gid graph.map in
577
    let tar_node = Gid_map.find tar_gid graph.map in
578

579 580 581 582 583 584 585
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

    (* Error if a loop is created by the shift_out *)
    let src_tar_edges = Massoc_gid.assoc tar_gid src_next in
    let _ =
      try
bguillaum's avatar
bguillaum committed
586 587
        let loop_edge = List.find (fun edge -> Label_cst.match_ ?domain label_cst edge) src_tar_edges in
        Error.run ~loc "The shfit_out command tries to build a loop (with label %s)" (Label.to_string ?domain loop_edge)
588 589 590 591 592
      with Not_found -> () in

    let (new_src_next,new_tar_next) =
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
bguillaum's avatar
bguillaum committed
593
        if Label_cst.match_ ?domain label_cst edge
594 595 596
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
bguillaum's avatar
bguillaum committed
597
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
598 599 600 601

        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
602

603
    { graph with map =
604 605 606
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
607
    }
pj2m's avatar
pj2m committed
608

bguillaum's avatar
bguillaum committed
609
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
610
  let shift_in loc ?domain src_gid tar_gid label_cst graph =
611
    let tar_node = Gid_map.find tar_gid graph.map in
612
    let tar_next = G_node.get_next tar_node in
613

614 615 616 617
    (* Error if a loop is created by the shift_in *)
    let tar_src_edges = Massoc_gid.assoc src_gid tar_next in
    let _ =
      try
bguillaum's avatar
bguillaum committed
618 619
        let loop_edge = List.find (fun edge -> Label_cst.match_ ?domain label_cst edge) tar_src_edges in
        Error.run ~loc "The [shift_in] command tries to build a loop (with label \"%s\")" (Label.to_string ?domain loop_edge)
620 621 622
      with Not_found -> () in

    { graph with map =
623 624
        Gid_map.mapi
          (fun node_id node ->
625 626 627 628 629 630 631 632
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
bguillaum's avatar
bguillaum committed
633
                if Label_cst.match_ ?domain label_cst edge
634 635
                then
                  match List_.usort_insert edge acc_node_tar_edges with
bguillaum's avatar
bguillaum committed
636
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
637 638 639 640 641 642 643 644 645
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
646 647
          ) graph.map
    }
648

bguillaum's avatar
bguillaum committed
649
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
650
  let shift_edges loc ?domain src_gid tar_gid label_cst graph =
651
    graph
bguillaum's avatar
bguillaum committed
652 653
    |> (shift_in loc ?domain src_gid tar_gid label_cst)
    |> (shift_out loc ?domain src_gid tar_gid label_cst)
pj2m's avatar
pj2m committed
654

bguillaum's avatar
bguillaum committed
655
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
656 657
  let merge_node loc ?domain graph src_gid tar_gid =
    let se_graph = shift_edges loc ?domain src_gid tar_gid Label_cst.all graph in
pj2m's avatar
pj2m committed
658

659 660
    let src_node = Gid_map.find src_gid se_graph.map in
    let tar_node = Gid_map.find tar_gid se_graph.map in
661

662
    match G_fs.unif (G_node.get_fs src_node) (G_node.get_fs tar_node) with
bguillaum's avatar
bguillaum committed
663 664 665
      | Some new_fs ->
        Some {graph with map =
            (Gid_map.add
bguillaum's avatar
bguillaum committed
666
               tar_gid
667
               (G_node.set_fs new_fs tar_node)
bguillaum's avatar
bguillaum committed
668
               (Gid_map.remove src_gid se_graph.map)
bguillaum's avatar
bguillaum committed
669 670 671
            )
             }
      | None -> None
bguillaum's avatar
bguillaum committed
672

bguillaum's avatar
bguillaum committed
673
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
674
  let set_feat ?loc ?domain graph node_id feat_name new_value =
675
    let node = Gid_map.find node_id graph.map in
676 677 678 679
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
bguillaum's avatar
bguillaum committed
680
          let new_fs = G_fs.set_feat ?loc ?domain feat_name new_value (G_node.get_fs node) in
681
          (G_node.set_fs new_fs node) in
682
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
683

bguillaum's avatar
bguillaum committed
684
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
685
  let update_feat ?loc ?domain graph tar_id tar_feat_name item_list =
686 687 688
    let strings_to_concat =
      List.map
        (function
bguillaum's avatar
bguillaum committed
689
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
690 691
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
692
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
693
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
694
            )
bguillaum's avatar
bguillaum committed
695
          | Concat_item.String s -> s
696 697
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
bguillaum's avatar
bguillaum committed
698
    (set_feat ?loc ?domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
699

bguillaum's avatar
bguillaum committed
700
  (* -------------------------------------------------------------------------------- *)
701
  let del_feat graph node_id feat_name =
702
    let node = Gid_map.find node_id graph.map in
703
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
704
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
705

bguillaum's avatar
bguillaum committed
706
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
707
  let to_gr ?domain graph =
708 709 710

    let gr_id id = G_node.get_name id (Gid_map.find id graph.map) in

711
    let buff = Buffer.create 32 in
712

713 714
    bprintf buff "graph {\n";

715 716
    (* meta data *)
    List.iter
717 718
      (fun (s) ->
        bprintf buff "  %s;\n" s
719 720
      ) graph.meta;

bguillaum's avatar
bguillaum committed
721
    (* nodes *)
722 723 724 725 726 727 728 729
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
730

731 732
    List.iter
      (fun (id,node) ->
733
        bprintf buff "  %s %s;\n" (gr_id id) (G_node.to_gr node)
734
      ) sorted_nodes;
735

bguillaum's avatar
bguillaum committed
736
    (* edges *)
737 738
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
739 740
        Massoc_gid.iter
          (fun tar edge ->
741
            bprintf buff "  %s -[%s]-> %s;\n" (gr_id id) (G_edge.to_string ?domain edge) (gr_id tar)
bguillaum's avatar
bguillaum committed
742
          ) (G_node.get_next node)
743
      ) sorted_nodes;
744

745 746 747
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
748
  (* -------------------------------------------------------------------------------- *)
749
  let to_sentence ?main_feat graph =
750
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
751
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
752 753 754 755 756 757 758 759 760 761

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
762 763
        " -t-", "-t-";
        "_-_", "-";
764 765
        "_", " ";
        "' ", "'";
766 767
        " ,", ",";
        " .", ".";
768 769 770 771
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
772

bguillaum's avatar
bguillaum committed
773
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
774
  let to_dep ?domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
775
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
776
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
777

778 779 780 781
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

bguillaum's avatar
bguillaum committed
782
    (* nodes *)
783
    List.iter
784
      (fun (id, node) ->
785
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
786
        let fs = G_node.get_fs node in
787 788 789 790
        let dep_fs = G_fs.to_dep ~decorated_feat ~position:(G_node.get_position node) ?filter ?main_feat fs in

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
791 792 793
          | _ -> match G_fs.get_string_atom "_UD_empty" fs with
            | Some "Yes" -> "; forecolor=purple; subcolor=purple; "
            | _ -> "" in
bguillaum's avatar
bguillaum committed
794 795 796 797 798

        bprintf buff "N_%s { %s%s }\n"
          (Gid.to_string id)
          dep_fs
          style
799 800
      ) snodes;
    bprintf buff "} \n";
801

bguillaum's avatar
bguillaum committed
802
    (* edges *)
803
    bprintf buff "[EDGES] { \n";
804

bguillaum's avatar
bguillaum committed
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
    if !Global.debug
    then
      List.iter
        (fun (id, node) ->
          begin
            match G_node.get_prec node with
            | None -> ()
            | Some p -> bprintf buff "N_%s -> N_%s { label=\"__PREC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string p)
          end;
          begin
            match G_node.get_succ node with
            | None -> ()
            | Some s -> bprintf buff "N_%s -> N_%s { label=\"__SUCC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string s)
          end
        ) snodes;
820

821
    Gid_map.iter
822
      (fun gid elt ->
bguillaum's avatar
bguillaum committed
823 824
        Massoc_gid.iter
          (fun tar g_edge ->
bguillaum's avatar
bguillaum committed
825
            if not (G_edge.is_void ?domain g_edge)
826 827
            then
              let deco = List.mem (gid,g_edge,tar) deco.G_deco.edges in
bguillaum's avatar
bguillaum committed
828
              bprintf buff "N_%s -> N_%s %s\n" (Gid.to_string gid) (Gid.to_string tar) (G_edge.to_dep ?domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
829
          ) (G_node.get_next elt)
830
      ) graph.map;
bguillaum's avatar
bguillaum committed
831 832

    bprintf buff "} \n";
833 834
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
835
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
836
  let to_dot ?domain ?main_feat ?(deco=G_deco.empty) graph =
837
    let buff = Buffer.create 32 in
838

839
    bprintf buff "digraph G {\n";
bguillaum's avatar
bguillaum committed
840
    (* bprintf buff "  rankdir=LR;\n"; *)
841
    bprintf buff "  node [shape=none];\n";
842

bguillaum's avatar
bguillaum committed
843
    (* nodes *)
844 845
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
846
        let decorated_feat =
847 848
          try List.assoc id deco.G_deco.nodes
          with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
849 850
        bprintf buff "  N_%s [label=<%s>, color=%s]\n"
          (Gid.to_string id)
851 852 853
          (G_fs.to_dot ~decorated_feat ?main_feat (G_node.get_fs node))
          (* TODO: add bgcolor in dot output *)
          (if List.mem_assoc id deco.G_deco.nodes then "red" else "black")
854