grew_graph.ml 40.5 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11 12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17 18
open Grew_types

pj2m's avatar
pj2m committed
19 20 21 22
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
23
(* ================================================================================ *)
24
module P_deco = struct
bguillaum's avatar
bguillaum committed
25 26 27 28
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
29 30

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
31
end (* module P_deco *)
pj2m's avatar
pj2m committed
32

bguillaum's avatar
bguillaum committed
33
(* ================================================================================ *)
34 35
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
36

37
  let empty = Pid_map.empty
38

39
  let find = Pid_map.find
pj2m's avatar
pj2m committed
40

41
  let pid_name_list t = Pid_map.fold (fun _ node acc -> (P_node.get_name node)::acc) t []
42

Bruno Guillaume's avatar
Bruno Guillaume committed
43 44 45 46 47 48 49 50 51 52 53
  let to_json ?domain t =
    `List (
      Pid_map.fold
        (fun pid p_node acc ->
          (`Assoc [
            ("id", `String (Pid.to_string pid));
            ("node", P_node.to_json ?domain p_node)
          ]) :: acc
        ) t []
      )

54
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
55
  let map_add_edge map id_src label id_tar =
56
    let node_src =
pj2m's avatar
pj2m committed
57
      (* Not found can be raised when adding an edge from pos to neg *)
58 59
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
60 61
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
62

bguillaum's avatar
bguillaum committed
63
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
64
  let build_filter ?domain table (ast_node, loc) =
65
    let pid = Id.build ~loc ast_node.Ast.node_id table in
bguillaum's avatar
bguillaum committed
66
    let fs = P_fs.build ?domain ast_node.Ast.fs in
67 68
    (pid, fs)

bguillaum's avatar
bguillaum committed
69
  (* -------------------------------------------------------------------------------- *)
Bruno Guillaume's avatar
Bruno Guillaume committed
70
  let build ?domain ?pat_vars (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
71

72
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
73
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
74
    let rec insert (ast_node, loc) = function
bguillaum's avatar
bguillaum committed
75
      | [] -> [P_node.build ?domain ?pat_vars (ast_node, loc)]
76
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
77
        begin
bguillaum's avatar
bguillaum committed
78
          try (node_id, P_node.unif_fs (P_fs.build ?domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
79 80
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
81
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
82

83
    let (named_nodes : (Id.name * P_node.t) list) =
84
      List.fold_left
85 86
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
87

pj2m's avatar
pj2m committed
88 89 90
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

91 92
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
93 94

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
95 96
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
97
      Pid_map.empty node_list in
98

bguillaum's avatar
bguillaum committed
99
    let (map : t) =
pj2m's avatar
pj2m committed
100
      List.fold_left
bguillaum's avatar
bguillaum committed
101 102 103
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
bguillaum's avatar
bguillaum committed
104
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
105 106 107
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
108
              (P_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
109
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
110 111
          )
        ) map_without_edges full_edge_list in
112
    (map, pos_table)
bguillaum's avatar
bguillaum committed
113

114

bguillaum's avatar
bguillaum committed
115
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
116 117
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
118
  type extension = {
bguillaum's avatar
bguillaum committed
119 120 121
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
122

bguillaum's avatar
bguillaum committed
123
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
124
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
Bruno Guillaume's avatar
Bruno Guillaume committed
125
  let build_extension ?domain ?pat_vars pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
126

bguillaum's avatar
bguillaum committed
127
    let built_nodes = List.map (P_node.build ?domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
128

129 130
    let (old_nodes, new_nodes) =
      List.partition
131
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
132
        built_nodes in
bguillaum's avatar
bguillaum committed
133

pj2m's avatar
pj2m committed
134 135 136 137 138 139
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
140 141 142 143

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
144 145 146
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
147 148 149

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
150 151 152 153 154 155 156
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
157

158
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
159
      List.fold_left
bguillaum's avatar
bguillaum committed
160
        (fun acc (ast_edge, loc) ->
161 162
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
163 164
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
165
              | Some i -> Pid.Pos i
166
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
167 168
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
169
              | Some i -> Pid.Pos i
170
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
bguillaum's avatar
bguillaum committed
171
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
172 173
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
174
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
175
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
176 177
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

178
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
179 180 181 182 183 184
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
185
      Pid_map.fold
bguillaum's avatar
bguillaum committed
186 187 188 189 190 191 192 193 194 195 196
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
197 198

    let roots =
199
      Pid_map.fold
bguillaum's avatar
bguillaum committed
200 201 202 203 204
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
205

pj2m's avatar
pj2m committed
206 207
    (!tree_prop, roots)

208
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
209
  let roots graph = snd (tree_and_roots graph)
210
end (* module P_graph *)
pj2m's avatar
pj2m committed
211

bguillaum's avatar
bguillaum committed
212
(* ================================================================================ *)
213 214
module G_deco = struct
  type t = {
215 216
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
217 218 219 220
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
221

bguillaum's avatar
bguillaum committed
222
(* ================================================================================ *)
223
module G_graph = struct
224
  type t = {
bguillaum's avatar
bguillaum committed
225 226
    meta: string list;                       (* meta-informations *)
    map: G_node.t Gid_map.t;                 (* node description *)
bguillaum's avatar
bguillaum committed
227
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
228
    highest_index: int;                      (* the next free integer index *)
229 230
  }

bguillaum's avatar
bguillaum committed
231
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; }
232

bguillaum's avatar
bguillaum committed
233
  (* ---------------------------------------------------------------------- *)
234 235 236 237 238 239 240 241 242 243
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

bguillaum's avatar
bguillaum committed
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
  exception Not_a_tree
  type tree = T of (Gid.t * tree list)

  let rec tree_to_string = function
  | T (gid, daughters) ->
    sprintf "%s [%s]"
      (Gid.to_string gid)
      ((String.concat) ";" (List.map tree_to_string daughters))

  let graph_to_tree (g : t) : tree =
    let rec build_sub_tree map forest gid =
      if List.mem_assoc gid forest
      then (map, forest)
      else
        let (new_map, daugthers, new_forest) = Massoc_gid.fold
          (fun (acc_map, sub_trees, acc_forest) gid2 edge ->
            if edge = G_edge.sub
            then
              (* ensure that gid2 is in forest *)
              let (new_acc_map, new_acc_forest) = build_sub_tree acc_map acc_forest gid2 in
              let sub = List.assoc gid2 new_acc_forest in
              ( new_acc_map,
                sub::sub_trees,
                List.remove_assoc gid2 new_acc_forest
              )
            else (acc_map, sub_trees, acc_forest)
        ) (map,[],forest) (G_node.get_next (Gid_map.find gid map)) in
      (Gid_map.remove gid new_map, (gid, T (gid, List.rev daugthers))::new_forest) in

    let rec loop (unused_map, forest) =
      match (Gid_map.is_empty unused_map, forest) with
      | (true, [(_,tree)]) -> tree
      | (true, _) -> raise Not_a_tree
      | _ ->
        (* pick one unused node *)
        let (gid,_) = Gid_map.choose unused_map in
        loop (build_sub_tree unused_map forest gid) in
    loop (g.map, [])

bguillaum's avatar
bguillaum committed
283
  let get_highest g = g.highest_index
284 285 286 287

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
288

bguillaum's avatar
bguillaum committed
289
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
290

291 292
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
293

bguillaum's avatar
bguillaum committed
294
  (* is there an edge e out of node i ? *)
bguillaum's avatar
bguillaum committed
295
  let edge_out ?domain graph node_id label_cst =
296
    let node = Gid_map.find node_id graph.map in
bguillaum's avatar
bguillaum committed
297
    Massoc_gid.exists (fun _ e -> Label_cst.match_ ?domain label_cst e) (G_node.get_next node)
298

bguillaum's avatar
bguillaum committed
299
  (* -------------------------------------------------------------------------------- *)
300
  let map_add_edge_opt map id_src label id_tar =
301
    let node_src =
302
      (* Not found can be raised when adding an edge from pos to neg *)
303
      try Gid_map.find id_src map with Not_found -> G_node.empty in
304
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
305 306
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
307

308 309 310 311 312 313 314
  (* -------------------------------------------------------------------------------- *)
  let map_add_edge map id_src label id_tar =
    let node_src = Gid_map.find id_src map in
    match G_node.add_edge label id_tar node_src with
      | Some new_node -> Gid_map.add id_src new_node map
      | None -> Log.fbug "[Graph.map_add_edge] duplicate"; exit 2

315
  (* -------------------------------------------------------------------------------- *)
316
  let add_edge graph id_src label id_tar =
317
    match map_add_edge_opt graph.map id_src label id_tar with
318 319
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
320

bguillaum's avatar
bguillaum committed
321
  (* -------------------------------------------------------------------------------- *)
Bruno Guillaume's avatar
Bruno Guillaume committed
322
  let build ?domain ?(grewpy=false) gr_ast =
323 324 325 326
    let full_node_list =
      if grewpy
      then List.sort (Ast.grewpy_compare) gr_ast.Ast.nodes
      else gr_ast.Ast.nodes
327
    and full_edge_list = gr_ast.Ast.edges in
328

329

bguillaum's avatar
bguillaum committed
330 331
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
332

bguillaum's avatar
bguillaum committed
333 334 335 336 337
      | (ast_node, loc)::tail ->
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
338 339
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some index) tail in
          let succ = if tail = [] then None else Some (index+1) in
340
          let new_node = G_node.build ?domain ?prec ?succ ~position:(float index) (ast_node, loc) in
bguillaum's avatar
bguillaum committed
341
            (
342
              Gid_map.add index new_node new_tail,
bguillaum's avatar
bguillaum committed
343 344
              (node_id,index)::table
            ) in
345

bguillaum's avatar
bguillaum committed
346
    let (map_without_edges, table) = loop [] 0 None full_node_list in
347

348 349
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
350
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
351 352
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
bguillaum's avatar
bguillaum committed
353
          let edge = G_edge.build ?domain (ast_edge, loc) in
354
          (match map_add_edge_opt acc i1 edge i2 with
bguillaum's avatar
bguillaum committed
355 356
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
357
              (G_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
358
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
359 360
          )
        ) map_without_edges full_edge_list in
361

bguillaum's avatar
bguillaum committed
362
    {meta=gr_ast.Ast.meta; map=map; fusion = []; highest_index = (List.length full_node_list) -1}
bguillaum's avatar
bguillaum committed
363

bguillaum's avatar
bguillaum committed
364
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
365
  let of_conll ?domain conll =
366

367
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
bguillaum's avatar
bguillaum committed
368

369
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), Conll.Id.to_dot) in
370

371 372 373 374
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
bguillaum's avatar
bguillaum committed
375
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec last) Gid_map.empty
376 377
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
bguillaum's avatar
bguillaum committed
378
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec ~succ:(index+1) line)
379
          (loop (index+1) (Some index) tail) in
380 381

    let map_without_edges = loop 0 None sorted_lines in
382

bguillaum's avatar
bguillaum committed
383
    let map_with_edges =
bguillaum's avatar
bguillaum committed
384 385
      List.fold_left
        (fun acc line ->
386 387
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
388 389
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
390
              let gov_id = Id.gbuild ~loc gov gtable in
bguillaum's avatar
bguillaum committed
391
              let edge = G_edge.make ?domain ~loc dep_lab in
392
              (match map_add_edge_opt acc2 gov_id edge dep_id with
bguillaum's avatar
bguillaum committed
393 394
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
395
                  (G_edge.to_string ?domain edge)
396
                  (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
397
              )
398
            ) acc line.Conll.deps
399
        ) map_without_edges conll.Conll.lines in
bguillaum's avatar
bguillaum committed
400 401 402

      let fusion =
        List.map
403
          (fun {Conll.first; last; fusion; mw_line_num} ->
404
              let loc = Loc.file_opt_line_opt conll.Conll.file mw_line_num in
405
              (
406
                Id.gbuild ~loc (first,None) gtable,
407
                (
408
                  Id.gbuild ~loc (last, None) gtable,
409 410
                  fusion
                )
bguillaum's avatar
bguillaum committed
411
              )
412
          ) conll.Conll.multiwords in
bguillaum's avatar
bguillaum committed
413

bguillaum's avatar
bguillaum committed
414
    {meta = conll.Conll.meta; map=map_with_edges; fusion; highest_index= (List.length sorted_lines) -1 }
bguillaum's avatar
bguillaum committed
415

416 417
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
bguillaum's avatar
bguillaum committed
418
  let of_brown ?domain ?sentid brown =
419
    let units = Str.split (Str.regexp " ") brown in
420
      let conll_lines = List.mapi
421
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
422
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
423
        let pos = String.sub pos 1 ((String.length pos)-2) in
424
        let feats = match (i,sentid) with
425 426
          | (0,Some id) -> [("sentid", id)]
          | _ -> [] in
427
        Conll.build_line ~id:(i+1,None) ~form ~lemma ~xpos:pos ~feats ~deps:([(i, "SUC")]) ()
428
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\") in >>>%s<<<" item brown
Bruno Guillaume's avatar
Bruno Guillaume committed
429
      ) units in
bguillaum's avatar
bguillaum committed
430
    of_conll ?domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
431

432
  (* -------------------------------------------------------------------------------- *)
433 434 435
  let of_pst ?domain pst =
    let cpt = ref 0 in
    let get_pos () = incr cpt; !cpt - 1 in
436

437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
    let leaf_list = ref [] in

    let rec loop nodes = function
    | Ast.Leaf (loc, phon) ->
      let fresh_id = get_pos () in
      let node = G_node.pst_leaf ~loc ?domain phon fresh_id in
      leaf_list := fresh_id :: ! leaf_list;
      (fresh_id, Gid_map.add fresh_id node nodes)

    | Ast.T (loc, cat, daughters) ->
      let fresh_id = get_pos () in
      let new_node = G_node.pst_node ~loc ?domain cat fresh_id in
      let with_mother = Gid_map.add fresh_id new_node nodes in
      let new_nodes = List.fold_left
        (fun map daughter ->
          let (daughter_id, new_map) = loop map daughter in
          map_add_edge new_map fresh_id G_edge.sub daughter_id
        ) with_mother daughters in
      (fresh_id, new_nodes) in

    let (_,map) = loop Gid_map.empty pst in

    let rec prec_loop map = function
    | [] | [_] -> map
    | n1 :: n2 :: tail ->
      let new_map = prec_loop map (n2 :: tail) in

464 465 466 467 468 469 470
      let node1 = Gid_map.find n1 new_map
      and node2 = Gid_map.find n2 new_map in
      new_map
      |> (Gid_map.add n1 (G_node.set_succ n2 node1))
      |> (Gid_map.add n2 (G_node.set_prec n1 node2)) in

    {meta=[]; map=prec_loop map (List.rev !leaf_list); fusion = []; highest_index = !cpt}
bguillaum's avatar
bguillaum committed
471

bguillaum's avatar
bguillaum committed
472
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
473
  let del_edge ?domain ?edge_ident loc graph id_src label id_tar =
474
    let node_src =
475
      try Gid_map.find id_src graph.map
476
      with Not_found ->
477
        match edge_ident with
bguillaum's avatar
bguillaum committed
478 479
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
480
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
bguillaum's avatar
bguillaum committed
481
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string ?domain label)
pj2m's avatar
pj2m committed
482

bguillaum's avatar
bguillaum committed
483
  (* -------------------------------------------------------------------------------- *)
484
  let del_node graph node_id =
485 486 487 488 489 490 491
    let map_wo_node =
      Gid_map.fold
        (fun id value acc ->
          if id = node_id
          then acc
          else Gid_map.add id (G_node.remove_key node_id value) acc
        ) graph.map Gid_map.empty in
492
    let node = Gid_map.find node_id graph.map in
493 494 495 496
    let new_map =
      match (G_node.get_prec node, G_node.get_succ node) with
      | (Some id_prec, Some id_succ) ->
        begin
497 498
          let prec = Gid_map.find id_prec map_wo_node
          and succ = Gid_map.find id_succ map_wo_node in
499 500 501 502 503 504
          map_wo_node
          |> (Gid_map.add id_prec (G_node.set_succ id_succ prec))
          |> (Gid_map.add id_succ (G_node.set_prec id_prec succ))
        end
      | (Some id_prec, None) ->
        begin
505
          let prec = Gid_map.find id_prec map_wo_node in
506 507 508 509 510
          map_wo_node
          |> (Gid_map.add id_prec (G_node.remove_succ prec))
        end
      | (None, Some id_succ) ->
        begin
511
          let succ = Gid_map.find id_succ map_wo_node in
512 513 514 515 516
          map_wo_node
          |> (Gid_map.add id_succ (G_node.remove_prec succ))
        end
      | (None, None) -> map_wo_node in
    { graph with map = new_map }
pj2m's avatar
pj2m committed
517

bguillaum's avatar
bguillaum committed
518
  (* -------------------------------------------------------------------------------- *)
519
  let insert id1 id2 graph =
bguillaum's avatar
bguillaum committed
520 521
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
522 523 524
    let new_pos = match (G_node.get_position node1, G_node.get_position node2) with
    | (G_node.Ordered pos1, G_node.Ordered pos2) -> (pos1 +. pos2) /. 2.
    | _ -> Error.run "Try to insert into non ordered nodes" in
525
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
526
    let map = graph.map
527
      |> (Gid_map.add new_gid (G_node.fresh ~prec:id1 ~succ:id2 new_pos))
bguillaum's avatar
bguillaum committed
528 529
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
530
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
531 532

  (* -------------------------------------------------------------------------------- *)
533
  let append id graph =
bguillaum's avatar
bguillaum committed
534
    let node = Gid_map.find id graph.map in
535 536 537
    let new_pos = match G_node.get_position node with
    | G_node.Ordered pos -> pos +. 1.
    | _ -> Error.run "Try to append into non ordered nodes" in
538
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
539
    let map = graph.map
540
      |> (Gid_map.add new_gid (G_node.fresh ~prec:id new_pos))
bguillaum's avatar
bguillaum committed
541
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
542
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
543 544

  (* -------------------------------------------------------------------------------- *)
545
  let prepend id graph =
bguillaum's avatar
bguillaum committed
546
    let node = Gid_map.find id graph.map in
547 548 549
    let new_pos = match G_node.get_position node with
    | G_node.Ordered pos -> pos -. 1.
    | _ -> Error.run "Try to prepend into non ordered nodes" in
550
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
551
    let map = graph.map
552
      |> (Gid_map.add new_gid (G_node.fresh ~succ:id new_pos))
bguillaum's avatar
bguillaum committed
553
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
554
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
555 556

  (* -------------------------------------------------------------------------------- *)
557
  let add_after node_id graph =
bguillaum's avatar
bguillaum committed
558 559
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
560 561
    | Some gid_succ -> insert node_id gid_succ graph
    | None -> append node_id graph
bguillaum's avatar
bguillaum committed
562 563

  (* -------------------------------------------------------------------------------- *)
564
  let add_before node_id graph =
bguillaum's avatar
bguillaum committed
565 566
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
567 568 569 570 571 572 573 574
    | Some gid_prec -> insert gid_prec node_id graph
    | None -> prepend node_id graph

  (* -------------------------------------------------------------------------------- *)
  let add_unordered graph =
    let new_gid = graph.highest_index + 1 in
    let map = Gid_map.add new_gid (G_node.fresh_unordered ()) graph.map in
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
575

bguillaum's avatar
bguillaum committed
576
  (* -------------------------------------------------------------------------------- *)
577
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
578
  let shift_out loc ?domain src_gid tar_gid is_gid_local label_cst graph =
579
    let src_node = Gid_map.find src_gid graph.map in
580
    let tar_node = Gid_map.find tar_gid graph.map in
581

582 583 584
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

585
    let (new_src_next, new_tar_next) =
586 587
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
588
        if Label_cst.match_ ?domain label_cst edge && not (is_gid_local next_gid)
589 590 591
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
bguillaum's avatar
bguillaum committed
592
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
593 594 595
        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
596

597
    { graph with map =
598 599 600
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
601
    }
pj2m's avatar
pj2m committed
602

bguillaum's avatar
bguillaum committed
603
  (* -------------------------------------------------------------------------------- *)
604
  let shift_in loc ?domain src_gid tar_gid is_gid_local label_cst graph =
605
    { graph with map =
606 607 608 609 610
      Gid_map.mapi
        (fun node_id node ->
          if is_gid_local node_id
          then node
          else
611 612 613 614 615 616 617 618
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
bguillaum's avatar
bguillaum committed
619
                if Label_cst.match_ ?domain label_cst edge
620 621
                then
                  match List_.usort_insert edge acc_node_tar_edges with
bguillaum's avatar
bguillaum committed
622
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
623 624 625 626 627 628 629 630 631
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
632 633
          ) graph.map
    }
634

bguillaum's avatar
bguillaum committed
635
  (* -------------------------------------------------------------------------------- *)
636
  let shift_edges loc ?domain src_gid tar_gid is_gid_local label_cst graph =
637
    graph
638 639
    |> (shift_in loc ?domain src_gid tar_gid is_gid_local label_cst)
    |> (shift_out loc ?domain src_gid tar_gid is_gid_local label_cst)
pj2m's avatar
pj2m committed
640

bguillaum's avatar
bguillaum committed
641
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
642
  let set_feat ?loc ?domain graph node_id feat_name new_value =
643
    let node = Gid_map.find node_id graph.map in
644 645 646 647
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
bguillaum's avatar
bguillaum committed
648
          let new_fs = G_fs.set_feat ?loc ?domain feat_name new_value (G_node.get_fs node) in
649
          (G_node.set_fs new_fs node) in
650
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
651

bguillaum's avatar
bguillaum committed
652
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
653
  let update_feat ?loc ?domain graph tar_id tar_feat_name item_list =
654 655 656
    let strings_to_concat =
      List.map
        (function
657 658
          | Concat_item.Feat (node_gid, "position") ->
            let node = Gid_map.find node_gid graph.map in
659 660 661 662 663
            begin
              match G_node.get_position node with
              | G_node.Ordered p -> sprintf "%g" p
              | _ -> Error.run ?loc "Try to read position of an unordered node"
            end
bguillaum's avatar
bguillaum committed
664
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
665 666
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
667
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
668
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
669
            )
bguillaum's avatar
bguillaum committed
670
          | Concat_item.String s -> s
671 672
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
bguillaum's avatar
bguillaum committed
673
    (set_feat ?loc ?domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
674

bguillaum's avatar
bguillaum committed
675
  (* -------------------------------------------------------------------------------- *)
676
  let del_feat graph node_id feat_name =
677
    let node = Gid_map.find node_id graph.map in
678
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
679
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
680

bguillaum's avatar
bguillaum committed
681
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
682
  let to_gr ?domain graph =
683 684 685

    let gr_id id = G_node.get_name id (Gid_map.find id graph.map) in

686
    let buff = Buffer.create 32 in
687

688 689
    bprintf buff "graph {\n";

690 691
    (* meta data *)
    List.iter
692 693
      (fun (s) ->
        bprintf buff "  %s;\n" s
694 695
      ) graph.meta;

bguillaum's avatar
bguillaum committed
696
    (* nodes *)
697 698 699 700 701 702 703 704
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
705

706 707
    List.iter
      (fun (id,node) ->
708
        bprintf buff "  %s %s;\n" (gr_id id) (G_node.to_gr node)
709
      ) sorted_nodes;
710

bguillaum's avatar
bguillaum committed
711
    (* edges *)
712 713
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
714 715
        Massoc_gid.iter
          (fun tar edge ->
716
            bprintf buff "  %s -[%s]-> %s;\n" (gr_id id) (G_edge.to_string ?domain edge) (gr_id tar)
bguillaum's avatar
bguillaum committed
717
          ) (G_node.get_next node)
718
      ) sorted_nodes;
719

720 721 722
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
723
  (* -------------------------------------------------------------------------------- *)
724
  let to_sentence ?main_feat graph =
725
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
726
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
727 728 729 730 731 732 733 734 735 736

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
737 738
        " -t-", "-t-";
        "_-_", "-";
739 740
        "_", " ";
        "' ", "'";
741 742
        " ,", ",";
        " .", ".";
743 744 745 746
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
747

bguillaum's avatar
bguillaum committed
748
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
749
  let to_dep ?domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
750
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
751
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
752

753 754 755 756
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

bguillaum's avatar
bguillaum committed
757
    (* nodes *)
758
    List.iter
759
      (fun (id, node) ->
760
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
761
        let fs = G_node.get_fs node in
762 763
        let pos= match G_node.get_position node with G_node.Ordered x -> Some x | _ -> None in
        let dep_fs = G_fs.to_dep ~decorated_feat ?position:pos ?filter ?main_feat fs in
764 765 766

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
767 768 769
          | _ -> match G_fs.get_string_atom "_UD_empty" fs with
            | Some "Yes" -> "; forecolor=purple; subcolor=purple; "
            | _ -> "" in
bguillaum's avatar
bguillaum committed
770 771 772 773 774

        bprintf buff "N_%s { %s%s }\n"
          (Gid.to_string id)
          dep_fs
          style
775 776
      ) snodes;
    bprintf buff "} \n";
777

bguillaum's avatar
bguillaum committed
778
    (* edges *)
779
    bprintf buff "[EDGES] { \n";
780

bguillaum's avatar
bguillaum committed
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
    if !Global.debug
    then
      List.iter
        (fun (id, node) ->
          begin
            match G_node.get_prec node with
            | None -> ()
            | Some p -> bprintf buff "N_%s -> N_%s { label=\"__PREC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string p)
          end;
          begin
            match G_node.get_succ node with
            | None -> ()
            | Some s -> bprintf buff "N_%s -> N_%s { label=\"__SUCC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string s)
          end
        ) snodes;
796

797
    Gid_map.iter
798
      (fun gid elt ->
bguillaum's avatar
bguillaum committed
799 800
        Massoc_gid.iter
          (fun tar g_edge ->
bguillaum's avatar
bguillaum committed
801
            if not (G_edge.is_void ?domain g_edge)
802 803
            then
              let deco = List.mem (gid,g_edge,tar) deco.G_deco.edges in
bguillaum's avatar
bguillaum committed
804
              bprintf buff "N_%s -> N_%s %s\n" (Gid.to_string gid) (Gid.to_string tar) (G_edge.to_dep ?domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
805
          ) (G_node.get_next elt)
806
      ) graph.map;
bguillaum's avatar
bguillaum committed
807 808

    bprintf buff "} \n";
809 810
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
811
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
812
  let to_dot ?domain ?main_feat ?(deco=G_deco.empty) graph =
813
    let buff = Buffer.create 32 in
814

815
    bprintf buff "digraph G {\n";
bguillaum's avatar
bguillaum committed
816
    (* bprintf buff "  rankdir=LR;\n"; *)
Bruno Guillaume's avatar
Bruno Guillaume committed
817
    (* bprintf buff "  node [shape=none];\n"; *)
818

bguillaum's avatar
bguillaum committed
819
    (* nodes *)
820 821
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
822
        let decorated_feat =
823 824
          try List.assoc id deco.G_deco.nodes
          with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
825 826
        bprintf buff "  N_%s [label=<%s>, color=%s]\n"
          (Gid.to_string id)
827 828 829
          (G_fs.to_dot ~decorated_feat ?main_feat (G_node.get_fs node))
          (* TODO: add bgcolor in dot output *)
          (if List.mem_assoc id deco.G_deco.nodes then "red" else "black")
830
      ) graph.map;
bguillaum's avatar
bguillaum committed
831 832

    (* edges *)
833 834
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
835 836 837
        Massoc_gid.iter
          (fun tar g_edge ->
            let deco = List.mem (id,g_edge,tar) deco.G_deco.edges in
838 839 840 841 842 843 844 845 846 847 848
            if g_edge = G_edge.sub
            then bprintf buff "  N_%s -> N_%s [dir=none];\n" (Gid.to_string id) (Gid.to_string tar)
            else bprintf buff "  N_%s -> N_%s%s;\n" (Gid.to_string id) (Gid.to_string tar) (G_edge.to_dot ?domain ~deco g_edge)
          ) (G_node.get_next node)
      ) graph.map;

    Gid_map.iter
      (fun id node ->
        begin
          match G_node.get_succ node with
          | Some s when !Global.debug ->
849
              bprintf buff "  N_%s -> N_%s [label=\"SUCC\", style=dotted, fontcolor=lightblue, color=lightblue]; {rank=same; N_%s; N_%s };\n"
850
                (Gid.to_string id) (Gid.to_string s) (Gid.to_string id) (Gid.to_string s)
Bruno Guillaume's avatar
Bruno Guillaume committed
851
          (* | Some s ->
852
              bprintf buff "  N_%s -> N_%s [style=invis]; {rank=same; N_%s; N_%s };\n"
Bruno Guillaume's avatar
Bruno Guillaume committed
853 854
                (Gid.to_string id) (Gid.to_string s) (Gid.to_string id) (Gid.to_string s) *)
          | _ -> ()
855
        end
856
      ) graph.map;
857

858 859
    bprintf buff "}\n";
    Buffer.contents buff