grew_graph.ml 35.6 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11
12
13
open Printf
open Log

bguillaum's avatar
bguillaum committed
14
open Grew_base
bguillaum's avatar
bguillaum committed
15
open Grew_ast
16
17
open Grew_types

pj2m's avatar
pj2m committed
18
19
20
21
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
22
(* ================================================================================ *)
23
module P_deco = struct
bguillaum's avatar
bguillaum committed
24
25
26
27
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
28
29

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
30
end (* module P_deco *)
pj2m's avatar
pj2m committed
31

bguillaum's avatar
bguillaum committed
32
(* ================================================================================ *)
33
34
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
35

36
  let empty = Pid_map.empty
37

38
  let find = Pid_map.find
pj2m's avatar
pj2m committed
39

40
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
41
  let map_add_edge map id_src label id_tar =
42
    let node_src =
pj2m's avatar
pj2m committed
43
      (* Not found can be raised when adding an edge from pos to neg *)
44
45
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
46
47
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
48

bguillaum's avatar
bguillaum committed
49
  (* -------------------------------------------------------------------------------- *)
50
  let build_filter domain table (ast_node, loc) =
51
    let pid = Id.build ~loc ast_node.Ast.node_id table in
52
    let fs = P_fs.build domain ast_node.Ast.fs in
53
54
    (pid, fs)

bguillaum's avatar
bguillaum committed
55
  (* -------------------------------------------------------------------------------- *)
56
  let build domain label_domain ?pat_vars ?(locals=[||]) (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
57

58
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
59
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
60
    let rec insert (ast_node, loc) = function
61
      | [] -> [P_node.build domain ?pat_vars (ast_node, loc)]
62
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
63
        begin
64
          try (node_id, P_node.unif_fs (P_fs.build domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
65
66
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
67
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
68

69
    let (named_nodes : (Id.name * P_node.t) list) =
70
      List.fold_left
71
72
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
73

pj2m's avatar
pj2m committed
74
75
76
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

77
78
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
79
80

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
81
82
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
83
      Pid_map.empty node_list in
84

bguillaum's avatar
bguillaum committed
85
    let (map : t) =
pj2m's avatar
pj2m committed
86
      List.fold_left
bguillaum's avatar
bguillaum committed
87
88
89
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
90
          let edge = P_edge.build label_domain ~locals (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
91
92
93
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
94
              (P_edge.to_string label_domain edge)
bguillaum's avatar
bguillaum committed
95
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
96
97
          )
        ) map_without_edges full_edge_list in
98
    (map, pos_table)
bguillaum's avatar
bguillaum committed
99

100

bguillaum's avatar
bguillaum committed
101
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
102
103
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
104
  type extension = {
bguillaum's avatar
bguillaum committed
105
106
107
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
108

bguillaum's avatar
bguillaum committed
109
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
110
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
111
  let build_extension domain label_domain ?pat_vars ?(locals=[||]) pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
112

113
    let built_nodes = List.map (P_node.build domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
114

115
116
    let (old_nodes, new_nodes) =
      List.partition
117
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
118
        built_nodes in
bguillaum's avatar
bguillaum committed
119

pj2m's avatar
pj2m committed
120
121
122
123
124
125
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
126
127
128
129

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
130
131
132
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
133
134
135

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
136
137
138
139
140
141
142
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
143

144
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
145
      List.fold_left
bguillaum's avatar
bguillaum committed
146
        (fun acc (ast_edge, loc) ->
147
148
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
149
150
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
151
              | Some i -> Pid.Pos i
152
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
153
154
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
155
              | Some i -> Pid.Pos i
156
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
157
          let edge = P_edge.build label_domain ~locals (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
158
159
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
160
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
161
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
162
163
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

164
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
165
166
167
168
169
170
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
171
      Pid_map.fold
bguillaum's avatar
bguillaum committed
172
173
174
175
176
177
178
179
180
181
182
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
183
184

    let roots =
185
      Pid_map.fold
bguillaum's avatar
bguillaum committed
186
187
188
189
190
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
191

pj2m's avatar
pj2m committed
192
193
    (!tree_prop, roots)

194
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
195
  let roots graph = snd (tree_and_roots graph)
196
end (* module P_graph *)
pj2m's avatar
pj2m committed
197

bguillaum's avatar
bguillaum committed
198
(* ================================================================================ *)
199
200
module G_deco = struct
  type t = {
201
202
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
203
204
205
206
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
207

bguillaum's avatar
bguillaum committed
208
(* ================================================================================ *)
209
module G_graph = struct
210
  type t = {
211
212
    meta: (string * string) list; (* meta-informations *)
    map: G_node.t Gid_map.t;      (* node description *)
bguillaum's avatar
bguillaum committed
213
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
214
215
  }

bguillaum's avatar
bguillaum committed
216
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]}
217

bguillaum's avatar
bguillaum committed
218
  (* ---------------------------------------------------------------------- *)
219
220
221
222
223
224
225
226
227
228
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

bguillaum's avatar
bguillaum committed
229
  (* ---------------------------------------------------------------------- *)
230
231
232
233
234
235
236
237
238
239
240
241
  (* [normalize g] changes all graphs keys to Old _ (used when entering a new module) *)
  let normalize t =
    let (_, mapping) =
      Gid_map.fold
        (fun key value (max_binding, mapping) ->
          match key with
          | Gid.Old n -> (n, mapping)
          | Gid.New _ -> (max_binding, mapping)
          | Gid.Act (n,suffix) -> (max_binding+1, (key, (Gid.Old (max_binding+1)))::mapping)
        ) t.map (0, []) in
        rename mapping t

242
243
244
245

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
246

bguillaum's avatar
bguillaum committed
247
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
248

249
250
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
251

252
253
254
255
  let max_binding t =
    match Gid_map.max_binding t.map with
      | (Gid.Old i,_) -> i
      | _ -> Error.bug "[G_graph.max_binding]"
bguillaum's avatar
bguillaum committed
256

bguillaum's avatar
bguillaum committed
257
  let list_num test =
bguillaum's avatar
bguillaum committed
258
259
260
261
262
263
    let rec loop n = function
      | [] -> raise Not_found
      | x::_ when test x -> n
      | _::t -> loop (n+1) t
    in loop 0

bguillaum's avatar
bguillaum committed
264
  (* is there an edge e out of node i ? *)
265
  let edge_out label_domain graph node_id label_cst =
266
    let node = Gid_map.find node_id graph.map in
267
    Massoc_gid.exists (fun _ e -> Label_cst.match_ label_domain e label_cst) (G_node.get_next node)
268

bguillaum's avatar
bguillaum committed
269
270
271
272
273
274
  let get_annot_info graph =
    let annot_info =
      Gid_map.fold
        (fun _ node acc ->
          match (G_node.get_annot_info node, acc) with
            | (None,_) -> acc
bguillaum's avatar
bguillaum committed
275
            | (Some f, None) -> Some (f,G_node.get_position node)
bguillaum's avatar
bguillaum committed
276
277
278
279
280
281
            | (Some _, Some _) -> Error.build "[G_node.get_annot_info] Two nodes with annot info"
        ) graph.map None in
    match annot_info with
      | Some x -> x
      | None -> Error.build "[G_node.get_annot_info] No nodes with annot info"

bguillaum's avatar
bguillaum committed
282
  (* -------------------------------------------------------------------------------- *)
283
  let map_add_edge map id_src label id_tar =
284
    let node_src =
285
      (* Not found can be raised when adding an edge from pos to neg *)
286
      try Gid_map.find id_src map with Not_found -> G_node.empty in
287
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
288
289
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
290

291
  (* -------------------------------------------------------------------------------- *)
292
293
294
295
  let add_edge graph id_src label id_tar =
    match map_add_edge graph.map id_src label id_tar with
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
296

bguillaum's avatar
bguillaum committed
297
  (* -------------------------------------------------------------------------------- *)
298
  let build domain label_domain ?(locals=[||]) gr_ast =
299
300
    let full_node_list = gr_ast.Ast.nodes
    and full_edge_list = gr_ast.Ast.edges in
301

302
303
    let next_free_position = ref 1. in

304
    let named_nodes =
305
      let rec loop already_bound = function
bguillaum's avatar
bguillaum committed
306
        | [] -> []
307
        | (ast_node, loc) :: tail ->
308
          let node_id = ast_node.Ast.node_id in
309
310
311
          if List.mem node_id already_bound
          then Error.build "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
          else
312
            let (new_id,new_node) = G_node.build domain ~def_position:!next_free_position (ast_node, loc) in
313
314
315
            next_free_position := 1. +. (max !next_free_position (G_node.get_position new_node));
            let new_tail = loop (node_id :: already_bound) tail in
            (new_id,new_node) :: new_tail in
316
317
318
319
320
321
322
      loop [] full_node_list in

    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

    (* table contains the sorted list of node ids *)
    let table = Array.of_list sorted_ids in
323
324
325
326

    (* the nodes, in the same order *)
    let map_without_edges = List_.foldi_left (fun i acc elt -> Gid_map.add (Gid.Old i) elt acc) Gid_map.empty node_list in

327
328
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
329
330
331
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src table in
          let i2 = Id.build ~loc ast_edge.Ast.tar table in
332
          let edge = G_edge.build label_domain ~locals (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
333
334
335
          (match map_add_edge acc (Gid.Old i1) edge (Gid.Old i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
336
              (G_edge.to_string label_domain edge)
bguillaum's avatar
bguillaum committed
337
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
338
339
          )
        ) map_without_edges full_edge_list in
340

bguillaum's avatar
bguillaum committed
341
342
343
    {meta=gr_ast.Ast.meta; map=map; fusion = []}


344

bguillaum's avatar
bguillaum committed
345
  (* -------------------------------------------------------------------------------- *)
346
  let of_conll ?loc domain label_domain (meta, lines, range_lines) =
347
    let sorted_lines = Conll.root :: (List.sort Conll.compare lines) in
bguillaum's avatar
bguillaum committed
348

bguillaum's avatar
bguillaum committed
349
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.num) sorted_lines), string_of_int) in
350

bguillaum's avatar
bguillaum committed
351
    let map_without_edges =
bguillaum's avatar
bguillaum committed
352
      List_.foldi_left
353
354
        (fun i acc line ->
          let loc = Loc.opt_set_line i loc in
355
          Gid_map.add (Gid.Old i) (G_node.of_conll domain ?loc line) acc)
bguillaum's avatar
bguillaum committed
356
        Gid_map.empty sorted_lines in
bguillaum's avatar
bguillaum committed
357
    let map_with_edges =
bguillaum's avatar
bguillaum committed
358
359
      List.fold_left
        (fun acc line ->
bguillaum's avatar
bguillaum committed
360
361
          (* add line number information in loc *)
          let loc = Loc.opt_set_line line.Conll.line_num loc in
bguillaum's avatar
bguillaum committed
362
          let dep_id = Id.gbuild ?loc line.Conll.num gtable in
363
364
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
bguillaum's avatar
bguillaum committed
365
              let gov_id = Id.gbuild ?loc gov gtable in
366
              let edge = G_edge.make label_domain ?loc dep_lab in
bguillaum's avatar
bguillaum committed
367
              (match map_add_edge acc2 (Gid.Old gov_id) edge (Gid.Old dep_id) with
bguillaum's avatar
bguillaum committed
368
369
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
370
                  (G_edge.to_string label_domain edge)
bguillaum's avatar
bguillaum committed
371
                  (match loc with Some l -> Loc.to_string l | None -> "")
bguillaum's avatar
bguillaum committed
372
              )
373
            ) acc line.Conll.deps
bguillaum's avatar
bguillaum committed
374
        ) map_without_edges lines in
bguillaum's avatar
bguillaum committed
375
376
377
378
379
380
381
382
383
384
385

      let fusion =
        List.map
          (fun {Conll.first; last; fusion} ->
              ( Gid.Old (Id.gbuild ?loc first gtable),
                (Gid.Old (Id.gbuild ?loc last gtable),
                fusion)
              )
          ) range_lines in

    {meta; map=map_with_edges; fusion}
bguillaum's avatar
bguillaum committed
386

387
388
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
389
  let of_brown domain label_domain ?sentid brown =
390
    let units = Str.split (Str.regexp " ") brown in
391
      let conll_lines = List.mapi
392
393
394
395
396
397
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
        | [Str.Text phon; Str.Delim pos; Str.Text lemma] ->
        let pos = String.sub pos 1 ((String.length pos)-2) in
        let morph = match (i,sentid) with
        | (0,Some id) -> [("sentid", id)]
        | _ -> [] in
398
399
        {
          Conll.line_num=0;
bguillaum's avatar
bguillaum committed
400
          num = i+1;
401
402
403
404
          phon;
          lemma;
          pos1 = "_";
          pos2 = pos;
405
          morph;
bguillaum's avatar
bguillaum committed
406
          deps = [(i, "SUC")]
407
          }
408
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\")" item
409
      ) units in 
410
    of_conll domain label_domain ([], conll_lines, [])
411

bguillaum's avatar
bguillaum committed
412
413
414
415
416
  (* -------------------------------------------------------------------------------- *)
  let opt_att atts name =
    try Some (List.assoc name atts)
    with Not_found -> None

bguillaum's avatar
bguillaum committed
417
418
 (* -------------------------------------------------------------------------------- *)
 (** [of_xml d_xml] loads a graph in the xml format: [d_xml] must be a <D> xml element *)
419
  let of_xml domain label_domain d_xml =
bguillaum's avatar
bguillaum committed
420
421
422
423
424
425
426
427
428
429
430
431
    match d_xml with
      | Xml.Element ("D", _, t_or_r_list) ->
        let (t_list, r_list) = List.partition (function Xml.Element ("T",_,_) -> true | _ -> false) t_or_r_list in
        let (nodes_without_edges, mapping) =
          List_.foldi_left
            (fun i (acc, acc_map) t_xml ->
              match t_xml with
                | Xml.Element ("T", t_atts, [Xml.PCData phon]) ->
                  let id = List.assoc "id" t_atts in
                  let other_feats = List.filter (fun (n,_) -> not (List.mem n ["id"; "start"; "end"; "label"])) t_atts in
                  let new_fs =
                    List.fold_left
432
                      (fun acc2 (fn,fv) -> G_fs.set_feat domain fn fv acc2)
bguillaum's avatar
bguillaum committed
433
434
                      G_fs.empty
                      (("phon", phon) :: ("cat", (List.assoc "label" t_atts)) :: other_feats) in
435
                  let new_node = G_node.set_fs new_fs (G_node.set_position (float i) G_node.empty) in
bguillaum's avatar
bguillaum committed
436
                  (Gid_map.add (Gid.Old i) new_node acc, String_map.add id (Gid.Old i) acc_map)
bguillaum's avatar
bguillaum committed
437
                | _ -> Log.critical "[G_graph.of_xml] Not a wellformed <T> tag"
bguillaum's avatar
bguillaum committed
438
            ) (Gid_map.empty, String_map.empty) t_list in
bguillaum's avatar
bguillaum committed
439
440
441
442
443
444
445
446
        let final_map =
          List.fold_left
            (fun acc r_xml ->
              match r_xml with
                | Xml.Element ("R", r_atts, _) ->
                  let src = List.assoc "from" r_atts
                  and tar = List.assoc "to" r_atts
                  and label = List.assoc "label" r_atts in
bguillaum's avatar
bguillaum committed
447
448
                  let gid_tar = String_map.find tar mapping in
                  let gid_src = String_map.find src mapping in
bguillaum's avatar
bguillaum committed
449
450
                  let old_node = Gid_map.find gid_src acc in
                  let new_map =
451
                    match G_node.add_edge (G_edge.make label_domain label) gid_tar old_node with
bguillaum's avatar
bguillaum committed
452
453
454
455
456
                      | Some new_node -> Gid_map.add gid_src new_node acc
                      | None -> Log.critical "[G_graph.of_xml] Fail to add edge" in
                  new_map
                | _ -> Log.critical "[G_graph.of_xml] Not a wellformed <R> tag"
            ) nodes_without_edges r_list in
bguillaum's avatar
bguillaum committed
457
        {meta=[]; map=final_map; fusion=[]}
bguillaum's avatar
bguillaum committed
458
459
      | _ -> Log.critical "[G_graph.of_xml] Not a <D> tag"

bguillaum's avatar
bguillaum committed
460
  (* -------------------------------------------------------------------------------- *)
461
  let del_edge label_domain ?edge_ident loc graph id_src label id_tar =
462
    let node_src =
463
      try Gid_map.find id_src graph.map
464
      with Not_found ->
465
        match edge_ident with
bguillaum's avatar
bguillaum committed
466
467
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
468
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
469
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string label_domain label)
pj2m's avatar
pj2m committed
470

bguillaum's avatar
bguillaum committed
471
  (* -------------------------------------------------------------------------------- *)
472
  let del_node graph node_id =
bguillaum's avatar
bguillaum committed
473
    {graph with map =
474
475
        Gid_map.fold
          (fun id value acc ->
bguillaum's avatar
bguillaum committed
476
477
478
            if id = node_id
            then acc
            else Gid_map.add id (G_node.remove_key node_id value) acc
479
480
          ) graph.map Gid_map.empty
    }
pj2m's avatar
pj2m committed
481

bguillaum's avatar
bguillaum committed
482
  (* -------------------------------------------------------------------------------- *)
483
  let activate loc node_id suffix graph =
bguillaum's avatar
bguillaum committed
484
    let index = match node_id with
485
      | Gid.Old id -> Gid.Act (id, suffix)
bguillaum's avatar
bguillaum committed
486
487
488
      | _ -> Error.run ~loc "[Graph.activate] is possible only from a \"ground\" node" in

    if Gid_map.mem index graph.map
489
    then Error.run ~loc "[Graph.activate] try to activate twice the \"same\" node (with suffix '%s')" suffix;
bguillaum's avatar
bguillaum committed
490
491
492
493
494

    let node = Gid_map.find node_id graph.map in
    let new_map = Gid_map.add index (G_node.build_new node) graph.map in
    (index, {graph with map = new_map})

bguillaum's avatar
bguillaum committed
495
  (* -------------------------------------------------------------------------------- *)
496
  let add_neighbour loc label_domain graph node_id label =
497
    let index = match node_id with
bguillaum's avatar
bguillaum committed
498
      | Gid.Old id ->
bguillaum's avatar
bguillaum committed
499
500
501
502
        (match Label.to_int label with
          | Some label_int -> Gid.New (id, label_int)
          | None -> Error.run ~loc "[Graph.add_neighbour] try to add neighbour with a local label"
        )
503
      | Gid.New _ | Gid.Act _ -> Error.run ~loc "[Graph.add_neighbour] try to add neighbour node to a neighbour node" in
pj2m's avatar
pj2m committed
504

505
    if Gid_map.mem index graph.map
506
    then Error.run ~loc "[Graph.add_neighbour] try to build twice the \"same\" neighbour node (with label '%s')" (Label.to_string label_domain label);
pj2m's avatar
pj2m committed
507

508
    let node = Gid_map.find node_id graph.map in
pj2m's avatar
pj2m committed
509
    (* put the new node on the right of its "parent" *)
510
    let new_map = Gid_map.add index (G_node.build_neighbour node) graph.map in
bguillaum's avatar
bguillaum committed
511

512
    match map_add_edge new_map node_id label index with
bguillaum's avatar
bguillaum committed
513
514
      | Some g -> (index, {graph with map = g})
      | None -> Log.bug "[Graph.add_neighbour] add_edge must not fail"; exit 1
pj2m's avatar
pj2m committed
515

bguillaum's avatar
bguillaum committed
516
  (* -------------------------------------------------------------------------------- *)
517
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
518
  let shift_out loc label_domain src_gid tar_gid label_cst graph =
519
    let src_node = Gid_map.find src_gid graph.map in
520
    let tar_node = Gid_map.find tar_gid graph.map in
521

522
523
524
525
526
527
528
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

    (* Error if a loop is created by the shift_out *)
    let src_tar_edges = Massoc_gid.assoc tar_gid src_next in
    let _ =
      try
529
530
        let loop_edge = List.find (fun edge -> Label_cst.match_ label_domain edge label_cst) src_tar_edges in
        Error.run ~loc "The shfit_out command tries to build a loop (with label %s)" (Label.to_string label_domain loop_edge)
531
532
533
534
535
      with Not_found -> () in

    let (new_src_next,new_tar_next) =
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
536
        if Label_cst.match_ label_domain edge label_cst
537
538
539
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
540
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string label_domain edge)
541
542
543
544

        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
545

546
    { graph with map =
547
548
549
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
550
    }
pj2m's avatar
pj2m committed
551

bguillaum's avatar
bguillaum committed
552
  (* -------------------------------------------------------------------------------- *)
553
  let shift_in loc label_domain src_gid tar_gid label_cst graph =
554
    let tar_node = Gid_map.find tar_gid graph.map in
555
    let tar_next = G_node.get_next tar_node in
556

557
558
559
560
    (* Error if a loop is created by the shift_in *)
    let tar_src_edges = Massoc_gid.assoc src_gid tar_next in
    let _ =
      try
561
562
        let loop_edge = List.find (fun edge -> Label_cst.match_ label_domain edge label_cst) tar_src_edges in
        Error.run ~loc "The [shift_in] command tries to build a loop (with label \"%s\")" (Label.to_string label_domain loop_edge)
563
564
565
      with Not_found -> () in

    { graph with map =
566
567
        Gid_map.mapi
          (fun node_id node ->
568
569
570
571
572
573
574
575
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
576
                if Label_cst.match_ label_domain edge label_cst
577
578
                then
                  match List_.usort_insert edge acc_node_tar_edges with
579
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string label_domain edge)
580
581
582
583
584
585
586
587
588
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
589
590
          ) graph.map
    }
591

bguillaum's avatar
bguillaum committed
592
  (* -------------------------------------------------------------------------------- *)
593
  let shift_edges loc label_domain src_gid tar_gid label_cst graph =
594
    graph
595
596
    |> (shift_in loc label_domain src_gid tar_gid label_cst)
    |> (shift_out loc label_domain src_gid tar_gid label_cst)
pj2m's avatar
pj2m committed
597

bguillaum's avatar
bguillaum committed
598
  (* -------------------------------------------------------------------------------- *)
599
600
  let merge_node loc label_domain graph src_gid tar_gid =
    let se_graph = shift_edges loc label_domain src_gid tar_gid Label_cst.all graph in
pj2m's avatar
pj2m committed
601

602
603
    let src_node = Gid_map.find src_gid se_graph.map in
    let tar_node = Gid_map.find tar_gid se_graph.map in
604

605
    match G_fs.unif (G_node.get_fs src_node) (G_node.get_fs tar_node) with
bguillaum's avatar
bguillaum committed
606
607
608
      | Some new_fs ->
        Some {graph with map =
            (Gid_map.add
bguillaum's avatar
bguillaum committed
609
               tar_gid
610
               (G_node.set_fs new_fs tar_node)
bguillaum's avatar
bguillaum committed
611
               (Gid_map.remove src_gid se_graph.map)
bguillaum's avatar
bguillaum committed
612
613
614
            )
             }
      | None -> None
bguillaum's avatar
bguillaum committed
615

bguillaum's avatar
bguillaum committed
616
  (* -------------------------------------------------------------------------------- *)
617
  let set_feat ?loc domain graph node_id feat_name new_value =
618
    let node = Gid_map.find node_id graph.map in
619
620
621
622
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
623
          let new_fs = G_fs.set_feat ?loc domain feat_name new_value (G_node.get_fs node) in
624
          (G_node.set_fs new_fs node) in
625
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
626

bguillaum's avatar
bguillaum committed
627
  (* -------------------------------------------------------------------------------- *)
628
  let update_feat ?loc domain graph tar_id tar_feat_name item_list =
629
630
631
    let strings_to_concat =
      List.map
        (function
bguillaum's avatar
bguillaum committed
632
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
633
634
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
635
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
636
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
637
            )
bguillaum's avatar
bguillaum committed
638
          | Concat_item.String s -> s
639
640
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
641
    (set_feat ?loc domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
642

bguillaum's avatar
bguillaum committed
643
  (* -------------------------------------------------------------------------------- *)
644
  let del_feat graph node_id feat_name =
645
    let node = Gid_map.find node_id graph.map in
646
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
647
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
648

bguillaum's avatar
bguillaum committed
649
  (* -------------------------------------------------------------------------------- *)
650
  let to_gr label_domain graph =
651
    let buff = Buffer.create 32 in
652

653
654
    bprintf buff "graph {\n";

655
656
657
658
659
660
    (* meta data *)
    List.iter
      (fun (name, value) ->
        bprintf buff "  %s = \"%s\";\n" name value
      ) graph.meta;

bguillaum's avatar
bguillaum committed
661
    (* nodes *)
662
663
664
665
666
667
668
669
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
670
671
    List.iter
      (fun (id,node) ->
672
        bprintf buff "  N_%s %s;\n" (Gid.to_string id) (G_node.to_gr node)
673
      ) sorted_nodes;
674

bguillaum's avatar
bguillaum committed
675
    (* edges *)
676
677
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
678
679
        Massoc_gid.iter
          (fun tar edge ->
680
            bprintf buff "  N_%s -[%s]-> N_%s;\n" (Gid.to_string id) (G_edge.to_string label_domain edge) (Gid.to_string tar)
bguillaum's avatar
bguillaum committed
681
          ) (G_node.get_next node)
682
      ) sorted_nodes;
683

684
685
686
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
687
  (* -------------------------------------------------------------------------------- *)
688
  let to_sentence ?main_feat graph =
689
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
690
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
691
692
693
694
695
696
697
698
699
700

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
701
702
        " -t-", "-t-";
        "_-_", "-";
703
704
        "_", " ";
        "' ", "'";
705
706
        " ,", ",";
        " .", ".";
707
708
709
710
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
711

bguillaum's avatar
bguillaum committed
712
  (* -------------------------------------------------------------------------------- *)
713
  let to_dep label_domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
714
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
715
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
716

717
718
719
720
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

bguillaum's avatar
bguillaum committed
721
    (* nodes *)
722
    List.iter
723
      (fun (id, node) ->
724
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
725
        let fs = G_node.get_fs node in
726
727
728
729
        let dep_fs = G_fs.to_dep ~decorated_feat ~position:(G_node.get_position node) ?filter ?main_feat fs in

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
bguillaum's avatar
bguillaum committed
730
          | _ -> "" in
bguillaum's avatar
bguillaum committed
731
        bprintf buff "N_%s { %s%s }\n" (Gid.to_string id) dep_fs style
732
733
      ) snodes;
    bprintf buff "} \n";
734

bguillaum's avatar
bguillaum committed
735
    (* edges *)
736
    bprintf buff "[EDGES] { \n";
737
    Gid_map.iter
738
      (fun gid elt ->
bguillaum's avatar
bguillaum committed
739
740
741
        Massoc_gid.iter
          (fun tar g_edge ->
            let deco = List.mem (gid,g_edge,tar) deco.G_deco.edges in
742
            bprintf buff "N_%s -> N_%s %s\n" (Gid.to_string gid) (Gid.to_string tar) (G_edge.to_dep label_domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
743
          ) (G_node.get_next elt)
744
      ) graph.map;
bguillaum's avatar
bguillaum committed
745
746

    bprintf buff "} \n";
747
748
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
749
  (* -------------------------------------------------------------------------------- *)
750
  let to_dot label_domain ?main_feat ?(deco=G_deco.empty) graph =
751
    let buff = Buffer.create 32 in
752

753
    bprintf buff "digraph G {\n";
bguillaum's avatar
bguillaum committed
754
    (* bprintf buff "  rankdir=LR;\n"; *)
755
756
    bprintf buff "  node [shape=Mrecord];\n";

bguillaum's avatar
bguillaum committed
757
    (* nodes *)
758
759
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
760
        let decorated_feat =
761
762
          try List.assoc id deco.G_deco.nodes
          with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
763
764
        bprintf buff "  N_%s [label=<%s>, color=%s]\n"
          (Gid.to_string id)
765
766
767
          (G_fs.to_dot ~decorated_feat ?main_feat (G_node.get_fs node))
          (* TODO: add bgcolor in dot output *)
          (if List.mem_assoc id deco.G_deco.nodes then "red" else "black")
768
      ) graph.map;
bguillaum's avatar
bguillaum committed
769
770

    (* edges *)
771
772
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
773
774
775
        Massoc_gid.iter
          (fun tar g_edge ->
            let deco = List.mem (id,g_edge,tar) deco.G_deco.edges in
776
            bprintf buff "  N_%s -> N_%s%s\n" (Gid.to_string id) (Gid.to_string tar) (G_edge.to_dot label_domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
777
          ) (G_node.get_next node)
778
      ) graph.map;
779