grew_graph.ml 34.2 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11
12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17
18
open Grew_types

pj2m's avatar
pj2m committed
19
20
21
22
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
23
(* ================================================================================ *)
24
module P_deco = struct
bguillaum's avatar
bguillaum committed
25
26
27
28
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
29
30

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
31
end (* module P_deco *)
pj2m's avatar
pj2m committed
32

bguillaum's avatar
bguillaum committed
33
(* ================================================================================ *)
34
35
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
36

37
  let empty = Pid_map.empty
38

39
  let find = Pid_map.find
pj2m's avatar
pj2m committed
40

41
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
42
  let map_add_edge map id_src label id_tar =
43
    let node_src =
pj2m's avatar
pj2m committed
44
      (* Not found can be raised when adding an edge from pos to neg *)
45
46
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
47
48
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
49

bguillaum's avatar
bguillaum committed
50
  (* -------------------------------------------------------------------------------- *)
51
  let build_filter domain table (ast_node, loc) =
52
    let pid = Id.build ~loc ast_node.Ast.node_id table in
53
    let fs = P_fs.build domain ast_node.Ast.fs in
54
55
    (pid, fs)

bguillaum's avatar
bguillaum committed
56
  (* -------------------------------------------------------------------------------- *)
57
  let build domain ?pat_vars ?(locals=[||]) (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
58

59
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
60
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
61
    let rec insert (ast_node, loc) = function
62
      | [] -> [P_node.build domain ?pat_vars (ast_node, loc)]
63
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
64
        begin
65
          try (node_id, P_node.unif_fs (P_fs.build domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
66
67
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
68
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
69

70
    let (named_nodes : (Id.name * P_node.t) list) =
71
      List.fold_left
72
73
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
74

pj2m's avatar
pj2m committed
75
76
77
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

78
79
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
80
81

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
82
83
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
84
      Pid_map.empty node_list in
85

bguillaum's avatar
bguillaum committed
86
    let (map : t) =
pj2m's avatar
pj2m committed
87
      List.fold_left
bguillaum's avatar
bguillaum committed
88
89
90
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
91
          let edge = P_edge.build domain ~locals (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
92
93
94
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
95
              (P_edge.to_string domain edge)
bguillaum's avatar
bguillaum committed
96
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
97
98
          )
        ) map_without_edges full_edge_list in
99
    (map, pos_table)
bguillaum's avatar
bguillaum committed
100

101

bguillaum's avatar
bguillaum committed
102
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
103
104
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
105
  type extension = {
bguillaum's avatar
bguillaum committed
106
107
108
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
109

bguillaum's avatar
bguillaum committed
110
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
111
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
112
  let build_extension domain ?pat_vars ?(locals=[||]) pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
113

114
    let built_nodes = List.map (P_node.build domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
115

116
117
    let (old_nodes, new_nodes) =
      List.partition
118
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
119
        built_nodes in
bguillaum's avatar
bguillaum committed
120

pj2m's avatar
pj2m committed
121
122
123
124
125
126
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
127
128
129
130

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
131
132
133
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
134
135
136

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
137
138
139
140
141
142
143
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
144

145
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
146
      List.fold_left
bguillaum's avatar
bguillaum committed
147
        (fun acc (ast_edge, loc) ->
148
149
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
150
151
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
152
              | Some i -> Pid.Pos i
153
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
154
155
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
156
              | Some i -> Pid.Pos i
157
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
158
          let edge = P_edge.build domain ~locals (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
159
160
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
161
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
162
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
163
164
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

165
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
166
167
168
169
170
171
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
172
      Pid_map.fold
bguillaum's avatar
bguillaum committed
173
174
175
176
177
178
179
180
181
182
183
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
184
185

    let roots =
186
      Pid_map.fold
bguillaum's avatar
bguillaum committed
187
188
189
190
191
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
192

pj2m's avatar
pj2m committed
193
194
    (!tree_prop, roots)

195
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
196
  let roots graph = snd (tree_and_roots graph)
197
end (* module P_graph *)
pj2m's avatar
pj2m committed
198

bguillaum's avatar
bguillaum committed
199
(* ================================================================================ *)
200
201
module G_deco = struct
  type t = {
202
203
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
204
205
206
207
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
208

bguillaum's avatar
bguillaum committed
209
(* ================================================================================ *)
210
module G_graph = struct
211
  type t = {
bguillaum's avatar
bguillaum committed
212
213
    meta: string list;                       (* meta-informations *)
    map: G_node.t Gid_map.t;                 (* node description *)
bguillaum's avatar
bguillaum committed
214
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
bguillaum's avatar
bguillaum committed
215
    highest_index: int;                      (* the next free interger index *)
216
217
  }

bguillaum's avatar
bguillaum committed
218
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; }
219

bguillaum's avatar
bguillaum committed
220
  (* ---------------------------------------------------------------------- *)
221
222
223
224
225
226
227
228
229
230
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

bguillaum's avatar
bguillaum committed
231
  let get_highest g = g.highest_index
232
233
234
235

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
236

bguillaum's avatar
bguillaum committed
237
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
238

239
240
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
241

bguillaum's avatar
bguillaum committed
242
  (* is there an edge e out of node i ? *)
243
  let edge_out domain graph node_id label_cst =
244
    let node = Gid_map.find node_id graph.map in
245
    Massoc_gid.exists (fun _ e -> Label_cst.match_ domain e label_cst) (G_node.get_next node)
246

bguillaum's avatar
bguillaum committed
247
248
249
250
251
252
  let get_annot_info graph =
    let annot_info =
      Gid_map.fold
        (fun _ node acc ->
          match (G_node.get_annot_info node, acc) with
            | (None,_) -> acc
bguillaum's avatar
bguillaum committed
253
            | (Some f, None) -> Some (f,G_node.get_position node)
bguillaum's avatar
bguillaum committed
254
255
256
257
258
259
            | (Some _, Some _) -> Error.build "[G_node.get_annot_info] Two nodes with annot info"
        ) graph.map None in
    match annot_info with
      | Some x -> x
      | None -> Error.build "[G_node.get_annot_info] No nodes with annot info"

bguillaum's avatar
bguillaum committed
260
  (* -------------------------------------------------------------------------------- *)
261
  let map_add_edge map id_src label id_tar =
262
    let node_src =
263
      (* Not found can be raised when adding an edge from pos to neg *)
264
      try Gid_map.find id_src map with Not_found -> G_node.empty in
265
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
266
267
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
268

269
  (* -------------------------------------------------------------------------------- *)
270
271
272
273
  let add_edge graph id_src label id_tar =
    match map_add_edge graph.map id_src label id_tar with
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
274

bguillaum's avatar
bguillaum committed
275
  (* -------------------------------------------------------------------------------- *)
276
  let build domain ?(locals=[||]) gr_ast =
277
278
    let full_node_list = gr_ast.Ast.nodes
    and full_edge_list = gr_ast.Ast.edges in
279

bguillaum's avatar
bguillaum committed
280
281
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
282

bguillaum's avatar
bguillaum committed
283
284
285
286
287
      | (ast_node, loc)::tail ->
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
288
289
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some index) tail in
          let succ = if tail = [] then None else Some (index+1) in
bguillaum's avatar
bguillaum committed
290
291
          let (_,new_node) = G_node.build domain ?prec ?succ index (ast_node, loc) in
            (
292
              Gid_map.add index new_node new_tail,
bguillaum's avatar
bguillaum committed
293
294
              (node_id,index)::table
            ) in
295

bguillaum's avatar
bguillaum committed
296
    let (map_without_edges, table) = loop [] 0 None full_node_list in
297

298
299
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
300
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
301
302
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
303
          let edge = G_edge.build domain ~locals (ast_edge, loc) in
304
          (match map_add_edge acc i1 edge i2 with
bguillaum's avatar
bguillaum committed
305
306
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
307
              (G_edge.to_string domain edge)
bguillaum's avatar
bguillaum committed
308
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
309
310
          )
        ) map_without_edges full_edge_list in
311

bguillaum's avatar
bguillaum committed
312
    {meta=gr_ast.Ast.meta; map=map; fusion = []; highest_index = (List.length full_node_list) -1}
bguillaum's avatar
bguillaum committed
313
314


315

bguillaum's avatar
bguillaum committed
316
  (* -------------------------------------------------------------------------------- *)
317
318
  let of_conll domain conll =

319
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
bguillaum's avatar
bguillaum committed
320

321
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), string_of_int) in
322

323
324
325
326
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
327
        Gid_map.add index (G_node.of_conll domain ~loc ?prec last) Gid_map.empty
328
329
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
330
331
        Gid_map.add index (G_node.of_conll domain ~loc ?prec ~succ:(index+1) line)
          (loop (index+1) (Some index) tail) in
332
333

    let map_without_edges = loop 0 None sorted_lines in
334

bguillaum's avatar
bguillaum committed
335
    let map_with_edges =
bguillaum's avatar
bguillaum committed
336
337
      List.fold_left
        (fun acc line ->
338
339
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
340
341
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
342
343
              let gov_id = Id.gbuild ~loc gov gtable in
              let edge = G_edge.make domain ~loc dep_lab in
344
              (match map_add_edge acc2 gov_id edge dep_id with
bguillaum's avatar
bguillaum committed
345
346
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
347
                  (G_edge.to_string domain edge)
348
                  (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
349
              )
350
            ) acc line.Conll.deps
351
        ) map_without_edges conll.Conll.lines in
bguillaum's avatar
bguillaum committed
352
353
354

      let fusion =
        List.map
355
356
          (fun {Conll.first; last; fusion; mw_line_num} ->
              let loc = Loc.file_opt_line conll.Conll.file mw_line_num in
357
358
359
360
361
362
              (
                Id.gbuild ~loc first gtable,
                (
                  Id.gbuild ~loc last gtable,
                  fusion
                )
bguillaum's avatar
bguillaum committed
363
              )
364
          ) conll.Conll.multiwords in
bguillaum's avatar
bguillaum committed
365

bguillaum's avatar
bguillaum committed
366
    {meta = conll.Conll.meta; map=map_with_edges; fusion; highest_index= (List.length sorted_lines) -1 }
bguillaum's avatar
bguillaum committed
367

368
369
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
370
  let of_brown domain ?sentid brown =
371
    let units = Str.split (Str.regexp " ") brown in
372
      let conll_lines = List.mapi
373
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
374
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
375
        let pos = String.sub pos 1 ((String.length pos)-2) in
376
        let feats = match (i,sentid) with
377
378
        | (0,Some id) -> [("sentid", id)]
        | _ -> [] in
379
380
        {
          Conll.line_num=0;
381
382
          id = i+1;
          form;
383
          lemma;
384
385
386
          upos = "_";
          xpos = pos;
          feats;
bguillaum's avatar
bguillaum committed
387
          deps = [(i, "SUC")]
388
          }
389
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\")" item
390
      ) units in 
bguillaum's avatar
bguillaum committed
391
    of_conll domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
392

bguillaum's avatar
bguillaum committed
393
394
395
396
397
  (* -------------------------------------------------------------------------------- *)
  let opt_att atts name =
    try Some (List.assoc name atts)
    with Not_found -> None

bguillaum's avatar
bguillaum committed
398
399
400
  (* -------------------------------------------------------------------------------- *)
  (** [of_xml d_xml] loads a graph in the xml format: [d_xml] must be a <D> xml element *)
  let of_xml domain d_xml = failwith "of_xml not available"
bguillaum's avatar
bguillaum committed
401

bguillaum's avatar
bguillaum committed
402
  (* -------------------------------------------------------------------------------- *)
403
  let del_edge domain ?edge_ident loc graph id_src label id_tar =
404
    let node_src =
405
      try Gid_map.find id_src graph.map
406
      with Not_found ->
407
        match edge_ident with
bguillaum's avatar
bguillaum committed
408
409
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
410
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
411
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string domain label)
pj2m's avatar
pj2m committed
412

bguillaum's avatar
bguillaum committed
413
  (* -------------------------------------------------------------------------------- *)
414
  let del_node graph node_id =
bguillaum's avatar
bguillaum committed
415
    {graph with map =
416
417
        Gid_map.fold
          (fun id value acc ->
bguillaum's avatar
bguillaum committed
418
419
420
            if id = node_id
            then acc
            else Gid_map.add id (G_node.remove_key node_id value) acc
421
422
          ) graph.map Gid_map.empty
    }
pj2m's avatar
pj2m committed
423

bguillaum's avatar
bguillaum committed
424
  (* -------------------------------------------------------------------------------- *)
425
  let add_neighbour loc domain graph node_id label = failwith "no more add_neighbour"
pj2m's avatar
pj2m committed
426

bguillaum's avatar
bguillaum committed
427
428
429
430
431
432
433
  (* -------------------------------------------------------------------------------- *)
  let insert domain id1 id2 graph =
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
    let pos1 = G_node.get_position node1 in
    let pos2 = G_node.get_position node2 in
    let new_pos= (pos1 +. pos2) /. 2. in
434
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
435
436
437
438
    let map = graph.map
      |> (Gid_map.add new_gid (G_node.fresh domain ~prec:id1 ~succ:id2 new_pos))
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
439
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
440
441
442
443
444
445

  (* -------------------------------------------------------------------------------- *)
  let append domain id graph =
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos +. 1. in
446
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
447
448
449
    let map = graph.map
      |> (Gid_map.add new_gid (G_node.fresh domain ~prec:id new_pos))
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
450
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
451
452
453
454
455
456

  (* -------------------------------------------------------------------------------- *)
  let prepend domain id graph =
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos -. 1. in
457
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
458
459
460
    let map = graph.map
      |> (Gid_map.add new_gid (G_node.fresh domain ~succ:id new_pos))
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
461
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476

  (* -------------------------------------------------------------------------------- *)
  let add_after loc domain node_id graph =
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
    | Some gid_succ -> insert domain node_id gid_succ graph
    | None -> append domain node_id graph

  (* -------------------------------------------------------------------------------- *)
  let add_before loc domain node_id graph =
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
    | Some gid_prec -> insert domain gid_prec node_id graph
    | None -> prepend domain node_id graph

bguillaum's avatar
bguillaum committed
477
  (* -------------------------------------------------------------------------------- *)
478
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
479
  let shift_out loc domain src_gid tar_gid label_cst graph =
480
    let src_node = Gid_map.find src_gid graph.map in
481
    let tar_node = Gid_map.find tar_gid graph.map in
482

483
484
485
486
487
488
489
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

    (* Error if a loop is created by the shift_out *)
    let src_tar_edges = Massoc_gid.assoc tar_gid src_next in
    let _ =
      try
490
491
        let loop_edge = List.find (fun edge -> Label_cst.match_ domain edge label_cst) src_tar_edges in
        Error.run ~loc "The shfit_out command tries to build a loop (with label %s)" (Label.to_string domain loop_edge)
492
493
494
495
496
      with Not_found -> () in

    let (new_src_next,new_tar_next) =
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
497
        if Label_cst.match_ domain edge label_cst
498
499
500
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
501
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string domain edge)
502
503
504
505

        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
506

507
    { graph with map =
508
509
510
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
511
    }
pj2m's avatar
pj2m committed
512

bguillaum's avatar
bguillaum committed
513
  (* -------------------------------------------------------------------------------- *)
514
  let shift_in loc domain src_gid tar_gid label_cst graph =
515
    let tar_node = Gid_map.find tar_gid graph.map in
516
    let tar_next = G_node.get_next tar_node in
517

518
519
520
521
    (* Error if a loop is created by the shift_in *)
    let tar_src_edges = Massoc_gid.assoc src_gid tar_next in
    let _ =
      try
522
523
        let loop_edge = List.find (fun edge -> Label_cst.match_ domain edge label_cst) tar_src_edges in
        Error.run ~loc "The [shift_in] command tries to build a loop (with label \"%s\")" (Label.to_string domain loop_edge)
524
525
526
      with Not_found -> () in

    { graph with map =
527
528
        Gid_map.mapi
          (fun node_id node ->
529
530
531
532
533
534
535
536
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
537
                if Label_cst.match_ domain edge label_cst
538
539
                then
                  match List_.usort_insert edge acc_node_tar_edges with
540
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string domain edge)
541
542
543
544
545
546
547
548
549
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
550
551
          ) graph.map
    }
552

bguillaum's avatar
bguillaum committed
553
  (* -------------------------------------------------------------------------------- *)
554
  let shift_edges loc domain src_gid tar_gid label_cst graph =
555
    graph
556
557
    |> (shift_in loc domain src_gid tar_gid label_cst)
    |> (shift_out loc domain src_gid tar_gid label_cst)
pj2m's avatar
pj2m committed
558

bguillaum's avatar
bguillaum committed
559
  (* -------------------------------------------------------------------------------- *)
560
561
  let merge_node loc domain graph src_gid tar_gid =
    let se_graph = shift_edges loc domain src_gid tar_gid Label_cst.all graph in
pj2m's avatar
pj2m committed
562

563
564
    let src_node = Gid_map.find src_gid se_graph.map in
    let tar_node = Gid_map.find tar_gid se_graph.map in
565

566
    match G_fs.unif (G_node.get_fs src_node) (G_node.get_fs tar_node) with
bguillaum's avatar
bguillaum committed
567
568
569
      | Some new_fs ->
        Some {graph with map =
            (Gid_map.add
bguillaum's avatar
bguillaum committed
570
               tar_gid
571
               (G_node.set_fs new_fs tar_node)
bguillaum's avatar
bguillaum committed
572
               (Gid_map.remove src_gid se_graph.map)
bguillaum's avatar
bguillaum committed
573
574
575
            )
             }
      | None -> None
bguillaum's avatar
bguillaum committed
576

bguillaum's avatar
bguillaum committed
577
  (* -------------------------------------------------------------------------------- *)
578
  let set_feat ?loc domain graph node_id feat_name new_value =
579
    let node = Gid_map.find node_id graph.map in
580
581
582
583
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
584
          let new_fs = G_fs.set_feat ?loc domain feat_name new_value (G_node.get_fs node) in
585
          (G_node.set_fs new_fs node) in
586
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
587

bguillaum's avatar
bguillaum committed
588
  (* -------------------------------------------------------------------------------- *)
589
  let update_feat ?loc domain graph tar_id tar_feat_name item_list =
590
591
592
    let strings_to_concat =
      List.map
        (function
bguillaum's avatar
bguillaum committed
593
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
594
595
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
596
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
597
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
598
            )
bguillaum's avatar
bguillaum committed
599
          | Concat_item.String s -> s
600
601
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
602
    (set_feat ?loc domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
603

bguillaum's avatar
bguillaum committed
604
  (* -------------------------------------------------------------------------------- *)
605
  let del_feat graph node_id feat_name =
606
    let node = Gid_map.find node_id graph.map in
607
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
608
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
609

bguillaum's avatar
bguillaum committed
610
  (* -------------------------------------------------------------------------------- *)
611
  let to_gr domain graph =
612
    let buff = Buffer.create 32 in
613

614
615
    bprintf buff "graph {\n";

616
617
    (* meta data *)
    List.iter
618
619
      (fun (s) ->
        bprintf buff "  %s;\n" s
620
621
      ) graph.meta;

bguillaum's avatar
bguillaum committed
622
    (* nodes *)
623
624
625
626
627
628
629
630
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
631
632
    List.iter
      (fun (id,node) ->
633
        bprintf buff "  N_%s %s;\n" (Gid.to_string id) (G_node.to_gr node)
634
      ) sorted_nodes;
635

bguillaum's avatar
bguillaum committed
636
    (* edges *)
637
638
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
639
640
        Massoc_gid.iter
          (fun tar edge ->
641
            bprintf buff "  N_%s -[%s]-> N_%s;\n" (Gid.to_string id) (G_edge.to_string domain edge) (Gid.to_string tar)
bguillaum's avatar
bguillaum committed
642
          ) (G_node.get_next node)
643
      ) sorted_nodes;
644

645
646
647
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
648
  (* -------------------------------------------------------------------------------- *)
649
  let to_sentence ?main_feat graph =
650
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
651
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
652
653
654
655
656
657
658
659
660
661

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
662
663
        " -t-", "-t-";
        "_-_", "-";
664
665
        "_", " ";
        "' ", "'";
666
667
        " ,", ",";
        " .", ".";
668
669
670
671
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
672

bguillaum's avatar
bguillaum committed
673
  (* -------------------------------------------------------------------------------- *)
674
  let to_dep domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
675
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
676
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
677

678
679
680
681
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

bguillaum's avatar
bguillaum committed
682
    (* nodes *)
683
    List.iter
684
      (fun (id, node) ->
685
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
686
        let fs = G_node.get_fs node in
687
688
689
690
        let dep_fs = G_fs.to_dep ~decorated_feat ~position:(G_node.get_position node) ?filter ?main_feat fs in

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
bguillaum's avatar
bguillaum committed
691
          | _ -> "" in
bguillaum's avatar
bguillaum committed
692
693
694
695
696

        bprintf buff "N_%s { %s%s }\n"
          (Gid.to_string id)
          dep_fs
          style
697
698
      ) snodes;
    bprintf buff "} \n";
699

bguillaum's avatar
bguillaum committed
700
    (* edges *)
701
    bprintf buff "[EDGES] { \n";
702

bguillaum's avatar
bguillaum committed
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
    if !Global.debug
    then
      List.iter
        (fun (id, node) ->
          begin
            match G_node.get_prec node with
            | None -> ()
            | Some p -> bprintf buff "N_%s -> N_%s { label=\"__PREC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string p)
          end;
          begin
            match G_node.get_succ node with
            | None -> ()
            | Some s -> bprintf buff "N_%s -> N_%s { label=\"__SUCC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string s)
          end
        ) snodes;
718

719
    Gid_map.iter
720
      (fun gid elt ->
bguillaum's avatar
bguillaum committed
721
722
723
        Massoc_gid.iter
          (fun tar g_edge ->
            let deco = List.mem (gid,g_edge,tar) deco.G_deco.edges in
724
            bprintf buff "N_%s -> N_%s %s\n" (Gid.to_string gid) (Gid.to_string tar) (G_edge.to_dep domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
725
          ) (G_node.get_next elt)
726
      ) graph.map;
bguillaum's avatar
bguillaum committed
727
728

    bprintf buff "} \n";
729
730
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
731
  (* -------------------------------------------------------------------------------- *)
732
  let to_dot domain ?main_feat ?(deco=G_deco.empty) graph =
733
    let buff = Buffer.create 32 in
734

735
    bprintf buff "digraph G {\n";
bguillaum's avatar
bguillaum committed
736
    (* bprintf buff "  rankdir=LR;\n"; *)
737
738
    bprintf buff "  node [shape=Mrecord];\n";

bguillaum's avatar
bguillaum committed
739
    (* nodes *)
740
741
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
742
        let decorated_feat =
743
744
          try List.assoc id deco.G_deco.nodes
          with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
745
746
        bprintf buff "  N_%s [label=<%s>, color=%s]\n"
          (Gid.to_string id)
747
748
749
          (G_fs.to_dot ~decorated_feat ?main_feat (G_node.get_fs node))
          (* TODO: add bgcolor in dot output *)
          (if List.mem_assoc id deco.G_deco.nodes then "red" else "black")
750
      ) graph.map;
bguillaum's avatar
bguillaum committed
751
752

    (* edges *)
753
754
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
755
756
757
        Massoc_gid.iter
          (fun tar g_edge ->
            let deco = List.mem (id,g_edge,tar) deco.G_deco.edges in
758
            bprintf buff "  N_%s -> N_%s%s\n" (Gid.to_string id) (Gid.to_string tar) (G_edge.to_dot domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
759
          ) (G_node.get_next node)
760
      ) graph.map;
761

762
763
    bprintf buff "}\n";
    Buffer.contents buff
764
765

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
766
767
768
769
770
771
772
  let list_num test =
    let rec loop n = function
      | [] -> raise Not_found
      | x::_ when test x -> n
      | _::t -> loop (n+1) t
    in loop 0

773
  let to_raw domain graph =
774
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
775
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
776
    let raw_nodes = List.map (fun (gid,node) -> (gid, G_fs.to_raw (G_node.get_fs node))) snodes in
777

bguillaum's avatar
bguillaum committed
778
    let get_num gid = list_num (fun (x,_) -> x=gid) raw_nodes in
779
780
    let edge_list = ref [] in
    Gid_map.iter
bguillaum's avatar
bguillaum committed
781
      (fun src_gid node ->