grew_graph.ml 35.9 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11
12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17
18
open Grew_types

pj2m's avatar
pj2m committed
19
20
21
22
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
23
(* ================================================================================ *)
24
module P_deco = struct
bguillaum's avatar
bguillaum committed
25
26
27
28
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
29
30

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
31
end (* module P_deco *)
pj2m's avatar
pj2m committed
32

bguillaum's avatar
bguillaum committed
33
(* ================================================================================ *)
34
35
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
36

37
  let empty = Pid_map.empty
38

39
  let find = Pid_map.find
pj2m's avatar
pj2m committed
40

41
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
42
  let map_add_edge map id_src label id_tar =
43
    let node_src =
pj2m's avatar
pj2m committed
44
      (* Not found can be raised when adding an edge from pos to neg *)
45
46
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
47
48
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
49

bguillaum's avatar
bguillaum committed
50
  (* -------------------------------------------------------------------------------- *)
51
  let build_filter domain table (ast_node, loc) =
52
    let pid = Id.build ~loc ast_node.Ast.node_id table in
53
    let fs = P_fs.build domain ast_node.Ast.fs in
54
55
    (pid, fs)

bguillaum's avatar
bguillaum committed
56
  (* -------------------------------------------------------------------------------- *)
57
  let build domain ?pat_vars ?(locals=[||]) (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
58

59
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
60
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
61
    let rec insert (ast_node, loc) = function
62
      | [] -> [P_node.build domain ?pat_vars (ast_node, loc)]
63
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
64
        begin
65
          try (node_id, P_node.unif_fs (P_fs.build domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
66
67
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
68
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
69

70
    let (named_nodes : (Id.name * P_node.t) list) =
71
      List.fold_left
72
73
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
74

pj2m's avatar
pj2m committed
75
76
77
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

78
79
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
80
81

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
82
83
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
84
      Pid_map.empty node_list in
85

bguillaum's avatar
bguillaum committed
86
    let (map : t) =
pj2m's avatar
pj2m committed
87
      List.fold_left
bguillaum's avatar
bguillaum committed
88
89
90
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
91
          let edge = P_edge.build domain ~locals (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
92
93
94
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
95
              (P_edge.to_string domain edge)
bguillaum's avatar
bguillaum committed
96
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
97
98
          )
        ) map_without_edges full_edge_list in
99
    (map, pos_table)
bguillaum's avatar
bguillaum committed
100

101

bguillaum's avatar
bguillaum committed
102
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
103
104
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
105
  type extension = {
bguillaum's avatar
bguillaum committed
106
107
108
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
109

bguillaum's avatar
bguillaum committed
110
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
111
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
112
  let build_extension domain ?pat_vars ?(locals=[||]) pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
113

114
    let built_nodes = List.map (P_node.build domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
115

116
117
    let (old_nodes, new_nodes) =
      List.partition
118
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
119
        built_nodes in
bguillaum's avatar
bguillaum committed
120

pj2m's avatar
pj2m committed
121
122
123
124
125
126
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
127
128
129
130

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
131
132
133
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
134
135
136

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
137
138
139
140
141
142
143
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
144

145
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
146
      List.fold_left
bguillaum's avatar
bguillaum committed
147
        (fun acc (ast_edge, loc) ->
148
149
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
150
151
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
152
              | Some i -> Pid.Pos i
153
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
154
155
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
156
              | Some i -> Pid.Pos i
157
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
158
          let edge = P_edge.build domain ~locals (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
159
160
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
161
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
162
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
163
164
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

165
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
166
167
168
169
170
171
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
172
      Pid_map.fold
bguillaum's avatar
bguillaum committed
173
174
175
176
177
178
179
180
181
182
183
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
184
185

    let roots =
186
      Pid_map.fold
bguillaum's avatar
bguillaum committed
187
188
189
190
191
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
192

pj2m's avatar
pj2m committed
193
194
    (!tree_prop, roots)

195
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
196
  let roots graph = snd (tree_and_roots graph)
197
end (* module P_graph *)
pj2m's avatar
pj2m committed
198

bguillaum's avatar
bguillaum committed
199
(* ================================================================================ *)
200
201
module G_deco = struct
  type t = {
202
203
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
204
205
206
207
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
208

bguillaum's avatar
bguillaum committed
209
(* ================================================================================ *)
210
module G_graph = struct
211
  type t = {
bguillaum's avatar
bguillaum committed
212
213
    meta: string list;                       (* meta-informations *)
    map: G_node.t Gid_map.t;                 (* node description *)
bguillaum's avatar
bguillaum committed
214
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
bguillaum's avatar
bguillaum committed
215
    highest_index: int;                      (* the next free interger index *)
216
217
  }

bguillaum's avatar
bguillaum committed
218
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; }
219

bguillaum's avatar
bguillaum committed
220
  (* ---------------------------------------------------------------------- *)
221
222
223
224
225
226
227
228
229
230
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

bguillaum's avatar
bguillaum committed
231
  (* ---------------------------------------------------------------------- *)
232
233
234
235
236
237
238
239
240
241
242
  (* [normalize g] changes all graphs keys to Old _ (used when entering a new module) *)
  let normalize t =
    let (_, mapping) =
      Gid_map.fold
        (fun key value (max_binding, mapping) ->
          match key with
          | Gid.Old n -> (n, mapping)
          | Gid.New _ -> (max_binding, mapping)
        ) t.map (0, []) in
        rename mapping t

bguillaum's avatar
bguillaum committed
243
  let get_highest g = g.highest_index
244
245
246
247

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
248

bguillaum's avatar
bguillaum committed
249
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
250

251
252
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
253

254
255
256
257
  let max_binding t =
    match Gid_map.max_binding t.map with
      | (Gid.Old i,_) -> i
      | _ -> Error.bug "[G_graph.max_binding]"
bguillaum's avatar
bguillaum committed
258
259

  (* is there an edge e out of node i ? *)
260
  let edge_out domain graph node_id label_cst =
261
    let node = Gid_map.find node_id graph.map in
262
    Massoc_gid.exists (fun _ e -> Label_cst.match_ domain e label_cst) (G_node.get_next node)
263

bguillaum's avatar
bguillaum committed
264
265
266
267
268
269
  let get_annot_info graph =
    let annot_info =
      Gid_map.fold
        (fun _ node acc ->
          match (G_node.get_annot_info node, acc) with
            | (None,_) -> acc
bguillaum's avatar
bguillaum committed
270
            | (Some f, None) -> Some (f,G_node.get_position node)
bguillaum's avatar
bguillaum committed
271
272
273
274
275
276
            | (Some _, Some _) -> Error.build "[G_node.get_annot_info] Two nodes with annot info"
        ) graph.map None in
    match annot_info with
      | Some x -> x
      | None -> Error.build "[G_node.get_annot_info] No nodes with annot info"

bguillaum's avatar
bguillaum committed
277
  (* -------------------------------------------------------------------------------- *)
278
  let map_add_edge map id_src label id_tar =
279
    let node_src =
280
      (* Not found can be raised when adding an edge from pos to neg *)
281
      try Gid_map.find id_src map with Not_found -> G_node.empty in
282
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
283
284
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
285

286
  (* -------------------------------------------------------------------------------- *)
287
288
289
290
  let add_edge graph id_src label id_tar =
    match map_add_edge graph.map id_src label id_tar with
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
291

bguillaum's avatar
bguillaum committed
292
  (* -------------------------------------------------------------------------------- *)
293
  let build domain ?(locals=[||]) gr_ast =
294
295
    let full_node_list = gr_ast.Ast.nodes
    and full_edge_list = gr_ast.Ast.edges in
296

bguillaum's avatar
bguillaum committed
297
298
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
299

bguillaum's avatar
bguillaum committed
300
301
302
303
304
305
306
307
308
309
310
311
      | (ast_node, loc)::tail ->
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some (Gid.Old index)) tail in
          let succ = if tail = [] then None else Some (Gid.Old (index+1)) in
          let (_,new_node) = G_node.build domain ?prec ?succ index (ast_node, loc) in
            (
              Gid_map.add (Gid.Old index) new_node new_tail,
              (node_id,index)::table
            ) in
312

bguillaum's avatar
bguillaum committed
313
    let (map_without_edges, table) = loop [] 0 None full_node_list in
314

315
316
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
317
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
318
319
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
320
          let edge = G_edge.build domain ~locals (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
321
322
323
          (match map_add_edge acc (Gid.Old i1) edge (Gid.Old i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
324
              (G_edge.to_string domain edge)
bguillaum's avatar
bguillaum committed
325
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
326
327
          )
        ) map_without_edges full_edge_list in
328

bguillaum's avatar
bguillaum committed
329
    {meta=gr_ast.Ast.meta; map=map; fusion = []; highest_index = (List.length full_node_list) -1}
bguillaum's avatar
bguillaum committed
330
331


332

bguillaum's avatar
bguillaum committed
333
  (* -------------------------------------------------------------------------------- *)
334
335
  let of_conll domain conll =

336
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
bguillaum's avatar
bguillaum committed
337

338
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), string_of_int) in
339

340
341
342
343
344
345
346
347
348
349
350
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
        Gid_map.add (Gid.Old index) (G_node.of_conll domain ~loc ?prec last) Gid_map.empty
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
        Gid_map.add (Gid.Old index) (G_node.of_conll domain ~loc ?prec ~succ:(Gid.Old (index+1)) line)
          (loop (index+1) (Some (Gid.Old index)) tail) in

    let map_without_edges = loop 0 None sorted_lines in
351

bguillaum's avatar
bguillaum committed
352
    let map_with_edges =
bguillaum's avatar
bguillaum committed
353
354
      List.fold_left
        (fun acc line ->
355
356
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
357
358
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
359
360
              let gov_id = Id.gbuild ~loc gov gtable in
              let edge = G_edge.make domain ~loc dep_lab in
bguillaum's avatar
bguillaum committed
361
              (match map_add_edge acc2 (Gid.Old gov_id) edge (Gid.Old dep_id) with
bguillaum's avatar
bguillaum committed
362
363
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
364
                  (G_edge.to_string domain edge)
365
                  (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
366
              )
367
            ) acc line.Conll.deps
368
        ) map_without_edges conll.Conll.lines in
bguillaum's avatar
bguillaum committed
369
370
371

      let fusion =
        List.map
372
373
374
375
          (fun {Conll.first; last; fusion; mw_line_num} ->
              let loc = Loc.file_opt_line conll.Conll.file mw_line_num in
              ( Gid.Old (Id.gbuild ~loc first gtable),
                (Gid.Old (Id.gbuild ~loc last gtable),
bguillaum's avatar
bguillaum committed
376
377
                fusion)
              )
378
          ) conll.Conll.multiwords in
bguillaum's avatar
bguillaum committed
379

bguillaum's avatar
bguillaum committed
380
    {meta = conll.Conll.meta; map=map_with_edges; fusion; highest_index= (List.length sorted_lines) -1 }
bguillaum's avatar
bguillaum committed
381

382
383
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
384
  let of_brown domain ?sentid brown =
385
    let units = Str.split (Str.regexp " ") brown in
386
      let conll_lines = List.mapi
387
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
388
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
389
        let pos = String.sub pos 1 ((String.length pos)-2) in
390
        let feats = match (i,sentid) with
391
392
        | (0,Some id) -> [("sentid", id)]
        | _ -> [] in
393
394
        {
          Conll.line_num=0;
395
396
          id = i+1;
          form;
397
          lemma;
398
399
400
          upos = "_";
          xpos = pos;
          feats;
bguillaum's avatar
bguillaum committed
401
          deps = [(i, "SUC")]
402
          }
403
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\")" item
404
      ) units in 
bguillaum's avatar
bguillaum committed
405
    of_conll domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
406

bguillaum's avatar
bguillaum committed
407
408
409
410
411
  (* -------------------------------------------------------------------------------- *)
  let opt_att atts name =
    try Some (List.assoc name atts)
    with Not_found -> None

bguillaum's avatar
bguillaum committed
412
413
414
  (* -------------------------------------------------------------------------------- *)
  (** [of_xml d_xml] loads a graph in the xml format: [d_xml] must be a <D> xml element *)
  let of_xml domain d_xml = failwith "of_xml not available"
bguillaum's avatar
bguillaum committed
415

bguillaum's avatar
bguillaum committed
416
  (* -------------------------------------------------------------------------------- *)
417
  let del_edge domain ?edge_ident loc graph id_src label id_tar =
418
    let node_src =
419
      try Gid_map.find id_src graph.map
420
      with Not_found ->
421
        match edge_ident with
bguillaum's avatar
bguillaum committed
422
423
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
424
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
425
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string domain label)
pj2m's avatar
pj2m committed
426

bguillaum's avatar
bguillaum committed
427
  (* -------------------------------------------------------------------------------- *)
428
  let del_node graph node_id =
bguillaum's avatar
bguillaum committed
429
    {graph with map =
430
431
        Gid_map.fold
          (fun id value acc ->
bguillaum's avatar
bguillaum committed
432
433
434
            if id = node_id
            then acc
            else Gid_map.add id (G_node.remove_key node_id value) acc
435
436
          ) graph.map Gid_map.empty
    }
pj2m's avatar
pj2m committed
437

bguillaum's avatar
bguillaum committed
438
  (* -------------------------------------------------------------------------------- *)
439
  let add_neighbour loc domain graph node_id label =
440
    let index = match node_id with
bguillaum's avatar
bguillaum committed
441
      | Gid.Old id ->
bguillaum's avatar
bguillaum committed
442
443
444
445
        (match Label.to_int label with
          | Some label_int -> Gid.New (id, label_int)
          | None -> Error.run ~loc "[Graph.add_neighbour] try to add neighbour with a local label"
        )
bguillaum's avatar
bguillaum committed
446
      | Gid.New _ -> Error.run ~loc "[Graph.add_neighbour] try to add neighbour node to a neighbour node" in
pj2m's avatar
pj2m committed
447

448
    if Gid_map.mem index graph.map
449
    then Error.run ~loc "[Graph.add_neighbour] try to build twice the \"same\" neighbour node (with label '%s')" (Label.to_string domain label);
pj2m's avatar
pj2m committed
450

451
    let node = Gid_map.find node_id graph.map in
pj2m's avatar
pj2m committed
452
    (* put the new node on the right of its "parent" *)
453
    let new_map = Gid_map.add index (G_node.build_neighbour node) graph.map in
bguillaum's avatar
bguillaum committed
454

455
    match map_add_edge new_map node_id label index with
bguillaum's avatar
bguillaum committed
456
457
      | Some g -> (index, {graph with map = g})
      | None -> Log.bug "[Graph.add_neighbour] add_edge must not fail"; exit 1
pj2m's avatar
pj2m committed
458

bguillaum's avatar
bguillaum committed
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
  (* -------------------------------------------------------------------------------- *)
  let insert domain id1 id2 graph =
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
    let pos1 = G_node.get_position node1 in
    let pos2 = G_node.get_position node2 in
    let new_pos= (pos1 +. pos2) /. 2. in
    let new_index = graph.highest_index + 1 in
    let new_gid = Gid.Old new_index in
    let map = graph.map
      |> (Gid_map.add new_gid (G_node.fresh domain ~prec:id1 ~succ:id2 new_pos))
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
    (new_gid, { graph with map; highest_index = new_index })

  (* -------------------------------------------------------------------------------- *)
  let append domain id graph =
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos +. 1. in
    let new_index = graph.highest_index + 1 in
    let new_gid = Gid.Old new_index in
    let map = graph.map
      |> (Gid_map.add new_gid (G_node.fresh domain ~prec:id new_pos))
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
    (new_gid, { graph with map; highest_index = new_index })

  (* -------------------------------------------------------------------------------- *)
  let prepend domain id graph =
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos -. 1. in
    let new_index = graph.highest_index + 1 in
    let new_gid = Gid.Old new_index in
    let map = graph.map
      |> (Gid_map.add new_gid (G_node.fresh domain ~succ:id new_pos))
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
    (new_gid, { graph with map; highest_index = new_index })

  (* -------------------------------------------------------------------------------- *)
  let add_after loc domain node_id graph =
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
    | Some gid_succ -> insert domain node_id gid_succ graph
    | None -> append domain node_id graph

  (* -------------------------------------------------------------------------------- *)
  let add_before loc domain node_id graph =
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
    | Some gid_prec -> insert domain gid_prec node_id graph
    | None -> prepend domain node_id graph

bguillaum's avatar
bguillaum committed
512
  (* -------------------------------------------------------------------------------- *)
513
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
514
  let shift_out loc domain src_gid tar_gid label_cst graph =
515
    let src_node = Gid_map.find src_gid graph.map in
516
    let tar_node = Gid_map.find tar_gid graph.map in
517

518
519
520
521
522
523
524
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

    (* Error if a loop is created by the shift_out *)
    let src_tar_edges = Massoc_gid.assoc tar_gid src_next in
    let _ =
      try
525
526
        let loop_edge = List.find (fun edge -> Label_cst.match_ domain edge label_cst) src_tar_edges in
        Error.run ~loc "The shfit_out command tries to build a loop (with label %s)" (Label.to_string domain loop_edge)
527
528
529
530
531
      with Not_found -> () in

    let (new_src_next,new_tar_next) =
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
532
        if Label_cst.match_ domain edge label_cst
533
534
535
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
536
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string domain edge)
537
538
539
540

        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
541

542
    { graph with map =
543
544
545
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
546
    }
pj2m's avatar
pj2m committed
547

bguillaum's avatar
bguillaum committed
548
  (* -------------------------------------------------------------------------------- *)
549
  let shift_in loc domain src_gid tar_gid label_cst graph =
550
    let tar_node = Gid_map.find tar_gid graph.map in
551
    let tar_next = G_node.get_next tar_node in
552

553
554
555
556
    (* Error if a loop is created by the shift_in *)
    let tar_src_edges = Massoc_gid.assoc src_gid tar_next in
    let _ =
      try
557
558
        let loop_edge = List.find (fun edge -> Label_cst.match_ domain edge label_cst) tar_src_edges in
        Error.run ~loc "The [shift_in] command tries to build a loop (with label \"%s\")" (Label.to_string domain loop_edge)
559
560
561
      with Not_found -> () in

    { graph with map =
562
563
        Gid_map.mapi
          (fun node_id node ->
564
565
566
567
568
569
570
571
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
572
                if Label_cst.match_ domain edge label_cst
573
574
                then
                  match List_.usort_insert edge acc_node_tar_edges with
575
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string domain edge)
576
577
578
579
580
581
582
583
584
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
585
586
          ) graph.map
    }
587

bguillaum's avatar
bguillaum committed
588
  (* -------------------------------------------------------------------------------- *)
589
  let shift_edges loc domain src_gid tar_gid label_cst graph =
590
    graph
591
592
    |> (shift_in loc domain src_gid tar_gid label_cst)
    |> (shift_out loc domain src_gid tar_gid label_cst)
pj2m's avatar
pj2m committed
593

bguillaum's avatar
bguillaum committed
594
  (* -------------------------------------------------------------------------------- *)
595
596
  let merge_node loc domain graph src_gid tar_gid =
    let se_graph = shift_edges loc domain src_gid tar_gid Label_cst.all graph in
pj2m's avatar
pj2m committed
597

598
599
    let src_node = Gid_map.find src_gid se_graph.map in
    let tar_node = Gid_map.find tar_gid se_graph.map in
600

601
    match G_fs.unif (G_node.get_fs src_node) (G_node.get_fs tar_node) with
bguillaum's avatar
bguillaum committed
602
603
604
      | Some new_fs ->
        Some {graph with map =
            (Gid_map.add
bguillaum's avatar
bguillaum committed
605
               tar_gid
606
               (G_node.set_fs new_fs tar_node)
bguillaum's avatar
bguillaum committed
607
               (Gid_map.remove src_gid se_graph.map)
bguillaum's avatar
bguillaum committed
608
609
610
            )
             }
      | None -> None
bguillaum's avatar
bguillaum committed
611

bguillaum's avatar
bguillaum committed
612
  (* -------------------------------------------------------------------------------- *)
613
  let set_feat ?loc domain graph node_id feat_name new_value =
614
    let node = Gid_map.find node_id graph.map in
615
616
617
618
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
619
          let new_fs = G_fs.set_feat ?loc domain feat_name new_value (G_node.get_fs node) in
620
          (G_node.set_fs new_fs node) in
621
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
622

bguillaum's avatar
bguillaum committed
623
  (* -------------------------------------------------------------------------------- *)
624
  let update_feat ?loc domain graph tar_id tar_feat_name item_list =
625
626
627
    let strings_to_concat =
      List.map
        (function
bguillaum's avatar
bguillaum committed
628
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
629
630
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
631
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
632
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
633
            )
bguillaum's avatar
bguillaum committed
634
          | Concat_item.String s -> s
635
636
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
637
    (set_feat ?loc domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
638

bguillaum's avatar
bguillaum committed
639
  (* -------------------------------------------------------------------------------- *)
640
  let del_feat graph node_id feat_name =
641
    let node = Gid_map.find node_id graph.map in
642
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
643
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
644

bguillaum's avatar
bguillaum committed
645
  (* -------------------------------------------------------------------------------- *)
646
  let to_gr domain graph =
647
    let buff = Buffer.create 32 in
648

649
650
    bprintf buff "graph {\n";

651
652
    (* meta data *)
    List.iter
653
654
      (fun (s) ->
        bprintf buff "  %s;\n" s
655
656
      ) graph.meta;

bguillaum's avatar
bguillaum committed
657
    (* nodes *)
658
659
660
661
662
663
664
665
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
666
667
    List.iter
      (fun (id,node) ->
668
        bprintf buff "  N_%s %s;\n" (Gid.to_string id) (G_node.to_gr node)
669
      ) sorted_nodes;
670

bguillaum's avatar
bguillaum committed
671
    (* edges *)
672
673
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
674
675
        Massoc_gid.iter
          (fun tar edge ->
676
            bprintf buff "  N_%s -[%s]-> N_%s;\n" (Gid.to_string id) (G_edge.to_string domain edge) (Gid.to_string tar)
bguillaum's avatar
bguillaum committed
677
          ) (G_node.get_next node)
678
      ) sorted_nodes;
679

680
681
682
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
683
  (* -------------------------------------------------------------------------------- *)
684
  let to_sentence ?main_feat graph =
685
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
686
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
687
688
689
690
691
692
693
694
695
696

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
697
698
        " -t-", "-t-";
        "_-_", "-";
699
700
        "_", " ";
        "' ", "'";
701
702
        " ,", ",";
        " .", ".";
703
704
705
706
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
707

bguillaum's avatar
bguillaum committed
708
  (* -------------------------------------------------------------------------------- *)
709
  let to_dep domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
710
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
711
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
712

713
714
715
716
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

bguillaum's avatar
bguillaum committed
717
    (* nodes *)
718
    List.iter
719
      (fun (id, node) ->
720
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
721
        let fs = G_node.get_fs node in
722
723
724
725
        let dep_fs = G_fs.to_dep ~decorated_feat ~position:(G_node.get_position node) ?filter ?main_feat fs in

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
bguillaum's avatar
bguillaum committed
726
          | _ -> "" in
bguillaum's avatar
bguillaum committed
727
728
729
730
731

        bprintf buff "N_%s { %s%s }\n"
          (Gid.to_string id)
          dep_fs
          style
732
733
      ) snodes;
    bprintf buff "} \n";
734

bguillaum's avatar
bguillaum committed
735
    (* edges *)
736
    bprintf buff "[EDGES] { \n";
737

bguillaum's avatar
bguillaum committed
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
    if !Global.debug
    then
      List.iter
        (fun (id, node) ->
          begin
            match G_node.get_prec node with
            | None -> ()
            | Some p -> bprintf buff "N_%s -> N_%s { label=\"__PREC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string p)
          end;
          begin
            match G_node.get_succ node with
            | None -> ()
            | Some s -> bprintf buff "N_%s -> N_%s { label=\"__SUCC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string s)
          end
        ) snodes;
753

754
    Gid_map.iter
755
      (fun gid elt ->
bguillaum's avatar
bguillaum committed
756
757
758
        Massoc_gid.iter
          (fun tar g_edge ->
            let deco = List.mem (gid,g_edge,tar) deco.G_deco.edges in
759
            bprintf buff "N_%s -> N_%s %s\n" (Gid.to_string gid) (Gid.to_string tar) (G_edge.to_dep domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
760
          ) (G_node.get_next elt)
761
      ) graph.map;
bguillaum's avatar
bguillaum committed
762
763

    bprintf buff "} \n";
764
765
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
766
  (* -------------------------------------------------------------------------------- *)
767
  let to_dot domain ?main_feat ?(deco=G_deco.empty) graph =
768
    let buff = Buffer.create 32 in
769

770
    bprintf buff "digraph G {\n";
bguillaum's avatar
bguillaum committed
771
    (* bprintf buff "  rankdir=LR;\n"; *)
772
773
    bprintf buff "  node [shape=Mrecord];\n";

bguillaum's avatar
bguillaum committed
774
    (* nodes *)
775
776
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
777
        let decorated_feat =
778
779
          try List.assoc id deco.G_deco.nodes
          with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
780
781
        bprintf buff "  N_%s [label=<%s>, color=%s]\n"
          (Gid.to_string id)
782
783
784
          (G_fs.to_dot ~decorated_feat ?main_feat (G_node.get_fs node))
          (* TODO: add bgcolor in dot output *)
          (if List.mem_assoc id deco.G_deco.nodes then "red" else "black")
785
      ) graph.map;
bguillaum's avatar
bguillaum committed
786
787

    (* edges *)
788
789
    Gid_map.iter
      (fun id node ->
bguillaum's avatar
bguillaum committed
790
791
792
        Massoc_gid.iter
          (fun tar g_edge ->
            let deco = List.mem (id,g_edge,tar) deco.G_deco.edges in
793
            bprintf buff "  N_%s -> N_%s%s\n" (Gid.to_string id) (Gid.to_string tar) (G_edge.to_dot domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
794
          ) (G_node.get_next node)
795
      ) graph.map;
796

797
798
    bprintf buff "}\n";
    Buffer.contents buff
799
800

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
801
802
803
804
805
806
807
  let list_num test =
    let rec loop n = function
      | [] -> raise Not_found
      | x::_ when test x -> n
      | _::t -> loop (n+1) t
    in loop 0

808
  let to_raw domain graph =
809
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
810
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
811
    let raw_nodes = List.map (fun (gid,node) -> (gid, G_fs.to_raw (G_node.get_fs node))) snodes in