grew_graph.ml 38.5 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11
12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17
18
open Grew_types

pj2m's avatar
pj2m committed
19
20
21
22
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
23
(* ================================================================================ *)
24
module P_deco = struct
bguillaum's avatar
bguillaum committed
25
26
27
28
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
29
30

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
31
end (* module P_deco *)
pj2m's avatar
pj2m committed
32

bguillaum's avatar
bguillaum committed
33
(* ================================================================================ *)
34
35
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
36

37
  let empty = Pid_map.empty
38

39
  let find = Pid_map.find
pj2m's avatar
pj2m committed
40

41
  let pid_name_list t = Pid_map.fold (fun _ node acc -> (P_node.get_name node)::acc) t []
42

43
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
44
  let map_add_edge map id_src label id_tar =
45
    let node_src =
pj2m's avatar
pj2m committed
46
      (* Not found can be raised when adding an edge from pos to neg *)
47
48
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
49
50
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
51

bguillaum's avatar
bguillaum committed
52
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
53
  let build_filter ?domain table (ast_node, loc) =
54
    let pid = Id.build ~loc ast_node.Ast.node_id table in
bguillaum's avatar
bguillaum committed
55
    let fs = P_fs.build ?domain ast_node.Ast.fs in
56
57
    (pid, fs)

bguillaum's avatar
bguillaum committed
58
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
59
  let build ?domain ?pat_vars ?(locals=[||]) (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
60

61
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
62
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
63
    let rec insert (ast_node, loc) = function
bguillaum's avatar
bguillaum committed
64
      | [] -> [P_node.build ?domain ?pat_vars (ast_node, loc)]
65
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
66
        begin
bguillaum's avatar
bguillaum committed
67
          try (node_id, P_node.unif_fs (P_fs.build ?domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
68
69
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
70
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
71

72
    let (named_nodes : (Id.name * P_node.t) list) =
73
      List.fold_left
74
75
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
76

pj2m's avatar
pj2m committed
77
78
79
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

80
81
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
82
83

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
84
85
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
86
      Pid_map.empty node_list in
87

bguillaum's avatar
bguillaum committed
88
    let (map : t) =
pj2m's avatar
pj2m committed
89
      List.fold_left
bguillaum's avatar
bguillaum committed
90
91
92
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
bguillaum's avatar
bguillaum committed
93
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
94
95
96
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
97
              (P_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
98
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
99
100
          )
        ) map_without_edges full_edge_list in
101
    (map, pos_table)
bguillaum's avatar
bguillaum committed
102

103

bguillaum's avatar
bguillaum committed
104
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
105
106
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
107
  type extension = {
bguillaum's avatar
bguillaum committed
108
109
110
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
111

bguillaum's avatar
bguillaum committed
112
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
113
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
bguillaum's avatar
bguillaum committed
114
  let build_extension ?domain ?pat_vars ?(locals=[||]) pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
115

bguillaum's avatar
bguillaum committed
116
    let built_nodes = List.map (P_node.build ?domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
117

118
119
    let (old_nodes, new_nodes) =
      List.partition
120
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
121
        built_nodes in
bguillaum's avatar
bguillaum committed
122

pj2m's avatar
pj2m committed
123
124
125
126
127
128
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
129
130
131
132

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
133
134
135
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
136
137
138

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
139
140
141
142
143
144
145
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
146

147
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
148
      List.fold_left
bguillaum's avatar
bguillaum committed
149
        (fun acc (ast_edge, loc) ->
150
151
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
152
153
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
154
              | Some i -> Pid.Pos i
155
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
156
157
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
158
              | Some i -> Pid.Pos i
159
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
bguillaum's avatar
bguillaum committed
160
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
161
162
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
163
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
164
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
165
166
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

167
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
168
169
170
171
172
173
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
174
      Pid_map.fold
bguillaum's avatar
bguillaum committed
175
176
177
178
179
180
181
182
183
184
185
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
186
187

    let roots =
188
      Pid_map.fold
bguillaum's avatar
bguillaum committed
189
190
191
192
193
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
194

pj2m's avatar
pj2m committed
195
196
    (!tree_prop, roots)

197
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
198
  let roots graph = snd (tree_and_roots graph)
199
end (* module P_graph *)
pj2m's avatar
pj2m committed
200

bguillaum's avatar
bguillaum committed
201
(* ================================================================================ *)
202
203
module G_deco = struct
  type t = {
204
205
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
206
207
208
209
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
210

bguillaum's avatar
bguillaum committed
211
(* ================================================================================ *)
212
module G_graph = struct
213
  type t = {
bguillaum's avatar
bguillaum committed
214
215
    meta: string list;                       (* meta-informations *)
    map: G_node.t Gid_map.t;                 (* node description *)
bguillaum's avatar
bguillaum committed
216
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
217
    highest_index: int;                      (* the next free integer index *)
218
219
  }

bguillaum's avatar
bguillaum committed
220
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; }
221

bguillaum's avatar
bguillaum committed
222
  (* ---------------------------------------------------------------------- *)
223
224
225
226
227
228
229
230
231
232
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

bguillaum's avatar
bguillaum committed
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
  exception Not_a_tree
  type tree = T of (Gid.t * tree list)

  let rec tree_to_string = function
  | T (gid, daughters) ->
    sprintf "%s [%s]"
      (Gid.to_string gid)
      ((String.concat) ";" (List.map tree_to_string daughters))

  let graph_to_tree (g : t) : tree =
    let rec build_sub_tree map forest gid =
      if List.mem_assoc gid forest
      then (map, forest)
      else
        let (new_map, daugthers, new_forest) = Massoc_gid.fold
          (fun (acc_map, sub_trees, acc_forest) gid2 edge ->
            if edge = G_edge.sub
            then
              (* ensure that gid2 is in forest *)
              let (new_acc_map, new_acc_forest) = build_sub_tree acc_map acc_forest gid2 in
              let sub = List.assoc gid2 new_acc_forest in
              ( new_acc_map,
                sub::sub_trees,
                List.remove_assoc gid2 new_acc_forest
              )
            else (acc_map, sub_trees, acc_forest)
        ) (map,[],forest) (G_node.get_next (Gid_map.find gid map)) in
      (Gid_map.remove gid new_map, (gid, T (gid, List.rev daugthers))::new_forest) in

    let rec loop (unused_map, forest) =
      match (Gid_map.is_empty unused_map, forest) with
      | (true, [(_,tree)]) -> tree
      | (true, _) -> raise Not_a_tree
      | _ ->
        (* pick one unused node *)
        let (gid,_) = Gid_map.choose unused_map in
        loop (build_sub_tree unused_map forest gid) in
    loop (g.map, [])

bguillaum's avatar
bguillaum committed
272
  let get_highest g = g.highest_index
273
274
275
276

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
277

bguillaum's avatar
bguillaum committed
278
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
279

280
281
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
282

bguillaum's avatar
bguillaum committed
283
  (* is there an edge e out of node i ? *)
bguillaum's avatar
bguillaum committed
284
  let edge_out ?domain graph node_id label_cst =
285
    let node = Gid_map.find node_id graph.map in
bguillaum's avatar
bguillaum committed
286
    Massoc_gid.exists (fun _ e -> Label_cst.match_ ?domain label_cst e) (G_node.get_next node)
287

bguillaum's avatar
bguillaum committed
288
289
290
291
292
293
  let get_annot_info graph =
    let annot_info =
      Gid_map.fold
        (fun _ node acc ->
          match (G_node.get_annot_info node, acc) with
            | (None,_) -> acc
bguillaum's avatar
bguillaum committed
294
            | (Some f, None) -> Some (f,G_node.get_position node)
bguillaum's avatar
bguillaum committed
295
296
297
298
299
300
            | (Some _, Some _) -> Error.build "[G_node.get_annot_info] Two nodes with annot info"
        ) graph.map None in
    match annot_info with
      | Some x -> x
      | None -> Error.build "[G_node.get_annot_info] No nodes with annot info"

bguillaum's avatar
bguillaum committed
301
  (* -------------------------------------------------------------------------------- *)
302
  let map_add_edge_opt map id_src label id_tar =
303
    let node_src =
304
      (* Not found can be raised when adding an edge from pos to neg *)
305
      try Gid_map.find id_src map with Not_found -> G_node.empty in
306
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
307
308
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
309

310
311
312
313
314
315
316
  (* -------------------------------------------------------------------------------- *)
  let map_add_edge map id_src label id_tar =
    let node_src = Gid_map.find id_src map in
    match G_node.add_edge label id_tar node_src with
      | Some new_node -> Gid_map.add id_src new_node map
      | None -> Log.fbug "[Graph.map_add_edge] duplicate"; exit 2

317
  (* -------------------------------------------------------------------------------- *)
318
  let add_edge graph id_src label id_tar =
319
    match map_add_edge_opt graph.map id_src label id_tar with
320
321
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
322

bguillaum's avatar
bguillaum committed
323
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
324
  let build ?domain ?(grewpy=false) ?(locals=[||]) gr_ast =
325
326
327
328
    let full_node_list =
      if grewpy
      then List.sort (Ast.grewpy_compare) gr_ast.Ast.nodes
      else gr_ast.Ast.nodes
329
    and full_edge_list = gr_ast.Ast.edges in
330

331

bguillaum's avatar
bguillaum committed
332
333
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
334

bguillaum's avatar
bguillaum committed
335
336
337
338
339
      | (ast_node, loc)::tail ->
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
340
341
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some index) tail in
          let succ = if tail = [] then None else Some (index+1) in
342
          let new_node = G_node.build ?domain ?prec ?succ index (ast_node, loc) in
bguillaum's avatar
bguillaum committed
343
            (
344
              Gid_map.add index new_node new_tail,
bguillaum's avatar
bguillaum committed
345
346
              (node_id,index)::table
            ) in
347

bguillaum's avatar
bguillaum committed
348
    let (map_without_edges, table) = loop [] 0 None full_node_list in
349

350
351
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
352
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
353
354
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
bguillaum's avatar
bguillaum committed
355
          let edge = G_edge.build ?domain (ast_edge, loc) in
356
          (match map_add_edge_opt acc i1 edge i2 with
bguillaum's avatar
bguillaum committed
357
358
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
359
              (G_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
360
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
361
362
          )
        ) map_without_edges full_edge_list in
363

bguillaum's avatar
bguillaum committed
364
    {meta=gr_ast.Ast.meta; map=map; fusion = []; highest_index = (List.length full_node_list) -1}
bguillaum's avatar
bguillaum committed
365

bguillaum's avatar
bguillaum committed
366
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
367
  let of_conll ?domain conll =
368

369
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
bguillaum's avatar
bguillaum committed
370

371
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), Conll.Id.to_dot) in
372

373
374
375
376
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
bguillaum's avatar
bguillaum committed
377
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec last) Gid_map.empty
378
379
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
bguillaum's avatar
bguillaum committed
380
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec ~succ:(index+1) line)
381
          (loop (index+1) (Some index) tail) in
382
383

    let map_without_edges = loop 0 None sorted_lines in
384

bguillaum's avatar
bguillaum committed
385
    let map_with_edges =
bguillaum's avatar
bguillaum committed
386
387
      List.fold_left
        (fun acc line ->
388
389
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
390
391
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
392
              let gov_id = Id.gbuild ~loc gov gtable in
bguillaum's avatar
bguillaum committed
393
              let edge = G_edge.make ?domain ~loc dep_lab in
394
              (match map_add_edge_opt acc2 gov_id edge dep_id with
bguillaum's avatar
bguillaum committed
395
396
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
397
                  (G_edge.to_string ?domain edge)
398
                  (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
399
              )
400
            ) acc line.Conll.deps
401
        ) map_without_edges conll.Conll.lines in
bguillaum's avatar
bguillaum committed
402
403
404

      let fusion =
        List.map
405
          (fun {Conll.first; last; fusion; mw_line_num} ->
406
              let loc = Loc.file_opt_line_opt conll.Conll.file mw_line_num in
407
              (
408
                Id.gbuild ~loc (first,None) gtable,
409
                (
410
                  Id.gbuild ~loc (last, None) gtable,
411
412
                  fusion
                )
bguillaum's avatar
bguillaum committed
413
              )
414
          ) conll.Conll.multiwords in
bguillaum's avatar
bguillaum committed
415

bguillaum's avatar
bguillaum committed
416
    {meta = conll.Conll.meta; map=map_with_edges; fusion; highest_index= (List.length sorted_lines) -1 }
bguillaum's avatar
bguillaum committed
417

418
419
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
bguillaum's avatar
bguillaum committed
420
  let of_brown ?domain ?sentid brown =
421
    let units = Str.split (Str.regexp " ") brown in
422
      let conll_lines = List.mapi
423
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
424
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
425
        let pos = String.sub pos 1 ((String.length pos)-2) in
426
        let feats = match (i,sentid) with
427
428
          | (0,Some id) -> [("sentid", id)]
          | _ -> [] in
429
        Conll.build_line ~id:(i+1,None) ~form ~lemma ~xpos:pos ~feats ~deps:([(i, "SUC")]) ()
430
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\") in >>>%s<<<" item brown
Bruno Guillaume's avatar
Bruno Guillaume committed
431
      ) units in
bguillaum's avatar
bguillaum committed
432
    of_conll ?domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
433

434
  (* -------------------------------------------------------------------------------- *)
435
436
437
  let of_pst ?domain pst =
    let cpt = ref 0 in
    let get_pos () = incr cpt; !cpt - 1 in
438

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
    let leaf_list = ref [] in

    let rec loop nodes = function
    | Ast.Leaf (loc, phon) ->
      let fresh_id = get_pos () in
      let node = G_node.pst_leaf ~loc ?domain phon fresh_id in
      leaf_list := fresh_id :: ! leaf_list;
      (fresh_id, Gid_map.add fresh_id node nodes)

    | Ast.T (loc, cat, daughters) ->
      let fresh_id = get_pos () in
      let new_node = G_node.pst_node ~loc ?domain cat fresh_id in
      let with_mother = Gid_map.add fresh_id new_node nodes in
      let new_nodes = List.fold_left
        (fun map daughter ->
          let (daughter_id, new_map) = loop map daughter in
          map_add_edge new_map fresh_id G_edge.sub daughter_id
        ) with_mother daughters in
      (fresh_id, new_nodes) in

    let (_,map) = loop Gid_map.empty pst in

    let rec prec_loop map = function
    | [] | [_] -> map
    | n1 :: n2 :: tail ->
      let new_map = prec_loop map (n2 :: tail) in

466
467
468
469
470
471
472
      let node1 = Gid_map.find n1 new_map
      and node2 = Gid_map.find n2 new_map in
      new_map
      |> (Gid_map.add n1 (G_node.set_succ n2 node1))
      |> (Gid_map.add n2 (G_node.set_prec n1 node2)) in

    {meta=[]; map=prec_loop map (List.rev !leaf_list); fusion = []; highest_index = !cpt}
bguillaum's avatar
bguillaum committed
473

bguillaum's avatar
bguillaum committed
474
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
475
  let del_edge ?domain ?edge_ident loc graph id_src label id_tar =
476
    let node_src =
477
      try Gid_map.find id_src graph.map
478
      with Not_found ->
479
        match edge_ident with
bguillaum's avatar
bguillaum committed
480
481
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
482
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
bguillaum's avatar
bguillaum committed
483
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string ?domain label)
pj2m's avatar
pj2m committed
484

bguillaum's avatar
bguillaum committed
485
  (* -------------------------------------------------------------------------------- *)
486
  let del_node graph node_id =
487
488
489
490
491
492
493
    let map_wo_node =
      Gid_map.fold
        (fun id value acc ->
          if id = node_id
          then acc
          else Gid_map.add id (G_node.remove_key node_id value) acc
        ) graph.map Gid_map.empty in
494
    let node = Gid_map.find node_id graph.map in
495
496
497
498
    let new_map =
      match (G_node.get_prec node, G_node.get_succ node) with
      | (Some id_prec, Some id_succ) ->
        begin
499
500
          let prec = Gid_map.find id_prec map_wo_node
          and succ = Gid_map.find id_succ map_wo_node in
501
502
503
504
505
506
          map_wo_node
          |> (Gid_map.add id_prec (G_node.set_succ id_succ prec))
          |> (Gid_map.add id_succ (G_node.set_prec id_prec succ))
        end
      | (Some id_prec, None) ->
        begin
507
          let prec = Gid_map.find id_prec map_wo_node in
508
509
510
511
512
          map_wo_node
          |> (Gid_map.add id_prec (G_node.remove_succ prec))
        end
      | (None, Some id_succ) ->
        begin
513
          let succ = Gid_map.find id_succ map_wo_node in
514
515
516
517
518
          map_wo_node
          |> (Gid_map.add id_succ (G_node.remove_prec succ))
        end
      | (None, None) -> map_wo_node in
    { graph with map = new_map }
pj2m's avatar
pj2m committed
519

bguillaum's avatar
bguillaum committed
520
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
521
  let add_neighbour loc ?domain graph node_id label = failwith "no more add_neighbour"
pj2m's avatar
pj2m committed
522

bguillaum's avatar
bguillaum committed
523
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
524
  let insert ?domain id1 id2 graph =
bguillaum's avatar
bguillaum committed
525
526
527
528
529
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
    let pos1 = G_node.get_position node1 in
    let pos2 = G_node.get_position node2 in
    let new_pos= (pos1 +. pos2) /. 2. in
530
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
531
    let map = graph.map
bguillaum's avatar
bguillaum committed
532
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~prec:id1 ~succ:id2 new_pos))
bguillaum's avatar
bguillaum committed
533
534
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
535
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
536
537

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
538
  let append ?domain id graph =
bguillaum's avatar
bguillaum committed
539
540
541
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos +. 1. in
542
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
543
    let map = graph.map
bguillaum's avatar
bguillaum committed
544
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~prec:id new_pos))
bguillaum's avatar
bguillaum committed
545
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
546
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
547
548

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
549
  let prepend ?domain id graph =
bguillaum's avatar
bguillaum committed
550
551
552
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos -. 1. in
553
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
554
    let map = graph.map
bguillaum's avatar
bguillaum committed
555
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~succ:id new_pos))
bguillaum's avatar
bguillaum committed
556
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
557
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
558
559

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
560
  let add_after loc ?domain node_id graph =
bguillaum's avatar
bguillaum committed
561
562
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
bguillaum's avatar
bguillaum committed
563
564
    | Some gid_succ -> insert ?domain node_id gid_succ graph
    | None -> append ?domain node_id graph
bguillaum's avatar
bguillaum committed
565
566

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
567
  let add_before loc ?domain node_id graph =
bguillaum's avatar
bguillaum committed
568
569
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
bguillaum's avatar
bguillaum committed
570
571
    | Some gid_prec -> insert ?domain gid_prec node_id graph
    | None -> prepend ?domain node_id graph
bguillaum's avatar
bguillaum committed
572

bguillaum's avatar
bguillaum committed
573
  (* -------------------------------------------------------------------------------- *)
574
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
bguillaum's avatar
bguillaum committed
575
  let shift_out loc ?domain src_gid tar_gid label_cst graph =
576
    let src_node = Gid_map.find src_gid graph.map in
577
    let tar_node = Gid_map.find tar_gid graph.map in
578

579
580
581
582
583
584
585
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

    (* Error if a loop is created by the shift_out *)
    let src_tar_edges = Massoc_gid.assoc tar_gid src_next in
    let _ =
      try
bguillaum's avatar
bguillaum committed
586
587
        let loop_edge = List.find (fun edge -> Label_cst.match_ ?domain label_cst edge) src_tar_edges in
        Error.run ~loc "The shfit_out command tries to build a loop (with label %s)" (Label.to_string ?domain loop_edge)
588
589
590
591
592
      with Not_found -> () in

    let (new_src_next,new_tar_next) =
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
bguillaum's avatar
bguillaum committed
593
        if Label_cst.match_ ?domain label_cst edge
594
595
596
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
bguillaum's avatar
bguillaum committed
597
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
598
599
600
601

        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
602

603
    { graph with map =
604
605
606
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
607
    }
pj2m's avatar
pj2m committed
608

bguillaum's avatar
bguillaum committed
609
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
610
  let shift_in loc ?domain src_gid tar_gid label_cst graph =
611
    let tar_node = Gid_map.find tar_gid graph.map in
612
    let tar_next = G_node.get_next tar_node in
613

614
615
616
617
    (* Error if a loop is created by the shift_in *)
    let tar_src_edges = Massoc_gid.assoc src_gid tar_next in
    let _ =
      try
bguillaum's avatar
bguillaum committed
618
619
        let loop_edge = List.find (fun edge -> Label_cst.match_ ?domain label_cst edge) tar_src_edges in
        Error.run ~loc "The [shift_in] command tries to build a loop (with label \"%s\")" (Label.to_string ?domain loop_edge)
620
621
622
      with Not_found -> () in

    { graph with map =
623
624
        Gid_map.mapi
          (fun node_id node ->
625
626
627
628
629
630
631
632
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
bguillaum's avatar
bguillaum committed
633
                if Label_cst.match_ ?domain label_cst edge
634
635
                then
                  match List_.usort_insert edge acc_node_tar_edges with
bguillaum's avatar
bguillaum committed
636
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
637
638
639
640
641
642
643
644
645
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
646
647
          ) graph.map
    }
648

bguillaum's avatar
bguillaum committed
649
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
650
  let shift_edges loc ?domain src_gid tar_gid label_cst graph =
651
    graph
bguillaum's avatar
bguillaum committed
652
653
    |> (shift_in loc ?domain src_gid tar_gid label_cst)
    |> (shift_out loc ?domain src_gid tar_gid label_cst)
pj2m's avatar
pj2m committed
654

bguillaum's avatar
bguillaum committed
655
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
656
657
  let merge_node loc ?domain graph src_gid tar_gid =
    let se_graph = shift_edges loc ?domain src_gid tar_gid Label_cst.all graph in
pj2m's avatar
pj2m committed
658

659
660
    let src_node = Gid_map.find src_gid se_graph.map in
    let tar_node = Gid_map.find tar_gid se_graph.map in
661

662
    match G_fs.unif (G_node.get_fs src_node) (G_node.get_fs tar_node) with
bguillaum's avatar
bguillaum committed
663
664
665
      | Some new_fs ->
        Some {graph with map =
            (Gid_map.add
bguillaum's avatar
bguillaum committed
666
               tar_gid
667
               (G_node.set_fs new_fs tar_node)
bguillaum's avatar
bguillaum committed
668
               (Gid_map.remove src_gid se_graph.map)
bguillaum's avatar
bguillaum committed
669
670
671
            )
             }
      | None -> None
bguillaum's avatar
bguillaum committed
672

bguillaum's avatar
bguillaum committed
673
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
674
  let set_feat ?loc ?domain graph node_id feat_name new_value =
675
    let node = Gid_map.find node_id graph.map in
676
677
678
679
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
bguillaum's avatar
bguillaum committed
680
          let new_fs = G_fs.set_feat ?loc ?domain feat_name new_value (G_node.get_fs node) in
681
          (G_node.set_fs new_fs node) in
682
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
683

bguillaum's avatar
bguillaum committed
684
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
685
  let update_feat ?loc ?domain graph tar_id tar_feat_name item_list =
686
687
688
    let strings_to_concat =
      List.map
        (function
689
690
691
          | Concat_item.Feat (node_gid, "position") ->
            let node = Gid_map.find node_gid graph.map in
            sprintf "%g" (G_node.get_position node)
bguillaum's avatar
bguillaum committed
692
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
693
694
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
695
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
696
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
697
            )
bguillaum's avatar
bguillaum committed
698
          | Concat_item.String s -> s
699
700
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
bguillaum's avatar
bguillaum committed
701
    (set_feat ?loc ?domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
702

bguillaum's avatar
bguillaum committed
703
  (* -------------------------------------------------------------------------------- *)
704
  let del_feat graph node_id feat_name =
705
    let node = Gid_map.find node_id graph.map in
706
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
707
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
708

bguillaum's avatar
bguillaum committed
709
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
710
  let to_gr ?domain graph =
711
712
713

    let gr_id id = G_node.get_name id (Gid_map.find id graph.map) in

714
    let buff = Buffer.create 32 in
715

716
717
    bprintf buff "graph {\n";

718
719
    (* meta data *)
    List.iter
720
721
      (fun (s) ->
        bprintf buff "  %s;\n" s
722
723
      ) graph.meta;

bguillaum's avatar
bguillaum committed
724
    (* nodes *)
725
726
727
728
729
730
731
732
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
733

734
735
    List.iter
      (fun (id,node) ->
736
        bprintf buff "  %s %s;\n" (gr_id id) (G_node.to_gr node)
737
      ) sorted_nodes;
738

bguillaum's avatar
bguillaum committed
739
    (* edges *)
740
741
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
742
743
        Massoc_gid.iter
          (fun tar edge ->
744
            bprintf buff "  %s -[%s]-> %s;\n" (gr_id id) (G_edge.to_string ?domain edge) (gr_id tar)
bguillaum's avatar
bguillaum committed
745
          ) (G_node.get_next node)
746
      ) sorted_nodes;
747

748
749
750
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
751
  (* -------------------------------------------------------------------------------- *)
752
  let to_sentence ?main_feat graph =
753
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
754
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
755
756
757
758
759
760
761
762
763
764

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
765
766
        " -t-", "-t-";
        "_-_", "-";
767
768
        "_", " ";
        "' ", "'";
769
770
        " ,", ",";
        " .", ".";
771
772
773
774
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
775

bguillaum's avatar
bguillaum committed
776
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
777
  let to_dep ?domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
778
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
779
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
780

781
782
783
784
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

bguillaum's avatar
bguillaum committed
785
    (* nodes *)
786
    List.iter
787
      (fun (id, node) ->
788
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
789
        let fs = G_node.get_fs node in
790
791
792
793
        let dep_fs = G_fs.to_dep ~decorated_feat ~position:(G_node.get_position node) ?filter ?main_feat fs in

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
794
795
796
          | _ -> match G_fs.get_string_atom "_UD_empty" fs with
            | Some "Yes" -> "; forecolor=purple; subcolor=purple; "
            | _ -> "" in
bguillaum's avatar
bguillaum committed
797
798
799
800
801

        bprintf buff "N_%s { %s%s }\n"
          (Gid.to_string id)