MAJ terminée. Nous sommes passés en version 14.6.2 . Pour consulter les "releases notes" associées c'est ici :

https://about.gitlab.com/releases/2022/01/11/security-release-gitlab-14-6-2-released/
https://about.gitlab.com/releases/2022/01/04/gitlab-14-6-1-released/

grew_graph.ml 38.1 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11
12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17
18
open Grew_types

pj2m's avatar
pj2m committed
19
20
21
22
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
23
(* ================================================================================ *)
24
module P_deco = struct
bguillaum's avatar
bguillaum committed
25
26
27
28
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
29
30

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
31
end (* module P_deco *)
pj2m's avatar
pj2m committed
32

bguillaum's avatar
bguillaum committed
33
(* ================================================================================ *)
34
35
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
36

37
  let empty = Pid_map.empty
38

39
  let find = Pid_map.find
pj2m's avatar
pj2m committed
40

41
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
42
  let map_add_edge map id_src label id_tar =
43
    let node_src =
pj2m's avatar
pj2m committed
44
      (* Not found can be raised when adding an edge from pos to neg *)
45
46
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
47
48
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
49

bguillaum's avatar
bguillaum committed
50
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
51
  let build_filter ?domain table (ast_node, loc) =
52
    let pid = Id.build ~loc ast_node.Ast.node_id table in
bguillaum's avatar
bguillaum committed
53
    let fs = P_fs.build ?domain ast_node.Ast.fs in
54
55
    (pid, fs)

bguillaum's avatar
bguillaum committed
56
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
57
  let build ?domain ?pat_vars ?(locals=[||]) (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
58

59
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
60
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
61
    let rec insert (ast_node, loc) = function
bguillaum's avatar
bguillaum committed
62
      | [] -> [P_node.build ?domain ?pat_vars (ast_node, loc)]
63
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
64
        begin
bguillaum's avatar
bguillaum committed
65
          try (node_id, P_node.unif_fs (P_fs.build ?domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
66
67
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
68
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
69

70
    let (named_nodes : (Id.name * P_node.t) list) =
71
      List.fold_left
72
73
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
74

pj2m's avatar
pj2m committed
75
76
77
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

78
79
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
80
81

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
82
83
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
84
      Pid_map.empty node_list in
85

bguillaum's avatar
bguillaum committed
86
    let (map : t) =
pj2m's avatar
pj2m committed
87
      List.fold_left
bguillaum's avatar
bguillaum committed
88
89
90
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
bguillaum's avatar
bguillaum committed
91
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
92
93
94
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
95
              (P_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
96
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
97
98
          )
        ) map_without_edges full_edge_list in
99
    (map, pos_table)
bguillaum's avatar
bguillaum committed
100

101

bguillaum's avatar
bguillaum committed
102
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
103
104
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
105
  type extension = {
bguillaum's avatar
bguillaum committed
106
107
108
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
109

bguillaum's avatar
bguillaum committed
110
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
111
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
bguillaum's avatar
bguillaum committed
112
  let build_extension ?domain ?pat_vars ?(locals=[||]) pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
113

bguillaum's avatar
bguillaum committed
114
    let built_nodes = List.map (P_node.build ?domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
115

116
117
    let (old_nodes, new_nodes) =
      List.partition
118
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
119
        built_nodes in
bguillaum's avatar
bguillaum committed
120

pj2m's avatar
pj2m committed
121
122
123
124
125
126
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
127
128
129
130

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
131
132
133
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
134
135
136

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
137
138
139
140
141
142
143
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
144

145
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
146
      List.fold_left
bguillaum's avatar
bguillaum committed
147
        (fun acc (ast_edge, loc) ->
148
149
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
150
151
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
152
              | Some i -> Pid.Pos i
153
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
154
155
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
156
              | Some i -> Pid.Pos i
157
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
bguillaum's avatar
bguillaum committed
158
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
159
160
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
161
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
162
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
163
164
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

165
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
166
167
168
169
170
171
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
172
      Pid_map.fold
bguillaum's avatar
bguillaum committed
173
174
175
176
177
178
179
180
181
182
183
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
184
185

    let roots =
186
      Pid_map.fold
bguillaum's avatar
bguillaum committed
187
188
189
190
191
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
192

pj2m's avatar
pj2m committed
193
194
    (!tree_prop, roots)

195
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
196
  let roots graph = snd (tree_and_roots graph)
197
end (* module P_graph *)
pj2m's avatar
pj2m committed
198

bguillaum's avatar
bguillaum committed
199
(* ================================================================================ *)
200
201
module G_deco = struct
  type t = {
202
203
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
204
205
206
207
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
208

bguillaum's avatar
bguillaum committed
209
(* ================================================================================ *)
210
module G_graph = struct
211
  type t = {
bguillaum's avatar
bguillaum committed
212
213
    meta: string list;                       (* meta-informations *)
    map: G_node.t Gid_map.t;                 (* node description *)
bguillaum's avatar
bguillaum committed
214
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
215
    highest_index: int;                      (* the next free integer index *)
216
217
  }

bguillaum's avatar
bguillaum committed
218
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; }
219

bguillaum's avatar
bguillaum committed
220
  (* ---------------------------------------------------------------------- *)
221
222
223
224
225
226
227
228
229
230
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

bguillaum's avatar
bguillaum committed
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
  exception Not_a_tree
  type tree = T of (Gid.t * tree list)

  let rec tree_to_string = function
  | T (gid, daughters) ->
    sprintf "%s [%s]"
      (Gid.to_string gid)
      ((String.concat) ";" (List.map tree_to_string daughters))

  let graph_to_tree (g : t) : tree =
    let rec build_sub_tree map forest gid =
      if List.mem_assoc gid forest
      then (map, forest)
      else
        let (new_map, daugthers, new_forest) = Massoc_gid.fold
          (fun (acc_map, sub_trees, acc_forest) gid2 edge ->
            if edge = G_edge.sub
            then
              (* ensure that gid2 is in forest *)
              let (new_acc_map, new_acc_forest) = build_sub_tree acc_map acc_forest gid2 in
              let sub = List.assoc gid2 new_acc_forest in
              ( new_acc_map,
                sub::sub_trees,
                List.remove_assoc gid2 new_acc_forest
              )
            else (acc_map, sub_trees, acc_forest)
        ) (map,[],forest) (G_node.get_next (Gid_map.find gid map)) in
      (Gid_map.remove gid new_map, (gid, T (gid, List.rev daugthers))::new_forest) in

    let rec loop (unused_map, forest) =
      match (Gid_map.is_empty unused_map, forest) with
      | (true, [(_,tree)]) -> tree
      | (true, _) -> raise Not_a_tree
      | _ ->
        (* pick one unused node *)
        let (gid,_) = Gid_map.choose unused_map in
        loop (build_sub_tree unused_map forest gid) in
    loop (g.map, [])

bguillaum's avatar
bguillaum committed
270
  let get_highest g = g.highest_index
271
272
273
274

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
275

bguillaum's avatar
bguillaum committed
276
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
277

278
279
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
280

bguillaum's avatar
bguillaum committed
281
  (* is there an edge e out of node i ? *)
bguillaum's avatar
bguillaum committed
282
  let edge_out ?domain graph node_id label_cst =
283
    let node = Gid_map.find node_id graph.map in
bguillaum's avatar
bguillaum committed
284
    Massoc_gid.exists (fun _ e -> Label_cst.match_ ?domain label_cst e) (G_node.get_next node)
285

bguillaum's avatar
bguillaum committed
286
287
288
289
290
291
  let get_annot_info graph =
    let annot_info =
      Gid_map.fold
        (fun _ node acc ->
          match (G_node.get_annot_info node, acc) with
            | (None,_) -> acc
bguillaum's avatar
bguillaum committed
292
            | (Some f, None) -> Some (f,G_node.get_position node)
bguillaum's avatar
bguillaum committed
293
294
295
296
297
298
            | (Some _, Some _) -> Error.build "[G_node.get_annot_info] Two nodes with annot info"
        ) graph.map None in
    match annot_info with
      | Some x -> x
      | None -> Error.build "[G_node.get_annot_info] No nodes with annot info"

bguillaum's avatar
bguillaum committed
299
  (* -------------------------------------------------------------------------------- *)
300
  let map_add_edge_opt map id_src label id_tar =
301
    let node_src =
302
      (* Not found can be raised when adding an edge from pos to neg *)
303
      try Gid_map.find id_src map with Not_found -> G_node.empty in
304
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
305
306
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
307

308
309
310
311
312
313
314
  (* -------------------------------------------------------------------------------- *)
  let map_add_edge map id_src label id_tar =
    let node_src = Gid_map.find id_src map in
    match G_node.add_edge label id_tar node_src with
      | Some new_node -> Gid_map.add id_src new_node map
      | None -> Log.fbug "[Graph.map_add_edge] duplicate"; exit 2

315
  (* -------------------------------------------------------------------------------- *)
316
  let add_edge graph id_src label id_tar =
317
    match map_add_edge_opt graph.map id_src label id_tar with
318
319
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
320

bguillaum's avatar
bguillaum committed
321
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
322
  let build ?domain ?(grewpy=false) ?(locals=[||]) gr_ast =
323
324
325
326
    let full_node_list =
      if grewpy
      then List.sort (Ast.grewpy_compare) gr_ast.Ast.nodes
      else gr_ast.Ast.nodes
327
    and full_edge_list = gr_ast.Ast.edges in
328

329

bguillaum's avatar
bguillaum committed
330
331
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
332

bguillaum's avatar
bguillaum committed
333
334
335
336
337
      | (ast_node, loc)::tail ->
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
338
339
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some index) tail in
          let succ = if tail = [] then None else Some (index+1) in
340
          let new_node = G_node.build ?domain ?prec ?succ index (ast_node, loc) in
bguillaum's avatar
bguillaum committed
341
            (
342
              Gid_map.add index new_node new_tail,
bguillaum's avatar
bguillaum committed
343
344
              (node_id,index)::table
            ) in
345

bguillaum's avatar
bguillaum committed
346
    let (map_without_edges, table) = loop [] 0 None full_node_list in
347

348
349
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
350
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
351
352
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
bguillaum's avatar
bguillaum committed
353
          let edge = G_edge.build ?domain (ast_edge, loc) in
354
          (match map_add_edge_opt acc i1 edge i2 with
bguillaum's avatar
bguillaum committed
355
356
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
357
              (G_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
358
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
359
360
          )
        ) map_without_edges full_edge_list in
361

bguillaum's avatar
bguillaum committed
362
    {meta=gr_ast.Ast.meta; map=map; fusion = []; highest_index = (List.length full_node_list) -1}
bguillaum's avatar
bguillaum committed
363

bguillaum's avatar
bguillaum committed
364
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
365
  let of_conll ?domain conll =
366

367
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
bguillaum's avatar
bguillaum committed
368

369
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), string_of_int) in
370

371
372
373
374
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
bguillaum's avatar
bguillaum committed
375
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec last) Gid_map.empty
376
377
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
bguillaum's avatar
bguillaum committed
378
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec ~succ:(index+1) line)
379
          (loop (index+1) (Some index) tail) in
380
381

    let map_without_edges = loop 0 None sorted_lines in
382

bguillaum's avatar
bguillaum committed
383
    let map_with_edges =
bguillaum's avatar
bguillaum committed
384
385
      List.fold_left
        (fun acc line ->
386
387
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
388
389
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
390
              let gov_id = Id.gbuild ~loc gov gtable in
bguillaum's avatar
bguillaum committed
391
              let edge = G_edge.make ?domain ~loc dep_lab in
392
              (match map_add_edge_opt acc2 gov_id edge dep_id with
bguillaum's avatar
bguillaum committed
393
394
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
395
                  (G_edge.to_string ?domain edge)
396
                  (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
397
              )
398
            ) acc line.Conll.deps
399
        ) map_without_edges conll.Conll.lines in
bguillaum's avatar
bguillaum committed
400
401
402

      let fusion =
        List.map
403
404
          (fun {Conll.first; last; fusion; mw_line_num} ->
              let loc = Loc.file_opt_line conll.Conll.file mw_line_num in
405
406
407
408
409
410
              (
                Id.gbuild ~loc first gtable,
                (
                  Id.gbuild ~loc last gtable,
                  fusion
                )
bguillaum's avatar
bguillaum committed
411
              )
412
          ) conll.Conll.multiwords in
bguillaum's avatar
bguillaum committed
413

bguillaum's avatar
bguillaum committed
414
    {meta = conll.Conll.meta; map=map_with_edges; fusion; highest_index= (List.length sorted_lines) -1 }
bguillaum's avatar
bguillaum committed
415

416
417
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
bguillaum's avatar
bguillaum committed
418
  let of_brown ?domain ?sentid brown =
419
    let units = Str.split (Str.regexp " ") brown in
420
      let conll_lines = List.mapi
421
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
422
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
423
        let pos = String.sub pos 1 ((String.length pos)-2) in
424
        let feats = match (i,sentid) with
425
426
427
          | (0,Some id) -> [("sentid", id)]
          | _ -> [] in
        Conll.build_line ~id:(i+1) ~form ~lemma ~xpos:pos ~feats ~deps:([(i, "SUC")]) ()
428
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\") in >>>%s<<<" item brown
429
      ) units in 
bguillaum's avatar
bguillaum committed
430
    of_conll ?domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
431

432
  (* -------------------------------------------------------------------------------- *)
433
434
435
  let of_pst ?domain pst =
    let cpt = ref 0 in
    let get_pos () = incr cpt; !cpt - 1 in
436

437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
    let leaf_list = ref [] in

    let rec loop nodes = function
    | Ast.Leaf (loc, phon) ->
      let fresh_id = get_pos () in
      let node = G_node.pst_leaf ~loc ?domain phon fresh_id in
      leaf_list := fresh_id :: ! leaf_list;
      (fresh_id, Gid_map.add fresh_id node nodes)

    | Ast.T (loc, cat, daughters) ->
      let fresh_id = get_pos () in
      let new_node = G_node.pst_node ~loc ?domain cat fresh_id in
      let with_mother = Gid_map.add fresh_id new_node nodes in
      let new_nodes = List.fold_left
        (fun map daughter ->
          let (daughter_id, new_map) = loop map daughter in
          map_add_edge new_map fresh_id G_edge.sub daughter_id
        ) with_mother daughters in
      (fresh_id, new_nodes) in

    let (_,map) = loop Gid_map.empty pst in

    let rec prec_loop map = function
    | [] | [_] -> map
    | n1 :: n2 :: tail ->
      let new_map = prec_loop map (n2 :: tail) in

464
465
466
467
468
469
470
      let node1 = Gid_map.find n1 new_map
      and node2 = Gid_map.find n2 new_map in
      new_map
      |> (Gid_map.add n1 (G_node.set_succ n2 node1))
      |> (Gid_map.add n2 (G_node.set_prec n1 node2)) in

    {meta=[]; map=prec_loop map (List.rev !leaf_list); fusion = []; highest_index = !cpt}
bguillaum's avatar
bguillaum committed
471

bguillaum's avatar
bguillaum committed
472
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
473
  let del_edge ?domain ?edge_ident loc graph id_src label id_tar =
474
    let node_src =
475
      try Gid_map.find id_src graph.map
476
      with Not_found ->
477
        match edge_ident with
bguillaum's avatar
bguillaum committed
478
479
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
480
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
bguillaum's avatar
bguillaum committed
481
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string ?domain label)
pj2m's avatar
pj2m committed
482

bguillaum's avatar
bguillaum committed
483
  (* -------------------------------------------------------------------------------- *)
484
  let del_node graph node_id =
485
486
487
488
489
490
491
    let map_wo_node =
      Gid_map.fold
        (fun id value acc ->
          if id = node_id
          then acc
          else Gid_map.add id (G_node.remove_key node_id value) acc
        ) graph.map Gid_map.empty in
492
    let node = Gid_map.find node_id graph.map in
493
494
495
496
    let new_map =
      match (G_node.get_prec node, G_node.get_succ node) with
      | (Some id_prec, Some id_succ) ->
        begin
497
498
          let prec = Gid_map.find id_prec map_wo_node
          and succ = Gid_map.find id_succ map_wo_node in
499
500
501
502
503
504
          map_wo_node
          |> (Gid_map.add id_prec (G_node.set_succ id_succ prec))
          |> (Gid_map.add id_succ (G_node.set_prec id_prec succ))
        end
      | (Some id_prec, None) ->
        begin
505
          let prec = Gid_map.find id_prec map_wo_node in
506
507
508
509
510
          map_wo_node
          |> (Gid_map.add id_prec (G_node.remove_succ prec))
        end
      | (None, Some id_succ) ->
        begin
511
          let succ = Gid_map.find id_succ map_wo_node in
512
513
514
515
516
          map_wo_node
          |> (Gid_map.add id_succ (G_node.remove_prec succ))
        end
      | (None, None) -> map_wo_node in
    { graph with map = new_map }
pj2m's avatar
pj2m committed
517

bguillaum's avatar
bguillaum committed
518
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
519
  let add_neighbour loc ?domain graph node_id label = failwith "no more add_neighbour"
pj2m's avatar
pj2m committed
520

bguillaum's avatar
bguillaum committed
521
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
522
  let insert ?domain id1 id2 graph =
bguillaum's avatar
bguillaum committed
523
524
525
526
527
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
    let pos1 = G_node.get_position node1 in
    let pos2 = G_node.get_position node2 in
    let new_pos= (pos1 +. pos2) /. 2. in
528
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
529
    let map = graph.map
bguillaum's avatar
bguillaum committed
530
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~prec:id1 ~succ:id2 new_pos))
bguillaum's avatar
bguillaum committed
531
532
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
533
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
534
535

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
536
  let append ?domain id graph =
bguillaum's avatar
bguillaum committed
537
538
539
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos +. 1. in
540
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
541
    let map = graph.map
bguillaum's avatar
bguillaum committed
542
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~prec:id new_pos))
bguillaum's avatar
bguillaum committed
543
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
544
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
545
546

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
547
  let prepend ?domain id graph =
bguillaum's avatar
bguillaum committed
548
549
550
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos -. 1. in
551
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
552
    let map = graph.map
bguillaum's avatar
bguillaum committed
553
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~succ:id new_pos))
bguillaum's avatar
bguillaum committed
554
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
555
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
556
557

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
558
  let add_after loc ?domain node_id graph =
bguillaum's avatar
bguillaum committed
559
560
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
bguillaum's avatar
bguillaum committed
561
562
    | Some gid_succ -> insert ?domain node_id gid_succ graph
    | None -> append ?domain node_id graph
bguillaum's avatar
bguillaum committed
563
564

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
565
  let add_before loc ?domain node_id graph =
bguillaum's avatar
bguillaum committed
566
567
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
bguillaum's avatar
bguillaum committed
568
569
    | Some gid_prec -> insert ?domain gid_prec node_id graph
    | None -> prepend ?domain node_id graph
bguillaum's avatar
bguillaum committed
570

bguillaum's avatar
bguillaum committed
571
  (* -------------------------------------------------------------------------------- *)
572
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
bguillaum's avatar
bguillaum committed
573
  let shift_out loc ?domain src_gid tar_gid label_cst graph =
574
    let src_node = Gid_map.find src_gid graph.map in
575
    let tar_node = Gid_map.find tar_gid graph.map in
576

577
578
579
580
581
582
583
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

    (* Error if a loop is created by the shift_out *)
    let src_tar_edges = Massoc_gid.assoc tar_gid src_next in
    let _ =
      try
bguillaum's avatar
bguillaum committed
584
585
        let loop_edge = List.find (fun edge -> Label_cst.match_ ?domain label_cst edge) src_tar_edges in
        Error.run ~loc "The shfit_out command tries to build a loop (with label %s)" (Label.to_string ?domain loop_edge)
586
587
588
589
590
      with Not_found -> () in

    let (new_src_next,new_tar_next) =
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
bguillaum's avatar
bguillaum committed
591
        if Label_cst.match_ ?domain label_cst edge
592
593
594
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
bguillaum's avatar
bguillaum committed
595
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
596
597
598
599

        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
600

601
    { graph with map =
602
603
604
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
605
    }
pj2m's avatar
pj2m committed
606

bguillaum's avatar
bguillaum committed
607
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
608
  let shift_in loc ?domain src_gid tar_gid label_cst graph =
609
    let tar_node = Gid_map.find tar_gid graph.map in
610
    let tar_next = G_node.get_next tar_node in
611

612
613
614
615
    (* Error if a loop is created by the shift_in *)
    let tar_src_edges = Massoc_gid.assoc src_gid tar_next in
    let _ =
      try
bguillaum's avatar
bguillaum committed
616
617
        let loop_edge = List.find (fun edge -> Label_cst.match_ ?domain label_cst edge) tar_src_edges in
        Error.run ~loc "The [shift_in] command tries to build a loop (with label \"%s\")" (Label.to_string ?domain loop_edge)
618
619
620
      with Not_found -> () in

    { graph with map =
621
622
        Gid_map.mapi
          (fun node_id node ->
623
624
625
626
627
628
629
630
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
bguillaum's avatar
bguillaum committed
631
                if Label_cst.match_ ?domain label_cst edge
632
633
                then
                  match List_.usort_insert edge acc_node_tar_edges with
bguillaum's avatar
bguillaum committed
634
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
635
636
637
638
639
640
641
642
643
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
644
645
          ) graph.map
    }
646

bguillaum's avatar
bguillaum committed
647
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
648
  let shift_edges loc ?domain src_gid tar_gid label_cst graph =
649
    graph
bguillaum's avatar
bguillaum committed
650
651
    |> (shift_in loc ?domain src_gid tar_gid label_cst)
    |> (shift_out loc ?domain src_gid tar_gid label_cst)
pj2m's avatar
pj2m committed
652

bguillaum's avatar
bguillaum committed
653
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
654
655
  let merge_node loc ?domain graph src_gid tar_gid =
    let se_graph = shift_edges loc ?domain src_gid tar_gid Label_cst.all graph in
pj2m's avatar
pj2m committed
656

657
658
    let src_node = Gid_map.find src_gid se_graph.map in
    let tar_node = Gid_map.find tar_gid se_graph.map in
659

660
    match G_fs.unif (G_node.get_fs src_node) (G_node.get_fs tar_node) with
bguillaum's avatar
bguillaum committed
661
662
663
      | Some new_fs ->
        Some {graph with map =
            (Gid_map.add
bguillaum's avatar
bguillaum committed
664
               tar_gid
665
               (G_node.set_fs new_fs tar_node)
bguillaum's avatar
bguillaum committed
666
               (Gid_map.remove src_gid se_graph.map)
bguillaum's avatar
bguillaum committed
667
668
669
            )
             }
      | None -> None
bguillaum's avatar
bguillaum committed
670

bguillaum's avatar
bguillaum committed
671
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
672
  let set_feat ?loc ?domain graph node_id feat_name new_value =
673
    let node = Gid_map.find node_id graph.map in
674
675
676
677
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
bguillaum's avatar
bguillaum committed
678
          let new_fs = G_fs.set_feat ?loc ?domain feat_name new_value (G_node.get_fs node) in
679
          (G_node.set_fs new_fs node) in
680
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
681

bguillaum's avatar
bguillaum committed
682
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
683
  let update_feat ?loc ?domain graph tar_id tar_feat_name item_list =
684
685
686
    let strings_to_concat =
      List.map
        (function
bguillaum's avatar
bguillaum committed
687
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
688
689
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
690
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
691
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
692
            )
bguillaum's avatar
bguillaum committed
693
          | Concat_item.String s -> s
694
695
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
bguillaum's avatar
bguillaum committed
696
    (set_feat ?loc ?domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
697

bguillaum's avatar
bguillaum committed
698
  (* -------------------------------------------------------------------------------- *)
699
  let del_feat graph node_id feat_name =
700
    let node = Gid_map.find node_id graph.map in
701
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
702
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
703

bguillaum's avatar
bguillaum committed
704
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
705
  let to_gr ?domain graph =
706
707
708

    let gr_id id = G_node.get_name id (Gid_map.find id graph.map) in

709
    let buff = Buffer.create 32 in
710

711
712
    bprintf buff "graph {\n";

713
714
    (* meta data *)
    List.iter
715
716
      (fun (s) ->
        bprintf buff "  %s;\n" s
717
718
      ) graph.meta;

bguillaum's avatar
bguillaum committed
719
    (* nodes *)
720
721
722
723
724
725
726
727
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
728

729
730
    List.iter
      (fun (id,node) ->
731
        bprintf buff "  %s %s;\n" (gr_id id) (G_node.to_gr node)
732
      ) sorted_nodes;
733

bguillaum's avatar
bguillaum committed
734
    (* edges *)
735
736
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
737
738
        Massoc_gid.iter
          (fun tar edge ->
739
            bprintf buff "  %s -[%s]-> %s;\n" (gr_id id) (G_edge.to_string ?domain edge) (gr_id tar)
bguillaum's avatar
bguillaum committed
740
          ) (G_node.get_next node)
741
      ) sorted_nodes;
742

743
744
745
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
746
  (* -------------------------------------------------------------------------------- *)
747
  let to_sentence ?main_feat graph =
748
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
749
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
750
751
752
753
754
755
756
757
758
759

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
760
761
        " -t-", "-t-";
        "_-_", "-";
762
763
        "_", " ";
        "' ", "'";
764
765
        " ,", ",";
        " .", ".";
766
767
768
769
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
770

bguillaum's avatar
bguillaum committed
771
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
772
  let to_dep ?domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
773
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
774
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
775

776
777
778
779
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

bguillaum's avatar
bguillaum committed
780
    (* nodes *)
781
    List.iter
782
      (fun (id, node) ->
783
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
784
        let fs = G_node.get_fs node in
785
786
787
788
        let dep_fs = G_fs.to_dep ~decorated_feat ~position:(G_node.get_position node) ?filter ?main_feat fs in

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
bguillaum's avatar
bguillaum committed
789
          | _ -> "" in
bguillaum's avatar
bguillaum committed
790
791
792
793
794

        bprintf buff "N_%s { %s%s }\n"
          (Gid.to_string id)
          dep_fs
          style
795
796
      ) snodes;
    bprintf buff "} \n";
797

bguillaum's avatar
bguillaum committed
798
    (* edges *)
799
    bprintf buff "[EDGES] { \n";
800

bguillaum's avatar
bguillaum committed
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
    if !Global.debug
    then
      List.iter
        (fun (id, node) ->
          begin
            match G_node.get_prec node with
            | None -> ()
            | Some p -> bprintf buff "N_%s -> N_%s { label=\"__PREC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string p)
          end;
          begin
            match G_node.get_succ