MAJ terminée. Nous sommes passés en version 14.6.2 . Pour consulter les "releases notes" associées c'est ici :

https://about.gitlab.com/releases/2022/01/11/security-release-gitlab-14-6-2-released/
https://about.gitlab.com/releases/2022/01/04/gitlab-14-6-1-released/

grew_graph.ml 38.8 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11
12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17
18
open Grew_types

pj2m's avatar
pj2m committed
19
20
21
22
open Grew_edge
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
23
(* ================================================================================ *)
24
module P_deco = struct
bguillaum's avatar
bguillaum committed
25
26
27
28
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
29
30

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
31
end (* module P_deco *)
pj2m's avatar
pj2m committed
32

bguillaum's avatar
bguillaum committed
33
(* ================================================================================ *)
34
35
module P_graph = struct
  type t = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
36

37
  let empty = Pid_map.empty
38

39
  let find = Pid_map.find
pj2m's avatar
pj2m committed
40

41
  let pid_name_list t = Pid_map.fold (fun _ node acc -> (P_node.get_name node)::acc) t []
42

Bruno Guillaume's avatar
Bruno Guillaume committed
43
44
45
46
47
48
49
50
51
52
53
  let to_json ?domain t =
    `List (
      Pid_map.fold
        (fun pid p_node acc ->
          (`Assoc [
            ("id", `String (Pid.to_string pid));
            ("node", P_node.to_json ?domain p_node)
          ]) :: acc
        ) t []
      )

54
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
55
  let map_add_edge map id_src label id_tar =
56
    let node_src =
pj2m's avatar
pj2m committed
57
      (* Not found can be raised when adding an edge from pos to neg *)
58
59
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
60
61
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
62

bguillaum's avatar
bguillaum committed
63
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
64
  let build_filter ?domain table (ast_node, loc) =
65
    let pid = Id.build ~loc ast_node.Ast.node_id table in
bguillaum's avatar
bguillaum committed
66
    let fs = P_fs.build ?domain ast_node.Ast.fs in
67
68
    (pid, fs)

bguillaum's avatar
bguillaum committed
69
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
70
  let build ?domain ?pat_vars ?(locals=[||]) (full_node_list : Ast.node list) full_edge_list =
bguillaum's avatar
bguillaum committed
71

72
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
73
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
74
    let rec insert (ast_node, loc) = function
bguillaum's avatar
bguillaum committed
75
      | [] -> [P_node.build ?domain ?pat_vars (ast_node, loc)]
76
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
bguillaum's avatar
bguillaum committed
77
        begin
bguillaum's avatar
bguillaum committed
78
          try (node_id, P_node.unif_fs (P_fs.build ?domain ?pat_vars ast_node.Ast.fs) fs) :: tail
bguillaum's avatar
bguillaum committed
79
80
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
81
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
82

83
    let (named_nodes : (Id.name * P_node.t) list) =
84
      List.fold_left
85
86
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
87

pj2m's avatar
pj2m committed
88
89
90
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

91
92
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
93
94

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
95
96
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
bguillaum's avatar
bguillaum committed
97
      Pid_map.empty node_list in
98

bguillaum's avatar
bguillaum committed
99
    let (map : t) =
pj2m's avatar
pj2m committed
100
      List.fold_left
bguillaum's avatar
bguillaum committed
101
102
103
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
bguillaum's avatar
bguillaum committed
104
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
105
106
107
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
108
              (P_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
109
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
110
111
          )
        ) map_without_edges full_edge_list in
112
    (map, pos_table)
bguillaum's avatar
bguillaum committed
113

114

bguillaum's avatar
bguillaum committed
115
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
116
117
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
118
  type extension = {
bguillaum's avatar
bguillaum committed
119
120
121
    ext_map: P_node.t Pid_map.t; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: P_node.t Pid_map.t; (* a partial map for new constraints on old nodes "Old [...]" *)
  }
122

bguillaum's avatar
bguillaum committed
123
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
124
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
bguillaum's avatar
bguillaum committed
125
  let build_extension ?domain ?pat_vars ?(locals=[||]) pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
126

bguillaum's avatar
bguillaum committed
127
    let built_nodes = List.map (P_node.build ?domain ?pat_vars) full_node_list in
pj2m's avatar
pj2m committed
128

129
130
    let (old_nodes, new_nodes) =
      List.partition
131
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
132
        built_nodes in
bguillaum's avatar
bguillaum committed
133

pj2m's avatar
pj2m committed
134
135
136
137
138
139
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
140
141
142
143

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
144
145
146
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
147
148
149

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
150
151
152
153
154
155
156
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
157

158
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
159
      List.fold_left
bguillaum's avatar
bguillaum committed
160
        (fun acc (ast_edge, loc) ->
161
162
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
163
164
          let i1 =
            match Id.build_opt src pos_table with
bguillaum's avatar
bguillaum committed
165
              | Some i -> Pid.Pos i
166
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
167
168
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
169
              | Some i -> Pid.Pos i
170
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
bguillaum's avatar
bguillaum committed
171
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
172
173
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
bguillaum's avatar
bguillaum committed
174
            | None -> Log.fbug "[GRS] [Graph.build_extension] add_edge cannot fail in pattern extension"; exit 2
bguillaum's avatar
bguillaum committed
175
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
176
177
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

178
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
179
180
181
182
183
184
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
185
      Pid_map.fold
bguillaum's avatar
bguillaum committed
186
187
188
189
190
191
192
193
194
195
196
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
        ) graph Pid_set.empty in
pj2m's avatar
pj2m committed
197
198

    let roots =
199
      Pid_map.fold
bguillaum's avatar
bguillaum committed
200
201
202
203
204
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
        ) graph [] in
205

pj2m's avatar
pj2m committed
206
207
    (!tree_prop, roots)

208
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
209
  let roots graph = snd (tree_and_roots graph)
210
end (* module P_graph *)
pj2m's avatar
pj2m committed
211

bguillaum's avatar
bguillaum committed
212
(* ================================================================================ *)
213
214
module G_deco = struct
  type t = {
215
216
    nodes: (Gid.t * (string * string list)) list;  (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    edges: (Gid.t * G_edge.t * Gid.t) list;        (* an edge list *)
217
218
219
220
  }

  let empty = {nodes=[]; edges=[]}
end (* module G_deco *)
221

bguillaum's avatar
bguillaum committed
222
(* ================================================================================ *)
223
module G_graph = struct
224
  type t = {
bguillaum's avatar
bguillaum committed
225
226
    meta: string list;                       (* meta-informations *)
    map: G_node.t Gid_map.t;                 (* node description *)
bguillaum's avatar
bguillaum committed
227
    fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
228
    highest_index: int;                      (* the next free integer index *)
229
230
  }

bguillaum's avatar
bguillaum committed
231
  let empty = {meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; }
232

bguillaum's avatar
bguillaum committed
233
  (* ---------------------------------------------------------------------- *)
234
235
236
237
238
239
240
241
242
243
  let rename mapping graph =
    {graph with map =
        Gid_map.fold
          (fun id node acc ->
            let new_id = try List.assoc id mapping with Not_found -> id in
            let new_node = G_node.rename mapping node in
            Gid_map.add new_id new_node acc
          ) graph.map Gid_map.empty
    }

bguillaum's avatar
bguillaum committed
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
  exception Not_a_tree
  type tree = T of (Gid.t * tree list)

  let rec tree_to_string = function
  | T (gid, daughters) ->
    sprintf "%s [%s]"
      (Gid.to_string gid)
      ((String.concat) ";" (List.map tree_to_string daughters))

  let graph_to_tree (g : t) : tree =
    let rec build_sub_tree map forest gid =
      if List.mem_assoc gid forest
      then (map, forest)
      else
        let (new_map, daugthers, new_forest) = Massoc_gid.fold
          (fun (acc_map, sub_trees, acc_forest) gid2 edge ->
            if edge = G_edge.sub
            then
              (* ensure that gid2 is in forest *)
              let (new_acc_map, new_acc_forest) = build_sub_tree acc_map acc_forest gid2 in
              let sub = List.assoc gid2 new_acc_forest in
              ( new_acc_map,
                sub::sub_trees,
                List.remove_assoc gid2 new_acc_forest
              )
            else (acc_map, sub_trees, acc_forest)
        ) (map,[],forest) (G_node.get_next (Gid_map.find gid map)) in
      (Gid_map.remove gid new_map, (gid, T (gid, List.rev daugthers))::new_forest) in

    let rec loop (unused_map, forest) =
      match (Gid_map.is_empty unused_map, forest) with
      | (true, [(_,tree)]) -> tree
      | (true, _) -> raise Not_a_tree
      | _ ->
        (* pick one unused node *)
        let (gid,_) = Gid_map.choose unused_map in
        loop (build_sub_tree unused_map forest gid) in
    loop (g.map, [])

bguillaum's avatar
bguillaum committed
283
  let get_highest g = g.highest_index
284
285
286
287

  let find node_id graph = Gid_map.find node_id graph.map

  let equals t t' = Gid_map.equal (fun node1 node2 -> node1 = node2) t.map t'.map
288

bguillaum's avatar
bguillaum committed
289
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
290

291
292
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
293

bguillaum's avatar
bguillaum committed
294
  (* is there an edge e out of node i ? *)
bguillaum's avatar
bguillaum committed
295
  let edge_out ?domain graph node_id label_cst =
296
    let node = Gid_map.find node_id graph.map in
bguillaum's avatar
bguillaum committed
297
    Massoc_gid.exists (fun _ e -> Label_cst.match_ ?domain label_cst e) (G_node.get_next node)
298

bguillaum's avatar
bguillaum committed
299
300
301
302
303
304
  let get_annot_info graph =
    let annot_info =
      Gid_map.fold
        (fun _ node acc ->
          match (G_node.get_annot_info node, acc) with
            | (None,_) -> acc
bguillaum's avatar
bguillaum committed
305
            | (Some f, None) -> Some (f,G_node.get_position node)
bguillaum's avatar
bguillaum committed
306
307
308
309
310
311
            | (Some _, Some _) -> Error.build "[G_node.get_annot_info] Two nodes with annot info"
        ) graph.map None in
    match annot_info with
      | Some x -> x
      | None -> Error.build "[G_node.get_annot_info] No nodes with annot info"

bguillaum's avatar
bguillaum committed
312
  (* -------------------------------------------------------------------------------- *)
313
  let map_add_edge_opt map id_src label id_tar =
314
    let node_src =
315
      (* Not found can be raised when adding an edge from pos to neg *)
316
      try Gid_map.find id_src map with Not_found -> G_node.empty in
317
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
318
319
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
320

321
322
323
324
325
326
327
  (* -------------------------------------------------------------------------------- *)
  let map_add_edge map id_src label id_tar =
    let node_src = Gid_map.find id_src map in
    match G_node.add_edge label id_tar node_src with
      | Some new_node -> Gid_map.add id_src new_node map
      | None -> Log.fbug "[Graph.map_add_edge] duplicate"; exit 2

328
  (* -------------------------------------------------------------------------------- *)
329
  let add_edge graph id_src label id_tar =
330
    match map_add_edge_opt graph.map id_src label id_tar with
331
332
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
333

bguillaum's avatar
bguillaum committed
334
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
335
  let build ?domain ?(grewpy=false) ?(locals=[||]) gr_ast =
336
337
338
339
    let full_node_list =
      if grewpy
      then List.sort (Ast.grewpy_compare) gr_ast.Ast.nodes
      else gr_ast.Ast.nodes
340
    and full_edge_list = gr_ast.Ast.edges in
341

342

bguillaum's avatar
bguillaum committed
343
344
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
345

bguillaum's avatar
bguillaum committed
346
347
348
349
350
      | (ast_node, loc)::tail ->
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
351
352
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some index) tail in
          let succ = if tail = [] then None else Some (index+1) in
353
          let new_node = G_node.build ?domain ?prec ?succ index (ast_node, loc) in
bguillaum's avatar
bguillaum committed
354
            (
355
              Gid_map.add index new_node new_tail,
bguillaum's avatar
bguillaum committed
356
357
              (node_id,index)::table
            ) in
358

bguillaum's avatar
bguillaum committed
359
    let (map_without_edges, table) = loop [] 0 None full_node_list in
360

361
362
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
363
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
364
365
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
bguillaum's avatar
bguillaum committed
366
          let edge = G_edge.build ?domain (ast_edge, loc) in
367
          (match map_add_edge_opt acc i1 edge i2 with
bguillaum's avatar
bguillaum committed
368
369
            | Some g -> g
            | None -> Error.build "[GRS] [Graph.build] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
370
              (G_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
371
              (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
372
373
          )
        ) map_without_edges full_edge_list in
374

bguillaum's avatar
bguillaum committed
375
    {meta=gr_ast.Ast.meta; map=map; fusion = []; highest_index = (List.length full_node_list) -1}
bguillaum's avatar
bguillaum committed
376

bguillaum's avatar
bguillaum committed
377
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
378
  let of_conll ?domain conll =
379

380
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
bguillaum's avatar
bguillaum committed
381

382
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), Conll.Id.to_dot) in
383

384
385
386
387
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
bguillaum's avatar
bguillaum committed
388
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec last) Gid_map.empty
389
390
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
bguillaum's avatar
bguillaum committed
391
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec ~succ:(index+1) line)
392
          (loop (index+1) (Some index) tail) in
393
394

    let map_without_edges = loop 0 None sorted_lines in
395

bguillaum's avatar
bguillaum committed
396
    let map_with_edges =
bguillaum's avatar
bguillaum committed
397
398
      List.fold_left
        (fun acc line ->
399
400
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
401
402
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
403
              let gov_id = Id.gbuild ~loc gov gtable in
bguillaum's avatar
bguillaum committed
404
              let edge = G_edge.make ?domain ~loc dep_lab in
405
              (match map_add_edge_opt acc2 gov_id edge dep_id with
bguillaum's avatar
bguillaum committed
406
407
                | Some g -> g
                | None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
bguillaum's avatar
bguillaum committed
408
                  (G_edge.to_string ?domain edge)
409
                  (Loc.to_string loc)
bguillaum's avatar
bguillaum committed
410
              )
411
            ) acc line.Conll.deps
412
        ) map_without_edges conll.Conll.lines in
bguillaum's avatar
bguillaum committed
413
414
415

      let fusion =
        List.map
416
          (fun {Conll.first; last; fusion; mw_line_num} ->
417
              let loc = Loc.file_opt_line_opt conll.Conll.file mw_line_num in
418
              (
419
                Id.gbuild ~loc (first,None) gtable,
420
                (
421
                  Id.gbuild ~loc (last, None) gtable,
422
423
                  fusion
                )
bguillaum's avatar
bguillaum committed
424
              )
425
          ) conll.Conll.multiwords in
bguillaum's avatar
bguillaum committed
426

bguillaum's avatar
bguillaum committed
427
    {meta = conll.Conll.meta; map=map_with_edges; fusion; highest_index= (List.length sorted_lines) -1 }
bguillaum's avatar
bguillaum committed
428

429
430
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
bguillaum's avatar
bguillaum committed
431
  let of_brown ?domain ?sentid brown =
432
    let units = Str.split (Str.regexp " ") brown in
433
      let conll_lines = List.mapi
434
      (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
435
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
436
        let pos = String.sub pos 1 ((String.length pos)-2) in
437
        let feats = match (i,sentid) with
438
439
          | (0,Some id) -> [("sentid", id)]
          | _ -> [] in
440
        Conll.build_line ~id:(i+1,None) ~form ~lemma ~xpos:pos ~feats ~deps:([(i, "SUC")]) ()
441
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\") in >>>%s<<<" item brown
Bruno Guillaume's avatar
Bruno Guillaume committed
442
      ) units in
bguillaum's avatar
bguillaum committed
443
    of_conll ?domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
444

445
  (* -------------------------------------------------------------------------------- *)
446
447
448
  let of_pst ?domain pst =
    let cpt = ref 0 in
    let get_pos () = incr cpt; !cpt - 1 in
449

450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
    let leaf_list = ref [] in

    let rec loop nodes = function
    | Ast.Leaf (loc, phon) ->
      let fresh_id = get_pos () in
      let node = G_node.pst_leaf ~loc ?domain phon fresh_id in
      leaf_list := fresh_id :: ! leaf_list;
      (fresh_id, Gid_map.add fresh_id node nodes)

    | Ast.T (loc, cat, daughters) ->
      let fresh_id = get_pos () in
      let new_node = G_node.pst_node ~loc ?domain cat fresh_id in
      let with_mother = Gid_map.add fresh_id new_node nodes in
      let new_nodes = List.fold_left
        (fun map daughter ->
          let (daughter_id, new_map) = loop map daughter in
          map_add_edge new_map fresh_id G_edge.sub daughter_id
        ) with_mother daughters in
      (fresh_id, new_nodes) in

    let (_,map) = loop Gid_map.empty pst in

    let rec prec_loop map = function
    | [] | [_] -> map
    | n1 :: n2 :: tail ->
      let new_map = prec_loop map (n2 :: tail) in

477
478
479
480
481
482
483
      let node1 = Gid_map.find n1 new_map
      and node2 = Gid_map.find n2 new_map in
      new_map
      |> (Gid_map.add n1 (G_node.set_succ n2 node1))
      |> (Gid_map.add n2 (G_node.set_prec n1 node2)) in

    {meta=[]; map=prec_loop map (List.rev !leaf_list); fusion = []; highest_index = !cpt}
bguillaum's avatar
bguillaum committed
484

bguillaum's avatar
bguillaum committed
485
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
486
  let del_edge ?domain ?edge_ident loc graph id_src label id_tar =
487
    let node_src =
488
      try Gid_map.find id_src graph.map
489
      with Not_found ->
490
        match edge_ident with
bguillaum's avatar
bguillaum committed
491
492
          | None -> Log.fcritical "[RUN] Some edge refers to a dead node, please report"
          | Some id -> Error.run ~loc "[Graph.del_edge] cannot find source node of edge \"%s\"" id in
493
    try {graph with map = Gid_map.add id_src (G_node.remove id_tar label node_src) graph.map}
bguillaum's avatar
bguillaum committed
494
    with Not_found -> Error.run ~loc "[Graph.del_edge] cannot find edge '%s'" (G_edge.to_string ?domain label)
pj2m's avatar
pj2m committed
495

bguillaum's avatar
bguillaum committed
496
  (* -------------------------------------------------------------------------------- *)
497
  let del_node graph node_id =
498
499
500
501
502
503
504
    let map_wo_node =
      Gid_map.fold
        (fun id value acc ->
          if id = node_id
          then acc
          else Gid_map.add id (G_node.remove_key node_id value) acc
        ) graph.map Gid_map.empty in
505
    let node = Gid_map.find node_id graph.map in
506
507
508
509
    let new_map =
      match (G_node.get_prec node, G_node.get_succ node) with
      | (Some id_prec, Some id_succ) ->
        begin
510
511
          let prec = Gid_map.find id_prec map_wo_node
          and succ = Gid_map.find id_succ map_wo_node in
512
513
514
515
516
517
          map_wo_node
          |> (Gid_map.add id_prec (G_node.set_succ id_succ prec))
          |> (Gid_map.add id_succ (G_node.set_prec id_prec succ))
        end
      | (Some id_prec, None) ->
        begin
518
          let prec = Gid_map.find id_prec map_wo_node in
519
520
521
522
523
          map_wo_node
          |> (Gid_map.add id_prec (G_node.remove_succ prec))
        end
      | (None, Some id_succ) ->
        begin
524
          let succ = Gid_map.find id_succ map_wo_node in
525
526
527
528
529
          map_wo_node
          |> (Gid_map.add id_succ (G_node.remove_prec succ))
        end
      | (None, None) -> map_wo_node in
    { graph with map = new_map }
pj2m's avatar
pj2m committed
530

bguillaum's avatar
bguillaum committed
531
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
532
  let add_neighbour loc ?domain graph node_id label = failwith "no more add_neighbour"
pj2m's avatar
pj2m committed
533

bguillaum's avatar
bguillaum committed
534
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
535
  let insert ?domain id1 id2 graph =
bguillaum's avatar
bguillaum committed
536
537
538
539
540
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
    let pos1 = G_node.get_position node1 in
    let pos2 = G_node.get_position node2 in
    let new_pos= (pos1 +. pos2) /. 2. in
541
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
542
    let map = graph.map
bguillaum's avatar
bguillaum committed
543
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~prec:id1 ~succ:id2 new_pos))
bguillaum's avatar
bguillaum committed
544
545
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
546
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
547
548

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
549
  let append ?domain id graph =
bguillaum's avatar
bguillaum committed
550
551
552
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos +. 1. in
553
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
554
    let map = graph.map
bguillaum's avatar
bguillaum committed
555
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~prec:id new_pos))
bguillaum's avatar
bguillaum committed
556
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
557
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
558
559

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
560
  let prepend ?domain id graph =
bguillaum's avatar
bguillaum committed
561
562
563
    let node = Gid_map.find id graph.map in
    let pos = G_node.get_position node in
    let new_pos= pos -. 1. in
564
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
565
    let map = graph.map
bguillaum's avatar
bguillaum committed
566
      |> (Gid_map.add new_gid (G_node.fresh ?domain ~succ:id new_pos))
bguillaum's avatar
bguillaum committed
567
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
568
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
569
570

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
571
  let add_after loc ?domain node_id graph =
bguillaum's avatar
bguillaum committed
572
573
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
bguillaum's avatar
bguillaum committed
574
575
    | Some gid_succ -> insert ?domain node_id gid_succ graph
    | None -> append ?domain node_id graph
bguillaum's avatar
bguillaum committed
576
577

  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
578
  let add_before loc ?domain node_id graph =
bguillaum's avatar
bguillaum committed
579
580
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
bguillaum's avatar
bguillaum committed
581
582
    | Some gid_prec -> insert ?domain gid_prec node_id graph
    | None -> prepend ?domain node_id graph
bguillaum's avatar
bguillaum committed
583

bguillaum's avatar
bguillaum committed
584
  (* -------------------------------------------------------------------------------- *)
585
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
bguillaum's avatar
bguillaum committed
586
  let shift_out loc ?domain src_gid tar_gid label_cst graph =
587
    let src_node = Gid_map.find src_gid graph.map in
588
    let tar_node = Gid_map.find tar_gid graph.map in
589

590
591
592
593
594
595
596
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

    (* Error if a loop is created by the shift_out *)
    let src_tar_edges = Massoc_gid.assoc tar_gid src_next in
    let _ =
      try
bguillaum's avatar
bguillaum committed
597
598
        let loop_edge = List.find (fun edge -> Label_cst.match_ ?domain label_cst edge) src_tar_edges in
        Error.run ~loc "The shfit_out command tries to build a loop (with label %s)" (Label.to_string ?domain loop_edge)
599
600
601
602
603
      with Not_found -> () in

    let (new_src_next,new_tar_next) =
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
bguillaum's avatar
bguillaum committed
604
        if Label_cst.match_ ?domain label_cst edge
605
606
607
        then
          match Massoc_gid.add next_gid edge acc_tar_next with
          | Some new_acc_tar_next -> (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
bguillaum's avatar
bguillaum committed
608
          | None -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
609
610
611
612

        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
613

614
    { graph with map =
615
616
617
      graph.map
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node))
618
    }
pj2m's avatar
pj2m committed
619

bguillaum's avatar
bguillaum committed
620
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
621
  let shift_in loc ?domain src_gid tar_gid label_cst graph =
622
    let tar_node = Gid_map.find tar_gid graph.map in
623
    let tar_next = G_node.get_next tar_node in
624

625
626
627
628
    (* Error if a loop is created by the shift_in *)
    let tar_src_edges = Massoc_gid.assoc src_gid tar_next in
    let _ =
      try
bguillaum's avatar
bguillaum committed
629
630
        let loop_edge = List.find (fun edge -> Label_cst.match_ ?domain label_cst edge) tar_src_edges in
        Error.run ~loc "The [shift_in] command tries to build a loop (with label \"%s\")" (Label.to_string ?domain loop_edge)
631
632
633
      with Not_found -> () in

    { graph with map =
634
635
        Gid_map.mapi
          (fun node_id node ->
636
637
638
639
640
641
642
643
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
bguillaum's avatar
bguillaum committed
644
                if Label_cst.match_ ?domain label_cst edge
645
646
                then
                  match List_.usort_insert edge acc_node_tar_edges with
bguillaum's avatar
bguillaum committed
647
                  | None -> Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (Label.to_string ?domain edge)
648
649
650
651
652
653
654
655
656
                  | Some l -> (List_.usort_remove edge acc_node_src_edges, l)
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
657
658
          ) graph.map
    }
659

bguillaum's avatar
bguillaum committed
660
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
661
  let shift_edges loc ?domain src_gid tar_gid label_cst graph =
662
    graph
bguillaum's avatar
bguillaum committed
663
664
    |> (shift_in loc ?domain src_gid tar_gid label_cst)
    |> (shift_out loc ?domain src_gid tar_gid label_cst)
pj2m's avatar
pj2m committed
665

bguillaum's avatar
bguillaum committed
666
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
667
668
  let merge_node loc ?domain graph src_gid tar_gid =
    let se_graph = shift_edges loc ?domain src_gid tar_gid Label_cst.all graph in
pj2m's avatar
pj2m committed
669

670
671
    let src_node = Gid_map.find src_gid se_graph.map in
    let tar_node = Gid_map.find tar_gid se_graph.map in
672

673
    match G_fs.unif (G_node.get_fs src_node) (G_node.get_fs tar_node) with
bguillaum's avatar
bguillaum committed
674
675
676
      | Some new_fs ->
        Some {graph with map =
            (Gid_map.add
bguillaum's avatar
bguillaum committed
677
               tar_gid
678
               (G_node.set_fs new_fs tar_node)
bguillaum's avatar
bguillaum committed
679
               (Gid_map.remove src_gid se_graph.map)
bguillaum's avatar
bguillaum committed
680
681
682
            )
             }
      | None -> None
bguillaum's avatar
bguillaum committed
683

bguillaum's avatar
bguillaum committed
684
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
685
  let set_feat ?loc ?domain graph node_id feat_name new_value =
686
    let node = Gid_map.find node_id graph.map in
687
688
689
690
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
bguillaum's avatar
bguillaum committed
691
          let new_fs = G_fs.set_feat ?loc ?domain feat_name new_value (G_node.get_fs node) in
692
          (G_node.set_fs new_fs node) in
693
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
694

bguillaum's avatar
bguillaum committed
695
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
696
  let update_feat ?loc ?domain graph tar_id tar_feat_name item_list =
697
698
699
    let strings_to_concat =
      List.map
        (function
700
701
702
          | Concat_item.Feat (node_gid, "position") ->
            let node = Gid_map.find node_gid graph.map in
            sprintf "%g" (G_node.get_position node)
bguillaum's avatar
bguillaum committed
703
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
704
705
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
706
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
707
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
708
            )
bguillaum's avatar
bguillaum committed
709
          | Concat_item.String s -> s
710
711
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
bguillaum's avatar
bguillaum committed
712
    (set_feat ?loc ?domain graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
713

bguillaum's avatar
bguillaum committed
714
  (* -------------------------------------------------------------------------------- *)
715
  let del_feat graph node_id feat_name =
716
    let node = Gid_map.find node_id graph.map in
717
    let new_fs = G_fs.del_feat feat_name (G_node.get_fs node) in
718
    { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
719

bguillaum's avatar
bguillaum committed
720
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
721
  let to_gr ?domain graph =
722
723
724

    let gr_id id = G_node.get_name id (Gid_map.find id graph.map) in

725
    let buff = Buffer.create 32 in
726

727
728
    bprintf buff "graph {\n";

729
730
    (* meta data *)
    List.iter
731
732
      (fun (s) ->
        bprintf buff "  %s;\n" s
733
734
      ) graph.meta;

bguillaum's avatar
bguillaum committed
735
    (* nodes *)
736
737
738
739
740
741
742
743
    let nodes = Gid_map.fold
      (fun id node acc ->
        if G_node.is_conll_root node
        then acc
        else (id,node)::acc
      ) graph.map [] in

    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
744

745
746
    List.iter
      (fun (id,node) ->
747
        bprintf buff "  %s %s;\n" (gr_id id) (G_node.to_gr node)
748
      ) sorted_nodes;
749

bguillaum's avatar
bguillaum committed
750
    (* edges *)
751
752
    List.iter
      (fun (id,node) ->
bguillaum's avatar
bguillaum committed
753
754
        Massoc_gid.iter
          (fun tar edge ->
755
            bprintf buff "  %s -[%s]-> %s;\n" (gr_id id) (G_edge.to_string ?domain edge) (gr_id tar)
bguillaum's avatar
bguillaum committed
756
          ) (G_node.get_next node)
757
      ) sorted_nodes;
758

759
760
761
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
762
  (* -------------------------------------------------------------------------------- *)
763
  let to_sentence ?main_feat graph =
764
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
765
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
766
767
768
769
770
771
772
773
774
775

    let words = List.map
      (fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
      ) snodes in
    List.fold_left
      (fun acc (regexp,repl) ->
        Str.global_replace (Str.regexp_string regexp) repl acc
      )
      (String.concat " " words)
      [
bguillaum's avatar
bguillaum committed
776
777
        " -t-", "-t-";
        "_-_", "-";
778
779
        "_", " ";
        "' ", "'";
780
781
        " ,", ",";
        " .", ".";
782
783
784
785
        "( ", "(";
        " )", ")";
        "\\\"", "\"";
      ]
786

bguillaum's avatar
bguillaum committed
787
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
788
  let to_dep ?domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
bguillaum's avatar
bguillaum committed
789
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
790
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
791

792
793
794
795
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

bguillaum's avatar
bguillaum committed
796
    (* nodes *)
797
    List.iter
798
      (fun (id, node) ->
799
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
800
        let fs = G_node.get_fs node in
801
802
803
804
        let dep_fs = G_fs.to_dep ~decorated_feat ~