grew_graph.ml 59.6 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
Bruno Guillaume's avatar
Bruno Guillaume committed
4
(*    Copyright 2011-2018 Inria, Université de Lorraine                           *)
bguillaum's avatar
bguillaum committed
5
(*                                                                                *)
Bruno Guillaume's avatar
Bruno Guillaume committed
6
(*    Webpage: http://grew.fr                                                     *)
bguillaum's avatar
bguillaum committed
7 8 9 10
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11 12
open Printf
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

15
open Grew_base
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17 18
open Grew_types

pj2m's avatar
pj2m committed
19
open Grew_edge
20
open Grew_domain
pj2m's avatar
pj2m committed
21 22 23
open Grew_fs
open Grew_node

bguillaum's avatar
bguillaum committed
24
(* ================================================================================ *)
25
module P_deco = struct
bguillaum's avatar
bguillaum committed
26 27 28 29
  type t = {
    nodes: Pid.t list;
    edges: (Pid.t * P_edge.t * Pid.t) list;
  }
pj2m's avatar
pj2m committed
30 31

  let empty = {nodes=[]; edges=[]}
bguillaum's avatar
bguillaum committed
32
end (* module P_deco *)
pj2m's avatar
pj2m committed
33

bguillaum's avatar
bguillaum committed
34
(* ================================================================================ *)
35
module P_graph = struct
36
  type map = P_node.t Pid_map.t
pj2m's avatar
pj2m committed
37

38 39 40 41
  type t = {
    map: map;
    pivot: Pid.t option;
  }
42

43
  let empty = { map = Pid_map.empty; pivot = None }
pj2m's avatar
pj2m committed
44

45 46 47
  let find pid t = Pid_map.find pid t.map

  let pid_name_list t = Pid_map.fold (fun _ node acc -> (P_node.get_name node)::acc) t.map []
48

49 50 51 52 53 54 55 56
  let to_json ?domain t =
    `List (
      Pid_map.fold
        (fun pid p_node acc ->
          (`Assoc [
            ("id", `String (Pid.to_string pid));
            ("node", P_node.to_json ?domain p_node)
          ]) :: acc
57
        ) t.map []
58 59
      )

60
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
61
  let map_add_edge map id_src label id_tar =
62
    let node_src =
pj2m's avatar
pj2m committed
63
      (* Not found can be raised when adding an edge from pos to neg *)
64 65
      try Pid_map.find id_src map with Not_found -> P_node.empty in
    match P_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
66 67
      | None -> None
      | Some new_node -> Some (Pid_map.add id_src new_node map)
pj2m's avatar
pj2m committed
68

bguillaum's avatar
bguillaum committed
69
  (* -------------------------------------------------------------------------------- *)
70 71 72
  let build ?domain lexicons pivot basic_ast =
      let (full_node_list : Ast.node list) = basic_ast.Ast.pat_nodes
      and full_edge_list = basic_ast.Ast.pat_edges in
bguillaum's avatar
bguillaum committed
73

74
    (* NB: insert searches for a previous node with the Same name and uses unification rather than constraint *)
75
    (* NB: insertion of new node at the end of the list: not efficient but graph building is not the hard part. *)
bguillaum's avatar
bguillaum committed
76
    let rec insert (ast_node, loc) = function
77
      | [] -> [P_node.build ?domain lexicons (ast_node, loc)]
78
      | (node_id,fs)::tail when ast_node.Ast.node_id = node_id ->
79
        begin
80
          try (node_id, P_node.unif_fs (P_fs.build ?domain lexicons ast_node.Ast.fs) fs) :: tail
81 82
          with Error.Build (msg,_) -> raise (Error.Build (msg,Some loc))
        end
83
      | head :: tail -> head :: (insert (ast_node, loc) tail) in
bguillaum's avatar
bguillaum committed
84

85
    let (named_nodes : (Id.name * P_node.t) list) =
86
      List.fold_left
87 88
        (fun acc ast_node -> insert ast_node acc)
        [] full_node_list in
89

pj2m's avatar
pj2m committed
90 91 92
    let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
    let (sorted_ids, node_list) = List.split sorted_nodes in

93 94
    (* [pos_table] contains the sorted list of node ids *)
    let pos_table = Array.of_list sorted_ids in
95 96

    (* the nodes, in the same order *)
bguillaum's avatar
bguillaum committed
97 98
    let map_without_edges = List_.foldi_left
      (fun i acc elt -> Pid_map.add (Pid.Pos i) elt acc)
99
      Pid_map.empty node_list in
100

101
    let (map : map) =
pj2m's avatar
pj2m committed
102
      List.fold_left
bguillaum's avatar
bguillaum committed
103 104 105
        (fun acc (ast_edge, loc) ->
          let i1 = Id.build ~loc ast_edge.Ast.src pos_table in
          let i2 = Id.build ~loc ast_edge.Ast.tar pos_table in
bguillaum's avatar
bguillaum committed
106
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
107 108
          (match map_add_edge acc (Pid.Pos i1) edge (Pid.Pos i2) with
            | Some g -> g
Bruno Guillaume's avatar
Bruno Guillaume committed
109
            | None -> Error.build ~loc "[Graph.build] try to build a graph with twice the same edge %s"
bguillaum's avatar
bguillaum committed
110
              (P_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
111 112
          )
        ) map_without_edges full_edge_list in
113 114
        let pivot = CCOpt.map (fun x -> Pid.Pos (Id.build x pos_table)) pivot in
    ({map; pivot}, pos_table)
bguillaum's avatar
bguillaum committed
115

116

bguillaum's avatar
bguillaum committed
117
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
118 119
  (* a type for extension of graph (a former graph exists):
     in grew the former is a positive basic and an extension is a negative basic ("without") *)
120
  type extension = {
121 122
    ext_map: map; (* node description for new nodes and for edge "Old -> New"  *)
    old_map: map; (* a partial map for new constraints on old nodes "Old [...]" *)
bguillaum's avatar
bguillaum committed
123
  }
124

bguillaum's avatar
bguillaum committed
125
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
126
  (* It may raise [P_fs.Fail_unif] in case of contradiction on constraints *)
127
  let build_extension ?domain lexicons pos_table full_node_list full_edge_list =
pj2m's avatar
pj2m committed
128

129
    let built_nodes = List.map (P_node.build ?domain lexicons) full_node_list in
pj2m's avatar
pj2m committed
130

131 132
    let (old_nodes, new_nodes) =
      List.partition
133
        (function (id,_) when Array_.dicho_mem id pos_table -> true | _ -> false)
134
        built_nodes in
bguillaum's avatar
bguillaum committed
135

pj2m's avatar
pj2m committed
136 137 138 139 140 141
    let new_sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) new_nodes in

    let (new_sorted_ids, new_node_list) = List.split new_sorted_nodes in

    (* table contains the sorted list of node ids *)
    let new_table = Array.of_list new_sorted_ids in
142 143 144 145

    (* the nodes, in the same order stored with index -1, -2, ... -N *)
    let ext_map_without_edges =
      List_.foldi_left
bguillaum's avatar
bguillaum committed
146 147 148
        (fun i acc elt -> Pid_map.add (Pid.Neg i) elt acc)
        Pid_map.empty
        new_node_list in
149 150 151

    let old_map_without_edges =
      List.fold_left
bguillaum's avatar
bguillaum committed
152 153 154 155 156 157 158
        (fun acc (id,node) ->
          let pid_pos = Pid.Pos (Array_.dicho_find id pos_table) in
          try
            let old = Pid_map.find pid_pos acc in
            Pid_map.add pid_pos (P_node.unif_fs (P_node.get_fs node) old) acc
          with Not_found -> Pid_map.add pid_pos node acc
        ) Pid_map.empty old_nodes in
pj2m's avatar
pj2m committed
159

160
    let ext_map_with_all_edges =
pj2m's avatar
pj2m committed
161
      List.fold_left
bguillaum's avatar
bguillaum committed
162
        (fun acc (ast_edge, loc) ->
163 164
          let src = ast_edge.Ast.src
          and tar = ast_edge.Ast.tar in
bguillaum's avatar
bguillaum committed
165 166
          let i1 =
            match Id.build_opt src pos_table with
167
              | Some i -> Pid.Pos i
168
              | None -> Pid.Neg (Id.build ~loc src new_table) in
bguillaum's avatar
bguillaum committed
169 170
          let i2 =
            match Id.build_opt tar pos_table with
bguillaum's avatar
bguillaum committed
171
              | Some i -> Pid.Pos i
172
              | None -> Pid.Neg (Id.build ~loc tar new_table) in
bguillaum's avatar
bguillaum committed
173
          let edge = P_edge.build ?domain (ast_edge, loc) in
bguillaum's avatar
bguillaum committed
174 175
          match map_add_edge acc i1 edge i2 with
            | Some map -> map
176
            | None -> Error.bug "[Graph.build_extension] add_edge cannot fail in pattern extension"
bguillaum's avatar
bguillaum committed
177
        ) ext_map_without_edges full_edge_list in
pj2m's avatar
pj2m committed
178 179
    ({ext_map = ext_map_with_all_edges; old_map = old_map_without_edges}, new_table)

180
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
181 182 183 184 185 186
  (* [tree_and_roots t] returns:
     - a boolean which is true iff the each node has at most one in-edge
     - the list of "roots" (i.e. nodes without in-edge *)
  let tree_and_roots graph =
    let tree_prop = ref true in
    let not_root =
187
      Pid_map.fold
bguillaum's avatar
bguillaum committed
188 189 190 191 192 193 194 195 196 197
        (fun _ node acc ->
          Massoc_pid.fold
            (fun acc2 tar _ ->
              if !tree_prop
              then
                if Pid_set.mem tar acc2
                then (tree_prop := false; acc2)
                else Pid_set.add tar acc2
              else Pid_set.add tar acc2
            ) acc (P_node.get_next node)
198
        ) graph.map Pid_set.empty in
pj2m's avatar
pj2m committed
199 200

    let roots =
201
      Pid_map.fold
bguillaum's avatar
bguillaum committed
202 203 204 205
        (fun id _ acc ->
          if Pid_set.mem id not_root
          then acc
          else id::acc
206
        ) graph.map [] in
207

pj2m's avatar
pj2m committed
208 209
    (!tree_prop, roots)

210
  (* -------------------------------------------------------------------------------- *)
pj2m's avatar
pj2m committed
211
  let roots graph = snd (tree_and_roots graph)
212
end (* module P_graph *)
pj2m's avatar
pj2m committed
213

bguillaum's avatar
bguillaum committed
214
(* ================================================================================ *)
215
module G_deco = struct
216 217 218
  (* value is (f, Some g) for combined request "f=v/g=u" and (j, None) else *)
  type highlighted_feat = string * string option

219
  type t = {
220 221 222 223 224
    (* a list of (node, (pattern_id, features of nodes implied in the step)) *)
    nodes: (Gid.t * (string * highlighted_feat list)) list;
    (* an edge list *)
    edges: (Gid.t * G_edge.t * Gid.t) list;
    pivot: Gid.t option;
225 226
  }

227
  let empty = {nodes=[]; edges=[]; pivot=None;}
228
end (* module G_deco *)
229

bguillaum's avatar
bguillaum committed
230
(* ================================================================================ *)
231
module G_graph = struct
232 233 234 235 236 237 238
  type fusion_item = {
    first: Gid.t;
    last: Gid.t;
    word: string;
    efs: (string * string) list;
  }

Bruno Guillaume's avatar
Bruno Guillaume committed
239 240 241 242 243
  let shift_fusion_item n fusion_item = { fusion_item with
    first = fusion_item.first + n;
    last = fusion_item.last + n;
  }

244
  type t = {
245
    domain: Domain.t option;
246 247
    meta: string list;            (* meta-informations *)
    map: G_node.t Gid_map.t;      (* node description *)
248
    fusion: fusion_item list;     (* the list of fusion word considered in UD conll *)
249
    highest_index: int;           (* the next free integer index *)
250
    rules: int String_map.t;
251 252
  }

253 254 255 256 257 258
  let shift user_id n graph =
    { graph with
      fusion = List.map (shift_fusion_item n) graph.fusion;
      map = Gid_map.map_key_value (fun i -> i+n) (fun node -> G_node.shift user_id n node) graph.map;
      highest_index = graph.highest_index + n;
    }
Bruno Guillaume's avatar
Bruno Guillaume committed
259

260
  let empty = { domain=None; meta=[]; map=Gid_map.empty; fusion=[]; highest_index=0; rules=String_map.empty; }
261

Bruno Guillaume's avatar
Bruno Guillaume committed
262 263
  let is_empty t = Gid_map.is_empty t.map

264
  let get_domain t = t.domain
265

266 267
  let find node_id graph = Gid_map.find node_id graph.map

268
  let equals t t' = Gid_map.equal (=) t.map t'.map
269

bguillaum's avatar
bguillaum committed
270
  let node_exists fct t = Gid_map.exists (fun _ node -> fct node) t.map
271

272 273
  let fold_gid fct t init =
    Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
274

275 276 277 278 279 280
  let push_rule rule_name t =
    if !Global.track_rules
    then
      let old = try String_map.find rule_name t.rules with Not_found -> 0 in
      { t with rules = String_map.add rule_name (old+1) t.rules }
    else t
281

282 283 284 285 286
  let string_rules t =
    String_map.fold
      (fun k v acc ->
        sprintf "%s:%d; %s" k v acc
      ) t.rules ""
287

bguillaum's avatar
bguillaum committed
288
  (* is there an edge e out of node i ? *)
289 290
  let edge_out graph node_id label_cst =
    let domain = get_domain graph in
291
    let node = Gid_map.find node_id graph.map in
bguillaum's avatar
bguillaum committed
292
    Massoc_gid.exists (fun _ e -> Label_cst.match_ ?domain label_cst e) (G_node.get_next node)
293

bguillaum's avatar
bguillaum committed
294
  (* -------------------------------------------------------------------------------- *)
295
  let map_add_edge_opt map id_src label id_tar =
296
    let node_src =
297
      (* Not found can be raised when adding an edge from pos to neg *)
298
      try Gid_map.find id_src map with Not_found -> G_node.empty in
299
    match G_node.add_edge label id_tar node_src with
bguillaum's avatar
bguillaum committed
300 301
      | None -> None
      | Some new_node -> Some (Gid_map.add id_src new_node map)
302

303 304 305 306 307
  (* -------------------------------------------------------------------------------- *)
  let map_add_edge map id_src label id_tar =
    let node_src = Gid_map.find id_src map in
    match G_node.add_edge label id_tar node_src with
      | Some new_node -> Gid_map.add id_src new_node map
308
      | None -> Error.bug "[Graph.map_add_edge] duplicate edge"
309

310
  (* -------------------------------------------------------------------------------- *)
311
  let add_edge graph id_src label id_tar =
312
    match map_add_edge_opt graph.map id_src label id_tar with
313 314
      | Some new_map -> Some {graph with map = new_map }
      | None -> None
315

bguillaum's avatar
bguillaum committed
316
  (* -------------------------------------------------------------------------------- *)
317
  let build ?domain gr_ast =
318 319 320 321 322 323 324 325 326 327

    let (ordered_nodes, unordered_nodes) = List.fold_left
      (fun (orderd_acc, unordered_acc) (node,loc) ->
        match Id.get_pos node.Ast.node_id with
        | Some p -> ((p,(node,loc)) :: orderd_acc, unordered_acc)
        | None -> (orderd_acc, (node,loc) :: unordered_acc)
      ) ([],[]) gr_ast.Ast.nodes in

    let sorted_nodes = List.sort (fun (p1,_) (p2,_) -> Pervasives.compare p1 p2) ordered_nodes in

bguillaum's avatar
bguillaum committed
328 329
    let rec loop already_bound index prec = function
      | [] -> (Gid_map.empty,[])
330
      | (position, (ast_node, loc))::tail ->
bguillaum's avatar
bguillaum committed
331 332 333 334
        let node_id = ast_node.Ast.node_id in
        if List.mem node_id already_bound
        then Error.build ~loc "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
        else
335 336
          let (new_tail, table) = loop (node_id :: already_bound) (index+1) (Some index) tail in
          let succ = if tail = [] then None else Some (index+1) in
337
          let new_node = G_node.build ?domain ?prec ?succ ~position (ast_node, loc) in
bguillaum's avatar
bguillaum committed
338
            (
339
              Gid_map.add index new_node new_tail,
bguillaum's avatar
bguillaum committed
340 341
              (node_id,index)::table
            ) in
342

343 344 345 346 347 348 349 350 351 352 353 354 355 356
    let (map_with_ordered_nodes, table_ordered) = loop [] 0 None sorted_nodes in

    let (map_without_edges, table, final_index) =
      List.fold_left
        (fun (acc_map, acc_table, acc_index) (ast_node,loc) ->
          let node_id = ast_node.Ast.node_id in
          let new_node = G_node.build ?domain (ast_node,loc) in
          (
            Gid_map.add acc_index new_node acc_map,
            (node_id,acc_index)::acc_table,
            acc_index + 1
          )
      ) (map_with_ordered_nodes, table_ordered, List.length sorted_nodes) unordered_nodes in

357

358 359
    let map =
      List.fold_left
bguillaum's avatar
bguillaum committed
360
        (fun acc (ast_edge, loc) ->
bguillaum's avatar
bguillaum committed
361 362
          let i1 = List.assoc ast_edge.Ast.src table in
          let i2 = List.assoc ast_edge.Ast.tar table in
bguillaum's avatar
bguillaum committed
363
          let edge = G_edge.build ?domain (ast_edge, loc) in
364
          (match map_add_edge_opt acc i1 edge i2 with
bguillaum's avatar
bguillaum committed
365
            | Some g -> g
Bruno Guillaume's avatar
Bruno Guillaume committed
366
            | None -> Error.build ~loc "[Graph.build] try to build a graph with twice the same edge %s"
bguillaum's avatar
bguillaum committed
367
              (G_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
368
          )
369
        ) map_without_edges gr_ast.Ast.edges in
370

371 372 373 374
    {
      domain;
      meta=gr_ast.Ast.meta;
      map;
375
      fusion = [];
376
      highest_index = final_index - 1;
377
      rules = String_map.empty;
378
    }
379

380 381
  (* -------------------------------------------------------------------------------- *)
  let of_json = function
Bruno Guillaume's avatar
Bruno Guillaume committed
382
  | `Assoc (l : (string * Yojson.Basic.t) list) ->
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
    let (ast_node_list, ast_edge_list) = List.fold_left
      (fun (acc_node, acc_edge) -> function
        | (id, `List [`Assoc feat_json_list; `List succ]) ->
          let fs = List.map (function
            | (feat_name, `String value) -> ({Ast.name= feat_name; kind = Ast.Equality [value]}, Loc.empty)
            | _ -> Error.build "[Graph.of_json] not an valid feature structure"
          ) feat_json_list in
          let new_edges = List.map
            (function
              | `List [`String rel; `String tar] -> ({Ast.edge_id=None; edge_label_cst=Ast.Pos_list [rel]; src=id; tar},Loc.empty)
              | _ -> Error.build "[Graph.of_json] not an valid succ list"
            ) succ in
          (
            ({ Ast.node_id=id; position=None; fs}, Loc.empty) :: acc_node,
            new_edges @ acc_edge
          )
        | _ -> Error.build "[Graph.of_json] not an assoc list"
      ) ([],[]) l in
      let graph_ast = { Ast.meta=[]; nodes=ast_node_list; edges=ast_edge_list}
402
      in build graph_ast
403 404
  | _ -> Error.build "[Graph.of_json] not an assoc list"

bguillaum's avatar
bguillaum committed
405
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
406
  let of_conll ?domain conll =
407
    let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
408

409
    (* [gtable] maps *)
410
    let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), Conll.Id.to_dot) in
411

412 413 414 415
    let rec loop index prec = function
      | [] -> Gid_map.empty
      | [last] ->
        let loc = Loc.file_opt_line conll.Conll.file last.Conll.line_num in
bguillaum's avatar
bguillaum committed
416
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec last) Gid_map.empty
417 418
      | line::tail ->
        let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
bguillaum's avatar
bguillaum committed
419
        Gid_map.add index (G_node.of_conll ?domain ~loc ?prec ~succ:(index+1) line)
420
          (loop (index+1) (Some index) tail) in
421 422

    let map_without_edges = loop 0 None sorted_lines in
423

424
    let map_with_edges =
bguillaum's avatar
bguillaum committed
425 426
      List.fold_left
        (fun acc line ->
427 428
          let loc = Loc.file_opt_line conll.Conll.file line.Conll.line_num in
          let dep_id = Id.gbuild ~loc line.Conll.id gtable in
429 430
          List.fold_left
            (fun acc2 (gov, dep_lab) ->
431
              let gov_id = Id.gbuild ~loc gov gtable in
432
              let edge = G_edge.from_string ?domain ~loc dep_lab in
433
              (match map_add_edge_opt acc2 gov_id edge dep_id with
bguillaum's avatar
bguillaum committed
434
                | Some g -> g
Bruno Guillaume's avatar
Bruno Guillaume committed
435
                | None -> Error.build ~loc "[Graph.of_conll] try to build a graph with twice the same edge %s"
bguillaum's avatar
bguillaum committed
436
                  (G_edge.to_string ?domain edge)
bguillaum's avatar
bguillaum committed
437
              )
438
            ) acc line.Conll.deps
439
        ) map_without_edges conll.Conll.lines in
440

441 442 443 444 445 446 447 448 449 450 451 452 453 454
      let fusion =
        List.map
          (fun {Conll.first; last; fusion; mw_line_num; mw_efs} ->
              let loc = Loc.file_opt_line_opt conll.Conll.file mw_line_num in
              (
                {
                  first = Id.gbuild ~loc (first,None) gtable;
                  last = Id.gbuild ~loc (last, None) gtable;
                  word = fusion;
                  efs = mw_efs;
                }
              )
          ) conll.Conll.multiwords in

455 456 457
      let (map_with_nl_nodes, free_index) =
        Conll_types.Int_map.fold
          (fun key mwe (acc, free_index) ->
458 459 460 461 462 463
            let kind = match mwe.Mwe.kind with Mwe.Ne -> "NE" | Mwe.Mwe -> "MWE" in
            let fs1 = G_fs.set_feat ?domain "kind" kind G_fs.empty in
            let fs2 = match mwe.Mwe.label with None -> fs1 | Some p -> G_fs.set_feat ?domain "label" p fs1 in
            let fs3 = match mwe.Mwe.mwepos with None -> fs2 | Some p -> G_fs.set_feat ?domain "mwepos" p fs2 in
            let fs4 = match mwe.Mwe.criterion with None -> fs3 | Some c -> G_fs.set_feat ?domain "criterion" c fs3 in

464
            let new_node = G_node.set_fs fs4 G_node.empty in
465

466 467 468
            (* add a new node *)
            let new_map_1 = (Gid_map.add free_index new_node acc) in
            (* add a link to the first component *)
469
            let new_map_2 = map_add_edge new_map_1 free_index (G_edge.from_string ?domain kind) (Id.gbuild (fst mwe.Mwe.first) gtable) in
470 471
            let new_map_3 = match snd mwe.Mwe.first with
            | None -> new_map_2
472
            | Some i -> map_add_edge new_map_2 free_index (G_edge.from_string ?domain (sprintf "%d" i)) (Id.gbuild (fst mwe.Mwe.first) gtable) in
473

474
            (* add a link to each other component *)
475 476
            let new_map_4 =
              Id_with_proj_set.fold (
477
                fun item acc2 ->
478
                  let tmp = map_add_edge acc2 free_index (G_edge.from_string ?domain kind) (Id.gbuild (fst item) gtable) in
479 480
                  match snd item with
                  | None -> tmp
481
                  | Some i -> map_add_edge tmp free_index (G_edge.from_string ?domain (sprintf "%d" i)) (Id.gbuild (fst item) gtable)
482
              ) mwe.Mwe.items new_map_3 in
483

484
            (new_map_4, free_index+1)
485 486
          ) conll.Conll.mwes (map_with_edges, List.length sorted_lines) in

487 488 489
    {
      domain;
      meta = conll.Conll.meta;
490
      map = map_with_nl_nodes;
491
      fusion;
492
      highest_index = free_index -1;
493
      rules = String_map.empty;
494
    }
bguillaum's avatar
bguillaum committed
495

496 497
  (* -------------------------------------------------------------------------------- *)
  (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
498 499 500

  let re = Str.regexp "/\\(ADJ\\|ADJWH\\|ADV\\|ADVWH\\|CC\\|CLO\\|CLR\\|CLS\\|CS\\|DET\\|DETWH\\|ET\\|I\\|NC\\|NPP\\|P\\|P\\+D\\|P\\+PRO\\|PONCT\\|PREF\\|PRO\\|PROREL\\|PROWH\\|V\\|VIMP\\|VINF\\|VPP\\|VPR\\|VS\\)/"

bguillaum's avatar
bguillaum committed
501
  let of_brown ?domain ?sentid brown =
502
    let units = Str.split (Str.regexp " ") brown in
503
      let conll_lines = List.mapi
504
      (fun i item -> match Str.full_split re item with
505
        | [Str.Text form; Str.Delim pos; Str.Text lemma] ->
506
        let pos = String.sub pos 1 ((String.length pos)-2) in
507
        Conll.build_line ~id:(i+1,None) ~form ~lemma ~xpos:pos ~feats:[] ~deps:([((i,None), "SUC")]) ()
508
        | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\") in >>>%s<<<" item brown
509
      ) units in
510
      let meta = match sentid with Some id -> ["# sent_id = "^id] | None -> [] in
511
    of_conll ?domain { Conll.file=None; meta; lines=conll_lines; multiwords=[]; mwes=Conll_types.Int_map.empty; }
512

513
  (* -------------------------------------------------------------------------------- *)
514 515 516
  let of_pst ?domain pst =
    let cpt = ref 0 in
    let get_pos () = incr cpt; !cpt - 1 in
517

518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
    let leaf_list = ref [] in

    let rec loop nodes = function
    | Ast.Leaf (loc, phon) ->
      let fresh_id = get_pos () in
      let node = G_node.pst_leaf ~loc ?domain phon fresh_id in
      leaf_list := fresh_id :: ! leaf_list;
      (fresh_id, Gid_map.add fresh_id node nodes)

    | Ast.T (loc, cat, daughters) ->
      let fresh_id = get_pos () in
      let new_node = G_node.pst_node ~loc ?domain cat fresh_id in
      let with_mother = Gid_map.add fresh_id new_node nodes in
      let new_nodes = List.fold_left
        (fun map daughter ->
          let (daughter_id, new_map) = loop map daughter in
          map_add_edge new_map fresh_id G_edge.sub daughter_id
        ) with_mother daughters in
      (fresh_id, new_nodes) in

    let (_,map) = loop Gid_map.empty pst in

    let rec prec_loop map = function
    | [] | [_] -> map
    | n1 :: n2 :: tail ->
      let new_map = prec_loop map (n2 :: tail) in

545 546 547 548 549 550
      let node1 = Gid_map.find n1 new_map
      and node2 = Gid_map.find n2 new_map in
      new_map
      |> (Gid_map.add n1 (G_node.set_succ n2 node1))
      |> (Gid_map.add n2 (G_node.set_prec n1 node2)) in

551 552 553 554
    {
      domain;
      meta=[];
      map=prec_loop map (List.rev !leaf_list);
555
      fusion = [];
556
      highest_index = !cpt;
557
      rules = String_map.empty;
558
    }
bguillaum's avatar
bguillaum committed
559

Bruno Guillaume's avatar
Bruno Guillaume committed
560 561 562 563 564 565 566
  let update_edge_feature ?loc edge_id feat_name new_value (src_gid,edge,tar_gid) graph =
    match Gid_map.find_opt src_gid graph.map with
    | None -> Error.run ?loc "[Graph.update_edge_feature] cannot find source node of edge \"%s\"" edge_id
    | Some src_node ->
      match G_node.update_edge tar_gid edge feat_name new_value src_node with
      | Some (new_node, new_edge) -> Some ({graph with map = Gid_map.add src_gid new_node graph.map}, new_edge)
      | None -> None
567

568 569 570 571 572 573 574 575
  let del_edge_feature ?loc edge_id feat_name (src_gid,edge,tar_gid) graph =
    match Gid_map.find_opt src_gid graph.map with
    | None -> Error.run ?loc "[Graph.del_edge_feature] cannot find source node of edge \"%s\"" edge_id
    | Some src_node ->
      match G_node.del_edge_feature tar_gid edge feat_name src_node with
      | Some (new_node, new_edge) -> Some ({graph with map = Gid_map.add src_gid new_node graph.map}, new_edge)
      | None -> None

bguillaum's avatar
bguillaum committed
576
  (* -------------------------------------------------------------------------------- *)
Bruno Guillaume's avatar
Bruno Guillaume committed
577 578
  let del_edge ?loc src_gid label tar_gid graph =
    match Gid_map.find_opt src_gid graph.map with
579
    | None -> Error.bug ?loc "[Graph.del_edge] Some edge refers to a dead node"
Bruno Guillaume's avatar
Bruno Guillaume committed
580 581 582 583 584
    | Some src_node ->
      match G_node.remove_edge tar_gid label src_node with
      | None -> None
      | Some new_node -> Some {graph with map = Gid_map.add src_gid new_node graph.map}

pj2m's avatar
pj2m committed
585

bguillaum's avatar
bguillaum committed
586
  (* -------------------------------------------------------------------------------- *)
587
  let del_node graph node_id =
588 589 590 591 592 593 594
    let map_wo_node =
      Gid_map.fold
        (fun id value acc ->
          if id = node_id
          then acc
          else Gid_map.add id (G_node.remove_key node_id value) acc
        ) graph.map Gid_map.empty in
Bruno Guillaume's avatar
Bruno Guillaume committed
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
    try
      let node = Gid_map.find node_id graph.map in
      let new_map =
        match (G_node.get_prec node, G_node.get_succ node) with
        | (Some id_prec, Some id_succ) ->
          begin
            let prec = Gid_map.find id_prec map_wo_node
            and succ = Gid_map.find id_succ map_wo_node in
            map_wo_node
            |> (Gid_map.add id_prec (G_node.set_succ id_succ prec))
            |> (Gid_map.add id_succ (G_node.set_prec id_prec succ))
          end
        | (Some id_prec, None) ->
          begin
            let prec = Gid_map.find id_prec map_wo_node in
            map_wo_node
            |> (Gid_map.add id_prec (G_node.remove_succ prec))
          end
        | (None, Some id_succ) ->
          begin
            let succ = Gid_map.find id_succ map_wo_node in
            map_wo_node
            |> (Gid_map.add id_succ (G_node.remove_prec succ))
          end
        | (None, None) -> map_wo_node in
      Some { graph with map = new_map }
    with Not_found -> None
pj2m's avatar
pj2m committed
622

bguillaum's avatar
bguillaum committed
623
  (* -------------------------------------------------------------------------------- *)
624
  let insert id1 id2 graph =
bguillaum's avatar
bguillaum committed
625 626
    let node1 = Gid_map.find id1 graph.map in
    let node2 = Gid_map.find id2 graph.map in
627
    let new_pos = match (G_node.get_position node1, G_node.get_position node2) with
628
    | (Some pos1, Some pos2) -> (pos1 +. pos2) /. 2.
629
    | _ -> Error.run "Try to insert into non ordered nodes" in
630
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
631
    let map = graph.map
632
      |> (Gid_map.add new_gid (G_node.fresh ~prec:id1 ~succ:id2 new_pos))
bguillaum's avatar
bguillaum committed
633 634
      |> (Gid_map.add id1 (G_node.set_succ new_gid node1))
      |> (Gid_map.add id2 (G_node.set_prec new_gid node2)) in
635
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
636 637

  (* -------------------------------------------------------------------------------- *)
638
  let append id graph =
bguillaum's avatar
bguillaum committed
639
    let node = Gid_map.find id graph.map in
640
    let new_pos = match G_node.get_position node with
641
    | Some pos -> pos +. 1.
642
    | _ -> Error.run "Try to append into non ordered nodes" in
643
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
644
    let map = graph.map
645
      |> (Gid_map.add new_gid (G_node.fresh ~prec:id new_pos))
bguillaum's avatar
bguillaum committed
646
      |> (Gid_map.add id (G_node.set_succ new_gid node)) in
647
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
648 649

  (* -------------------------------------------------------------------------------- *)
650
  let prepend id graph =
bguillaum's avatar
bguillaum committed
651
    let node = Gid_map.find id graph.map in
652
    let new_pos = match G_node.get_position node with
653
    | Some pos -> pos /. 2. (* build a smaller position but still > 0 *)
654
    | _ -> Error.run "Try to prepend into non ordered nodes" in
655
    let new_gid = graph.highest_index + 1 in
bguillaum's avatar
bguillaum committed
656
    let map = graph.map
657
      |> (Gid_map.add new_gid (G_node.fresh ~succ:id new_pos))
bguillaum's avatar
bguillaum committed
658
      |> (Gid_map.add id (G_node.set_prec new_gid node)) in
659
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
660 661

  (* -------------------------------------------------------------------------------- *)
662
  let add_after node_id graph =
bguillaum's avatar
bguillaum committed
663 664
    let node = Gid_map.find node_id graph.map in
    match G_node.get_succ node with
665 666
    | Some gid_succ -> insert node_id gid_succ graph
    | None -> append node_id graph
bguillaum's avatar
bguillaum committed
667 668

  (* -------------------------------------------------------------------------------- *)
669
  let add_before node_id graph =
bguillaum's avatar
bguillaum committed
670 671
    let node = Gid_map.find node_id graph.map in
    match G_node.get_prec node with
672 673 674 675 676 677
    | Some gid_prec -> insert gid_prec node_id graph
    | None -> prepend node_id graph

  (* -------------------------------------------------------------------------------- *)
  let add_unordered graph =
    let new_gid = graph.highest_index + 1 in
678
    let map = Gid_map.add new_gid G_node.empty graph.map in
679
    (new_gid, { graph with map; highest_index = new_gid })
bguillaum's avatar
bguillaum committed
680

bguillaum's avatar
bguillaum committed
681
  (* -------------------------------------------------------------------------------- *)
682
  (* move out-edges (which respect cst [labels,neg]) from id_src are moved to out-edges out off node id_tar *)
Bruno Guillaume's avatar
Bruno Guillaume committed
683
  let shift_out loc src_gid tar_gid is_gid_local label_cst graph =
684
    let domain = get_domain graph in
685 686
    let del_edges = ref [] and add_edges = ref [] in

687
    let src_node = Gid_map.find src_gid graph.map in
688
    let tar_node = Gid_map.find tar_gid graph.map in
689

690 691 692
    let src_next = G_node.get_next src_node in
    let tar_next = G_node.get_next tar_node in

693
    let (new_src_next, new_tar_next) =
694 695
    Massoc_gid.fold
      (fun (acc_src_next,acc_tar_next) next_gid edge ->
696
        if Label_cst.match_ ?domain label_cst edge && not (is_gid_local next_gid)
697
        then
Bruno Guillaume's avatar
Bruno Guillaume committed
698
          match Massoc_gid.add_opt next_gid edge acc_tar_next with
699
          | None when !Global.safe_commands -> Error.run ~loc "The [shift_out] command tries to build a duplicate edge (with label \"%s\")" (G_edge.to_string ?domain edge)
700 701 702 703 704 705 706
          | None ->
            del_edges := (src_gid,edge,next_gid) :: !del_edges;
            (Massoc_gid.remove next_gid edge acc_src_next, acc_tar_next)
          | Some new_acc_tar_next ->
            del_edges := (src_gid,edge,next_gid) :: !del_edges;
            add_edges := (tar_gid,edge,next_gid) :: !add_edges;
            (Massoc_gid.remove next_gid edge acc_src_next, new_acc_tar_next)
707 708 709
        else (acc_src_next,acc_tar_next)
      )
      (src_next, tar_next) src_next in
710

711
    let new_map = graph.map
712
      |> (Gid_map.add src_gid (G_node.set_next new_src_next src_node))
713 714 715 716 717
      |> (Gid_map.add tar_gid (G_node.set_next new_tar_next tar_node)) in
    ( { graph with map = new_map },
      !del_edges,
      !add_edges
    )
pj2m's avatar
pj2m committed
718

bguillaum's avatar
bguillaum committed
719
  (* -------------------------------------------------------------------------------- *)
Bruno Guillaume's avatar
Bruno Guillaume committed
720
  let shift_in loc src_gid tar_gid is_gid_local label_cst graph =
721
    let domain = get_domain graph in
722 723
    let del_edges = ref [] and add_edges = ref [] in
    let new_map =
724 725
      Gid_map.mapi
        (fun node_id node ->
726
          if is_gid_local node_id (* shift does not move pattern edges *)
727 728
          then node
          else
729 730 731 732 733 734 735 736
            let node_next = G_node.get_next node in
            match Massoc_gid.assoc src_gid node_next with
            | [] -> node (* no edges from node to src *)
            | node_src_edges ->
              let node_tar_edges = Massoc_gid.assoc tar_gid node_next in
              let (new_node_src_edges, new_node_tar_edges) =
              List.fold_left
              (fun (acc_node_src_edges,acc_node_tar_edges) edge ->
bguillaum's avatar
bguillaum committed
737
                if Label_cst.match_ ?domain label_cst edge
738 739
                then
                  match List_.usort_insert edge acc_node_tar_edges with
Bruno Guillaume's avatar
Bruno Guillaume committed
740
                  | None when !Global.safe_commands ->
741
                    Error.run ~loc "The [shift_in] command tries to build a duplicate edge (with label \"%s\")" (G_edge.to_string ?domain edge)
742 743 744 745 746 747 748
                  | None ->
                    del_edges := (node_id,edge,src_gid) :: !del_edges;
                    (List_.usort_remove edge acc_node_src_edges, acc_node_tar_edges)
                  | Some l ->
                    del_edges := (node_id,edge,src_gid) :: !del_edges;
                    add_edges := (node_id,edge,tar_gid) :: !add_edges;
                    (List_.usort_remove edge acc_node_src_edges, l)
749 750 751 752 753 754 755 756
                else (acc_node_src_edges,acc_node_tar_edges)
              )
              (node_src_edges, node_tar_edges) node_src_edges in
              let new_next =
                node_next
                |> (Massoc_gid.replace src_gid new_node_src_edges)
                |> (Massoc_gid.replace tar_gid new_node_tar_edges) in
              G_node.set_next new_next node
757 758 759 760 761
          ) graph.map in
    ( { graph with map = new_map },
      !del_edges,
      !add_edges
    )
762

bguillaum's avatar
bguillaum committed
763
  (* -------------------------------------------------------------------------------- *)
Bruno Guillaume's avatar
Bruno Guillaume committed
764 765 766
  let shift_edges loc src_gid tar_gid is_gid_local label_cst graph =
    let (g1,de1,ae1) = shift_out loc src_gid tar_gid is_gid_local label_cst graph in
    let (g2,de2,ae2) = shift_in loc src_gid tar_gid is_gid_local label_cst g1 in
767
    (g2, de1 @ de2, ae1 @ ae2)
pj2m's avatar
pj2m committed
768

bguillaum's avatar
bguillaum committed
769
  (* -------------------------------------------------------------------------------- *)
770 771
  let set_feat ?loc graph node_id feat_name new_value =
    let domain = get_domain graph in
772
    let node = Gid_map.find node_id graph.map in
773 774 775 776
    let new_node =
      match feat_name with
        | "position" -> G_node.set_position (float_of_string new_value) node
        | _ ->
bguillaum's avatar
bguillaum committed
777
          let new_fs = G_fs.set_feat ?loc ?domain feat_name new_value (G_node.get_fs node) in
778
          (G_node.set_fs new_fs node) in
779
    { graph with map = Gid_map.add node_id new_node graph.map }
bguillaum's avatar
bguillaum committed
780

bguillaum's avatar
bguillaum committed
781
  (* -------------------------------------------------------------------------------- *)
782
  let update_feat ?loc graph tar_id tar_feat_name item_list =
783 784 785
    let strings_to_concat =
      List.map
        (function
786 787
          | Concat_item.Feat (node_gid, "position") ->
            let node = Gid_map.find node_gid graph.map in
788 789
            begin
              match G_node.get_position node with
790
              | Some p -> sprintf "%g" p
791 792
              | _ -> Error.run ?loc "Try to read position of an unordered node"
            end
bguillaum's avatar
bguillaum committed
793
          | Concat_item.Feat (node_gid, feat_name) ->
bguillaum's avatar
bguillaum committed
794 795
            let node = Gid_map.find node_gid graph.map in
            (match G_fs.get_string_atom feat_name (G_node.get_fs node) with
bguillaum's avatar
bguillaum committed
796
              | Some atom -> atom
bguillaum's avatar
bguillaum committed
797
              | None -> Error.run ?loc "Cannot update_feat, some feature (named \"%s\") is not defined" feat_name
bguillaum's avatar
bguillaum committed
798
            )
bguillaum's avatar
bguillaum committed
799
          | Concat_item.String s -> s
800 801
        ) item_list in
    let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
802
    (set_feat ?loc graph tar_id tar_feat_name new_feature_value, new_feature_value)
bguillaum's avatar
bguillaum committed
803

bguillaum's avatar
bguillaum committed
804
  (* -------------------------------------------------------------------------------- *)
805
  let del_feat graph node_id feat_name =
806
    let node = Gid_map.find node_id graph.map in
807 808 809
    match G_fs.del_feat feat_name (G_node.get_fs node) with
      | Some new_fs -> Some { graph with map = Gid_map.add node_id (G_node.set_fs new_fs node) graph.map }
      | None -> None
810

811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
  (* -------------------------------------------------------------------------------- *)
  let to_json graph =
    let domain = get_domain graph in

    let gr_id id = G_node.get_name id (Gid_map.find id graph.map) in

    let nodes = Gid_map.fold
      (fun id node acc ->
        let node_id = gr_id id
        and fs = G_node.get_fs node
        and succ =
        Massoc_gid.fold
          (fun acc tar edge ->
            (`List [`String (G_edge.to_string ?domain edge); `String (gr_id tar)]) :: acc
          ) [] (G_node.get_next node) in
         (node_id,`List [G_fs.to_json fs; `List succ])::acc
      ) graph.map [] in

    `Assoc nodes

bguillaum's avatar
bguillaum committed
831
  (* -------------------------------------------------------------------------------- *)
832 833
  let to_gr graph =
    let domain = get_domain graph in
834 835 836
    let buff = Buffer.create 32 in
    bprintf buff "graph {\n";

837
    (* meta data *)
Bruno Guillaume's avatar
Bruno Guillaume committed
838
    List.iter (bprintf buff "  %s;\n") graph.meta;
839

Bruno Guillaume's avatar
Bruno Guillaume committed
840 841
    (* node_list *)
    let nodes = Gid_map.fold (fun gid node acc -> (gid,node)::acc) graph.map [] in
842
    let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
843
    List.iter
Bruno Guillaume's avatar
Bruno Guillaume committed
844 845
      (fun (gid,node) ->
        bprintf buff "  N_%d %s;\n" gid (G_node.to_gr node)
846
      ) sorted_nodes;
847

848
    (* edges *)
849
    List.iter
Bruno Guillaume's avatar
Bruno Guillaume committed
850
      (fun (src_gid,node) ->
bguillaum's avatar
bguillaum committed
851
        Massoc_gid.iter
Bruno Guillaume's avatar
Bruno Guillaume committed
852 853
          (fun tar_gid edge ->
            bprintf buff "  N_%d -[%s]-> N_%d;\n" src_gid (G_edge.to_string ?domain edge) tar_gid
bguillaum's avatar
bguillaum committed
854
          ) (G_node.get_next node)
855
      ) sorted_nodes;
856

857 858 859
    bprintf buff "}\n";
    Buffer.contents buff

bguillaum's avatar
bguillaum committed
860
  (* -------------------------------------------------------------------------------- *)
861 862 863 864 865 866 867 868 869 870 871
  let fusion_item_space_after fi =
    try if List.assoc "SpaceAfter" fi.efs = "No" then "" else " "
    with Not_found -> " "

  let space_after gnode =
    match G_fs.get_string_atom "_MISC_SpaceAfter" (G_node.get_fs gnode) with
    | Some "No" -> ""
    | _ -> " "

  let esc s = Str.global_replace (Str.regexp "<") "&lt;" s

872 873
  let to_sentence ?(only_pivot=false) ?main_feat ?(deco=G_deco.empty) graph =
    let high_list = match (only_pivot, deco.pivot) with
874
      | (true, None) -> []
875 876
      | (true, Some i) -> [i,("pivot", [])]
      | (false, _) -> deco.nodes in
877

878
    let is_highlighted_gid gid = List.mem_assoc gid high_list in
879 880 881 882 883 884

    let inside fusion_item gid =
      let first = Gid_map.find fusion_item.first graph.map in
      let last = Gid_map.find fusion_item.last graph.map in
      let node = Gid_map.find gid graph.map in
      match (G_node.get_position first, G_node.get_position node, G_node.get_position last) with
885
      | (Some f, Some n, Some l) when f <= n && n <= l -> true
886 887 888
      | _ -> false in

    let is_highlighted_fusion_item fusion_item =
889
      List.exists (fun (gid,_) -> inside fusion_item gid) high_list in
890

891
    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
892
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
893

894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
    let rec loop skip = function
    | [] -> ""
    | (gid, gnode)::gtail when skip = None ->
      begin
        match List.find_opt (fun fusion_item -> fusion_item.first=gid) graph.fusion with
        | Some fusion_item ->
          (if is_highlighted_fusion_item fusion_item
            then sprintf "<span class=\"highlight\">%s</span>" (esc fusion_item.word)
            else (esc fusion_item.word))
          ^ (fusion_item_space_after fusion_item)
          ^ (loop (Some fusion_item.last) gtail)
        | None ->
          match G_fs.to_word (G_node.get_fs gnode) with
          | None -> (loop None gtail)
          | Some text ->
          (if is_highlighted_gid gid
            then sprintf "<span class=\"highlight\">%s</span>" (esc text)
            else esc (text))
          ^ (space_after gnode)
          ^ (loop None gtail)
      end
    | (gid, gnode)::gtail when skip = Some gid -> loop None gtail
    | (gid, gnode)::gtail -> loop skip gtail in

    Sentence.fr_clean_spaces (loop None snodes)
919

Bruno Guillaume's avatar
Bruno Guillaume committed
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942

  let start_dur gnode =
    let fs = G_node.get_fs gnode in
    match (G_fs.get_string_atom "_start" fs, G_fs.get_string_atom "_stop" fs) with (Some _start, Some _stop) ->
      let start = float_of_string _start
      and stop = float_of_string _stop in
    (start, stop -. start)
    | _ -> (-1., -1.)


  let to_orfeo ?(deco=G_deco.empty) graph =
    let is_highlighted_gid gid = List.mem_assoc gid deco.nodes in

    let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in

    let buff = Buffer.create 32 in
    CCList.iteri (fun i (gid, gnode) ->
      match G_fs.to_word (G_node.get_fs gnode) with
        | None -> ()
        | Some word ->
      let (start, dur) = start_dur gnode in
      Printf.bprintf buff
Bruno Guillaume's avatar
Bruno Guillaume committed
943
        "<span id=\"tok%d\" data-dur=\"%g\" data-begin=\"%g\" tabindex=\"0\" data-index=\"%d\" %s>%s </span>"
Bruno Guillaume's avatar
Bruno Guillaume committed
944
        i dur start i
Bruno Guillaume's avatar
Bruno Guillaume committed
945 946 947 948 949 950
        (match i, is_highlighted_gid gid with
          | (1, true) -> "class=\"speaking highlight\""
          | (1, false) -> "class=\"speaking\""
          | (_, true) -> "class=\"highlight\""
          | (_, false) -> ""
        )
Bruno Guillaume's avatar
Bruno Guillaume committed
951 952 953 954 955
        word
    ) snodes;
  Buffer.contents buff


bguillaum's avatar
bguillaum committed
956
  (* -------------------------------------------------------------------------------- *)
957
  let is_non_lexical_node node =
958
    let fs = G_node.get_fs node in G_fs.get_string_atom "kind" fs <> None
959

960 961 962
  let to_dep ?filter ?main_feat ?(deco=G_deco.empty) graph =
    let domain = get_domain graph in

963 964 965 966 967 968 969 970
    (* split lexical // non-lexical nodes *)
    let (nodes, nl_nodes) = Gid_map.fold
      (fun id elt (acc1, acc2) ->
        if is_non_lexical_node elt
        then (acc1, (id,elt)::acc2)
        else ((id,elt)::acc1, acc2)
      ) graph.map ([],[]) in

971
    let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
bguillaum's avatar
bguillaum committed
972

973 974 975 976 977 978 979 980 981 982 983 984 985
    let insert (mid,mwe) nodes =
      let next_ids = Massoc_gid.fold (fun acc gid _ -> gid::acc) [] (G_node.get_next mwe) in
      let rec loop = function
      | [] -> [(mid,mwe)]
      | (h,n)::t when List.mem h next_ids -> (mid,mwe)::(h,n)::t
      | h::t -> h :: (loop t) in
      loop nodes in

    let all_nodes = List.fold_left (
      fun acc mwe -> insert mwe acc
      ) snodes nl_nodes
      in

986 987 988 989
    let buff = Buffer.create 32 in
    bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
    bprintf buff "[WORDS] { \n";

990
    (* nodes *)
991
    List.iter
992
      (fun (id, node) ->
993
        let decorated_feat = try List.assoc id deco.G_deco.nodes with Not_found -> ("",[]) in
bguillaum's avatar
bguillaum committed
994
        let fs = G_node.get_fs node in
995
        let pos = G_node.get_position node in
996
        let dep_fs = G_fs.to_dep ~decorated_feat ?position:pos ?filter ?main_feat fs in
997 998 999

        let style = match G_fs.get_string_atom "void" fs with
          | Some "y" -> "; forecolor=red; subcolor=red; "
1000 1001 1002
          | _ -> match G_fs.get_string_atom "_UD_empty" fs with
            | Some "Yes" -> "; forecolor=purple; subcolor=purple; "
            | _ -> "" in
bguillaum's avatar
bguillaum committed
1003 1004 1005 1006 1007

        bprintf buff "N_%s { %s%s }\n"
          (Gid.to_string id)
          dep_fs
          style
1008
      ) all_nodes;
1009
    bprintf buff "} \n";
1010

1011
    (* edges *)
1012
    bprintf buff "[EDGES] { \n";
1013

bguillaum's avatar
bguillaum committed
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
    if !Global.debug
    then
      List.iter
        (fun (id, node) ->
          begin
            match G_node.get_prec node with
            | None -> ()
            | Some p -> bprintf buff "N_%s -> N_%s { label=\"__PREC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string p)
          end;
          begin
            match G_node.get_succ node with
            | None -> ()
            | Some s -> bprintf buff "N_%s -> N_%s { label=\"__SUCC__\"; bottom; style=dot; color=lightblue; forecolor=lightblue; }\n" (Gid.to_string id) (Gid.to_string s)
          end
        ) snodes;
1029

1030
    Gid_map.iter
1031
      (fun gid elt ->
bguillaum's avatar
bguillaum committed
1032 1033
        Massoc_gid.iter
          (fun tar g_edge ->
1034 1035
            let deco = List.mem (gid,g_edge,tar) deco.G_deco.edges in
            bprintf buff "N_%s -> N_%s %s\n" (Gid.to_string gid) (Gid.to_string tar) (G_edge.to_dep ?domain ~deco g_edge)
bguillaum's avatar
bguillaum committed
1036
          ) (G_node.get_next elt)
1037
      ) graph.map;
1038 1039

    bprintf buff "} \n";
1040 1041
    Buffer.contents buff

1042
  (* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
1043 1044 1045 1046 1047 1048 1049
  let list_num test =
    let rec loop n = function
      | [] -> raise Not_found
      | x::_ when test x -> n
      | _::t -> loop (n+1) t
    in loop 0

1050
  (* -------------------------------------------------------------------------------- *)
1051 1052
  exception Skip