grew_corpus.ml 6.22 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
(***********************************************************************)
(*    Grew - a Graph Rewriting tool dedicated to NLP applications      *)
(*                                                                     *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                *)
(*                                                                     *)
Bruno Guillaume's avatar
New URL    
Bruno Guillaume committed
6
(*    Webpage: http://grew.fr                                          *)
bguillaum's avatar
bguillaum committed
7
8
9
10
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info") *)
(*    Authors: see AUTHORS file                                        *)
(***********************************************************************)

bguillaum's avatar
bguillaum committed
11
open Printf
12
open Log
13
open Conll
bguillaum's avatar
bguillaum committed
14

15
16
open Libgrew

bguillaum's avatar
bguillaum committed
17
18
19
20
open Grew_utils
open Grew_args

(* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
21

Bruno Guillaume's avatar
Bruno Guillaume committed
22
23
24
25
26
let fail msg =
  Log.fmessage "-------------------------------------";
  Log.fmessage "%s" msg;
  Log.fmessage "-------------------------------------";
  exit 2
bguillaum's avatar
bguillaum committed
27

28
29
30
let handle fct () =
  try fct ()
  with
31
    | Conll_error json ->            fail (Yojson.Basic.pretty_to_string json)
32
33
34
    | Libgrew.Error msg ->           fail msg
    | Corpus.File_not_found file ->  fail (sprintf "File not found: \"%s\"" file)
    | Corpus.Fail msg ->             fail msg
Bruno Guillaume's avatar
Bruno Guillaume committed
35
    | Sys_error msg ->               fail (sprintf "System error: %s" msg)
36
37
    | Libgrew.Bug msg ->             fail (sprintf "Libgrew.bug, please report: %s" msg)
    | exc ->                         fail (sprintf "Uncaught exception, please report: %s" (Printexc.to_string exc))
bguillaum's avatar
bguillaum committed
38

Bruno Guillaume's avatar
Bruno Guillaume committed
39

Bruno Guillaume's avatar
Bruno Guillaume committed
40
41
42
(* -------------------------------------------------------------------------------- *)
let transform () =
  handle (fun () ->
43
44
45
46
    let grs = match !Grew_args.grs with
    | None -> Grs.empty
    | Some file -> Grs.load file in

47
48
49
50
51
52
53
54
    let domain = Grs.domain grs in

    let graph_array = Corpus.input ?domain () in
    let len = Array.length graph_array in

    let out_ch = match !Grew_args.output_file with
      | Some output_file -> open_out output_file
      | None -> stdout in
Bruno Guillaume's avatar
Bruno Guillaume committed
55
56
57
58
59
60
61
62
63
64
65

    let out_graph ?new_sent_id graph = match (!Grew_args.output, new_sent_id) with
    | (Grew_args.Conll,None) -> fprintf out_ch "%s\n" (Graph.to_conll_string graph)
    | (Grew_args.Conll,Some nsi) -> fprintf out_ch "%s\n" (graph |> Graph.to_conll |> Conll.set_sentid nsi |> Conll.to_string)
    | (Grew_args.Cupt, None) -> fprintf out_ch "%s\n" (Graph.to_conll_string ~cupt:true graph)
    | (Grew_args.Cupt,Some nsi) -> fprintf out_ch "%s\n" (graph |> Graph.to_conll |> Conll.set_sentid nsi |> Conll.to_string ~cupt:true)
    | (Grew_args.Gr, None) -> fprintf out_ch "%s\n" (Graph.to_gr graph)
    | (Grew_args.Gr, Some nsi) -> fprintf out_ch "# sent_id = %s\n%s\n" nsi (Graph.to_gr graph)
    | (Grew_args.Dot, None) -> fprintf out_ch "%s\n" (Graph.to_dot graph)
    | (Grew_args.Dot, Some nsi) -> fprintf out_ch "# sent_id = %s\n%s\n" nsi (Graph.to_dot graph) in

66
67
68
69
    Array.iteri
      (fun index (id, gr) ->
        Counter.print index len id;
        match Rewrite.simple_rewrite ~gr ~grs ~strat:!Grew_args.strat with
Bruno Guillaume's avatar
Bruno Guillaume committed
70
        | [one] -> out_graph one
71
72
73
74
75
76
77
        | l ->
          List.iteri (fun i gr ->
            let conll = Graph.to_conll gr in
            let conll_new_id = Conll.set_sentid (sprintf "%s_%d" id i) conll in
            fprintf out_ch "%s\n" (Conll.to_string conll_new_id)
          ) l
      ) graph_array;
Bruno Guillaume's avatar
Bruno Guillaume committed
78
    Counter.finish ();
79
80
    match !Grew_args.output_file with
      | Some output_file -> close_out out_ch
Bruno Guillaume's avatar
Bruno Guillaume committed
81
      | None -> ()
Bruno Guillaume's avatar
Bruno Guillaume committed
82
  ) ()
Bruno Guillaume's avatar
Bruno Guillaume committed
83

bguillaum's avatar
bguillaum committed
84
(* -------------------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
85
86
  let grep () = handle
    (fun () ->
87
88
89
      match !Grew_args.pattern with
      | None -> Log.message "No pattern file specified: use -pattern option"; exit 1;
      | Some pattern_file ->
90

91
92
93
      let domain = match !Grew_args.grs with
      | None -> None
      | Some file -> Grs.domain (Grs.load file) in
94

95
96
      let pattern = Pattern.load ?domain pattern_file in

97
      (* get the array of graphs to explore *)
98
      let graph_array = Corpus.input ?domain () in
99

100
101
102
103
104
105
106
      (match !Grew_args.dep_dir with
      | None -> ()
      | Some d -> ignore (Sys.command (sprintf "mkdir -p %s" d)));

      (* printf "%s\n" (String.concat "_" (Pattern.pid_name_list pattern)); *)
      let pattern_ids = Pattern.pid_name_list pattern in

107
108
109
110
111
112
      let final_json =
        Array.fold_left
          (fun acc (name,graph) ->
            let matchings = Graph.search_pattern ?domain pattern graph in
              List.fold_left
                (fun acc2 matching ->
Bruno Guillaume's avatar
Fix #5    
Bruno Guillaume committed
113
114
                  let assoc_nodes = Matching.nodes pattern graph matching in
                  let graph_node_names = List.map snd assoc_nodes in
115
116
117
118
119
120
121
122
123
                  let deco = Deco.build pattern matching in

                  (* write the dep file if needed *)
                  let dep_file =
                    match !Grew_args.dep_dir with
                    | None -> None
                    | Some dir ->
                      let id = sprintf "%s__%s"
                        name
Bruno Guillaume's avatar
Fix #5    
Bruno Guillaume committed
124
                        (String.concat "_" (List.map2 (sprintf "%s:%s") pattern_ids graph_node_names)) in
125
126
127
128
129
130
131
                      let dep = Graph.to_dep ~deco graph in
                      let filename = Filename.concat dir (sprintf "%s.dep" id) in
                      let out_ch = open_out filename in
                      fprintf out_ch "%s" dep;
                      close_out out_ch;
                    Some filename in

132
133
                  let json_matching = Matching.to_json pattern graph matching in

134
135
136
137
138
139
140
141
142
143
144
145
146
147
                  let opt_list = [
                    Some ("sent_id", `String name);
                    Some ("matching", json_matching);
                    (
                      if !Grew_args.html
                      then Some ("html", `String (Graph.to_sentence ~deco graph))
                      else None
                    );
                    (
                      match dep_file with
                      | None -> None
                      | Some f -> Some ("dep_file", `String f)
                    )
                  ] in
Bruno Guillaume's avatar
Bruno Guillaume committed
148
149
                    let json = `Assoc (CCList.filter_map (fun x -> x) opt_list) in
                    json :: acc2
150
151
                ) acc matchings
          ) [] graph_array in
152
      Printf.printf "%s\n" (Yojson.Basic.pretty_to_string (`List final_json))
153
    ) ()