grew_types.ml 6.42 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

open Log
open Printf

open Grew_base

type feature_name = string (* cat, num, ... *)
type feature_atom = string (* V, N, inf, ... *)
type feature_value = string (* V, 4, "free text", ... *)
type suffix = string

bguillaum's avatar
bguillaum committed
21 22 23 24 25 26 27 28 29 30 31
type value = String of string | Float of float

let string_of_value = function
  | String s -> Str.global_replace (Str.regexp "\"") "\\\""
    (Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" s)
  | Float i -> String_.of_float i

let conll_string_of_value = function
  | String s -> s
  | Float i -> String_.of_float i

32 33
type disjunction = value list

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
(* ================================================================================ *)
module Pid = struct
  (* type t = int *)
  type t = Pos of int | Neg of int

  let compare = Pervasives.compare

  let to_id = function
    | Pos i -> sprintf "p_%d" i
    | Neg i -> sprintf "n_%d" i

  let to_string = function
    | Pos i -> sprintf "Pos %d" i
    | Neg i -> sprintf "Neg %d" i
end (* module Pid *)

(* ================================================================================ *)
module Pid_map =
  struct
    include Map.Make (Pid)

    exception True

    let exists fct map =
      try
        iter
          (fun key value ->
            if fct key value
            then raise True
          ) map;
        false
      with True -> true

    (* union of two maps*)
    let union_map m m' = fold (fun k v m'' -> (add k v m'')) m m'
end (* module Pid_map *)

(* ================================================================================ *)
module Pid_set = Set.Make (Pid)

(* ================================================================================ *)
module Gid = struct
76
  type t = int
77

78
  let compare = Pervasives.compare
79

80
  let to_string i = sprintf "%d" i
81 82 83 84 85 86 87 88 89 90 91
end (* module Gid *)

(* ================================================================================ *)
module Gid_map = Map.Make (Gid)

(* ================================================================================ *)
module Massoc_gid = Massoc_make (Gid)

(* ================================================================================ *)
module Massoc_pid = Massoc_make (Pid)

92 93


94 95 96 97 98 99 100 101
(* ================================================================================ *)
(* This module defines a type for lexical parameter (i.e. one line in a lexical file) *)
module Lex_par = struct

  type item = string list * string list (* first list: pattern parameters $id , second list command parameters @id *)

  type t = item list

bguillaum's avatar
bguillaum committed
102
  let size = List.length
103 104
  let append = List.append

bguillaum's avatar
bguillaum committed
105 106 107 108
  let signature = function
    | [] -> Error.bug "[Lex_par.signature] empty data"
    | (pp,cp)::_ -> (List.length pp,List.length cp)

109 110 111 112 113 114 115 116 117
  let dump t =
    printf "[Lex_par.dump] --> size = %d\n" (List.length t);
    List.iter (fun (pp,cp) ->
      printf "%s##%s\n"
        (String.concat "#" pp)
        (String.concat "#" cp)
    ) t

  let parse_line ?loc nb_p nb_c line =
bguillaum's avatar
bguillaum committed
118
    let line = String_.rm_peripheral_white line in
119 120 121
    if line = "" || line.[0] = '%'
    then None
    else
122
      let line = Str.global_replace (Str.regexp "\\\\%") "%" line in
123 124 125 126
      match Str.split (Str.regexp "##") line with
        | [args] when nb_c = 0 ->
          (match Str.split (Str.regexp "#") args with
            | l when List.length l = nb_p -> Some (l,[])
bguillaum's avatar
bguillaum committed
127
            | _ -> Error.build ?loc
128 129 130 131 132
              "Illegal lexical parameter line: \"%s\" doesn't contain %d args"
              line nb_p)
        | [args; values] ->
          (match (Str.split (Str.regexp "#") args, Str.split (Str.regexp "#") values) with
            | (lp,lc) when List.length lp = nb_p && List.length lc = nb_c -> Some (lp,lc)
bguillaum's avatar
bguillaum committed
133
            | _ -> Error.build ?loc
134 135
              "Illegal lexical parameter line: \"%s\" doesn't contain %d args and %d values"
              line nb_p nb_c)
bguillaum's avatar
bguillaum committed
136
        | _ -> Error.build ?loc "Illegal param line: '%s'" line
137

bguillaum's avatar
bguillaum committed
138 139 140 141
  let from_lines ?loc nb_p nb_c lines =
    match List_.opt_map (parse_line ?loc nb_p nb_c) lines with
    | [] -> Error.build ?loc "Empty lexical parameter list"
    | l -> l
142 143 144 145 146 147 148 149

  let load ?loc dir nb_p nb_c file =
    try
      let full_file =
        if Filename.is_relative file
        then Filename.concat dir file
        else file in
      let lines = File.read full_file in
bguillaum's avatar
bguillaum committed
150 151 152
      match List_.opt_mapi (fun i line -> parse_line ~loc:(Loc.file_line full_file i) nb_p nb_c line) lines with
      | [] -> Error.build ?loc "Empty lexical parameter file '%s'" file
      | l -> l
153 154
    with Sys_error _ -> Error.build ?loc "External lexical file '%s' not found" file

bguillaum's avatar
bguillaum committed
155
  let select index atom t =
156 157 158 159
    match
      List_.opt_map
        (fun (p_par, c_par) ->
          let par = List.nth p_par index in
bguillaum's avatar
bguillaum committed
160
          if atom = par
161
          then Some (p_par, c_par)
bguillaum's avatar
bguillaum committed
162
          else None
163 164 165 166 167 168
        ) t
    with
    | [] -> None
    | t -> Some t

  let get_param_value index = function
169
    | [] -> Error.bug "[Lex_par.get_param_value] empty parameter"
170 171 172 173 174 175 176 177 178 179 180 181 182 183
    | (params,_)::_ -> List.nth params index

  let get_command_value index = function
    | [(_,one)] -> List.nth one index
    | [] -> Error.bug "[Lex_par.get_command_value] empty parameter"
    | (_,[sing])::tail when index=0 ->
        Printf.sprintf "%s/%s"
          sing
          (List_.to_string
             (function
               | (_,[s]) -> s
               | _ -> Error.bug "[Lex_par.get_command_value] inconsistent param"
             ) "/" tail
          )
184 185 186 187
    | (left,_)::_ ->
      Error.run "Lexical parameter are not functional, input parameter%s: %s"
        (if (List.length left) > 1 then "s" else "")
        (String.concat ", " left)
188
end (* module Lex_par *)
bguillaum's avatar
bguillaum committed
189 190 191 192 193 194 195 196

(* ================================================================================ *)
module Concat_item = struct
  type t =
    | Feat of (Gid.t * feature_name)
    | String of string
end (* module Concat_item *)