Commit b95b2db7 authored by bguillaum's avatar bguillaum

Handling constituents, to be continued...

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@9090 7838e531-6607-4d57-9587-6c381814729c
parent aaf9d5d6
......@@ -348,3 +348,32 @@ module Ast = struct
let empty_grs = { domain = None; modules = []; strategies= [] }
end (* module Ast *)
(* ================================================================================ *)
module Constituent = struct
type t =
| Leaf of (string * string) (* cat, phon *)
| T of (string * t list)
let to_gr t =
let rec loop gorn nodes edges = function
| Leaf (cat,phon) ->
let fs = [{Ast.name="cat"; kind= Ast.Equality [cat]}, (Loc.empty); {Ast.name="phon"; kind= Ast.Equality [phon]}, (Loc.empty)] in
let node = ({Ast.node_id = gorn; position = None; fs}, Loc.empty) in
(gorn, node :: nodes, edges)
| T (cat, daugthers) ->
let (nodes',edges') = List_.foldi_left
(fun i (acc_nodes, acc_edges) daugther ->
let (id, new_acc_nodes, new_acc_edges) = loop (gorn^(string_of_int i)) acc_nodes acc_edges daugther in
let new_edge = ({Ast.edge_id= None; src = gorn; edge_label_cst= Ast.Pos_list ["_"]; tar= id}, Loc.empty) in
(new_acc_nodes, new_edge :: new_acc_edges)
) (nodes, edges) daugthers in
let fs = [{Ast.name="cat"; kind= Ast.Equality [cat]}, (Loc.empty)] in
let new_nodes = ({Ast.node_id = gorn; position = None; fs}, Loc.empty) :: nodes' in
(gorn, new_nodes, edges') in
let (_,nodes,edges) = loop "N" [] [] t in
{Ast.nodes; edges; meta=[]}
end (* module Constituent *)
......@@ -211,3 +211,12 @@ module Ast : sig
val empty_grs: grs
end (* module Ast *)
module Constituent : sig
type t =
| Leaf of (string * string) (* cat, phon *)
| T of (string * t list)
val to_gr: t -> Ast.gr
end
......@@ -21,6 +21,7 @@ module Int_map = Map.Make (struct type t = int let compare = Pervasives.compare
module Loc = struct
type t = string * int
let empty = ("Not a file", -1)
let file_line f l = (f,l)
let file_opt_line fo l = match fo with
| Some f -> file_line f l
......
......@@ -47,6 +47,8 @@ end
module Loc: sig
type t
val empty: t
val file_line: string -> int -> t
val file_opt_line: string option -> int -> t
val file: string -> t
......
......@@ -383,6 +383,11 @@ module G_graph = struct
) units in
of_conll ?domain { Conll.file=None; meta=[]; lines=conll_lines; multiwords=[] }
(* -------------------------------------------------------------------------------- *)
let of_const domain const =
let gr = Constituent.to_gr const in
build domain gr
(* -------------------------------------------------------------------------------- *)
let opt_att atts name =
try Some (List.assoc name atts)
......
......@@ -112,6 +112,8 @@ module G_graph: sig
It supposes that "SUC" is defined in current relations *)
val of_brown: ?domain:Domain.t -> ?sentid: string -> string -> t
val of_const: ?domain:Domain.t -> Constituent.t -> t
(* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *)
(* Update functions *)
(* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *)
......
......@@ -233,3 +233,14 @@ and standard target = parse
| eof { EOF }
| _ as c { raise (Error (sprintf "unexpected character '%c'" c)) }
| _ as c { raise (Error (sprintf "At line %d: unexpected character '%c'" (lexbuf.Lexing.lex_start_p.Lexing.pos_lnum) c)) }
and const = parse
| [' ' '\t'] { const lexbuf }
| '\n' { incr Global.current_line; const lexbuf}
| '(' { printf ">>>LPAREN<<<\n%!"; LPAREN }
| ')' { printf ">>>RPAREN<<<\n%!"; RPAREN }
| "SENT" { printf ">>>SENT<<<\n%!"; SENT }
| [^'(' ')' ' ']+ as id { printf "ID=>>>%s<<<\n%!" id; ID id }
......@@ -112,6 +112,17 @@ module Loader = struct
gr
with Sys_error msg -> Error.parse ~loc:(Loc.file file) "[Grew_loader.pattern] %s" msg
(* ------------------------------------------------------------------------------------------*)
let constituent file =
try
Global.init file;
let in_ch = open_in file in
let lexbuf = Lexing.from_channel in_ch in
let graph = parse_handle file (Grew_parser.constituent Grew_lexer.const) lexbuf in
close_in in_ch;
graph
with Sys_error msg -> Error.parse ~loc:(Loc.file file) "[Grew_loader.constituent] %s" msg
end (* module Loader *)
......@@ -121,7 +132,17 @@ module Parser = struct
try
Global.init "from_string";
let lexbuf = Lexing.from_string gr_string in
let gr = parse_handle "from_string" (Grew_parser.gr Grew_lexer.global) lexbuf in
let gr = parse_handle "Not a file" (Grew_parser.gr Grew_lexer.global) lexbuf in
gr
with Sys_error msg -> Error.parse "[Grew_parser.gr] %s" msg
(* ------------------------------------------------------------------------------------------*)
let constituent s =
try
Global.init "Not a file";
let lexbuf = Lexing.from_string s in
let graph = parse_handle "Not a file" (Grew_parser.constituent Grew_lexer.const) lexbuf in
graph
with Sys_error msg -> Error.parse "[Grew_parser.constituent] %s" msg
end
......@@ -20,8 +20,12 @@ module Loader: sig
val gr: string -> Ast.gr
val pattern: string -> Ast.pattern
val constituent: string -> Constituent.t
end
module Parser : sig
val gr: string -> Ast.gr
val constituent: string -> Constituent.t
end
\ No newline at end of file
......@@ -101,6 +101,8 @@ let localize t = (t,get_loc ())
%token <string> AROBAS_ID /* @id */
%token <string> COLOR /* @#89abCD */
%token SENT /* SENT */
%token <string> ID /* the general notion of id */
/* %token <Grew_ast.Ast.complex_id> COMPLEX_ID*/
......@@ -119,6 +121,10 @@ let localize t = (t,get_loc ())
%start <Grew_ast.Ast.pattern> pattern
%start <Grew_ast.Ast.domain> domain
/* parsing of the string representation of the constituent representation of Sequoia */
/* EX: "( (SENT (NP (NC Amélioration) (PP (P de) (NP (DET la) (NC sécurité))))))" */
%start <Grew_ast.Constituent.t> constituent
%left SEMIC
%left PLUS
%nonassoc STAR
......@@ -724,4 +730,15 @@ op_seq:
/*=============================================================================================*/
pattern:
| p=pos_item n=list(neg_item) EOF { Ast.complete_pattern {Ast.pat_pos=p; pat_negs=n} }
/*=============================================================================================*/
/* Constituent tree (à la Sequoia) */
/*=============================================================================================*/
constituent:
| LPAREN t=const_tree RPAREN { t }
const_tree:
| LPAREN pos=ID ff=ID RPAREN { Grew_ast.Constituent.Leaf (pos,ff) }
| LPAREN cat=ID list=nonempty_list (const_tree) RPAREN { Grew_ast.Constituent.T (cat, list) }
%%
......@@ -162,7 +162,9 @@ module Label_domain = struct
| Dash -> ["style=dashed"]
| Solid when deco -> ["style=dotted"]
| Solid -> []) in
sprintf "[label=\"%s\", %s]" style.text (String.concat ", " dot_items)
match style.text with
| "_" -> sprintf "[%s]" (String.concat ", " dot_items)
| _ -> sprintf "[label=\"%s\", %s]" style.text (String.concat ", " dot_items)
end
......
......@@ -119,6 +119,16 @@ type t = Grew_graph.G_graph.t
Grew_graph.G_graph.of_brown ?domain brown
) ()
let load_const ?domain file =
if not (Sys.file_exists file)
then raise (File_not_found file)
else
handle ~name:"load_const" ~file
(fun () ->
let const_ast = Grew_loader.Loader.constituent file in
Grew_graph.G_graph.of_const ?domain const_ast
) ()
let load ?domain file =
handle ~name:"Graph.load_graph" ~file
(fun () ->
......@@ -126,12 +136,13 @@ type t = Grew_graph.G_graph.t
| Some ".gr" -> load_gr ?domain file
| Some ".conll" -> load_conll ?domain file
| Some ".br" | Some ".melt" -> load_brown ?domain file
| Some ".cst" -> load_const ?domain file
| _ ->
Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
let rec loop = function
| [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
| load_fct :: tail -> try load_fct ?domain file with _ -> loop tail in
loop [load_gr; load_conll; load_brown]
loop [load_gr; load_conll; load_brown; load_const]
) ()
let of_gr ?domain ?(grewpy=false) gr_string =
......@@ -140,6 +151,13 @@ type t = Grew_graph.G_graph.t
let of_conll ?domain conll =
handle ~name:"Graph.of_conll" (fun () -> Grew_graph.G_graph.of_conll ?domain conll) ()
let of_const ?domain const =
handle ~name:"of_const"
(fun () ->
let const_ast = Grew_loader.Parser.constituent const in
(Grew_graph.G_graph.of_const ?domain const_ast)
) ()
let of_brown ?domain ?sentid brown =
handle ~name:"Graph.of_brown" (fun () -> Grew_graph.G_graph.of_brown ?domain ?sentid brown) ()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment