grew_parser.mly 33.4 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
Bruno Guillaume's avatar
Bruno Guillaume committed
4
(*    Copyright 2011-2018 Inria, Université de Lorraine                           *)
bguillaum's avatar
bguillaum committed
5
(*                                                                                *)
Bruno Guillaume's avatar
Bruno Guillaume committed
6
(*    Webpage: http://grew.fr                                                     *)
bguillaum's avatar
bguillaum committed
7 8 9
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)
10

bguillaum's avatar
bguillaum committed
11
%{
bguillaum's avatar
bguillaum committed
12
open Grew_base
bguillaum's avatar
bguillaum committed
13
open Grew_types
bguillaum's avatar
bguillaum committed
14
open Grew_ast
pj2m's avatar
pj2m committed
15

16
(* Some intermediate sum types used in sub-functions when building the ast *)
17
type pat_item =
bguillaum's avatar
bguillaum committed
18 19 20
  | Pat_node of Ast.node
  | Pat_edge of Ast.edge
  | Pat_const of Ast.const
pj2m's avatar
pj2m committed
21 22

type graph_item =
23
  | Graph_meta of string
bguillaum's avatar
bguillaum committed
24 25
  | Graph_node of Ast.node
  | Graph_edge of Ast.edge
pj2m's avatar
pj2m committed
26

bguillaum's avatar
bguillaum committed
27
type ineq_item =
28
  | Ineq_sofi of Ast.simple_or_pointed
bguillaum's avatar
bguillaum committed
29 30
  | Ineq_float of float

Bruno Guillaume's avatar
Bruno Guillaume committed
31
let get_loc () = Global.get_loc ()
pj2m's avatar
pj2m committed
32 33 34
let localize t = (t,get_loc ())
%}

35 36
%token DUMMY

37 38 39 40 41 42 43 44 45
%token LACC                        /* { */
%token RACC                        /* } */
%token LBRACKET                    /* [ */
%token RBRACKET                    /* ] */
%token LPAREN                      /* ( */
%token RPAREN                      /* ) */
%token DDOT                        /* : */
%token COMA                        /* , */
%token SEMIC                       /* ; */
bguillaum's avatar
bguillaum committed
46
%token SHARP                       /* # */
bguillaum's avatar
bguillaum committed
47
%token PLUS                        /* + */
48 49
%token EQUAL                       /* = */
%token DISEQUAL                    /* <> */
bguillaum's avatar
bguillaum committed
50
%token BANG                        /* ! */
Bruno Guillaume's avatar
Bruno Guillaume committed
51
%token SLASH                       /* / */
52
%token STAR                        /* * */
bguillaum's avatar
bguillaum committed
53 54 55 56
%token LT                          /* < */
%token GT                          /* > */
%token LE                          /* <= or  */
%token GE                          /* >= or  */
bguillaum's avatar
bguillaum committed
57 58
%token LPREC                       /* << */
%token LSUCC                       /* >> */
bguillaum's avatar
bguillaum committed
59

bguillaum's avatar
bguillaum committed
60 61 62
%token BEFORE                      /* :< */
%token AFTER                       /* :> */

63
%token PIPE                        /* | */
64

65
%token EDGE                        /* -> */
66 67 68
%token LTR_EDGE_LEFT               /* -[ */
%token LTR_EDGE_LEFT_NEG           /* -[^ */
%token LTR_EDGE_RIGHT              /* ]-> */
69 70 71 72 73

%token ARROW                       /* ==> */
%token ARROW_LEFT                  /* =[ */
%token ARROW_LEFT_NEG              /* =[^ */
%token ARROW_RIGHT                 /* ]=> */
74

bguillaum's avatar
bguillaum committed
75
%token INCL                        /* include */
76
%token IMPORT                      /* import */
77
%token FEATURES                    /* features */
Bruno Guillaume's avatar
Bruno Guillaume committed
78
%token FROM                        /* from */
79
%token LABELS                      /* labels */
80
%token PATTERN                     /* pattern */
81 82
%token WITHOUT                     /* without */
%token COMMANDS                    /* commands */
83 84
%token STRAT                       /* strat */
%token PACKAGE                     /* package */
85 86 87 88 89
%token RULE                        /* rule */
%token GRAPH                       /* graph */

%token DEL_EDGE                    /* del_edge */
%token ADD_EDGE                    /* add_edge */
90 91
%token SHIFT_IN                    /* shift_in */
%token SHIFT_OUT                   /* shift_out */
92 93 94 95 96
%token SHIFT                       /* shift */
%token DEL_NODE                    /* del_node */
%token ADD_NODE                    /* add_node */
%token DEL_FEAT                    /* del_feat */

97 98 99 100 101
%token PICK                        /* Pick */
%token ALT                         /* Alt */
%token SEQ                         /* Seq */
%token ITER                        /* Iter */
%token IF                          /* If */
Bruno Guillaume's avatar
Bruno Guillaume committed
102
%token ONF                         /* Onf */
103 104
%token EMPTY                       /* Empty */
%token TRY                         /* Try */
105

106 107
%token <string> AROBAS_ID          /* @id */
%token <string> COLOR              /* @#89abCD */
bguillaum's avatar
bguillaum committed
108

109 110 111
%token <string> ID   /* the general notion of id */

/* %token <Grew_ast.Ast.complex_id>   COMPLEX_ID*/
112

113 114 115 116
%token <string>                STRING
%token <string>                REGEXP
%token <float>                 FLOAT
%token <string list>           COMMENT
Bruno Guillaume's avatar
Bruno Guillaume committed
117
%token <string * (int *string) list>  LEX_PAR
pj2m's avatar
pj2m committed
118

119
%token EOF                         /* end of file */
pj2m's avatar
pj2m committed
120

bguillaum's avatar
bguillaum committed
121
%start <Grew_ast.Ast.gr> gr
122
%start <Grew_ast.Ast.pattern> pattern
123
%start <Grew_ast.Ast.domain> domain
124

Bruno Guillaume's avatar
Bruno Guillaume committed
125 126
%start <Grew_ast.Ast.grs> new_grs
%start <Grew_ast.Ast.strat> strat_alone
127

128 129
/* parsing of the string representation of the constituent representation of Sequoia */
/* EX: "( (SENT (NP (NC Amélioration) (PP (P de) (NP (DET la) (NC sécurité))))))"    */
130
%start <Grew_ast.Ast.pst> phrase_structure_tree
131

pj2m's avatar
pj2m committed
132 133
%%

bguillaum's avatar
bguillaum committed
134 135 136
%public separated_list_final_opt(separator,X):
|                                                               { [] }
|   x=X                                                         { [x] }
137
|   x=X; separator; xs=separated_list_final_opt(separator,X)    { x :: xs }
bguillaum's avatar
bguillaum committed
138 139 140 141

%public separated_nonempty_list_final_opt(separator,X):
|   x=X                                                                  { [x] }
|   x=X; separator                                                       { [x] }
142
|   x=X; separator; xs=separated_nonempty_list_final_opt(separator,X)    { x :: xs }
143

pj2m's avatar
pj2m committed
144
/*=============================================================================================*/
145
/*  BASIC DEFINITIONS                                                                          */
pj2m's avatar
pj2m committed
146
/*=============================================================================================*/
147

148
label_ident:
149 150 151 152
        | x=ID        { Ast.parse_label_ident x }

pattern_label_ident:
        | x=ID        { Ast.parse_pattern_label_ident x }
153

bguillaum's avatar
bguillaum committed
154
simple_id:
155
        | id=ID       { Ast.parse_simple_ident id }
bguillaum's avatar
bguillaum committed
156

157
simple_id_with_loc:
158 159
        | id=ID       { localize (Ast.parse_simple_ident id) }

160 161 162 163 164 165
node_id:
        | id=ID       { Ast.parse_node_ident id }

node_id_with_loc:
        | id=ID       { localize (Ast.parse_node_ident id) }

166 167
feature_ident :
        | id=ID       { Ast.parse_feature_ident id }
bguillaum's avatar
bguillaum committed
168

169 170
feature_ident_with_loc :
        | id=ID      { localize (Ast.parse_feature_ident id) }
171

172
feature_value:
173
        | v=ID        { Ast.parse_simple_ident v }
174 175
        | v=STRING    { v }
        | v=FLOAT     { Printf.sprintf "%g" v }
pj2m's avatar
pj2m committed
176

177 178 179 180 181
pattern_feature_value:
        | v=ID        { Ast.parse_simple_or_pointed v }
        | v=STRING    { Ast.Simple v }
        | v=FLOAT     { Ast.Simple (Printf.sprintf "%g" v) }

bguillaum's avatar
bguillaum committed
182
ineq_value:
183
        | v=ID    { Ineq_sofi (Ast.parse_simple_or_pointed v) }
bguillaum's avatar
bguillaum committed
184 185 186
        | v=FLOAT { Ineq_float v }

ineq_value_with_loc:
187
        | v=ID    { localize (Ineq_sofi (Ast.parse_simple_or_pointed v)) }
bguillaum's avatar
bguillaum committed
188 189
        | v=FLOAT { localize (Ineq_float v) }

190 191 192
/*=============================================================================================*/
/*  GREW GRAPH                                                                                 */
/*=============================================================================================*/
193 194
gr:
        | GRAPH LACC items=separated_list_final_opt(SEMIC,gr_item) RACC EOF
195
            {
196 197 198 199 200 201
              Ast.complete_graph
              {
                Ast.meta = List_.opt_map (function Graph_meta n -> Some n | _ -> None) items;
                Ast.nodes = List_.opt_map (function Graph_node n -> Some n | _ -> None) items;
                Ast.edges = List_.opt_map (function Graph_edge n -> Some n | _ -> None) items;
              }
202
            }
bguillaum's avatar
bguillaum committed
203

pj2m's avatar
pj2m committed
204
gr_item:
205
        /*  sentence = "Jean dort."   */
206
        | id=simple_id EQUAL value=feature_value
207
            { Graph_meta (id ^ " = " ^ value) }
208

209 210
        /*  B (1) [phon="pense", lemma="penser", cat=v, mood=ind ]   */
        /*  B [phon="pense", lemma="penser", cat=v, mood=ind ]   */
211
        | id_loc=node_id_with_loc position=option(delimited(LPAREN, FLOAT ,RPAREN)) feats=delimited(LBRACKET,separated_list_final_opt(COMA,node_features),RBRACKET)
bguillaum's avatar
bguillaum committed
212 213
            { let (id,loc) = id_loc in
              Graph_node ({Ast.node_id = id; position=position; fs=feats}, loc) }
214
        /*   A   */
215 216 217
        | id_loc=node_id_with_loc
            { let (id,loc) = id_loc in
              Graph_node ({Ast.node_id = id; position=None; fs=Ast.default_fs ~loc id}, loc) }
pj2m's avatar
pj2m committed
218

219
        /*   A -[x]-> B   */
220
        | n1_loc=node_id_with_loc label=delimited(LTR_EDGE_LEFT,label_ident,LTR_EDGE_RIGHT) n2=node_id
221
            { Graph_edge ({Ast.edge_id = None; src=fst n1_loc; edge_label_cst=Ast.Pos_list [label]; tar=n2}, snd n1_loc) }
pj2m's avatar
pj2m committed
222

223 224 225 226
/*=============================================================================================*/
/*  DOMAIN DEFINITION                                                                          */
/*=============================================================================================*/
domain:
227
        | c=option(DUMMY) f=feature_group g=labels EOF
228 229 230 231 232 233
            {
              {  Ast.feature_domain = f;
                 label_domain = g;
              }
            }

pj2m's avatar
pj2m committed
234 235 236
/*=============================================================================================*/
/* FEATURES DOMAIN DEFINITION                                                                  */
/*=============================================================================================*/
237
feature_group:
238 239
        | FEATURES x=features { x }

240
features:
241
        | LACC x=separated_nonempty_list_final_opt(SEMIC,feature) RACC { x }
bguillaum's avatar
bguillaum committed
242

243
feature:
244 245
        /*   pos=#   */
        /*   m: ind,inf,part,subj,imp   */
246
        | feature_name=feature_name DDOT feature_values=feature_values
bguillaum's avatar
bguillaum committed
247
            {
248
              match feature_values with
249 250
                | ["#"] -> Ast.Num feature_name
                | _ -> Ast.build_closed feature_name feature_values
251
            }
pj2m's avatar
pj2m committed
252

253
        /*   phon:*   */
254
        | feature_name=feature_name DDOT STAR
255
            { Ast.Open feature_name }
256

pj2m's avatar
pj2m committed
257
feature_name:
258
        | ci=ID { Ast.to_uname ci }
pj2m's avatar
pj2m committed
259

260
feature_values:
261 262
        | SHARP                                         { ["#"] }
        | x=separated_nonempty_list(COMA,feature_value) { x }
pj2m's avatar
pj2m committed
263 264 265 266

/*=============================================================================================*/
/* GLOBAL LABELS DEFINITION                                                                    */
/*=============================================================================================*/
267
labels:
268
        /*   labels { OBJ, SUBJ, DE_OBJ, ANT }   */
269 270
        | LABELS x=delimited(LACC,separated_nonempty_list_final_opt(COMA,label),RACC) { x }

271
label:
272
        | x=label_ident display_list=list(display)  { (x, display_list) }
273
        | x=STRING                                  { (x,[]) }
pj2m's avatar
pj2m committed
274

275
display:
276 277
        | dis=AROBAS_ID   { dis }
        | col=COLOR       { col }
pj2m's avatar
pj2m committed
278 279 280 281

/*=============================================================================================*/
/* RULES DEFINITION                                                                            */
/*=============================================================================================*/
282
rule:
Bruno Guillaume's avatar
Bruno Guillaume committed
283
        | doc=option(COMMENT) RULE id_loc=simple_id_with_loc file_lexicons = option(external_lexicons) LACC p=pos_item n=list(neg_item) cmds=commands RACC final_lexicons=list(final_lexicon)
284
            {
Bruno Guillaume's avatar
Bruno Guillaume committed
285 286 287
              let lexicons = match file_lexicons with
              | Some l -> l @ final_lexicons
              | None -> final_lexicons in
288
              { Ast.rule_id = fst id_loc;
289
                pattern = Ast.complete_pattern { Ast.pat_pos = p; Ast.pat_negs = n };
290
                commands = cmds;
Bruno Guillaume's avatar
Bruno Guillaume committed
291
                lexicon_info = lexicons;
bguillaum's avatar
bguillaum committed
292
                rule_doc = begin match doc with Some d -> d | None -> [] end;
bguillaum's avatar
bguillaum committed
293
                rule_loc = snd id_loc;
294
                rule_dir = None;
bguillaum's avatar
bguillaum committed
295
              }
296
            }
297

Bruno Guillaume's avatar
Bruno Guillaume committed
298 299
external_lexicons:
        | LPAREN external_lexicons= separated_nonempty_list_final_opt(COMA, external_lexicon) RPAREN       { external_lexicons }
300

Bruno Guillaume's avatar
Bruno Guillaume committed
301 302
external_lexicon:
        | lex_name=simple_id FROM file=STRING { (lex_name, Ast.File file)}
bguillaum's avatar
bguillaum committed
303

Bruno Guillaume's avatar
Bruno Guillaume committed
304 305
final_lexicon:
        | final_lexicon = LEX_PAR  { (fst final_lexicon, Ast.Final (snd final_lexicon)) }
bguillaum's avatar
bguillaum committed
306

pj2m's avatar
pj2m committed
307
pos_item:
308
        | PATTERN i=pn_item   { i }
pj2m's avatar
pj2m committed
309 310

neg_item:
311 312 313 314
        | WITHOUT i=pn_item { i }

pn_item:
        | l=delimited(LACC,separated_list_final_opt(SEMIC,pat_item),RACC)
315 316
            {
             {
bguillaum's avatar
bguillaum committed
317 318 319
              Ast.pat_nodes = List_.opt_map (function Pat_node n -> Some n | _ -> None) l;
              Ast.pat_edges = List_.opt_map (function Pat_edge n -> Some n | _ -> None) l;
              Ast.pat_const = List_.opt_map (function Pat_const n -> Some n | _ -> None) l;
320 321
            }
           }
pj2m's avatar
pj2m committed
322 323

/*=============================================================================================*/
324
/* PATTERN DEFINITION                                                                            */
pj2m's avatar
pj2m committed
325 326
/*=============================================================================================*/
pat_item:
327
        /*   R [cat=V, lemma=$lemma]   */
bguillaum's avatar
bguillaum committed
328
        | id_loc=simple_id_with_loc feats=delimited(LBRACKET,separated_list_final_opt(COMA,node_features),RBRACKET)
329
            { Pat_node ({Ast.node_id = fst id_loc; position=None; fs= feats}, snd id_loc) }
bguillaum's avatar
bguillaum committed
330

331
        /*   e: A -> B   */
332
        | id_loc=simple_id_with_loc DDOT n1=simple_id EDGE n2=simple_id
333
            { let (id,loc) = id_loc in Pat_edge ({Ast.edge_id = Some id; src=n1; edge_label_cst=(Ast.Neg_list []); tar=n2}, loc) }
pj2m's avatar
pj2m committed
334

335
        /*   e: A -[X|Y]-> B   */
336
        | id_loc=simple_id_with_loc DDOT n1=simple_id labels=delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) n2=simple_id
337
            { let (id,loc) = id_loc in Pat_edge ({Ast.edge_id = Some id; src=n1; edge_label_cst=(Ast.Pos_list labels); tar=n2}, loc) }
pj2m's avatar
pj2m committed
338

339
        /*   e: A -[^X|Y]-> B   */
340
        | id_loc=simple_id_with_loc DDOT n1=simple_id labels=delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) n2=simple_id
341 342
            { let (id,loc) = id_loc in Pat_edge ({Ast.edge_id = Some id; src=n1; edge_label_cst=(Ast.Neg_list labels); tar=n2}, loc) }

343
        /*   e: A -[re"regexp"]-> B   */
344 345
        | id_loc=simple_id_with_loc DDOT n1=simple_id LTR_EDGE_LEFT re=REGEXP LTR_EDGE_RIGHT n2=simple_id
            { let (id,loc) = id_loc in Pat_edge ({Ast.edge_id = Some id; src=n1; edge_label_cst=Ast.Regexp re; tar=n2}, loc) }
346

347
        /*   A -> B   */
348
        | n1_loc=simple_id_with_loc EDGE n2=simple_id
349
            { let (n1,loc) = n1_loc in Pat_edge ({Ast.edge_id = None; src=n1; edge_label_cst=Ast.Neg_list []; tar=n2}, loc) }
350

351
        /*   A -> *   */
352
        | n1_loc=simple_id_with_loc EDGE STAR
353
            { let (n1,loc) = n1_loc in Pat_const (Ast.Cst_out (n1,Ast.Neg_list []), loc) }
354

355
        /*   * -> B   */
356
        | STAR EDGE n2_loc=simple_id_with_loc
357
            { let (n2,loc) = n2_loc in Pat_const (Ast.Cst_in (n2,Ast.Neg_list []), loc) }
358

359
        /*   A -[X|Y]-> B   */
360
        | n1_loc=simple_id_with_loc labels=delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) n2=simple_id
361
            { let (n1,loc) = n1_loc in Pat_edge ({Ast.edge_id = None; src=n1; edge_label_cst=Ast.Pos_list labels; tar=n2}, loc) }
362

363
        /*   A -[X|Y]-> *   */
364
        | n1_loc=simple_id_with_loc labels=delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) STAR
365
            { let (n1,loc) = n1_loc in Pat_const (Ast.Cst_out (n1,Ast.Pos_list labels), loc) }
366

367
        /*   * -[X|Y]-> B   */
368
        | STAR labels=delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) n2_loc=simple_id_with_loc
369
            { let (n2,loc) = n2_loc in Pat_const (Ast.Cst_in (n2,Ast.Pos_list labels), loc) }
370

371
        /*   A -[^X|Y]-> B   */
372
        | n1_loc=simple_id_with_loc labels=delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) n2=simple_id
373 374
            { let (n1,loc) = n1_loc in Pat_edge ({Ast.edge_id = None; src=n1; edge_label_cst=Ast.Neg_list labels; tar=n2}, loc) }

375
        /*   A -[re"regexp"]-> B   */
376 377
        | n1_loc=simple_id_with_loc LTR_EDGE_LEFT re=REGEXP LTR_EDGE_RIGHT n2=simple_id
            { let (n1,loc) = n1_loc in Pat_edge ({Ast.edge_id = None; src=n1; edge_label_cst=Ast.Regexp re; tar=n2}, loc) }
378

379
        /*   A -[^X|Y]-> *   */
380
        | n1_loc=simple_id_with_loc labels=delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) STAR
381
            { let (n1,loc) = n1_loc in Pat_const (Ast.Cst_out (n1,Ast.Neg_list labels), loc) }
382

383
        /*   * -[^X|Y]-> B   */
384
        | STAR labels=delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) n2_loc=simple_id_with_loc
385
            { let (n2,loc) = n2_loc in Pat_const (Ast.Cst_in (n2,Ast.Neg_list labels), loc) }
pj2m's avatar
pj2m committed
386

387
        /*   X.cat = Y.cat   */
Bruno Guillaume's avatar
Bruno Guillaume committed
388
        /*   X.cat = value   */
389
        /*   X.cat = lex.value   */
bguillaum's avatar
bguillaum committed
390
        | feat_id1_loc=feature_ident_with_loc EQUAL rhs=ID
391 392 393 394 395
             { let (feat_id1,loc)=feat_id1_loc in
              match Ast.parse_simple_or_pointed rhs with
              | Ast.Simple value ->
                Pat_const (Ast.Feature_eq_cst (feat_id1, value), loc)
              | Ast.Pointed (s1, s2) ->
Bruno Guillaume's avatar
Bruno Guillaume committed
396
                Pat_const (Ast.Feature_eq_lex_or_fs (feat_id1, (s1, Ast.to_uname s2)), loc)
397
             }
bguillaum's avatar
bguillaum committed
398

399
        /*   X.cat = "value"   */
bguillaum's avatar
bguillaum committed
400
        | feat_id1_loc=feature_ident_with_loc EQUAL rhs=STRING
401
            { let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Feature_eq_cst (feat_id1, rhs), loc) }
bguillaum's avatar
bguillaum committed
402

403
        /*   X.cat = 12.34   */
bguillaum's avatar
bguillaum committed
404
        | feat_id1_loc=feature_ident_with_loc EQUAL rhs=FLOAT
405
            { let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Feature_eq_float (feat_id1, rhs), loc) }
bguillaum's avatar
bguillaum committed
406

407 408
        /*   X.cat <> Y.cat   */
        /*   X.cat <> value   */
409
        /*   X.cat <> lex.value   */
bguillaum's avatar
bguillaum committed
410
        | feat_id1_loc=feature_ident_with_loc DISEQUAL rhs=ID
411 412 413 414 415
             { let (feat_id1,loc)=feat_id1_loc in
              match Ast.parse_simple_or_pointed rhs with
              | Ast.Simple value ->
                Pat_const (Ast.Feature_diff_cst (feat_id1, value), loc)
              | Ast.Pointed (s1, s2) ->
Bruno Guillaume's avatar
Bruno Guillaume committed
416
                Pat_const (Ast.Feature_diff_lex_or_fs (feat_id1, (s1, Ast.to_uname s2)), loc)
417
             }
bguillaum's avatar
bguillaum committed
418

419
        /*   X.cat <> "value"   */
bguillaum's avatar
bguillaum committed
420 421 422
        | feat_id1_loc=feature_ident_with_loc DISEQUAL rhs=STRING
            { let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Feature_diff_cst (feat_id1, rhs), loc) }

423
        /*   X.cat <> 12.34   */
bguillaum's avatar
bguillaum committed
424 425
        | feat_id1_loc=feature_ident_with_loc DISEQUAL rhs=FLOAT
            { let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Feature_diff_float (feat_id1, rhs), loc) }
426

427

428
        /*   X.cat = re"regexp"   */
429
        | feat_id_loc=feature_ident_with_loc EQUAL regexp=REGEXP
430
            { let (feat_id,loc)=feat_id_loc in Pat_const (Ast.Feature_eq_regexp (feat_id, regexp), loc) }
bguillaum's avatar
bguillaum committed
431

bguillaum's avatar
bguillaum committed
432 433 434
        | id1_loc=ineq_value_with_loc LT id2=ineq_value
            { let (id1,loc)=id1_loc in
              match (id1, id2) with
435
              (*   X.feat < Y.feat   *)
436 437 438
              | (Ineq_sofi (Ast.Pointed (n1, f1)), Ineq_sofi (Ast.Pointed (n2, f2))) ->
                Pat_const (Ast.Features_ineq (Ast.Lt, (n1,f1), (n2,f2)), loc)

439
              (*   X.feat < 12.34   *)
440 441 442
              | (Ineq_sofi (Ast.Pointed (n1, f1)), Ineq_float num) ->
                Pat_const (Ast.Feature_ineq_cst (Ast.Lt, (n1,f1), num), loc)

443
              (*   12.34 < Y.feat   *)
444 445 446
              | (Ineq_float num, Ineq_sofi (Ast.Pointed (n1, f1))) ->
                Pat_const (Ast.Feature_ineq_cst (Ast.Gt, (n1,f1), num), loc)

447
              (*   X < Y   *)
448 449 450 451 452 453
              | (Ineq_sofi (Ast.Simple n1), Ineq_sofi (Ast.Simple n2)) ->
                Pat_const (Ast.Immediate_prec (n1,n2), loc)

(* TODO : axe lex_field *)

              (*  __ERRORS__   *)
bguillaum's avatar
bguillaum committed
454 455 456
              | (Ineq_float _, Ineq_float _) -> Error.build "the '<' symbol can be used with 2 constants"
              | _ -> Error.build "the '<' symbol can be used with 2 nodes or with 2 features but not in a mix inequality"
            }
bguillaum's avatar
bguillaum committed
457

bguillaum's avatar
bguillaum committed
458 459 460
        | id1_loc=ineq_value_with_loc GT id2=ineq_value
            { let (id1,loc)=id1_loc in
              match (id1, id2) with
461
              (*   X.feat > Y.feat   *)
462 463 464
              | (Ineq_sofi (Ast.Pointed (n1, f1)), Ineq_sofi (Ast.Pointed (n2, f2))) ->
                Pat_const (Ast.Features_ineq (Ast.Gt, (n1,f1), (n2,f2)), loc)

465
              (*   X.feat > 12.34   *)
466 467 468
              | (Ineq_sofi (Ast.Pointed (n1, f1)), Ineq_float num) ->
                Pat_const (Ast.Feature_ineq_cst (Ast.Gt, (n1,f1), num), loc)

469
              (*   12.34 > Y.feat   *)
470 471 472
              | (Ineq_float num, Ineq_sofi (Ast.Pointed (n1, f1))) ->
                Pat_const (Ast.Feature_ineq_cst (Ast.Lt, (n1,f1), num), loc)

473
              (*   X > Y   *)
474 475 476 477 478 479
              | (Ineq_sofi (Ast.Simple n1), Ineq_sofi (Ast.Simple n2)) ->
                Pat_const (Ast.Immediate_prec (n2,n1), loc)

(* TODO : axe lex_field *)

              (*  __ERRORS__   *)
bguillaum's avatar
bguillaum committed
480 481 482
              | (Ineq_float _, Ineq_float _) -> Error.build "the '>' symbol can be used with 2 constants"
              | _ -> Error.build "the '>' symbol can be used with 2 nodes or with 2 features but not in a mix inequality"
            }
bguillaum's avatar
bguillaum committed
483

484
        /*   X.position <= Y.position   */
bguillaum's avatar
bguillaum committed
485
        | feat_id1_loc=feature_ident_with_loc LE feat_id2=feature_ident
486
            { let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Features_ineq (Ast.Le, feat_id1, feat_id2), loc) }
bguillaum's avatar
bguillaum committed
487

488
        /*   X.position >= Y.position   */
bguillaum's avatar
bguillaum committed
489
        | feat_id1_loc=feature_ident_with_loc GE feat_id2=feature_ident
490
            { let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Features_ineq (Ast.Ge, feat_id1, feat_id2), loc) }
491

492
        /*   X.feat >= 12.34   */
bguillaum's avatar
bguillaum committed
493 494
        | feat_id1_loc=feature_ident_with_loc GE num=FLOAT
        | num=FLOAT LE feat_id1_loc=feature_ident_with_loc
495 496
            { let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Feature_ineq_cst (Ast.Ge, feat_id1, num), loc)  }

497
        /*   X.feat <= 12.34   */
bguillaum's avatar
bguillaum committed
498 499
        | feat_id1_loc=feature_ident_with_loc LE num=FLOAT
        | num=FLOAT GE feat_id1_loc=feature_ident_with_loc
500 501
            { let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Feature_ineq_cst (Ast.Le, feat_id1, num), loc)  }

502
        /*   A << B   */
bguillaum's avatar
bguillaum committed
503
        | n1_loc=simple_id_with_loc LPREC n2=simple_id
504
            { let (n1,loc) = n1_loc in Pat_const (Ast.Large_prec (n1,n2), loc) }
bguillaum's avatar
bguillaum committed
505

506
        /*   A >> B   */
bguillaum's avatar
bguillaum committed
507
        | n1_loc=simple_id_with_loc LSUCC n2=simple_id
508
            { let (n1,loc) = n1_loc in Pat_const (Ast.Large_prec (n2,n1), loc) }
bguillaum's avatar
bguillaum committed
509

510 511
node_features:
        /*   cat = n|v|adj   */
512 513 514
        | name_loc=simple_id_with_loc EQUAL values=separated_nonempty_list(PIPE,pattern_feature_value)
            {
              let (name,loc) = name_loc in
515
              let uname = Ast.to_uname name in
516
              match values with
517 518 519 520 521 522 523 524 525 526
              | [Ast.Simple "*"] ->
                ({Ast.kind = Ast.Disequality []; name=uname},loc)
              | [Ast.Pointed (lex,fn)] ->
                ({Ast.kind = Ast.Equal_lex (lex,fn); name=uname }, loc)
              | l ->
                let value_list = List.map (function
                  | Ast.Simple x -> x
                  | Ast.Pointed (lex,fn) -> Error.build "Lexical reference '%s.%s' cannot be used in a disjunction" lex fn
                ) l in ({Ast.kind = Ast.Equality value_list; name=uname }, loc)
            }
527 528 529

        /*   cat = *   */
        | name_loc=simple_id_with_loc EQUAL STAR
530 531
            { let (name,loc) = name_loc in
              ({Ast.kind = Ast.Disequality []; name=Ast.to_uname name},loc) }
532 533 534

        /*   cat   */
        | name_loc=simple_id_with_loc
535 536
            { let (name,loc) = name_loc in
              ({Ast.kind = Ast.Disequality []; name=Ast.to_uname name},loc) }
537 538

        /*    cat<>n|v|adj   */
539 540 541 542 543 544 545 546 547 548 549 550 551
        | name_loc=simple_id_with_loc DISEQUAL values=separated_nonempty_list(PIPE,pattern_feature_value)
            {
              let (name,loc) = name_loc in
              let uname = Ast.to_uname name in
              match values with
              | [Ast.Pointed (lex,fn)] ->
                ({Ast.kind = Ast.Disequal_lex (lex,fn); name=uname }, loc)
              | l ->
                let value_list = List.map (function
                  | Ast.Simple x -> x
                  | Ast.Pointed (lex,fn) -> Error.build "Lexical reference '%s.%s' cannot be used in a disjunction" lex fn
                ) l in ({Ast.kind = Ast.Disequality value_list; name=uname }, loc)
            }
552 553 554 555


        /*   !lemma   */
        | BANG name_loc=simple_id_with_loc
Bruno Guillaume's avatar
Bruno Guillaume committed
556
            { let (name,loc) = name_loc in ({Ast.kind = Ast.Absent; name=Ast.to_uname name}, loc) }
557

Bruno Guillaume's avatar
Bruno Guillaume committed
558 559 560 561 562
        /*   mwepos=ADV/upos=ADV   */
        | name1_loc=simple_id_with_loc EQUAL fv1=feature_value SLASH name2=simple_id EQUAL fv2=feature_value
            { let (name1,loc) = name1_loc in ({Ast.kind = Ast.Else (fv1,name2,fv2); name=Ast.to_uname name1}, loc) }


pj2m's avatar
pj2m committed
563 564 565 566
/*=============================================================================================*/
/* COMMANDS DEFINITION                                                                         */
/*=============================================================================================*/
commands:
567
        | COMMANDS x=delimited(LACC,separated_nonempty_list_final_opt(SEMIC,command),RACC) { x }
568

pj2m's avatar
pj2m committed
569
command:
570
        /*   del_edge e   */
bguillaum's avatar
bguillaum committed
571 572
        | DEL_EDGE n_loc=simple_id_with_loc
            { let (n,loc) = n_loc in (Ast.Del_edge_name n, loc) }
Bruno Guillaume's avatar
Bruno Guillaume committed
573

574
        /*   del_edge m -[x]-> n   */
bguillaum's avatar
bguillaum committed
575
        | DEL_EDGE src_loc=simple_id_with_loc label=delimited(LTR_EDGE_LEFT,label_ident,LTR_EDGE_RIGHT) tar=simple_id
576
            { let (src,loc) = src_loc in (Ast.Del_edge_expl (src, tar, label), loc) }
Bruno Guillaume's avatar
Bruno Guillaume committed
577

578
        /*   add_edge m -[x]-> n   */
bguillaum's avatar
bguillaum committed
579
        | ADD_EDGE src_loc=simple_id_with_loc label=delimited(LTR_EDGE_LEFT,label_ident,LTR_EDGE_RIGHT) tar=simple_id
580
            { let (src,loc) = src_loc in (Ast.Add_edge (src, tar, label), loc) }
Bruno Guillaume's avatar
Bruno Guillaume committed
581

582 583 584 585
        /*   add_edge e: m -> n   */
        | ADD_EDGE id_loc=simple_id_with_loc DDOT src=simple_id EDGE tar=simple_id
            { let (id,loc) = id_loc in (Ast.Add_edge_expl (src, tar, id), loc) }

bguillaum's avatar
bguillaum committed
586
        /*   shift_in m ==> n   */
bguillaum's avatar
bguillaum committed
587
        | SHIFT_IN src_loc=simple_id_with_loc ARROW tar=simple_id
588
            { let (src,loc) = src_loc in (Ast.Shift_in (src, tar, Ast.Neg_list []), loc) }
589

bguillaum's avatar
bguillaum committed
590
        /*   shift_in m =[x*|y]=> n   */
bguillaum's avatar
bguillaum committed
591
        | SHIFT_IN src_loc=simple_id_with_loc
592
          labels=delimited(ARROW_LEFT,separated_nonempty_list(PIPE,pattern_label_ident),ARROW_RIGHT)
bguillaum's avatar
bguillaum committed
593
          tar=simple_id
594
            { let (src,loc) = src_loc in (Ast.Shift_in (src, tar, Ast.Pos_list labels), loc) }
595

bguillaum's avatar
bguillaum committed
596
        /*   shift_in m =[^x*|y]=> n   */
bguillaum's avatar
bguillaum committed
597
        | SHIFT_IN src_loc=simple_id_with_loc
598
          labels=delimited(ARROW_LEFT_NEG,separated_nonempty_list(PIPE,pattern_label_ident),ARROW_RIGHT)
bguillaum's avatar
bguillaum committed
599
          tar=simple_id
600
            { let (src,loc) = src_loc in (Ast.Shift_in (src, tar, Ast.Neg_list labels), loc) }
601

bguillaum's avatar
bguillaum committed
602
        /*   shift_out m ==> n   */
bguillaum's avatar
bguillaum committed
603
        | SHIFT_OUT src_loc=simple_id_with_loc ARROW tar=simple_id
604
            { let (src,loc) = src_loc in (Ast.Shift_out (src, tar, Ast.Neg_list []), loc) }
605

bguillaum's avatar
bguillaum committed
606
        /*   shift_out m =[x*|y]=> n   */
bguillaum's avatar
bguillaum committed
607
        | SHIFT_OUT src_loc=simple_id_with_loc
608
          labels=delimited(ARROW_LEFT,separated_nonempty_list(PIPE,pattern_label_ident),ARROW_RIGHT)
bguillaum's avatar
bguillaum committed
609
          tar=simple_id
610
            { let (src,loc) = src_loc in (Ast.Shift_out (src, tar, Ast.Pos_list labels), loc) }
611

bguillaum's avatar
bguillaum committed
612
        /*   shift_out m =[^x*|y]=> n   */
bguillaum's avatar
bguillaum committed
613
        | SHIFT_OUT src_loc=simple_id_with_loc
614
          labels=delimited(ARROW_LEFT_NEG,separated_nonempty_list(PIPE,pattern_label_ident),ARROW_RIGHT)
bguillaum's avatar
bguillaum committed
615
          tar=simple_id
616
            { let (src,loc) = src_loc in (Ast.Shift_out (src, tar, Ast.Neg_list labels), loc) }
617

bguillaum's avatar
bguillaum committed
618
        /*   shift m ==> n   */
bguillaum's avatar
bguillaum committed
619
        | SHIFT src_loc=simple_id_with_loc ARROW tar=simple_id
620
            { let (src,loc) = src_loc in (Ast.Shift_edge (src, tar, Ast.Neg_list []), loc) }
Bruno Guillaume's avatar
Bruno Guillaume committed
621

bguillaum's avatar
bguillaum committed
622
        /*   shift m =[x*|y]=> n   */
bguillaum's avatar
bguillaum committed
623
        | SHIFT src_loc=simple_id_with_loc
624
          labels=delimited(ARROW_LEFT,separated_nonempty_list(PIPE,pattern_label_ident),ARROW_RIGHT)
bguillaum's avatar
bguillaum committed
625
          tar=simple_id
626
            { let (src,loc) = src_loc in (Ast.Shift_edge (src, tar, Ast.Pos_list labels), loc) }
Bruno Guillaume's avatar
Bruno Guillaume committed
627

bguillaum's avatar
bguillaum committed
628
        /*   shift m =[^x*|y]=> n   */
bguillaum's avatar
bguillaum committed
629
        | SHIFT src_loc=simple_id_with_loc
630
          labels=delimited(ARROW_LEFT_NEG,separated_nonempty_list(PIPE,pattern_label_ident),ARROW_RIGHT)
bguillaum's avatar
bguillaum committed
631
          tar=simple_id
632
            { let (src,loc) = src_loc in (Ast.Shift_edge (src, tar, Ast.Neg_list labels), loc) }
Bruno Guillaume's avatar
Bruno Guillaume committed
633

634
        /*   del_node n   */
bguillaum's avatar
bguillaum committed
635
        | DEL_NODE ci_loc=simple_id_with_loc
636
            { let (ci,loc) = ci_loc in (Ast.Del_node (ci), loc) }
Bruno Guillaume's avatar
Bruno Guillaume committed
637

638
        /*   add_node n   */
bguillaum's avatar
bguillaum committed
639 640 641
        | ADD_NODE new_ci_loc=simple_id_with_loc
            { let (new_ci,loc) = new_ci_loc in (Ast.New_node new_ci, loc) }

642
        /*   add_node n :< m   */
bguillaum's avatar
bguillaum committed
643 644 645
        | ADD_NODE new_ci_loc=simple_id_with_loc BEFORE old_ci=simple_id
            { let (new_ci,loc) = new_ci_loc in (Ast.New_before (new_ci,old_ci), loc) }

646
        /*   add_node n :> m   */
bguillaum's avatar
bguillaum committed
647 648 649
        | ADD_NODE new_ci_loc=simple_id_with_loc AFTER old_ci=simple_id
            { let (new_ci,loc) = new_ci_loc in (Ast.New_after (new_ci,old_ci), loc) }

650
        /*   del_feat m.cat   */
bguillaum's avatar
bguillaum committed
651
        | DEL_FEAT com_fead_id_loc= feature_ident_with_loc
652
            { let (com_fead_id,loc) = com_fead_id_loc in (Ast.Del_feat com_fead_id, loc) }
Bruno Guillaume's avatar
Bruno Guillaume committed
653

654
        /*   m.cat = n.x + "_" + nn.y   */
bguillaum's avatar
bguillaum committed
655
        | com_fead_id_loc= feature_ident_with_loc EQUAL items=separated_nonempty_list (PLUS, concat_item)
656
            { let (com_fead_id,loc) = com_fead_id_loc in (Ast.Update_feat (com_fead_id, items), loc) }
657 658

concat_item:
659 660 661 662
        | gi=ID
          {
            match Ast.parse_simple_or_pointed gi with
            | Ast.Simple value -> Ast.String_item value
Bruno Guillaume's avatar
Bruno Guillaume committed
663
            | Ast.Pointed (s1, s2) -> Ast.Qfn_or_lex_item (s1, Ast.to_uname s2)
664
          }
665
        | s=STRING         { Ast.String_item s }
666
        | f=FLOAT          { Ast.String_item (Printf.sprintf "%g" f) }
667

pj2m's avatar
pj2m committed
668

bguillaum's avatar
bguillaum committed
669 670

/*=============================================================================================*/
bguillaum's avatar
bguillaum committed
671
/* ISOLATED PATTERN (grep mode)                                                                */
bguillaum's avatar
bguillaum committed
672
/*=============================================================================================*/
673
pattern:
674
        | p=pos_item n=list(neg_item) EOF { Ast.complete_pattern {Ast.pat_pos=p; pat_negs=n} }
675 676 677 678

/*=============================================================================================*/
/* Constituent tree (à la Sequoia)                                                             */
/*=============================================================================================*/
679 680
phrase_structure_tree:
        | LPAREN t=pst RPAREN  { t }
681