Commit e9609532 authored by POTTIER Francois's avatar POTTIER Francois

Hacked SentenceLexer and SentenceParser. Unsatisfactory for now.

parent 0ddb894b
......@@ -6,6 +6,7 @@
graph unreachable states, too
Add --compile-errors and check that every error is covered at
least once and at most once.
clean up sentenceLexer/sentenceParser, do proper segmentation
* Dans les avantages de Menhir versus ocamlyacc (dans la doc et
sur la page Web), ajouter le back-end Coq, l'API incrémentale
......
......@@ -9,10 +9,10 @@
(* Updates the line counter, which is used in some error messages. *)
let update_loc lexbuf =
let update_loc lexbuf n =
let pos = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <- { pos with
pos_lnum = pos.pos_lnum + 1;
pos_lnum = pos.pos_lnum + n;
pos_bol = pos.pos_cnum;
}
......@@ -33,6 +33,10 @@ let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] (* '\'' forbidden *)
let comment = '#' [^'\010''\013']* newline
let skip = newline whitespace* newline
rule lex = parse
| (lowercase identchar *) as lid
{ try
......@@ -44,20 +48,57 @@ rule lex = parse
with Not_found ->
error1 lexbuf (Printf.sprintf "\"%s\" is not a known non-terminal symbol." lid)
}
(* An identifier that begins with an uppercase letter is considered a
terminal symbol. *)
| (uppercase identchar *) as uid
{ try
TERMINAL (Terminal.lookup uid)
with Not_found ->
error1 lexbuf (Printf.sprintf "\"%s\" is not a known terminal symbol." uid)
}
(* Whitespace is ignored. *)
| whitespace
{ lex lexbuf }
{ lexbuf.lex_start_p <- lexbuf.lex_curr_p; lex lexbuf }
(* The end of a line is translated to [EOL]. *)
| newline
{ update_loc lexbuf; EOL }
{ update_loc lexbuf 1; EOL }
(* A comment is ignored. *)
| comment
{ update_loc lexbuf 1; lexbuf.lex_start_p <- lexbuf.lex_curr_p; lex lexbuf }
(* The end of file is translated to [EOF]. *)
| eof
{ EOF }
(* A colon. *)
| ':'
{ COLON }
| _
{ error1 lexbuf "unexpected character(s)." }
{ error1 lexbuf "unexpected character." }
(* Note that [block] cannot fail. *)
and block buffer = parse
(* A blank line (or the end of file) signals the end of this block of text. *)
| (newline as n) whitespace* newline
{ update_loc lexbuf 2; Buffer.add_string buffer n; Buffer.contents buffer }
| eof
{ Buffer.contents buffer }
(* Anything else is considered part of the text.
(There is no syntax for comments here.) *)
| newline as n
{ update_loc lexbuf 1; Buffer.add_string buffer n; block buffer lexbuf }
| _ as c
{ Buffer.add_char buffer c; block buffer lexbuf }
(* Note that [skip] cannot fail. *)
and skip = parse
(* Whitespace, newlines, comments are ignored. *)
| whitespace
{ lexbuf.lex_start_p <- lexbuf.lex_curr_p; skip lexbuf }
| newline
| comment
{ update_loc lexbuf 1; lexbuf.lex_start_p <- lexbuf.lex_curr_p; skip lexbuf }
(* Anything else causes us to stop. *)
| _
| eof
{ () }
/* This parser is used to read the sentences provided on the standard input
channel when [--interpret] is enabled. */
/* A sentence is a pair of an optional non-terminal start symbol and a list
of terminal symbols. */
/* It is used also to read a [.messages] file. This is two parsers in one. */
/* ------------------------------------------------------------------------ */
/* Tokens. */
%token COLON EOF EOL
%token<Grammar.Terminal.t> TERMINAL
%token<Grammar.Nonterminal.t> NONTERMINAL
/* A block of text, preceded with '=' and terminated with a blank line. */
%token<string> BLOCK
/* ------------------------------------------------------------------------ */
/* Types. */
%{
open Grammar
type terminals = Terminal.t list
type sentence = Nonterminal.t option * terminals
type located_sentence = Positions.positions * sentence
type message = string
type entry = located_sentence list * message
type file = entry list
%}
%type <(Grammar.Nonterminal.t option * Grammar.Terminal.t list) option> sentence
%start sentence
%type <terminals> terminals
%type <sentence> sentence
%type <located_sentence> located_sentence
%type <entry> entry
%type <file> entries
/* %start <sentence option> optional_sentence */
%type <(Grammar.Nonterminal.t option * Grammar.Terminal.t list) option> optional_sentence
%start optional_sentence
/* %start <file> file */
%type <((Positions.positions * (Grammar.Nonterminal.t option * Grammar.Terminal.t list)) list * string) list> file
%start file
/* %start<located_sentence list> entry1 */
%type <(Positions.positions * (Grammar.Nonterminal.t option * Grammar.Terminal.t list)) list> entry1
%start entry1
%%
sentence:
/* ------------------------------------------------------------------------ */
/* A file is a list of entries, terminated with an end-of-file. */
file: entries EOF { $1 }
/* A list of entries. */
entries: { [] } | entry entries { $1 :: $2 }
/* An entry is a non-empty list of located sentences, followed with a block of text. */
entry: entry1 BLOCK
{ $1, $2 }
entry1: located_sentence located_sentences
{ $1 :: $2 }
| EOF
{ [] } /* a bit of a hack */
/* A list of located sentences. */
located_sentences: { [] } | located_sentence located_sentences { $1 :: $2 }
/* A located sentence. (Must be non-empty, because we use blank lines as delimiters.) */
located_sentence: nonempty_sentence
{ let pos = Positions.two (Parsing.symbol_start_pos()) (Parsing.symbol_end_pos()) in
pos, $1 }
/* An optional sentence. */
optional_sentence:
| EOF
{ None }
| sentence
{ Some $1 }
/* A sentence is a pair of an optional non-terminal start symbol and a list
of terminal symbols. It is terminated by a newline. */
sentence:
| NONTERMINAL COLON terminals EOL
{ Some (Some $1, $3) }
{ Some $1, $3 }
| terminals EOL
{ Some (None, $1) }
{ None, $1 }
/* A sentence is a pair of an optional non-terminal start symbol and a list
of terminal symbols. It is terminated by a newline. */
nonempty_sentence:
| NONTERMINAL COLON TERMINAL terminals EOL
{ Some $1, $3 :: $4 }
| TERMINAL terminals EOL
{ None, $1 :: $2 }
/* A list of terminal symbols. */
terminals:
|
{ [] }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment