Commit cca55abd authored by POTTIER Francois's avatar POTTIER Francois
parents 59cfddc9 0a0f45f1
...@@ -2,9 +2,6 @@ ...@@ -2,9 +2,6 @@
* Generate default printers for terminal and nonterminal. * Generate default printers for terminal and nonterminal.
* [Printers] could use [print_element_as_symbol] as the default
value of [print_element].
* IncrementalEngine: document [lr1state], [element], [view]. * IncrementalEngine: document [lr1state], [element], [view].
Document the Inspection interface. Document the Inspection interface.
......
DEMOS := calc calc-two calc-param calc-incremental DEMOS := calc calc-two calc-param calc-incremental calc-inspection
.PHONY: all clean .PHONY: all clean
......
...@@ -9,7 +9,7 @@ endif ...@@ -9,7 +9,7 @@ endif
# We assume that menhirLib has been installed in such a # We assume that menhirLib has been installed in such a
# way that ocamlfind knows about it. # way that ocamlfind knows about it.
MENHIRFLAGS := --table --inspection MENHIRFLAGS := --table
# -- infer is automatically added by ocamlbuild. # -- infer is automatically added by ocamlbuild.
OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib
......
(* Introduce a short name for the incremental parser API. *) (* A short name for the incremental parser API. *)
module I = module I =
Parser.MenhirInterpreter Parser.MenhirInterpreter
(* A custom symbol printer. *) (* The loop which drives the parser. At each iteration, we analyze a
result produced by the parser, and act in an appropriate manner. *)
let print_symbol symbol = (* [lexbuf] is the lexing buffer. [result] is the last result produced
let open I in by the parser. *)
match symbol with
| X (T T_TIMES) ->
"*"
| X (T T_RPAREN) ->
")"
| X (T T_PLUS) ->
"+"
| X (T T_MINUS) ->
"-"
| X (T T_LPAREN) ->
"("
| X (T T_INT) ->
"INT"
| X (N N_expr) ->
"expr"
| X (N N_main) ->
"main"
| X (T T_EOL) ->
"EOL"
| X (T T_DIV) ->
"/"
| X (T T_error) ->
"error"
module P = let rec loop lexbuf (result : int I.result) =
Printers.Make(I) (struct
let arrow = " -> "
let dot = "."
let space = " "
let print_symbol = print_symbol
end)
(* A custom element printer. *)
let print_element e : string =
match e with
| I.Element (s, v, _, _) ->
let open I in
match incoming_symbol s with
| T T_TIMES ->
"*"
| T T_RPAREN ->
")"
| T T_PLUS ->
"+"
| T T_MINUS ->
"-"
| T T_LPAREN ->
"("
| T T_INT ->
string_of_int v
| N N_expr ->
string_of_int v
| N N_main ->
string_of_int v
| T T_EOL ->
""
| T T_DIV ->
"/"
| T T_error ->
"error"
(* Debugging. *)
let dump env =
Printf.fprintf stderr "Stack height: %d\n%!" (I.length (I.view env));
Printf.fprintf stderr "Stack view:\n%s\n%!" (P.print_env print_element env);
begin match Lazy.force (I.view env) with
| I.Nil ->
()
| I.Cons (I.Element (current, _, _, _), _) ->
Printf.fprintf stderr "Current state: %d\n%!" (Obj.magic current);
let items = I.items current in
Printf.fprintf stderr "#Items: %d\n%!" (List.length items);
List.iter (fun item ->
Printf.fprintf stderr "%s\n%!" (P.print_item item)
) items
end;
print_newline()
(* Define the loop which drives the parser. At each iteration,
we analyze a result produced by the parser, and act in an
appropriate manner. *)
let rec loop linebuf (result : int I.result) =
match result with match result with
| I.InputNeeded env -> | I.InputNeeded env ->
dump env;
(* The parser needs a token. Request one from the lexer, (* The parser needs a token. Request one from the lexer,
and offer it to the parser, which will produce a new and offer it to the parser, which will produce a new
result. Then, repeat. *) result. Then, repeat. *)
let token = Lexer.token linebuf in let token = Lexer.token lexbuf in
let startp = linebuf.Lexing.lex_start_p let startp = lexbuf.Lexing.lex_start_p
and endp = linebuf.Lexing.lex_curr_p in and endp = lexbuf.Lexing.lex_curr_p in
let result = I.offer result (token, startp, endp) in let result = I.offer result (token, startp, endp) in
loop linebuf result loop lexbuf result
| I.AboutToReduce (env, prod) -> | I.AboutToReduce (env, prod) ->
dump env;
let result = I.resume result in let result = I.resume result in
loop linebuf result loop lexbuf result
| I.HandlingError env -> | I.HandlingError env ->
(* The parser has suspended itself because of a syntax error. Stop. *) (* The parser has suspended itself because of a syntax error. Stop. *)
Printf.fprintf stderr Printf.fprintf stderr
"At offset %d: syntax error.\n%!" "At offset %d: syntax error.\n%!"
(Lexing.lexeme_start linebuf) (Lexing.lexeme_start lexbuf)
| I.Accepted v -> | I.Accepted v ->
(* The parser has succeeded and produced a semantic value. Print it. *) (* The parser has succeeded and produced a semantic value. Print it. *)
Printf.printf "%d\n%!" v Printf.printf "%d\n%!" v
...@@ -123,9 +39,9 @@ let rec loop linebuf (result : int I.result) = ...@@ -123,9 +39,9 @@ let rec loop linebuf (result : int I.result) =
(* Initialize the lexer, and catch any exception raised by the lexer. *) (* Initialize the lexer, and catch any exception raised by the lexer. *)
let process (line : string) = let process (line : string) =
let linebuf = Lexing.from_string line in let lexbuf = Lexing.from_string line in
try try
loop linebuf (Parser.Incremental.main()) loop lexbuf (Parser.Incremental.main())
with with
| Lexer.Error msg -> | Lexer.Error msg ->
Printf.fprintf stderr "%s%!" msg Printf.fprintf stderr "%s%!" msg
......
.PHONY: all clean test
# Find Menhir.
ifndef MENHIR
MENHIR := $(shell ../find-menhir.sh)
endif
# We use the table back-end, and link against menhirLib.
# We assume that menhirLib has been installed in such a
# way that ocamlfind knows about it.
MENHIRFLAGS := --table --inspection -v -la 2
# -- infer is automatically added by ocamlbuild.
OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib
MAIN := calc
all:
$(OCAMLBUILD) $(MAIN).native
clean:
rm -f *~
$(OCAMLBUILD) -clean
test: all
@echo "The following command should print 42:"
echo "(1 + 2 * 10) * 2" | ./$(MAIN).native
module Make
(I : MenhirLib.IncrementalEngine.EVERYTHING)
(User : sig
val print: string -> unit
val print_symbol: I.xsymbol -> unit
val print_element: (I.element -> unit) option
end)
= struct
let arrow = " -> "
let dot = "."
let space = " "
let newline = "\n"
open User
(* Printing a list of symbols. An optional dot is printed at offset
[i] into the list [symbols], if this offset lies between [0] and
the length of the list (included). *)
let rec print_symbols i symbols =
if i = 0 then begin
print dot;
print space;
print_symbols (-1) symbols
end
else begin
match symbols with
| [] ->
()
| symbol :: symbols ->
print_symbol symbol;
print space;
print_symbols (i - 1) symbols
end
(* Printing an element as a symbol. *)
let print_element_as_symbol element =
match element with
| I.Element (s, _, _, _) ->
print_symbol (I.X (I.incoming_symbol s))
(* Some of the functions that follow need an element printer. They use
[print_element] if provided by the user; otherwise they use
[print_element_as_symbol]. *)
let print_element =
match print_element with
| Some print_element ->
print_element
| None ->
print_element_as_symbol
(* Printing a stack as a list of symbols. *)
let print_stack stack =
I.foldr (fun element () ->
print_element element;
print space
) stack ()
(* Printing an item. *)
let print_item (prod, i) =
print_symbol (I.lhs prod);
print arrow;
print_symbols i (I.rhs prod);
print newline
(* Printing a production (without a dot). *)
let print_production prod =
print_item (prod, -1)
(* Printing the current LR(1) state. *)
let print_current_state env =
print "Current LR(1) state: ";
match Lazy.force (I.stack env) with
| I.Nil ->
print "<some initial state>";
print newline
| I.Cons (I.Element (current, _, _, _), _) ->
print (string_of_int (Obj.magic current)); (* TEMPORARY safe conversion needed *)
print newline;
List.iter print_item (I.items current)
end
module Make
(I : MenhirLib.IncrementalEngine.EVERYTHING)
(User : sig
(* [print s] is supposed to send the string [s] to some output channel. *)
val print: string -> unit
(* [print_symbol s] is supposed to print a representation of the symbol [s]. *)
val print_symbol: I.xsymbol -> unit
(* [print_element e] is supposed to print a representation of the element [e].
This function is optional; if it is not provided, [print_element_as_symbol]
(defined below) is used instead. *)
val print_element: (I.element -> unit) option
end)
: sig
(* Printing an element as a symbol. This prints just the symbol
that this element represents; nothing more. *)
val print_element_as_symbol: I.element -> unit
(* Printing a stack as a list of elements. This function needs an element
printer. It uses [print_element] if provided by the user; otherwise
it uses [print_element_as_symbol]. *)
val print_stack: I.stack -> unit
(* Printing an item. (Ending with a newline.) *)
val print_item: I.item -> unit
(* Printing a production. (Ending with a newline.) *)
val print_production: I.production -> unit
(* Printing the current LR(1) state. The current state is first displayed
as a number; then the list of its LR(0) items is printed. (Ending with
a newline.) *)
val print_current_state: I.env -> unit
end
This variant of the calc demo uses Menhir with the --table and --inspection
options. It illustrates how to inspect the intermediate states produced by
the parser.
(* A short name for the incremental parser API. *)
module I =
Parser.MenhirInterpreter
(* A custom symbol printer. *)
let print_symbol symbol =
let open I in
match symbol with
| X (T T_TIMES) ->
"*"
| X (T T_RPAREN) ->
")"
| X (T T_PLUS) ->
"+"
| X (T T_MINUS) ->
"-"
| X (T T_LPAREN) ->
"("
| X (T T_INT) ->
"INT"
| X (N N_expr) ->
"expr"
| X (N N_main) ->
"main"
| X (T T_EOL) ->
"EOL"
| X (T T_DIV) ->
"/"
| X (T T_error) ->
"error"
(* A custom element printer. *)
let print_element e : string =
match e with
| I.Element (s, v, _, _) ->
let open I in
match incoming_symbol s with
| T T_TIMES ->
"*"
| T T_RPAREN ->
")"
| T T_PLUS ->
"+"
| T T_MINUS ->
"-"
| T T_LPAREN ->
"("
| T T_INT ->
string_of_int v
| N N_expr ->
string_of_int v
| N N_main ->
string_of_int v
| T T_EOL ->
""
| T T_DIV ->
"/"
| T T_error ->
"error"
(* TEMPORARY comment *)
module P =
Printers.Make(I) (struct
let print s = Printf.fprintf stderr "%s" s
let print_symbol s = print (print_symbol s)
let print_element = Some (fun s -> print (print_element s))
end)
(* Debugging. *)
let dump env =
P.print_stack (I.stack env);
Printf.fprintf stderr "\n%!";
P.print_current_state env;
Printf.fprintf stderr "\n%!"
(* The loop which drives the parser. At each iteration, we analyze a
result produced by the parser, and act in an appropriate manner. *)
(* [lexbuf] is the lexing buffer. [result] is the last result produced
by the parser. *)
let rec loop lexbuf (result : int I.result) =
match result with
| I.InputNeeded env ->
dump env;
(* The parser needs a token. Request one from the lexer,
and offer it to the parser, which will produce a new
result. Then, repeat. *)
let token = Lexer.token lexbuf in
let startp = lexbuf.Lexing.lex_start_p
and endp = lexbuf.Lexing.lex_curr_p in
let result = I.offer result (token, startp, endp) in
loop lexbuf result
| I.AboutToReduce (env, prod) ->
dump env;
let result = I.resume result in
loop lexbuf result
| I.HandlingError env ->
(* The parser has suspended itself because of a syntax error. Stop. *)
Printf.fprintf stderr
"At offset %d: syntax error.\n%!"
(Lexing.lexeme_start lexbuf)
| I.Accepted v ->
(* The parser has succeeded and produced a semantic value. Print it. *)
Printf.printf "%d\n%!" v
| I.Rejected ->
(* The parser rejects this input. This cannot happen, here, because
we stop as soon as the parser reports [HandlingError]. *)
assert false
(* Initialize the lexer, and catch any exception raised by the lexer. *)
let process (line : string) =
let lexbuf = Lexing.from_string line in
try
loop lexbuf (Parser.Incremental.main())
with
| Lexer.Error msg ->
Printf.fprintf stderr "%s%!" msg
(* The rest of the code is as in the [calc] demo. *)
let process (optional_line : string option) =
match optional_line with
| None ->
()
| Some line ->
process line
let rec repeat channel =
(* Attempt to read one line. *)
let optional_line, continue = Lexer.line channel in
process optional_line;
if continue then
repeat channel
let () =
repeat (Lexing.from_channel stdin)
{
open Parser
exception Error of string
}
(* This rule looks for a single line, terminated with '\n' or eof.
It returns a pair of an optional string (the line that was found)
and a Boolean flag (false if eof was reached). *)
rule line = parse
| ([^'\n']* '\n') as line
(* Normal case: one line, no eof. *)
{ Some line, true }
| eof
(* Normal case: no data, eof. *)
{ None, false }
| ([^'\n']+ as line) eof
(* Special case: some data but missing '\n', then eof.
Consider this as the last line, and add the missing '\n'. *)
{ Some (line ^ "\n"), false }
(* This rule analyzes a single line and turns it into a stream of
tokens. *)
and token = parse
| [' ' '\t']
{ token lexbuf }
| '\n'
{ EOL }
| ['0'-'9']+ as i
{ INT (int_of_string i) }
| '+'
{ PLUS }
| '-'
{ MINUS }
| '*'
{ TIMES }
| '/'
{ DIV }
| '('
{ LPAREN }
| ')'
{ RPAREN }
| _
{ raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) }
%token <int> INT
%token PLUS MINUS TIMES DIV
%token LPAREN RPAREN
%token EOL
%left PLUS MINUS /* lowest precedence */
%left TIMES DIV /* medium precedence */
%nonassoc UMINUS /* highest precedence */
%start <int> main
%%
main:
| e = expr EOL
{ e }
expr:
| i = INT
{ i }
| LPAREN e = expr RPAREN
{ e }
| e1 = expr PLUS e2 = expr
{ e1 + e2 }
| e1 = expr MINUS e2 = expr
{ e1 - e2 }
| e1 = expr TIMES e2 = expr
{ e1 * e2 }
| e1 = expr DIV e2 = expr
{ e1 / e2 }
| MINUS e = expr %prec UMINUS
{ - e }
*.dvi
*.aux *.aux
*.bbl *.bbl
*.blg *.blg
......
...@@ -405,7 +405,10 @@ A declaration of the form: ...@@ -405,7 +405,10 @@ A declaration of the form:
\dparameter \ocamlparam \dparameter \ocamlparam
\end{quote} \end{quote}
causes the entire parser to become parameterized over the \ocaml module causes the entire parser to become parameterized over the \ocaml module
\nt{uid}, that is, to become an \ocaml functor. If a single specification file \nt{uid}, that is, to become an \ocaml functor. The directory
\distrib{demos/calc-param} contains a demo that illustrates the use of this switch.
If a single specification file
contains multiple \dparameter declarations, their order is preserved, so that contains multiple \dparameter declarations, their order is preserved, so that
the module name \nt{uid} introduced by one declaration is effectively in scope the module name \nt{uid} introduced by one declaration is effectively in scope
in the declarations that follow. When two \dparameter declarations originate in the declarations that follow. When two \dparameter declarations originate
...@@ -1822,6 +1825,9 @@ access to the lexer. Instead, when the parser needs the next token, it stops ...@@ -1822,6 +1825,9 @@ access to the lexer. Instead, when the parser needs the next token, it stops
and returns its current state to the user. The user is then responsible for and returns its current state to the user. The user is then responsible for
obtaining this token (typically by invoking the lexer) and resuming the parser obtaining this token (typically by invoking the lexer) and resuming the parser
from that state. from that state.
%
The directory \distrib{demos/calc-incremental} contains a demo that
illustrates the use of the incremental API.
This API is ``incremental'' in the sense that the user has access to a This API is ``incremental'' in the sense that the user has access to a
sequence of the intermediate states of the parser. Assuming that semantic sequence of the intermediate states of the parser. Assuming that semantic
...@@ -1946,9 +1952,12 @@ At this time, because the type \verb+env+ is opaque, the state of the parser ...@@ -1946,9 +1952,12 @@ At this time, because the type \verb+env+ is opaque, the state of the parser
cannot be inspected by the user. We plan to offer an inspection API in the cannot be inspected by the user. We plan to offer an inspection API in the
near future. near future.
% TEMPORARY view % TEMPORARY stack
% TEMPORARY symbol % TEMPORARY symbol
% TEMPORARY document the inspection API
% The directory \distrib{demos/calc-inspection} contains a demo that illustrates the use of the inspection API.
% --------------------------------------------------------------------------------------------------------------------- % ---------------------------------------------------------------------------------------------------------------------
\section{Coq back-end} \section{Coq back-end}
...@@ -2069,8 +2078,7 @@ grammar), then (given sufficient fuel) it is accepted by the parser. ...@@ -2069,8 +2078,7 @@ grammar), then (given sufficient fuel) it is accepted by the parser.
These results imply that the grammar is unambiguous: for every input, there is These results imply that the grammar is unambiguous: for every input, there is
at most one valid interpretation. This is proved by another generated theorem, at most one valid interpretation. This is proved by another generated theorem,
named \verb+unambiguous+. named \verb+Parser.unambiguous+.
% TEMPORARY clarifier où est ce théorème et comment il s'appelle
% jh: Pas besoin de prouver la terminaison pour avoir la non-ambiguïté, car % jh: Pas besoin de prouver la terminaison pour avoir la non-ambiguïté, car
% les cas de non-terminaison ne concernent que les entrées invalides. % les cas de non-terminaison ne concernent que les entrées invalides.
......
...@@ -66,15 +66,6 @@ module type INCREMENTAL_ENGINE = sig ...@@ -66,15 +66,6 @@ module type INCREMENTAL_ENGINE = sig
type 'a lr1state type 'a lr1state
(* An element is a pair of a non-initial state [s] and a semantic value [v]
associated with the incoming symbol of this state. The idea is, the value
[v] was pushed onto the stack just before the state [s] was entered. Thus,
for some type ['a], the type [s] has type ['a lr1state] and the value [v]
has type ['a]. In other words, the type [element] is an existential type. *)