Commit cca55abd authored by POTTIER Francois's avatar POTTIER Francois
parents 59cfddc9 0a0f45f1
......@@ -2,9 +2,6 @@
* Generate default printers for terminal and nonterminal.
* [Printers] could use [print_element_as_symbol] as the default
value of [print_element].
* IncrementalEngine: document [lr1state], [element], [view].
Document the Inspection interface.
......
DEMOS := calc calc-two calc-param calc-incremental
DEMOS := calc calc-two calc-param calc-incremental calc-inspection
.PHONY: all clean
......
......@@ -9,7 +9,7 @@ endif
# We assume that menhirLib has been installed in such a
# way that ocamlfind knows about it.
MENHIRFLAGS := --table --inspection
MENHIRFLAGS := --table
# -- infer is automatically added by ocamlbuild.
OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib
......
(* Introduce a short name for the incremental parser API. *)
(* A short name for the incremental parser API. *)
module I =
Parser.MenhirInterpreter
(* A custom symbol printer. *)
(* The loop which drives the parser. At each iteration, we analyze a
result produced by the parser, and act in an appropriate manner. *)
let print_symbol symbol =
let open I in
match symbol with
| X (T T_TIMES) ->
"*"
| X (T T_RPAREN) ->
")"
| X (T T_PLUS) ->
"+"
| X (T T_MINUS) ->
"-"
| X (T T_LPAREN) ->
"("
| X (T T_INT) ->
"INT"
| X (N N_expr) ->
"expr"
| X (N N_main) ->
"main"
| X (T T_EOL) ->
"EOL"
| X (T T_DIV) ->
"/"
| X (T T_error) ->
"error"
(* [lexbuf] is the lexing buffer. [result] is the last result produced
by the parser. *)
module P =
Printers.Make(I) (struct
let arrow = " -> "
let dot = "."
let space = " "
let print_symbol = print_symbol
end)
(* A custom element printer. *)
let print_element e : string =
match e with
| I.Element (s, v, _, _) ->
let open I in
match incoming_symbol s with
| T T_TIMES ->
"*"
| T T_RPAREN ->
")"
| T T_PLUS ->
"+"
| T T_MINUS ->
"-"
| T T_LPAREN ->
"("
| T T_INT ->
string_of_int v
| N N_expr ->
string_of_int v
| N N_main ->
string_of_int v
| T T_EOL ->
""
| T T_DIV ->
"/"
| T T_error ->
"error"
(* Debugging. *)
let dump env =
Printf.fprintf stderr "Stack height: %d\n%!" (I.length (I.view env));
Printf.fprintf stderr "Stack view:\n%s\n%!" (P.print_env print_element env);
begin match Lazy.force (I.view env) with
| I.Nil ->
()
| I.Cons (I.Element (current, _, _, _), _) ->
Printf.fprintf stderr "Current state: %d\n%!" (Obj.magic current);
let items = I.items current in
Printf.fprintf stderr "#Items: %d\n%!" (List.length items);
List.iter (fun item ->
Printf.fprintf stderr "%s\n%!" (P.print_item item)
) items
end;
print_newline()
(* Define the loop which drives the parser. At each iteration,
we analyze a result produced by the parser, and act in an
appropriate manner. *)
let rec loop linebuf (result : int I.result) =
let rec loop lexbuf (result : int I.result) =
match result with
| I.InputNeeded env ->
dump env;
(* The parser needs a token. Request one from the lexer,
and offer it to the parser, which will produce a new
result. Then, repeat. *)
let token = Lexer.token linebuf in
let startp = linebuf.Lexing.lex_start_p
and endp = linebuf.Lexing.lex_curr_p in
let token = Lexer.token lexbuf in
let startp = lexbuf.Lexing.lex_start_p
and endp = lexbuf.Lexing.lex_curr_p in
let result = I.offer result (token, startp, endp) in
loop linebuf result
loop lexbuf result
| I.AboutToReduce (env, prod) ->
dump env;
let result = I.resume result in
loop linebuf result
loop lexbuf result
| I.HandlingError env ->
(* The parser has suspended itself because of a syntax error. Stop. *)
Printf.fprintf stderr
"At offset %d: syntax error.\n%!"
(Lexing.lexeme_start linebuf)
(Lexing.lexeme_start lexbuf)
| I.Accepted v ->
(* The parser has succeeded and produced a semantic value. Print it. *)
Printf.printf "%d\n%!" v
......@@ -123,9 +39,9 @@ let rec loop linebuf (result : int I.result) =
(* Initialize the lexer, and catch any exception raised by the lexer. *)
let process (line : string) =
let linebuf = Lexing.from_string line in
let lexbuf = Lexing.from_string line in
try
loop linebuf (Parser.Incremental.main())
loop lexbuf (Parser.Incremental.main())
with
| Lexer.Error msg ->
Printf.fprintf stderr "%s%!" msg
......
.PHONY: all clean test
# Find Menhir.
ifndef MENHIR
MENHIR := $(shell ../find-menhir.sh)
endif
# We use the table back-end, and link against menhirLib.
# We assume that menhirLib has been installed in such a
# way that ocamlfind knows about it.
MENHIRFLAGS := --table --inspection -v -la 2
# -- infer is automatically added by ocamlbuild.
OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib
MAIN := calc
all:
$(OCAMLBUILD) $(MAIN).native
clean:
rm -f *~
$(OCAMLBUILD) -clean
test: all
@echo "The following command should print 42:"
echo "(1 + 2 * 10) * 2" | ./$(MAIN).native
module Make
(I : MenhirLib.IncrementalEngine.EVERYTHING)
(User : sig
val print: string -> unit
val print_symbol: I.xsymbol -> unit
val print_element: (I.element -> unit) option
end)
= struct
let arrow = " -> "
let dot = "."
let space = " "
let newline = "\n"
open User
(* Printing a list of symbols. An optional dot is printed at offset
[i] into the list [symbols], if this offset lies between [0] and
the length of the list (included). *)
let rec print_symbols i symbols =
if i = 0 then begin
print dot;
print space;
print_symbols (-1) symbols
end
else begin
match symbols with
| [] ->
()
| symbol :: symbols ->
print_symbol symbol;
print space;
print_symbols (i - 1) symbols
end
(* Printing an element as a symbol. *)
let print_element_as_symbol element =
match element with
| I.Element (s, _, _, _) ->
print_symbol (I.X (I.incoming_symbol s))
(* Some of the functions that follow need an element printer. They use
[print_element] if provided by the user; otherwise they use
[print_element_as_symbol]. *)
let print_element =
match print_element with
| Some print_element ->
print_element
| None ->
print_element_as_symbol
(* Printing a stack as a list of symbols. *)
let print_stack stack =
I.foldr (fun element () ->
print_element element;
print space
) stack ()
(* Printing an item. *)
let print_item (prod, i) =
print_symbol (I.lhs prod);
print arrow;
print_symbols i (I.rhs prod);
print newline
(* Printing a production (without a dot). *)
let print_production prod =
print_item (prod, -1)
(* Printing the current LR(1) state. *)
let print_current_state env =
print "Current LR(1) state: ";
match Lazy.force (I.stack env) with
| I.Nil ->
print "<some initial state>";
print newline
| I.Cons (I.Element (current, _, _, _), _) ->
print (string_of_int (Obj.magic current)); (* TEMPORARY safe conversion needed *)
print newline;
List.iter print_item (I.items current)
end
module Make
(I : MenhirLib.IncrementalEngine.EVERYTHING)
(User : sig
(* [print s] is supposed to send the string [s] to some output channel. *)
val print: string -> unit
(* [print_symbol s] is supposed to print a representation of the symbol [s]. *)
val print_symbol: I.xsymbol -> unit
(* [print_element e] is supposed to print a representation of the element [e].
This function is optional; if it is not provided, [print_element_as_symbol]
(defined below) is used instead. *)
val print_element: (I.element -> unit) option
end)
: sig
(* Printing an element as a symbol. This prints just the symbol
that this element represents; nothing more. *)
val print_element_as_symbol: I.element -> unit
(* Printing a stack as a list of elements. This function needs an element
printer. It uses [print_element] if provided by the user; otherwise
it uses [print_element_as_symbol]. *)
val print_stack: I.stack -> unit
(* Printing an item. (Ending with a newline.) *)
val print_item: I.item -> unit
(* Printing a production. (Ending with a newline.) *)
val print_production: I.production -> unit
(* Printing the current LR(1) state. The current state is first displayed
as a number; then the list of its LR(0) items is printed. (Ending with
a newline.) *)
val print_current_state: I.env -> unit
end
This variant of the calc demo uses Menhir with the --table and --inspection
options. It illustrates how to inspect the intermediate states produced by
the parser.
(* A short name for the incremental parser API. *)
module I =
Parser.MenhirInterpreter
(* A custom symbol printer. *)
let print_symbol symbol =
let open I in
match symbol with
| X (T T_TIMES) ->
"*"
| X (T T_RPAREN) ->
")"
| X (T T_PLUS) ->
"+"
| X (T T_MINUS) ->
"-"
| X (T T_LPAREN) ->
"("
| X (T T_INT) ->
"INT"
| X (N N_expr) ->
"expr"
| X (N N_main) ->
"main"
| X (T T_EOL) ->
"EOL"
| X (T T_DIV) ->
"/"
| X (T T_error) ->
"error"
(* A custom element printer. *)
let print_element e : string =
match e with
| I.Element (s, v, _, _) ->
let open I in
match incoming_symbol s with
| T T_TIMES ->
"*"
| T T_RPAREN ->
")"
| T T_PLUS ->
"+"
| T T_MINUS ->
"-"
| T T_LPAREN ->
"("
| T T_INT ->
string_of_int v
| N N_expr ->
string_of_int v
| N N_main ->
string_of_int v
| T T_EOL ->
""
| T T_DIV ->
"/"
| T T_error ->
"error"
(* TEMPORARY comment *)
module P =
Printers.Make(I) (struct
let print s = Printf.fprintf stderr "%s" s
let print_symbol s = print (print_symbol s)
let print_element = Some (fun s -> print (print_element s))
end)
(* Debugging. *)
let dump env =
P.print_stack (I.stack env);
Printf.fprintf stderr "\n%!";
P.print_current_state env;
Printf.fprintf stderr "\n%!"
(* The loop which drives the parser. At each iteration, we analyze a
result produced by the parser, and act in an appropriate manner. *)
(* [lexbuf] is the lexing buffer. [result] is the last result produced
by the parser. *)
let rec loop lexbuf (result : int I.result) =
match result with
| I.InputNeeded env ->
dump env;
(* The parser needs a token. Request one from the lexer,
and offer it to the parser, which will produce a new
result. Then, repeat. *)
let token = Lexer.token lexbuf in
let startp = lexbuf.Lexing.lex_start_p
and endp = lexbuf.Lexing.lex_curr_p in
let result = I.offer result (token, startp, endp) in
loop lexbuf result
| I.AboutToReduce (env, prod) ->
dump env;
let result = I.resume result in
loop lexbuf result
| I.HandlingError env ->
(* The parser has suspended itself because of a syntax error. Stop. *)
Printf.fprintf stderr
"At offset %d: syntax error.\n%!"
(Lexing.lexeme_start lexbuf)
| I.Accepted v ->
(* The parser has succeeded and produced a semantic value. Print it. *)
Printf.printf "%d\n%!" v
| I.Rejected ->
(* The parser rejects this input. This cannot happen, here, because
we stop as soon as the parser reports [HandlingError]. *)
assert false
(* Initialize the lexer, and catch any exception raised by the lexer. *)
let process (line : string) =
let lexbuf = Lexing.from_string line in
try
loop lexbuf (Parser.Incremental.main())
with
| Lexer.Error msg ->
Printf.fprintf stderr "%s%!" msg
(* The rest of the code is as in the [calc] demo. *)
let process (optional_line : string option) =
match optional_line with
| None ->
()
| Some line ->
process line
let rec repeat channel =
(* Attempt to read one line. *)
let optional_line, continue = Lexer.line channel in
process optional_line;
if continue then
repeat channel
let () =
repeat (Lexing.from_channel stdin)
{
open Parser
exception Error of string
}
(* This rule looks for a single line, terminated with '\n' or eof.
It returns a pair of an optional string (the line that was found)
and a Boolean flag (false if eof was reached). *)
rule line = parse
| ([^'\n']* '\n') as line
(* Normal case: one line, no eof. *)
{ Some line, true }
| eof
(* Normal case: no data, eof. *)
{ None, false }
| ([^'\n']+ as line) eof
(* Special case: some data but missing '\n', then eof.
Consider this as the last line, and add the missing '\n'. *)
{ Some (line ^ "\n"), false }
(* This rule analyzes a single line and turns it into a stream of
tokens. *)
and token = parse
| [' ' '\t']
{ token lexbuf }
| '\n'
{ EOL }
| ['0'-'9']+ as i
{ INT (int_of_string i) }
| '+'
{ PLUS }
| '-'
{ MINUS }
| '*'
{ TIMES }
| '/'
{ DIV }
| '('
{ LPAREN }
| ')'
{ RPAREN }
| _
{ raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) }
%token <int> INT
%token PLUS MINUS TIMES DIV
%token LPAREN RPAREN
%token EOL
%left PLUS MINUS /* lowest precedence */
%left TIMES DIV /* medium precedence */
%nonassoc UMINUS /* highest precedence */
%start <int> main
%%
main:
| e = expr EOL
{ e }
expr:
| i = INT
{ i }
| LPAREN e = expr RPAREN
{ e }
| e1 = expr PLUS e2 = expr
{ e1 + e2 }
| e1 = expr MINUS e2 = expr
{ e1 - e2 }
| e1 = expr TIMES e2 = expr
{ e1 * e2 }
| e1 = expr DIV e2 = expr
{ e1 / e2 }
| MINUS e = expr %prec UMINUS
{ - e }
*.dvi
*.aux
*.bbl
*.blg
......
......@@ -405,7 +405,10 @@ A declaration of the form:
\dparameter \ocamlparam
\end{quote}
causes the entire parser to become parameterized over the \ocaml module
\nt{uid}, that is, to become an \ocaml functor. If a single specification file
\nt{uid}, that is, to become an \ocaml functor. The directory
\distrib{demos/calc-param} contains a demo that illustrates the use of this switch.
If a single specification file
contains multiple \dparameter declarations, their order is preserved, so that
the module name \nt{uid} introduced by one declaration is effectively in scope
in the declarations that follow. When two \dparameter declarations originate
......@@ -1822,6 +1825,9 @@ access to the lexer. Instead, when the parser needs the next token, it stops
and returns its current state to the user. The user is then responsible for
obtaining this token (typically by invoking the lexer) and resuming the parser
from that state.
%
The directory \distrib{demos/calc-incremental} contains a demo that
illustrates the use of the incremental API.
This API is ``incremental'' in the sense that the user has access to a
sequence of the intermediate states of the parser. Assuming that semantic
......@@ -1946,9 +1952,12 @@ At this time, because the type \verb+env+ is opaque, the state of the parser
cannot be inspected by the user. We plan to offer an inspection API in the
near future.
% TEMPORARY view
% TEMPORARY stack
% TEMPORARY symbol
% TEMPORARY document the inspection API
% The directory \distrib{demos/calc-inspection} contains a demo that illustrates the use of the inspection API.
% ---------------------------------------------------------------------------------------------------------------------
\section{Coq back-end}
......@@ -2069,8 +2078,7 @@ grammar), then (given sufficient fuel) it is accepted by the parser.
These results imply that the grammar is unambiguous: for every input, there is
at most one valid interpretation. This is proved by another generated theorem,
named \verb+unambiguous+.
% TEMPORARY clarifier où est ce théorème et comment il s'appelle
named \verb+Parser.unambiguous+.
% jh: Pas besoin de prouver la terminaison pour avoir la non-ambiguïté, car
% les cas de non-terminaison ne concernent que les entrées invalides.
......
......@@ -66,15 +66,6 @@ module type INCREMENTAL_ENGINE = sig
type 'a lr1state
(* An element is a pair of a non-initial state [s] and a semantic value [v]
associated with the incoming symbol of this state. The idea is, the value
[v] was pushed onto the stack just before the state [s] was entered. Thus,
for some type ['a], the type [s] has type ['a lr1state] and the value [v]
has type ['a]. In other words, the type [element] is an existential type. *)
type element =
| Element: 'a lr1state * 'a * Lexing.position * Lexing.position -> element
(* A stream is a list whose elements are produced on demand. *)
type 'a stream =
......@@ -92,9 +83,27 @@ module type INCREMENTAL_ENGINE = sig
val foldr: ('a -> 'b -> 'b) -> 'a stream -> 'b -> 'b
(* We offer a view of the parser's state as a stream of elements. *)
(* An element is a pair of a non-initial state [s] and a semantic value [v]
associated with the incoming symbol of this state. The idea is, the value
[v] was pushed onto the stack just before the state [s] was entered. Thus,
for some type ['a], the type [s] has type ['a lr1state] and the value [v]
has type ['a]. In other words, the type [element] is an existential type. *)
type element =
| Element: 'a lr1state * 'a * Lexing.position * Lexing.position -> element
(* The parser's stack is (or, more precisely, can be viewed as) a stream of
elements. *)
type stack =
element stream
(* The parser's stack, a stream of elements, can be examined. This stream is
empty if the parser is in an initial state; otherwise, it is non-empty.
The LR(1) automaton's current state is the one found in the top element
of the stack. *)
val view: env -> element stream
val stack: env -> stack
end
......
......@@ -531,11 +531,14 @@ module Make (T : TABLE) = struct
type element =
| Element: 'a lr1state * 'a * Lexing.position * Lexing.position -> element
type stack =
element stream
(* If [current] is the current state and [cell] is the top stack cell,
then [view cell current] is a view of the parser's state as a stream
then [stack cell current] is a view of the parser's state as a stream
of elements. *)
let rec view cell current : element stream =
let rec