Commit ee15cc4d authored by POTTIER Francois's avatar POTTIER Francois

Merge branch 'francois' of git+ssh://scm.gforge.inria.fr//gitroot/menhir/menhir into francois

parents 69d71231 3b779b8f
......@@ -81,6 +81,7 @@ LIBFILES := \
src/infiniteArray.{ml,mli} \
src/packedIntArray.{ml,mli} \
src/rowDisplacement.{ml,mli} \
src/IncrementalEngine.ml \
src/engineTypes.ml \
src/engine.{ml,mli} \
src/tableFormat.ml \
......
......@@ -4,6 +4,7 @@ In the following, "the Library" refers to the following files:
src/infiniteArray.{ml,mli}
src/packedIntArray.{ml,mli}
src/rowDisplacement.{ml,mli}
src/IncrementalEngine.ml
src/engineTypes.ml
src/engine.{ml,mli}
src/tableFormat.ml
......
* Place INCREMENTAL_ENGINE in a separate file, so as to reduce confusion
and allow including this file in the documentation
* Add an incremental version of the calc demo
* Test incremental interface in conjunction with %parameter
* Include comments in the generated .mli file (Interface)?
* Document the incremental interface
......@@ -17,8 +10,9 @@
change String to Bytes where appropriate
replace let/unless with match/exception in a few places
* Update the demos to use ocamlbuild; remove Makefile.shared;
remove ocamldep.wrapper? remove OMakefiles
* Is it possible to update calc-two and calc-param to use ocamlbuild?
(they need --external-tokens and --only-tokens)
Then, one could remove Makefile.shared and ocamldep.wrapper.
* engine.ml: initial call to [run env true] should be [run env false]
if the initial state has a default reduction on #
......
DEMOS := calc calc-two calc-param
DEMOS := calc calc-two calc-param calc-incremental
.PHONY: all clean
......
# This OMakefile is designed for projects that use Objective Caml,
# ocamllex, menhir, and alphaCaml.
# This OMakefile is meant to be included within a host OMakefile
# that defines the following variables:
#
# GENERATED : a list of the source (.ml and .mli) files
# that are generated (by invoking ocamllex
# or menhir)
#
# MODULES : a list of the modules (without extension)
# that should be linked into the executable
# program. Order is significant.
#
# EXECUTABLE : the base name of the executables that should
# be produced. Suffixes $(BSUFFIX) and $(OSUFFIX)
# will be added to distinguish the bytecode and
# native code versions.
# -------------------------------------------------------------------------
# The host OMakefile can override the following default settings.
# Menhir.
if $(not $(defined MENHIR))
MENHIR = menhir
export
# Parser generation flags.
if $(not $(defined MENHIR_FLAGS))
MENHIR_FLAGS = --infer -v
export
# Include directives for compilation and for linking.
if $(not $(defined PREFIXED_OCAMLINCLUDES))
PREFIXED_OCAMLINCLUDES =
export
# Bytecode compilation flags.
if $(not $(defined BFLAGS))
BFLAGS = -dtypes -g
export
# Native code compilation flags.
if $(not $(defined OFLAGS))
OFLAGS = -dtypes
export
# Menhir-suggested compilation flags.
if $(not $(defined SUGG_FLAGS))
SUGG_FLAGS = $(shell $(MENHIR) $(MENHIR_FLAGS) --suggest-comp-flags 2>/dev/null)
export
# Bytecode link-time flags.
if $(not $(defined BLNKFLAGS))
BLNKFLAGS = -g
export
# Menhir-suggested bytecode link-time flags.
if $(not $(defined SUGG_BLNKFLAGS))
SUGG_BLNKFLAGS = $(shell $(MENHIR) $(MENHIR_FLAGS) --suggest-link-flags-byte 2>/dev/null)
export
# Native code link-time flags.
if $(not $(defined OLNKFLAGS))
OLNKFLAGS =
export
# Menhir-suggested native code link-time flags.
if $(not $(defined SUGG_OLNKFLAGS))
SUGG_OLNKFLAGS = $(shell $(MENHIR) $(MENHIR_FLAGS) --suggest-link-flags-opt 2>/dev/null)
export
# Lexer generation flags.
if $(not $(defined LGFLAGS))
LGFLAGS =
export
# Suffix appended to the name of the bytecode executable.
if $(not $(defined BSUFFIX))
BSUFFIX = .byte
export
# Suffix appended to the name of the native code executable.
if $(not $(defined OSUFFIX))
OSUFFIX =
export
# Access paths for the tools.
if $(not $(defined OCAML))
OCAML = ocaml
export
if $(not $(defined OCAMLC))
if $(which ocamlfind)
OCAMLC = ocamlfind ocamlc
export
elseif $(which ocamlc.opt)
OCAMLC = ocamlc.opt
export
else
OCAMLC = ocamlc
export
export
if $(not $(defined OCAMLOPT))
if $(which ocamlfind)
OCAMLOPT = ocamlfind ocamlopt
export
elseif $(which ocamlopt.opt)
OCAMLOPT = ocamlopt.opt
export
else
OCAMLOPT = ocamlopt
export
export
if $(not $(defined OCAMLDEP))
if $(which ocamlfind)
OCAMLDEP = ocamlfind ocamldep
export
elseif $(which ocamldep.opt)
OCAMLDEP = ocamldep.opt
export
else
OCAMLDEP = ocamldep
export
export
if $(not $(defined OCAMLDEPWRAPPER))
OCAMLDEPWRAPPER = ./ocamldep.wrapper
export
if $(not $(defined OCAMLLEX))
OCAMLLEX = ocamllex
export
if $(not $(defined ALPHACAML))
ALPHACAML = alphaCaml
export
# ----------------------------------------------------------------
# Define an ocamldep wrapper that creates fake generated files so that
# ocamldep can see that these files exist (or are supposed to exist).
# This is required to work around ocamldep's brokenness.
WrapScanner(command) =
$(OCAML) $(OCAMLDEPWRAPPER) $(GENERATED) - $(command)
# ----------------------------------------------------------------
# Dependencies.
.SCANNER: %.cmi: %.mli
WrapScanner($(OCAMLDEP) $<)
.SCANNER: %.cmx %.cmo %.o: %.ml
WrapScanner($(OCAMLDEP) $<)
# ----------------------------------------------------------------
# Compilation.
%.cmi: %.mli
$(OCAMLC) $(PREFIXED_OCAMLINCLUDES) $(BFLAGS) $(SUGG_FLAGS) -c $<
%.cmo: %.ml
$(OCAMLC) $(PREFIXED_OCAMLINCLUDES) $(BFLAGS) $(SUGG_FLAGS) -c $<
%.cmx %.o: %.ml
$(OCAMLOPT) $(PREFIXED_OCAMLINCLUDES) $(OFLAGS) $(SUGG_FLAGS) -c $<
%.ml: %.mll
$(OCAMLLEX) $(LGFLAGS) $<
%.ml %.mli: %.mla
$(ALPHACAML) $<
# ----------------------------------------------------------------
# Linking.
$(EXECUTABLE)$(OSUFFIX): $(addsuffix .cmx, $(MODULES))
$(OCAMLOPT) -o $@ $(PREFIXED_OCAMLINCLUDES) $(OLNKFLAGS) $(SUGG_FLAGS) $(SUGG_OLNKFLAGS) $+
$(EXECUTABLE)$(BSUFFIX): $(addsuffix .cmo, $(MODULES))
$(OCAMLC) -o $@ $(PREFIXED_OCAMLINCLUDES) $(BLNKFLAGS) $(SUGG_FLAGS) $(SUGG_BLNKFLAGS) $+
# ----------------------------------------------------------------
# Menhir: multiple file projects.
MenhirMulti(target, sources, options) =
TARGETS = $(file $(target).ml $(target).mli)
SOURCES = $(file $(sources))
$(TARGETS): $(SOURCES)
$(MENHIR) --ocamlc "$(OCAMLC) $(PREFIXED_OCAMLINCLUDES) $(BFLAGS)" $(MENHIR_FLAGS) --base $(target) $(options) $(SOURCES)
.SCANNER: $(TARGETS): $(SOURCES)
WrapScanner($(MENHIR) --ocamldep "$(OCAMLDEP)" --depend --base $(target) $(options) $(SOURCES))
# Menhir: single file projects.
MenhirMono(target, options) =
MenhirMulti($(target), $(target).mly, $(options))
# Menhir: automatic single file projects.
# This causes every .mly file to be viewed as a single file project.
MenhirAuto() =
foreach (target, $(glob *.mly))
MenhirMono($(removesuffix $(target)), $(EMPTY))
# ----------------------------------------------------------------
.PHONY: clean
clean:
/bin/rm -f $(EXECUTABLE)$(BSUFFIX) $(EXECUTABLE)$(OSUFFIX) $(GENERATED)
/bin/rm -f *.cmi *.cmx *.cmo *.o *~ .*~ *.automaton *.conflicts *.annot
.PHONY: all clean test
# Find Menhir.
ifndef MENHIR
MENHIR := $(shell ../find-menhir.sh)
endif
# We use the table back-end, and link against menhirLib.
# We assume that menhirLib has been installed in such a
# way that ocamlfind knows about it.
MENHIRFLAGS := --infer --table
OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)" -package menhirLib
MAIN := calc
all:
$(OCAMLBUILD) $(MAIN).native
clean:
rm -f *~
$(OCAMLBUILD) -clean
test: all
@echo "The following command should print 42:"
echo "(1 + 2 * 10) * 2" | ./$(MAIN).native
This variant of the calc demo uses Menhir with the --table option.
It also demonstrates how to use the incremental parser interface.
(* Introduce a short name for the incremental parser API. *)
module I =
Parser.MenhirInterpreter
(* Define the loop which drives the parser. At each iteration,
we analyze a result produced by the parser, and act in an
appropriate manner. *)
let rec loop linebuf (result : int I.result) =
match result with
| I.InputNeeded env ->
(* The parser needs a token. Request one from the lexer,
and offer it to the parser, which will produce a new
result. Then, repeat. *)
let token = Lexer.token linebuf in
let startp = linebuf.Lexing.lex_start_p
and endp = linebuf.Lexing.lex_curr_p in
let result = I.offer env (token, startp, endp) in
loop linebuf result
| I.HandlingError env ->
(* The parser has suspended itself because of a syntax error. Stop. *)
Printf.fprintf stderr
"At offset %d: syntax error.\n%!"
(Lexing.lexeme_start linebuf)
| I.Accepted v ->
(* The parser has succeeded and produced a semantic value. Print it. *)
Printf.printf "%d\n%!" v
| I.Rejected ->
(* The parser rejects this input. This cannot happen, here, because
we stop as soon as the parser reports [HandlingError]. *)
assert false
(* Initialize the lexer, and catch any exception raised by the lexer. *)
let process (line : string) =
let linebuf = Lexing.from_string line in
try
loop linebuf (Parser.main_incremental())
with
| Lexer.Error msg ->
Printf.fprintf stderr "%s%!" msg
(* The rest of the code is as in the [calc] demo. *)
let process (optional_line : string option) =
match optional_line with
| None ->
()
| Some line ->
process line
let rec repeat channel =
(* Attempt to read one line. *)
let optional_line, continue = Lexer.line channel in
process optional_line;
if continue then
repeat channel
let () =
repeat (Lexing.from_channel stdin)
{
open Parser
exception Error of string
}
(* This rule looks for a single line, terminated with '\n' or eof.
It returns a pair of an optional string (the line that was found)
and a Boolean flag (false if eof was reached). *)
rule line = parse
| ([^'\n']* '\n') as line
(* Normal case: one line, no eof. *)
{ Some line, true }
| eof
(* Normal case: no data, eof. *)
{ None, false }
| ([^'\n']+ as line) eof
(* Special case: some data but missing '\n', then eof.
Consider this as the last line, and add the missing '\n'. *)
{ Some (line ^ "\n"), false }
(* This rule analyzes a single line and turns it into a stream of
tokens. *)
and token = parse
| [' ' '\t']
{ token lexbuf }
| '\n'
{ EOL }
| ['0'-'9']+ as i
{ INT (int_of_string i) }
| '+'
{ PLUS }
| '-'
{ MINUS }
| '*'
{ TIMES }
| '/'
{ DIV }
| '('
{ LPAREN }
| ')'
{ RPAREN }
| _
{ raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) }
%token <int> INT
%token PLUS MINUS TIMES DIV
%token LPAREN RPAREN
%token EOL
%left PLUS MINUS /* lowest precedence */
%left TIMES DIV /* medium precedence */
%nonassoc UMINUS /* highest precedence */
%start <int> main
%%
main:
| e = expr EOL
{ e }
expr:
| i = INT
{ i }
| LPAREN e = expr RPAREN
{ e }
| e1 = expr PLUS e2 = expr
{ e1 + e2 }
| e1 = expr MINUS e2 = expr
{ e1 - e2 }
| e1 = expr TIMES e2 = expr
{ e1 * e2 }
| e1 = expr DIV e2 = expr
{ e1 / e2 }
| MINUS e = expr %prec UMINUS
{ - e }
# Find Menhir.
ifndef MENHIR
MENHIR := $(shell ../find-menhir.sh)
endif
# Add --table on the next line to use Menhir's table-based back-end.
PGFLAGS := --infer
GENERATED := parser.ml parser.mli lexer.ml tokens.ml tokens.mli
......
GENERATED = parser.ml parser.mli lexer.ml
MODULES = parser lexer calc
EXECUTABLE = calc
OCAMLDEPWRAPPER = ../ocamldep.wrapper
include ../OMakefile.shared
MenhirAuto()
.DEFAULT: $(EXECUTABLE)$(OSUFFIX)
#
# Load the standard configuration.
#
open build/Common
#
# The command-line variables are defined *after* the
# standard configuration has been loaded.
#
DefineCommandVars()
#
# Include the OMakefile in this directory.
#
.SUBDIRS: .
# Find Menhir.
ifndef MENHIR
MENHIR := $(shell ../find-menhir.sh)
endif
# Add --table on the next line to use Menhir's table-based back-end.
PGFLAGS := --infer
GENERATED := tokens.ml tokens.mli algebraic.ml algebraic.mli reverse.ml reverse.mli lexer.ml
......
GENERATED = tokens.ml tokens.mli algebraic.ml algebraic.mli reverse.ml reverse.mli lexer.ml
MODULES = algebraic reverse lexer calc
EXECUTABLE = calc
OCAMLDEPWRAPPER = ../ocamldep.wrapper
include ../OMakefile.shared
MenhirMono(tokens,--only-tokens)
MenhirMulti(algebraic,tokens.mly algebraic.mly,--external-tokens Tokens)
MenhirMulti(reverse,tokens.mly reverse.mly,--external-tokens Tokens)
.DEFAULT: $(EXECUTABLE)$(OSUFFIX)
#
# Load the standard configuration.
#
open build/Common
#
# The command-line variables are defined *after* the
# standard configuration has been loaded.
#
DefineCommandVars()
#
# Include the OMakefile in this directory.
#
.SUBDIRS: .
calc
lexer.ml
parser.ml
parser.mli
*.cmi
*.cmo
*.cmx
*.o
*.d
calc.native
_build
# Add --table on the next line to use Menhir's table-based back-end.
PGFLAGS := --infer
GENERATED := parser.ml parser.mli lexer.ml
MODULES := parser lexer calc
EXECUTABLE := calc
OCAMLDEPWRAPPER := ../ocamldep.wrapper
include ../Makefile.shared
include ../Makefile.auto
.PHONY: all clean test
# Find Menhir.
ifndef MENHIR
MENHIR := $(shell ../find-menhir.sh)
endif
MENHIRFLAGS := --infer
OCAMLBUILD := ocamlbuild -use-ocamlfind -use-menhir -menhir "$(MENHIR) $(MENHIRFLAGS)"
MAIN := calc
all:
$(OCAMLBUILD) $(MAIN).native
clean:
rm -f *~
$(OCAMLBUILD) -clean
test: all
@echo "The following command should print 42:"
echo "(1 + 2 * 10) * 2" | ./$(MAIN).native
GENERATED = parser.ml parser.mli lexer.ml
MODULES = parser lexer calc
EXECUTABLE = calc
OCAMLDEPWRAPPER = ../ocamldep.wrapper
include ../OMakefile.shared
MenhirAuto()
.DEFAULT: $(EXECUTABLE)$(OSUFFIX)
#
# Load the standard configuration.
#
open build/Common
#
# The command-line variables are defined *after* the
# standard configuration has been loaded.
#
DefineCommandVars()
#
# Include the OMakefile in this directory.
#
.SUBDIRS: .
#!/bin/sh
# This script tries to find the Menhir executable.
# This is useful because we would like the demos
# to work regardless of whether Menhir has been
# installed already.
# A normal user does not need this script. One can
# assume that Menhir has been installed.
# First attempt: find Menhir in the PATH.
if which menhir >/dev/null ; then
echo menhir
exit 0
fi
# Second attempt: find Menhir in the src directory
# of the Menhir distribution.
# This loop assumes that we are somewhere within
# the Menhir distribution, so by going up, we will
# end up at the root of the distribution.
while ! [ -d src ] ; do
cd ..
done
LOCAL=src/_stage1/menhir.native
if ls $LOCAL >/dev/null 2>/dev/null ; then
echo `pwd`/$LOCAL
exit 0
fi
echo Error: could not find Menhir.
exit 1
......@@ -1903,9 +1903,10 @@ factoring out an error matrix, as suggested by Dencker, D
Heuft~\cite{dencker-84}.
The type-theoretic tricks that triggered our interest in LR
parsers~\cite{pottier-regis-gianas-typed-lr} are not implemented in \menhir,
because the \ocaml compiler does not currently offer generalized algebraic
data types (GADTs).
parsers~\cite{pottier-regis-gianas-typed-lr} are not implemented in \menhir.
In the beginning, we did not implement them because the \ocaml compiler did
not at the time offer generalized algebraic data types (GADTs). Today, \ocaml
has GADTs, but, as the saying goes, ``if it ain't broken, don't fix it''.
% ---------------------------------------------------------------------------------------------------------------------
......
......@@ -60,7 +60,7 @@ let () =
Random.init seed
let () =
let tks : token stream = produce 5000000 in
let tks : token stream = produce 10000000 in
let tks = fresh (map wrap tks) in
if !dry_run then begin
let _ = find (fun _ -> false) tks in
......
(* This signature describes the incremental LR engine. *)
(* In this mode, the user controls the lexer, and the parser suspends
itself when it needs to read a new token. *)
module type INCREMENTAL_ENGINE = sig
type token
(* The type ['a result] represents an intermediate or final result of the
parser. An intermediate result can be thought of as a suspension: it
records the parser's current state, and allows parsing to be resumed.
The parameter ['a] is the type of the final semantic value that will
be produced if the parser succeeds. *)
(* [InputNeeded] is an intermediate result, which means that the parser
wishes to read one token before continuing. [HandlingError] is also
an intermediate result, which means that the parser has detected and
is trying to handle an error. It does not need more input at this
point; it is suspending itself only in order to give the user an
opportunity to handle this error in a different manner, if desired.
[Accepted] and [Rejected] are final results. *)
(* The type [('a, 'pc) env] is shared by [InputNeeded] and [HandlingError].
As above, the parameter ['a] is the type of the final semantic value.
The phantom type parameter ['pc] is instantiated with [input_needed]
or [handling_error], as appropriate. This prevents the user from
calling [offer] when she should call [handle], or vice-versa. *)
type input_needed
type handling_error
type ('a, 'pc) env
type 'a result =
| InputNeeded of ('a, input_needed) env
| HandlingError of ('a, handling_error) env
| Accepted of 'a
| Rejected
(* [offer] allows the user to resume the parser after it has suspended
itself with a result of the form [InputNeeded env] result. [offer]
expects [env] as well as a new token and produces a new result. It
does not raise any exception. *)