Commit 18b91208 authored by POTTIER Francois's avatar POTTIER Francois
parents 727969d5 bb5a11f1
......@@ -79,11 +79,14 @@ DISTRIBUTED_FILES := AUTHORS CHANGES INSTALLATION LICENSE Makefile demos
LIBFILES := \
src/standard.mly \
src/infiniteArray.{ml,mli} \
src/LinearizedArray.{ml,mli} \
src/packedIntArray.{ml,mli} \
src/rowDisplacement.{ml,mli} \
src/IncrementalEngine.ml \
src/engineTypes.ml \
src/engine.{ml,mli} \
src/inspectionTableFormat.ml \
src/inspectionTableInterpreter.{ml,mli} \
src/tableFormat.ml \
src/tableInterpreter.{ml,mli} \
src/convert.{ml,mli}
......
......@@ -2,11 +2,14 @@ In the following, "the Library" refers to the following files:
src/standard.mly
src/infiniteArray.{ml,mli}
src/LinearizedArray.{ml,mli}
src/packedIntArray.{ml,mli}
src/rowDisplacement.{ml,mli}
src/IncrementalEngine.ml
src/engineTypes.ml
src/engine.{ml,mli}
src/inspectionTableFormat.ml
src/inspectionTableInterpreter.{ml,mli}
src/tableFormat.ml
src/tableInterpreter.{ml,mli}
src/convert.{ml,mli}
......
......@@ -22,8 +22,10 @@
* Suite des patchs de Frédéric Bour.
API d'inspection complète.
Expose nullable/first?
Librairie(s) pour la gestion des erreurs.
Librairie pour la complétion du parsing.
Need a way of feeding the parser a nonterminal symbol.
Peut-on faire un noyau de Merlin indépendant de la grammaire?
* Meilleurs messages d'erreur de syntaxe.
......@@ -80,11 +82,6 @@
utiliser un type abstrait d'intervalles, avec un traitement particulier
de l'intervalle vide. (Voir mon message du 15/09/2011.)
* BUG: le prologue fourni par le programmeur peut cacher notre exception
Error; il suffirait de bouger la ligne "let _eRR = Error" avant le prologue
modifier les deux back-ends si besoin
ajouter un nouveau champ nonrecvaldefs_to_be_printed_before_prologue...
* BUG: message de Valentin Gatien-Baron du 09/01/2010: le bug de --explain
est-il bien le bug connu? peut-on le corriger? ne suffirait-il pas de
passer sous silence les conflits qui ont lieu dans une partie inaccessible
......@@ -101,9 +98,6 @@
syntaxe (magic e : _ -> _). Difficile, en fait, car on applique
souvent magic à toute la pile.
* BUG (Jonathan Kimmit): reproduire le prologue après la définition du
type token -- c'est fait non?
* Implémenter un test de détection de boucles. Si la grammaire contient une
boucle, elle est infiniment ambiguë. Pour le test, calculer tous les
nullables, ajouter une production unité A -> B dès qu'il existe une
......@@ -195,9 +189,6 @@
all conflicts are covered. Allow the user to submit a sentence and
run the parser over it.
* dans sample-grammars, il y a des grammaires qui produisent des
warnings, mais le Makefile ne vérifie pas que ce sont les bons
* changer la licence et y inclure la documentation (distribuée avec
son source)
......@@ -245,27 +236,3 @@
dès que la librairie standard est utilisée (et pas seulement lorsque
%inline est utilisé).
* Le mot-clef $syntaxerror ne fonctionne pas de façon satisfaisante:
lorsque l'action sémantique est évaluée, on a déjà lu le lookahead
token, et l'appel à $syntaxerror oblitère celui-ci en le remplaçant
par error. On voudrait plutôt insérer le token error devant le
lookahead token? Autre problème: en nous ramenant à un état
antérieur de l'automate, $syntaxerror peut très bien provoquer
une non-terminaison (certes on consomme un token, mais le flot
de tokens est infini si le lexer reconnaît eof).
* Would it make sense to allow %start or %type declarations to
concern parameterized nonterminals (or applications thereof)?
* The code that checks well-formedness should emit as many
error messages as possible and die only at the end.
* Autoriser %functorparam et %functionparam (cf. requête de Skaller)
car ces derniers sont parfois plus flexibles. Ou bien garder seulement
%parameter et déclarer le foncteur Make comme récursif, pour pouvoir
l'appeler depuis une action sémantique.
* Les actions semantiques ont-elles officiellement le droit de s'appeler
(ou d'appeler le foncteur Make) recursivement? Si oui, est-ce que ca
fonctionne avec --infer?
module Make
(I : MenhirLib.IncrementalEngine.INSPECTION)
(User : sig
val arrow: string (* should include space on both sides *)
val dot: string
val space: string
val print_symbol: I.xsymbol -> string
end)
= struct
open User
let out =
Buffer.add_string
(* Printing a list of symbols. An optional dot is printed at offset
[i] into the list [symbols], if this offset lies between [0] and
the length of the list (included). *)
let rec buffer_symbols b i symbols =
if i = 0 then begin
out b dot;
out b space;
buffer_symbols b (-1) symbols
end
else begin
match symbols with
| [] ->
()
| symbol :: symbols ->
out b (print_symbol symbol);
out b space;
buffer_symbols b (i - 1) symbols
end
(* Printing an item. *)
let buffer_item b (prod, i) =
out b (print_symbol (I.lhs prod));
out b arrow;
buffer_symbols b i (I.rhs prod)
(* Printing a production (without a dot). *)
let buffer_production b prod =
buffer_item b (prod, -1)
let with_buffer f x =
let b = Buffer.create 128 in
f b x;
Buffer.contents b
let print_item =
with_buffer buffer_item
let print_production =
with_buffer buffer_production
end
module Make
(I : MenhirLib.IncrementalEngine.INSPECTION)
(User : sig
val arrow: string (* should include space on both sides *)
val dot: string
val space: string
val print_symbol: I.xsymbol -> string
end)
: sig
(* Printing an item. *)
val buffer_item: Buffer.t -> I.item -> unit
val print_item: I.item -> string
(* Printing a production. *)
val buffer_production: Buffer.t -> I.production -> unit
val print_production: I.production -> string
end
......@@ -3,6 +3,15 @@
module I =
Parser.MenhirInterpreter
(* TEMPORARY *)
module Essai = (I : sig
include MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE
include MenhirLib.IncrementalEngine.INSPECTION
with type 'a lr1state := 'a lr1state
with type production := production
end)
(* The length of a stream. *)
let rec length xs =
......@@ -27,13 +36,54 @@ let rec foldr f xs accu =
let height env =
length (I.view env)
(* Printing a symbol. *)
let print_symbol symbol =
let open I in
match symbol with
| X (T T_TIMES) ->
"*"
| X (T T_RPAREN) ->
")"
| X (T T_PLUS) ->
"+"
| X (T T_MINUS) ->
"-"
| X (T T_LPAREN) ->
"("
| X (T T_INT) ->
"INT"
| X (N N_expr) ->
"expr"
| X (N N_main) ->
"main"
| X (T T_EOL) ->
"EOL"
| X (T T_DIV) ->
"/"
| X (T T_error) ->
"error"
module P =
Printers.Make(I) (struct
let arrow = " -> "
let dot = "."
let space = " "
let print_symbol = print_symbol
end)
(* Printing an element. *)
let print_element e =
match e with
| I.Element (s, v, _, _) ->
print_symbol (I.X (I.incoming_symbol s))
let print_element e : string =
match e with
| I.Element (s, v, _, _) ->
let open Parser.Inspection in
match symbol s with
let open I in
match incoming_symbol s with
| T T_TIMES ->
"*"
| T T_RPAREN ->
......@@ -54,6 +104,8 @@ let print_element e : string =
""
| T T_DIV ->
"/"
| T T_error ->
"error"
(* Printing a stack. *)
......@@ -65,6 +117,24 @@ let print env : string =
) (I.view env) ();
Buffer.contents b
(* Debugging. *)
let dump env =
Printf.fprintf stderr "Stack height: %d\n%!" (height env);
Printf.fprintf stderr "Stack view:\n%s\n%!" (print env);
begin match Lazy.force (I.view env) with
| I.Nil ->
()
| I.Cons (I.Element (current, _, _, _), _) ->
Printf.fprintf stderr "Current state: %d\n%!" (Obj.magic current);
let items = I.items current in
Printf.fprintf stderr "#Items: %d\n%!" (List.length items);
List.iter (fun item ->
Printf.fprintf stderr "%s\n%!" (P.print_item item)
) items
end;
print_newline()
(* Define the loop which drives the parser. At each iteration,
we analyze a result produced by the parser, and act in an
appropriate manner. *)
......@@ -72,25 +142,7 @@ let print env : string =
let rec loop linebuf (result : int I.result) =
match result with
| I.InputNeeded env ->
(* TEMPORARY *)
if true then begin
Printf.fprintf stderr "Stack height: %d\n%!" (height env);
Printf.fprintf stderr "Stack view:\n%s\n%!" (print env);
begin match Lazy.force (I.view env) with
| I.Nil ->
()
| I.Cons (I.Element (current, _, _, _), _) ->
Printf.fprintf stderr "Current state: %d\n%!" (Obj.magic current);
let items : (I.production * int) list = Parser.Inspection.items current in
Printf.fprintf stderr "#Items: %d\n%!" (List.length items);
List.iter (fun (prod, index) ->
let _lhs : Parser.Inspection.xsymbol = Parser.Inspection.lhs prod in
let _rhs : Parser.Inspection.xsymbol list = Parser.Inspection.rhs prod in
(* TEMPORARY print item *)
()
) items
end
end;
dump env;
(* The parser needs a token. Request one from the lexer,
and offer it to the parser, which will produce a new
result. Then, repeat. *)
......@@ -99,7 +151,8 @@ let rec loop linebuf (result : int I.result) =
and endp = linebuf.Lexing.lex_curr_p in
let result = I.offer result (token, startp, endp) in
loop linebuf result
| I.AboutToReduce _ ->
| I.AboutToReduce (env, prod) ->
dump env;
let result = I.resume result in
loop linebuf result
| I.HandlingError env ->
......
......@@ -84,31 +84,91 @@ module type INCREMENTAL_ENGINE = sig
| Nil
| Cons of 'a * 'a stream
(* We offer a read-only view of the parser's state as a stream of elements. *)
(* We offer a view of the parser's state as a stream of elements. *)
val view: env -> element stream
end
(* TEMPORARY comment/document *)
(* This signature is a fragment of the inspection API that is made available
to the user when [--inspection] is used. This fragment contains type
definitions for symbols. *)
module type SYMBOLS = sig
(* The type ['a terminal] represents a terminal symbol. The type ['a
nonterminal] represents a nonterminal symbol. In both cases, the index
['a] represents the type of the semantic values associated with this
symbol. The concrete definitions of these types are generated. *)
type 'a terminal
type 'a nonterminal
(* The type ['a symbol] represents a terminal or nonterminal symbol. It is
the disjoint union of the types ['a terminal] and ['a nonterminal]. *)
type 'a symbol =
| T : 'a terminal -> 'a symbol
| N : 'a nonterminal -> 'a symbol
(* The type [xsymbol] is an existentially quantified version of the type
['a symbol]. This type is useful in situations where the index ['a]
is not statically known. *)
type xsymbol =
| X : 'a symbol -> xsymbol
end
(* This signature describes the inspection API that is made available to the
user when [--inspection] is used. *)
module type INSPECTION = sig
(* The types of symbols are described above. *)
include SYMBOLS
(* The type ['a lr1state] is meant to be the same as in [INCREMENTAL_ENGINE]. *)
type 'a lr1state
(* The type [production] is meant to be the same as in [INCREMENTAL_ENGINE].
It represents a production of the grammar. A production can be examined
via the functions [lhs] and [rhs] below. *)
type production
type 'a symbol
(* An LR(0) item is a pair of a production [prod] and a valid index [i] into
this production. That is, if the length of [rhs prod] is [n], then [i] is
comprised between 0 and [n], inclusive. *)
type xsymbol
type item =
production * int
val symbol: 'a lr1state -> 'a symbol
(* [incoming_symbol s] is the incoming symbol of the state [s], that is,
the symbol that the parser must recognize before (has recognized when)
it enters the state [s]. This function gives access to the semantic
value [v] stored in a stack element [Element (s, v, _, _)]. Indeed,
by case analysis on the symbol [incoming_symbol s], one discovers the
type ['a] of the value [v]. *)
val incoming_symbol: 'a lr1state -> 'a symbol
(* [lhs prod] is the left-hand side of the production [prod]. This is
always a non-terminal symbol. *)
val lhs: production -> xsymbol
(* [rhs prod] is the right-hand side of the production [prod]. This is
a (possibly empty) sequence of (terminal or nonterminal) symbols. *)
val rhs: production -> xsymbol list
val items: 'a lr1state -> (production * int) list
(* [items s] is the set of the LR(0) items in the LR(0) core of the LR(1)
state [s]. This set is presented as a list, in an arbitrary order. *)
val items: 'a lr1state -> item list
end
(* The [entry] array contains offsets into the [data] array. It has [n+1]
elements if the original (unencoded) array has [n] elements. The value
of [entry.(n)] is the length of the [data] array. This convention is
natural and allows avoiding a special case. *)
type 'a t =
(* data: *) 'a array *
(* entry: *) int array
let make (a : 'a array array) : 'a t =
let n = Array.length a in
(* Build the entry array. *)
let size = ref 0 in
let entry = Array.init (n + 1) (fun i ->
let s = !size in
if i < n then
size := s + Array.length a.(i);
s
) in
assert (entry.(n) = !size);
(* Build the data array. *)
let i = ref 0
and j = ref 0 in
let data = Array.init !size (fun _ ->
while !j = Array.length a.(!i) do
i := !i + 1;
j := 0;
done;
let x = a.(!i).(!j) in
j := !j + 1;
x
) in
data, entry
let length ((_, entry) : 'a t) : int =
Array.length entry
let row_length ((_, entry) : 'a t) i : int =
entry.(i + 1) - entry.(i)
let row_length_via get_entry i =
get_entry (i + 1) - get_entry i
let read ((data, entry) as la : 'a t) i j : 'a =
assert (0 <= j && j < row_length la i);
data.(entry.(i) + j)
let read_via get_data get_entry i j =
assert (0 <= j && j < row_length_via get_entry i);
get_data (get_entry i + j)
let write ((data, entry) as la : 'a t) i j (v : 'a) : unit =
assert (0 <= j && j < row_length la i);
data.(entry.(i) + j) <- v
let rec read_interval_via get_data i j =
if i = j then
[]
else
get_data i :: read_interval_via get_data (i + 1) j
let read_row_via get_data get_entry i =
read_interval_via get_data (get_entry i) (get_entry (i + 1))
let read_row ((data, entry) : 'a t) i : 'a list =
read_row_via (Array.get data) (Array.get entry) i
(* An array of arrays (of possibly different lengths!) can be ``linearized'',
i.e., encoded as a data array (by concatenating all of the little arrays)
and an entry array (which contains offsets into the data array). *)
type 'a t =
(* data: *) 'a array *
(* entry: *) int array
(* [make a] turns the array of arrays [a] into a linearized array. *)
val make: 'a array array -> 'a t
(* [read la i j] reads the linearized array [la] at indices [i] and [j].
Thus, [read (make a) i j] is equivalent to [a.(i).(j)]. *)
val read: 'a t -> int -> int -> 'a
(* [write la i j v] writes the value [v] into the linearized array [la]
at indices [i] and [j]. *)
val write: 'a t -> int -> int -> 'a -> unit
(* [length la] is the number of rows of the array [la]. Thus, [length (make
a)] is equivalent to [Array.length a]. *)
val length: 'a t -> int
(* [row_length la i] is the length of the row at index [i] in the linearized
array [la]. Thus, [row_length (make a) i] is equivalent to [Array.length
a.(i)]. *)
val row_length: 'a t -> int -> int
(* [read_row la i] reads the row at index [i], producing a list. Thus,
[read_row (make a) i] is equivalent to [Array.to_list a.(i)]. *)
val read_row: 'a t -> int -> 'a list
(* The following variants read the linearized array via accessors
[get_data : int -> 'a] and [get_entry : int -> int]. *)
val row_length_via:
(* get_entry: *) (int -> int) ->
(* i: *) int ->
int
val read_via:
(* get_data: *) (int -> 'a) ->
(* get_entry: *) (int -> int) ->
(* i: *) int ->
(* j: *) int ->
'a
val read_row_via:
(* get_data: *) (int -> 'a) ->
(* get_entry: *) (int -> int) ->
(* i: *) int ->
'a list
......@@ -20,11 +20,14 @@
# and GNUmakefile in the toplevel directory, and update the file
# menhirLib.mlpack in this directory.
<infiniteArray.cmx>: for-pack(MenhirLib)
<LinearizedArray.cmx>: for-pack(MenhirLib)
<packedIntArray.cmx>: for-pack(MenhirLib)
<rowDisplacement.cmx>: for-pack(MenhirLib)
<IncrementalEngine.cmx>: for-pack(MenhirLib)
<engineTypes.cmx>: for-pack(MenhirLib)
<engine.cmx>: for-pack(MenhirLib)
<inspectionTableFormat.cmx>: for-pack(MenhirLib)
<inspectionTableInterpreter.cmx>: for-pack(MenhirLib)
<tableFormat.cmx>: for-pack(MenhirLib)
<tableInterpreter.cmx>: for-pack(MenhirLib)
<convert.cmx>: for-pack(MenhirLib)
......@@ -1623,6 +1623,8 @@ let program =
SIExcDefs [ excdef ] ::
SIValDefs (false, [ excvaldef ]) ::
interface_to_structure (
tokentypedef grammar
) @
......@@ -1631,8 +1633,6 @@ let program =
SIStretch grammar.preludes ::
SIValDefs (false, [ excvaldef ]) ::
SIValDefs (true,
ProductionMap.fold (fun _ s defs ->
entrydef s :: defs
......
......@@ -62,8 +62,8 @@ module Make (T : TABLE) = struct
(* The following recursive group of functions are tail recursive, produce a
result of type [semantic_value result], and cannot raise an exception. A
semantic action can raise [Accept] or [Error], but these exceptions are
immediately caught within [reduce]. *)
semantic action can raise [Error], but this exception is immediately
caught within [reduce]. *)
let rec run env please_discard : semantic_value result =
......@@ -185,14 +185,23 @@ module Make (T : TABLE) = struct
(* The function [announce_reduce] stops the parser and returns a result
which allows the parser to be resumed by calling [reduce]. *)
(* Only ordinary productions are exposed to the user. Start productions
are not exposed to the user. Reducing a start production simply leads
to the successful termination of the parser. *)
and announce_reduce env (prod : production) =
AboutToReduce (env, prod)
if T.is_start prod then
accept env prod
else
AboutToReduce (env, prod)
(* The function [reduce] takes care of reductions. It is invoked by
[resume] after an [AboutToReduce] event has been produced. *)
(* Here, the lookahead token CAN be [error]. *)
(* The production [prod] CANNOT be a start production. *)
and reduce env (prod : production) =
(* Log a reduction event. *)
......@@ -202,14 +211,11 @@ module Make (T : TABLE) = struct
(* Invoke the semantic action. The semantic action is responsible for
truncating the stack and pushing a new cell onto the stack, which
contains a new semantic value. It can raise [Accept] or [Error]. *)
contains a new semantic value. It can raise [Error]. *)
(* If the semantic action terminates normally, it returns a new stack,
which becomes the current stack. *)
(* If the semantic action raises [Accept], we catch it and produce an
[Accepted] result. *)
(* If the semantic action raises [Error], we catch it and initiate error
handling. *)
......@@ -230,12 +236,18 @@ module Make (T : TABLE) = struct
let env = { env with stack; current } in
run env false
| exception Accept v ->
Accepted v
| exception Error ->
initiate env
and accept env prod =
(* Log an accept event. *)
if log then
Log.reduce_or_accept prod;
(* Extract the semantic value out of the stack. *)
let v = env.stack.semv in
(* Finish. *)
Accepted v
(* --------------------------------------------------------------------------- *)
(* The following functions deal with errors. *)
......@@ -375,7 +387,7 @@ module Make (T : TABLE) = struct
(* In reality, [offer] and [resume] accept an argument of type
[semantic_value result] and produce a result of the same type. The choice
of [semantic_value] is forced by the fact that this is the parameter of
the exception [Accept]. *)
the result [Accepted]. *)
(* We change this as follows. *)
......
......@@ -189,6 +189,10 @@ module type TABLE = sig
val goto: state -> production -> state
(* [is_start prod] tells whether the production [prod] is a start production. *)
val is_start: production -> bool
(* By convention, a semantic action is responsible for:
1. fetching whatever semantic values and positions it needs off the stack;
......@@ -210,15 +214,8 @@ module type TABLE = sig
semantic actions would be variadic: not all semantic actions would have
the same number of arguments. The rest follows rather naturally. *)
(* If production [prod] is an accepting production, then the semantic action
is responsible for raising exception [Accept], instead of returning
normally. This convention allows us to not distinguish between regular
productions and accepting productions. All we have to do is catch that
exception at top level. *)
(* Semantic actions are allowed to raise [Error]. *)
exception Accept of semantic_value
exception Error
type semantic_action =
......
......@@ -576,6 +576,9 @@ module Production = struct
let map f =
Misc.mapi n f
let amap f =
Array.init n f
let iterx f =
for prod = start to n - 1 do
f prod
......@@ -584,6 +587,9 @@ module Production = struct
let foldx f accu =
Misc.foldij start n f accu
let mapx f =
Misc.mapij start n f
(* Printing a production. *)
let print prod =
......
......@@ -294,11 +294,13 @@ module Production : sig
val iter: (index -> unit) -> unit
val fold: (index -> 'a -> 'a) -> 'a -> 'a
val map: (index -> 'a) -> 'a list
val amap: (index -> 'a) -> 'a array
(* Iteration over all productions, except the start productions. *)
val iterx: (index -> unit) -> unit
val foldx: (index -> 'a -> 'a) -> 'a -> 'a
val mapx: (index -> 'a) -> 'a list