Commit 3aee11b9 authored by POTTIER Francois's avatar POTTIER Francois

Cleanup of the handling of keywords in the lexer.

parent 94de7b51
...@@ -10,12 +10,6 @@ type t = { ...@@ -10,12 +10,6 @@ type t = {
be several files. *) be several files. *)
filenames: string list; filenames: string list;
(* A list of keywords that appear in this semantic action, with their
positions. This list is maintained only up to the well-formedness check in
[PartialGrammar.check_keywords]. Thereafter, it is no longer used. So, the
keyword-renaming functions do not bother to update it. *)
pkeywords : keyword Positions.located list;
(* The set of keywords that appear in this semantic action. They can be thought (* The set of keywords that appear in this semantic action. They can be thought
of as free variables that refer to positions. They must be renamed during of as free variables that refer to positions. They must be renamed during
inlining. *) inlining. *)
...@@ -25,17 +19,11 @@ type t = { ...@@ -25,17 +19,11 @@ type t = {
(* Creation. *) (* Creation. *)
let pkeywords_to_keywords pkeywords = let from_stretch s = {
KeywordSet.of_list (List.map Positions.value pkeywords) expr = IL.ETextual s;
filenames = [ s.Stretch.stretch_filename ];
let from_stretch s = keywords = KeywordSet.of_list s.Stretch.stretch_keywords
let pkeywords = s.Stretch.stretch_keywords in }
{
expr = IL.ETextual s;
filenames = [ s.Stretch.stretch_filename ];
pkeywords = pkeywords;
keywords = pkeywords_to_keywords pkeywords;
}
(* Defining a keyword in terms of other keywords. *) (* Defining a keyword in terms of other keywords. *)
...@@ -57,7 +45,6 @@ let compose x a1 a2 = ...@@ -57,7 +45,6 @@ let compose x a1 a2 =
expr = IL.ELet ([ IL.PVar x, a1.expr ], a2.expr); expr = IL.ELet ([ IL.PVar x, a1.expr ], a2.expr);
keywords = KeywordSet.union a1.keywords a2.keywords; keywords = KeywordSet.union a1.keywords a2.keywords;
filenames = a1.filenames @ a2.filenames; filenames = a1.filenames @ a2.filenames;
pkeywords = [] (* don't bother; already checked *)
} }
(* Substitutions, represented as association lists. (* Substitutions, represented as association lists.
...@@ -136,7 +123,6 @@ let rename f phi a = ...@@ -136,7 +123,6 @@ let rename f phi a =
{ {
expr = expr; expr = expr;
filenames = a.filenames; filenames = a.filenames;
pkeywords = []; (* don't bother *)
keywords = keywords; keywords = keywords;
} }
...@@ -149,9 +135,6 @@ let filenames action = ...@@ -149,9 +135,6 @@ let filenames action =
let keywords action = let keywords action =
action.keywords action.keywords
let pkeywords action =
action.pkeywords
let print f action = let print f action =
let module P = Printer.Make (struct let f = f let module P = Printer.Make (struct let f = f
let locate_stretches = None let locate_stretches = None
......
...@@ -47,9 +47,6 @@ val to_il_expr: t -> IL.expr ...@@ -47,9 +47,6 @@ val to_il_expr: t -> IL.expr
the standard library. *) the standard library. *)
val filenames: t -> string list val filenames: t -> string list
(** [pkeywords a] returns a list of all keyword occurrences in [a]. *)
val pkeywords: t -> keyword Positions.located list
(** [keywords a] is the set of keywords used in the semantic action [a]. *) (** [keywords a] is the set of keywords used in the semantic action [a]. *)
val keywords: t -> KeywordSet.t val keywords: t -> KeywordSet.t
......
...@@ -3,6 +3,17 @@ ...@@ -3,6 +3,17 @@
open Lexing open Lexing
open Parser open Parser
open Positions open Positions
open Keyword
(* ------------------------------------------------------------------------ *)
(* Short-hands. *)
let error1 pos =
Error.error (Positions.one pos)
let error2 lexbuf =
Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p)
(* ------------------------------------------------------------------------ *) (* ------------------------------------------------------------------------ *)
...@@ -38,99 +49,178 @@ let overwrite content offset c1 c2 = ...@@ -38,99 +49,178 @@ let overwrite content offset c1 c2 =
(* Keyword recognition and construction. *) (* Keyword recognition and construction. *)
type parsed_subject = (* A monster is a spot where we have identified a keyword in concrete syntax.
| PLeft We describe a monster as an object with the following methods: *)
| PRightDollar of int
| PRightNamed of string type monster = {
type parsed_keyword = (* The position of the monster. *)
| PDollar of int pos: Positions.t;
| PPosition of parsed_subject * Keyword.where * Keyword.flavor
| PSyntaxError (* This method is passed an array of (optional) names for the producers,
that is, the elements of the production's right-hand side. It may
(* Check that only allowed indices are used in semantic actions. *) perform some checks and is allowed to fail. *)
let check_producers_indices (producers : string option array) pkeywords = check: string option array -> unit;
List.iter (fun pkeyword ->
match Positions.value pkeyword with (* This method transforms the keyword (in place) into a conventional
| PPosition (PRightDollar 0, Keyword.WhereEnd, _) -> OCaml identifier. This is done by replacing '$', '(', and ')' with
(* As a special case, [$endpos($0)] is allowed. *) '_'. Bloody. The arguments are [ofs1] and [content]. [ofs1] is the
() offset where [content] begins in the source file. *)
| PDollar idx transform: int -> bytes -> unit;
| PPosition (PRightDollar idx, _, _) ->
if not (0 <= idx - 1 && idx - 1 < Array.length producers) then (* This is the keyword, in abstract syntax. *)
Error.error [ Positions.position pkeyword ] keyword: keyword option;
"$%d refers to a nonexistent symbol." idx
}
(* ------------------------------------------------------------------------ *)
(* The [$syntaxerror] monster. *)
let syntaxerror pos : monster =
let check _ = ()
and transform ofs1 content =
(* [$syntaxerror] is replaced with
[(raise _eRR)]. Same length. *)
let pos = start_of_position pos in
let ofs = pos.pos_cnum - ofs1 in
let source = "(raise _eRR)" in
Bytes.blit_string source 0 content ofs (String.length source)
and keyword =
Some SyntaxError
in
{ pos; check; transform; keyword }
(* ------------------------------------------------------------------------ *)
(* We check that every [$i] is within range. Also, we forbid using [$i]
when a producer has been given a name; this is bad style and may be
a mistake. (Plus, this simplies our life, as we rewrite [$i] to [_i],
and we would have to rewrite it to a different identifier otherwise.) *)
let check_dollar pos i producers =
if not (0 <= i - 1 && i - 1 < Array.length producers) then
Error.error [pos] "$%d refers to a nonexistent symbol." i
else
producers.(i - 1) |> Option.iter (fun x ->
Error.error [pos] "please do not say: $%d. Instead, say: %s." i x
)
(* We check that every reference to a producer [x] in a position keyword,
such as [$startpos(x)], exists. *)
let check_producer pos x producers =
if not (List.mem (Some x) (Array.to_list producers)) then
Error.error [pos] "%s refers to a nonexistent symbol." x
(* ------------------------------------------------------------------------ *)
(* The [$i] monster. *)
let dollar pos i : monster =
let check = check_dollar pos i
and transform ofs1 content =
(* [$i] is replaced with [_i]. Thus, it is no longer a keyword. *)
let pos = start_of_position pos in
let ofs = pos.pos_cnum - ofs1 in
overwrite content ofs '$' '_'
and keyword =
None
in
{ pos; check; transform; keyword }
(* ------------------------------------------------------------------------ *)
(* The position-keyword monster. The most horrible of all. *)
let position pos
(where : string)
(flavor : string)
(i : string option) (x : string option)
=
let none _ = () in
let where, ofslpar (* offset of the opening parenthesis, if there is one *) =
match where with
| "start" -> WhereStart, 9
| "end" -> WhereEnd, 7
| _ -> assert false
and flavor =
match flavor with
| "pos" -> FlavorPosition
| "ofs" -> FlavorOffset
| _ -> assert false
in
let subject, check =
match i, x with
| Some i, None ->
let ii = int_of_string i in (* cannot fail *)
if ii = 0 && where = WhereEnd then
(* [$endpos($0)] *)
Before, none
else else
producers.(idx - 1) |> Option.iter (fun x -> (* [$startpos($i)] is rewritten to [$startpos(_i)]. *)
Error.error [ Positions.position pkeyword ] RightNamed ("_" ^ i), check_dollar pos ii
"please do not say: $%d. Instead, say: %s." idx x | None, Some x ->
) (* [$startpos(x)] *)
| _ -> RightNamed x, check_producer pos x
() | None, None ->
) pkeywords (* [$startpos] *)
Left, none
(* In-place transformation of keywords. We turn our keywords into | Some _, Some _ ->
valid OCaml identifiers by replacing '$', '(', and ')' with '_'. assert false
Bloody. *) in
let transform ofs1 content =
let transform_keywords ofs1 (pkeywords : parsed_keyword located list) (content : bytes) =
List.iter (function { value = keyword; position = pos } ->
let pos = start_of_position pos in let pos = start_of_position pos in
let ofs = pos.pos_cnum - ofs1 in let ofs = pos.pos_cnum - ofs1 in
overwrite content ofs '$' '_'; overwrite content ofs '$' '_';
match keyword with let ofslpar = ofs + ofslpar in
| PDollar _ match i, x with
| PPosition (PLeft, _, _) -> | None, Some x ->
() overwrite content ofslpar '(' '_';
| PSyntaxError -> overwrite content (ofslpar + 1 + String.length x) ')' '_'
(* $syntaxerror is replaced with | Some i, None ->
(raise _eRR) *)
let source = "(raise _eRR)" in
Bytes.blit_string source 0 content ofs (String.length source)
| PPosition (subject, where, _) ->
let ofslpar =
match where with
| Keyword.WhereStart ->
ofs + 9
| Keyword.WhereEnd ->
ofs + 7
in
overwrite content ofslpar '(' '_'; overwrite content ofslpar '(' '_';
match subject with overwrite content (ofslpar + 1) '$' '_';
| PLeft -> overwrite content (ofslpar + 2 + String.length i) ')' '_'
assert false | _, _ ->
| PRightDollar i -> ()
overwrite content (ofslpar + 1) '$' '_'; in
overwrite content (ofslpar + 2 + String.length (string_of_int i)) ')' '_' let keyword =
| PRightNamed id -> Some (Position (subject, where, flavor))
overwrite content (ofslpar + 1 + String.length id) ')' '_' in
) pkeywords { pos; check; transform; keyword }
(* In an OCaml header, there should be no keywords. This is just a sanity check. *) (* ------------------------------------------------------------------------ *)
let no_keywords pkeywords = (* In an OCaml header, there should be no monsters. This is just a sanity
match pkeywords with check. *)
let no_monsters monsters =
match monsters with
| [] -> | [] ->
() ()
| { value = _; position = pos } :: _ -> | monster :: _ ->
Error.error [pos] "a Menhir keyword cannot be used in an OCaml header." Error.error [monster.pos]
"a Menhir keyword cannot be used in an OCaml header."
(* ------------------------------------------------------------------------ *)
(* Creates a stretch. *) (* Creates a stretch. *)
let mk_stretch pos1 pos2 parenthesize pkeywords = let mk_stretch pos1 pos2 parenthesize monsters =
(* Read the specified chunk of the file. *) (* Read the specified chunk of the file. *)
let ofs1 = pos1.pos_cnum let ofs1 = pos1.pos_cnum
and ofs2 = pos2.pos_cnum in and ofs2 = pos2.pos_cnum in
let raw_content : string = chunk ofs1 ofs2 in let raw_content : string = chunk ofs1 ofs2 in
(* Transform the keywords, if there are any. (This explicit test (* Transform the monsters, if there are any. (This explicit test
allows saving one string copy and keeping just one live copy.) *) allows saving one string copy and keeping just one live copy.) *)
let content : string = let content : string =
match pkeywords with match monsters with
| [] -> | [] ->
raw_content raw_content
| _ :: _ -> | _ :: _ ->
let content : bytes = Bytes.of_string raw_content in let content : bytes = Bytes.of_string raw_content in
transform_keywords ofs1 pkeywords content; List.iter (fun monster -> monster.transform ofs1 content) monsters;
Bytes.unsafe_to_string content Bytes.unsafe_to_string content
in in
(* Add whitespace so that the column numbers match those of the source file. (* Add whitespace so that the column numbers match those of the source file.
...@@ -142,64 +232,14 @@ let mk_stretch pos1 pos2 parenthesize pkeywords = ...@@ -142,64 +232,14 @@ let mk_stretch pos1 pos2 parenthesize pkeywords =
else else
(String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content (String.make (pos1.pos_cnum - pos1.pos_bol) ' ') ^ content
in in
(* After parsing, every occurrence [$i] is replaced by [_i] in Stretch.({
semantic actions. *) stretch_filename = Error.get_filename();
let rewritten_pkeywords = Keyword.( stretch_linenum = pos1.pos_lnum;
let rewrite_index i = stretch_linecount = pos2.pos_lnum - pos1.pos_lnum;
"_" ^ string_of_int i stretch_content = content;
in stretch_raw_content = raw_content;
let rewrite_subject = function stretch_keywords = Misc.map_opt (fun monster -> monster.keyword) monsters
| PLeft -> Left })
| PRightDollar 0 -> Before
| PRightDollar i -> RightNamed (rewrite_index i)
| PRightNamed n -> RightNamed n
in
Misc.map_opt (fun pk ->
let position = Positions.position pk in
match Positions.value pk with
| PDollar _ -> None
| PPosition (s, w, f) -> Some (Positions.with_pos position (Position (rewrite_subject s, w, f)))
| PSyntaxError -> Some (Positions.with_pos position SyntaxError)
) pkeywords
) in
{
Stretch.stretch_filename = Error.get_filename();
Stretch.stretch_linenum = pos1.pos_lnum;
Stretch.stretch_linecount = pos2.pos_lnum - pos1.pos_lnum;
Stretch.stretch_content = content;
Stretch.stretch_raw_content = raw_content;
Stretch.stretch_keywords = rewritten_pkeywords
}
(* Translates the family of position-related keywords to abstract
syntax. *)
let mk_keyword lexbuf w f n id =
let where =
match w with
| Some _ ->
Keyword.WhereStart
| None ->
Keyword.WhereEnd
and flavor =
match f with
| Some _ ->
Keyword.FlavorPosition
| None ->
Keyword.FlavorOffset
and subject =
match n, id with
| Some n, None ->
PRightDollar (int_of_string n)
| None, Some id ->
PRightNamed id
| None, None ->
PLeft
| Some _, Some _ ->
assert false
in
let keyword = PPosition (subject, where, flavor) in
with_cpos lexbuf keyword
(* ------------------------------------------------------------------------ *) (* ------------------------------------------------------------------------ *)
...@@ -267,16 +307,6 @@ let reserved = ...@@ -267,16 +307,6 @@ let reserved =
]; ];
table table
(* ------------------------------------------------------------------------ *)
(* Short-hands. *)
let error1 pos =
Error.error (Positions.one pos)
let error2 lexbuf =
Error.error (Positions.two lexbuf.lex_start_p lexbuf.lex_curr_p)
} }
(* ------------------------------------------------------------------------ *) (* ------------------------------------------------------------------------ *)
...@@ -295,9 +325,9 @@ let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' ' ...@@ -295,9 +325,9 @@ let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '
let poskeyword = let poskeyword =
'$' '$'
(("start" as w) | "end") (("start" | "end") as where)
(("pos" as f) | "ofs") (("pos" | "ofs") as flavor)
( '(' ( '$' (['0'-'9']+ as n) | ((lowercase identchar*) as id)) ')')? ( '(' ( '$' (['0'-'9']+ as i) | ((lowercase identchar*) as x)) ')')?
let previouserror = let previouserror =
"$previouserror" "$previouserror"
...@@ -384,19 +414,19 @@ rule main = parse ...@@ -384,19 +414,19 @@ rule main = parse
| "%{" | "%{"
{ savestart lexbuf (fun lexbuf -> { savestart lexbuf (fun lexbuf ->
let openingpos = lexeme_end_p lexbuf in let openingpos = lexeme_end_p lexbuf in
let closingpos, pkeywords = action true openingpos [] lexbuf in let closingpos, monsters = action true openingpos [] lexbuf in
no_keywords pkeywords; no_monsters monsters;
HEADER (mk_stretch openingpos closingpos false []) HEADER (mk_stretch openingpos closingpos false [])
) } ) }
| "{" | "{"
{ savestart lexbuf (fun lexbuf -> { savestart lexbuf (fun lexbuf ->
let openingpos = lexeme_end_p lexbuf in let openingpos = lexeme_end_p lexbuf in
let closingpos, pkeywords = action false openingpos [] lexbuf in let closingpos, monsters = action false openingpos [] lexbuf in
ACTION ( ACTION (
fun (producers : string option array) -> fun (producers : string option array) ->
let stretch = mk_stretch openingpos closingpos true pkeywords in List.iter (fun monster -> monster.check producers) monsters;
check_producers_indices producers pkeywords; let stretch = mk_stretch openingpos closingpos true monsters in
Action.from_stretch stretch Action.from_stretch stretch
) )
) } ) }
| eof | eof
...@@ -441,93 +471,92 @@ and ocamltype openingpos = parse ...@@ -441,93 +471,92 @@ and ocamltype openingpos = parse
(* ------------------------------------------------------------------------ *) (* ------------------------------------------------------------------------ *)
(* Collect O'Caml code delimited by curly brackets. Any occurrences of (* Collect O'Caml code delimited by curly brackets. The monsters that are
the special ``$i'' identifiers are recorded in the accumulating encountered along the way are accumulated in the list [monsters]. Nested
parameter [pkeywords]. Nested curly brackets must be properly curly brackets must be properly counted. Nested parentheses are also kept
counted. Nested parentheses are also kept track of, so as to better track of, so as to better report errors when they are not balanced. *)
report errors when they are not balanced. *)
and action percent openingpos pkeywords = parse and action percent openingpos monsters = parse
| '{' | '{'
{ let _, pkeywords = action false (lexeme_end_p lexbuf) pkeywords lexbuf in { let _, monsters = action false (lexeme_end_p lexbuf) monsters lexbuf in
action percent openingpos pkeywords lexbuf } action percent openingpos monsters lexbuf }
| ("}" | "%}") as delimiter | ("}" | "%}") as delimiter
{ match percent, delimiter with { match percent, delimiter with
| true, "%}" | true, "%}"
| false, "}" -> | false, "}" ->
(* This is the delimiter we were instructed to look for. *) (* This is the delimiter we were instructed to look for. *)
lexeme_start_p lexbuf, pkeywords lexeme_start_p lexbuf, monsters
| _, _ -> | _, _ ->
(* This is not it. *) (* This is not it. *)
error1 openingpos "unbalanced opening brace." error1 openingpos "unbalanced opening brace."
} }
| '(' | '('
{ let _, pkeywords = parentheses (lexeme_end_p lexbuf) pkeywords lexbuf in { let _, monsters = parentheses (lexeme_end_p lexbuf) monsters lexbuf in
action percent openingpos pkeywords lexbuf } action percent openingpos monsters lexbuf }
| '$' (['0'-'9']+ as n) | '$' (['0'-'9']+ as i)
{ let pkeyword = with_cpos lexbuf (PDollar (int_of_string n)) in { let monster = dollar (cpos lexbuf) (int_of_string i) in
action percent openingpos (pkeyword :: pkeywords) lexbuf } action percent openingpos (monster :: monsters) lexbuf }
| poskeyword | poskeyword
{ let pkeyword = mk_keyword lexbuf w f n id in { let monster = position (cpos lexbuf) where flavor i x in
action percent openingpos (pkeyword :: pkeywords) lexbuf } action percent openingpos (monster :: monsters) lexbuf }
| previouserror | previouserror
{ error2 lexbuf "$previouserror is no longer supported." } { error2 lexbuf "$previouserror is no longer supported." }
| syntaxerror | syntaxerror
{ let pkeyword = with_cpos lexbuf PSyntaxError in { let monster = syntaxerror (cpos lexbuf) in
action percent openingpos (pkeyword :: pkeywords) lexbuf } action percent openingpos (monster :: monsters) lexbuf }
| '"' | '"'
{ string (lexeme_start_p lexbuf) lexbuf; { string (lexeme_start_p lexbuf) lexbuf;
action percent openingpos pkeywords lexbuf } action percent openingpos monsters lexbuf }
| "'" | "'"
{ char lexbuf; { char lexbuf;
action percent openingpos pkeywords lexbuf } action percent openingpos monsters lexbuf }
| "(*" | "(*"
{ ocamlcomment (lexeme_start_p lexbuf) lexbuf; { ocamlcomment (lexeme_start_p lexbuf) lexbuf;
action percent openingpos pkeywords lexbuf } action percent openingpos monsters lexbuf }
| newline | newline
{ new_line lexbuf; { new_line lexbuf;
action percent openingpos pkeywords lexbuf } action percent openingpos monsters lexbuf }
| ')' | ')'
| eof | eof
{ error1 openingpos "unbalanced opening brace." } { error1 openingpos "unbalanced opening brace." }
| _ | _
{ action percent openingpos pkeywords lexbuf } { action percent openingpos monsters lexbuf }
(* ------------------------------------------------------------------------ *) (* ------------------------------------------------------------------------ *)
and parentheses openingpos pkeywords = parse and parentheses openingpos monsters = parse
| '(' | '('
{ let _, pkeywords = parentheses (lexeme_end_p lexbuf) pkeywords lexbuf in { let _, monsters = parentheses (lexeme_end_p lexbuf) monsters lexbuf in
parentheses openingpos pkeywords lexbuf } parentheses openingpos monsters lexbuf }
| ')' | ')'
{ lexeme_start_p lexbuf, pkeywords } { lexeme_start_p lexbuf, monsters }
| '{' | '{'
{ let _, pkeywords = action false (lexeme_end_p lexbuf) pkeywords lexbuf in