Commit a4611cf8 authored by POTTIER Francois's avatar POTTIER Francois

Always generate a definition of the [token] type, either as ADT or as alias.

This fixes the bug where --table and --external-tokens did not work together.
parent 6ce61536
2015/10/05:
Fixed a bug where inconsistent OCaml code was generated when --table
and --external-tokens were used together. (Reported by Darin Morrison.)
2015/10/05:
In --infer mode, leave the .ml file around (instead of removing it) if
ocamlc fails, so we have a chance to understand what's wrong.
......
......@@ -13,41 +13,63 @@ open CodeBits
let tctoken =
"token"
let ttoken =
TypApp (tctoken, [])
(* This is the conventional name of the token GADT, which describes
the tokens. Same setup as above. *)
let tctokengadt =
"terminal"
let ttokengadt a =
TypApp (tctokengadt, [ a ])
(* This is the conventional name of the data constructors of
the token GADT. *)
let ttokengadtdata token =
"T_" ^ token
(* This is the definition of the type of tokens. (Regardless of
[Settings.token_type_mode], which is examined below.) *)
(* This is the definition of the type of tokens. It is defined as an algebraic
data type, unless [--external-tokens M] is set, in which case it is defined
as an abbreviation for the type [M.token]. *)
let tokentypedef grammar =
let datadefs =
List.map (fun (token, typo) -> {
dataname = token;
datavalparams = (match typo with None -> [] | Some t -> [ TypTextual t ]);
datatypeparams = None
}) (typed_tokens grammar)
let typerhs =
match Settings.token_type_mode with
| Settings.TokenTypeOnly
| Settings.TokenTypeAndCode ->
(* Algebraic data type. *)
TDefSum (
List.map (fun (tok, typo) -> {
dataname = tok;
datavalparams = (match typo with None -> [] | Some t -> [ TypTextual t ]);
datatypeparams = None
}) (typed_tokens grammar)
)
| Settings.CodeOnly m ->
(* Type abbreviation. *)
TAbbrev (TypApp (m ^ "." ^ tctoken, []))
in
[
IIComment "The type of tokens.";
IITypeDecls [{
typename = tctoken;
typeparams = [];
typerhs = TDefSum datadefs;
typerhs;
typeconstraint = None
}]
]
(* This is the definition of the token GADT. Here, the data
constructors have no value argument, but have a type index. *)
(* This is the definition of the token GADT. Here, the data constructors have
no value argument, but have a type index. *)
(* The token GADT is produced only when [Settings.inspection] is true. Thus,
when [Settings.inspection] is false, we remain compatible with old versions
......@@ -58,34 +80,56 @@ let tokentypedef grammar =
[error] token (because this GADT must describe all of the tokens that are
allowed to appear in a production). *)
(* It is defined as a generalized algebraic data type, unless
[--external-tokens M] is set, in which case it is defined as an
abbreviation for the type ['a M.tokengadt]. *)
let tokengadtdef grammar =
assert Settings.inspection;
let errordata = {
dataname = ttokengadtdata "error";
datavalparams = [];
datatypeparams = Some [ tunit ]
(* the [error] token has a semantic value of type [unit] *)
} in
let datadefs =
(* The ordering of this list matters. We want the data constructors
to respect the internal ordering (as determined by [typed_tokens]
in [UnparameterizedSyntax]) of the terminal symbols. This may be
exploited in the table back-end to allow an unsafe conversion
of a data constructor to an integer code. See [t2i] in
[InspectionTableInterpreter]. *)
errordata ::
List.map (fun (token, typo) -> {
dataname = ttokengadtdata token;
datavalparams = [];
datatypeparams = Some [ match typo with None -> tunit | Some t -> TypTextual t ]
}) (typed_tokens grammar)
let param, typerhs =
match Settings.token_type_mode with
| Settings.TokenTypeOnly
| Settings.TokenTypeAndCode ->
(* Generalized algebraic data type. *)
let param = "_" in
param,
TDefSum (
(* The ordering of this list matters. We want the data constructors
to respect the internal ordering (as determined by [typed_tokens]
in [UnparameterizedSyntax]) of the terminal symbols. This may be
exploited in the table back-end to allow an unsafe conversion
of a data constructor to an integer code. See [t2i] in
[InspectionTableInterpreter]. *)
{
dataname = ttokengadtdata "error";
datavalparams = [];
datatypeparams = Some [ tunit ]
(* the [error] token has a semantic value of type [unit] *)
} ::
List.map (fun (token, typo) -> {
dataname = ttokengadtdata token;
datavalparams = [];
datatypeparams = Some [ match typo with None -> tunit | Some t -> TypTextual t ]
}) (typed_tokens grammar)
)
| Settings.CodeOnly m ->
(* Type abbreviation. *)
let param = "a" in
param,
TAbbrev (TypApp (m ^ "." ^ tctokengadt, [ TypVar param ]))
in
[
IIComment "The indexed type of terminal symbols.";
IITypeDecls [{
typename = tctokengadt;
typeparams = [ "_" ];
typerhs = TDefSum datadefs;
typeparams = [ param ];
typerhs;
typeconstraint = None
}]
]
......@@ -134,29 +178,10 @@ let produce_tokentypes grammar =
| Settings.TokenTypeAndCode ->
()
(* Define [tokentypedef] and [tokengadtdef], [tokenprefix]. *)
let tokentypedef grammar =
match Settings.token_type_mode with
| Settings.CodeOnly _ ->
[]
| Settings.TokenTypeAndCode ->
tokentypedef grammar
| Settings.TokenTypeOnly ->
(* This should not happen, as [produce_tokentype] should
have been called first. *)
assert false
let tokengadtdef grammar =
match Settings.token_type_mode with
| Settings.CodeOnly _ ->
[]
| Settings.TokenTypeAndCode ->
tokengadtdef grammar
| Settings.TokenTypeOnly ->
(* This should not happen, as [produce_tokentype] should
have been called first. *)
assert false
(* The token type and the token GADTs can be referred to via a short
(unqualified) name, regardless of how they have been defined (either
directly or as an abbreviation). However, their data constructors must
be qualified if [--external-tokens] is set. *)
let tokenprefix id =
match Settings.token_type_mode with
......@@ -167,24 +192,9 @@ let tokenprefix id =
| Settings.TokenTypeOnly ->
id (* irrelevant, really *)
(* Redefine the name of the [token] type to take a possible
prefix into account. *)
let tctoken =
tokenprefix tctoken
let ttoken =
TypApp (tctoken, [])
let tokendata =
tokenprefix
let tctokengadt =
tokenprefix tctokengadt
let ttokengadt a =
TypApp (tctokengadt, [ a ])
let tokengadtdata token =
tokenprefix (ttokengadtdata token)
......@@ -11,12 +11,12 @@
(* In this module, we also deal with [--only-tokens] and [--external-tokens].
If [--only-tokens] is specified on the command line, [produce_tokentypes]
emits the type definition(s) and exit. If [--external-tokens] is specified,
then the list [tokentypedefs] is empty, and [tokenprefix] produces a
nontrivial prefix. *)
emits the type definition(s) and exit. If [--external-tokens M] is set,
then the token type and the token GADT are defined as abbreviations for
[M.token] and ['a M.terminal]. *)
(* The conventional name of the [token] type, for use by the code
generators. If [--external-tokens] is set, this type is qualified. *)
generators. *)
val ttoken: IL.typ
......@@ -28,8 +28,7 @@ val tokendata: string -> string
(* The conventional name of the [terminal] type, a.k.a. the token GADT. This
is an indexed type (i.e., it has one type parameter). Its data constructors
carry zero value arguments. If [--external-tokens] is set, this type is
qualified. *)
carry zero value arguments. *)
val tctokengadt: string
val ttokengadt: IL.typ -> IL.typ
......@@ -40,10 +39,7 @@ val ttokengadt: IL.typ -> IL.typ
val tokengadtdata: string -> string
(* The definitions of the token type and of the token GADT, for use by the
code generators. Each of these lists may define zero or one type. Indeed,
both lists are empty when [--external-tokens] is set. Otherwise, only the
type [token] is defined always, and the type [terminal] is defined only in
[--inspection] mode. *)
code generators. Each of these lists defines zero or one type. *)
val tokentypedef: UnparameterizedSyntax.grammar -> IL.interface
val tokengadtdef: UnparameterizedSyntax.grammar -> IL.interface
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment