tokenType.ml 6.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
(******************************************************************************)
(*                                                                            *)
(*                                   Menhir                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*              Yann Régis-Gianas, PPS, Université Paris Diderot              *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under the  *)
(*  terms of the GNU General Public License version 2, as described in the    *)
(*  file LICENSE.                                                             *)
(*                                                                            *)
(******************************************************************************)

14 15 16 17
(* This module deals with a few details regarding the definition of
   the [token] type. In particular, if [--only-tokens] was specified,
   it emits the type definition and exits. *)

18
open BasicSyntax
19 20 21 22 23 24 25 26 27 28
open IL
open CodeBits

(* This is the conventional name of the [token] type, with no
   prefix. A prefix is possibly appended to it below, where
   [tctoken] is redefined before being exported. *)

let tctoken =
  "token"

29 30 31
let ttoken =
  TypApp (tctoken, [])

32 33 34 35 36 37
(* This is the conventional name of the token GADT, which describes
   the tokens. Same setup as above. *)

let tctokengadt =
  "terminal"

38 39 40
let ttokengadt a =
  TypApp (tctokengadt, [ a ])

41 42 43 44 45 46
(* This is the conventional name of the data constructors of
   the token GADT. *)

let ttokengadtdata token =
  "T_" ^ token

47 48 49
(* This is the definition of the type of tokens. It is defined as an algebraic
   data type, unless [--external-tokens M] is set, in which case it is defined
   as an abbreviation for the type [M.token]. *)
50

51
let tokentypedef grammar =
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
  let typerhs =
    match Settings.token_type_mode with
    | Settings.TokenTypeOnly
    | Settings.TokenTypeAndCode ->

        (* Algebraic data type. *)

        TDefSum (
          List.map (fun (tok, typo) -> {
            dataname = tok;
            datavalparams = (match typo with None -> [] | Some t -> [ TypTextual t ]);
            datatypeparams = None
          }) (typed_tokens grammar)
        )

    | Settings.CodeOnly m ->

        (* Type abbreviation. *)

        TAbbrev (TypApp (m ^ "." ^ tctoken, []))

73
  in
74 75 76 77 78
  [
    IIComment "The type of tokens.";
    IITypeDecls [{
      typename = tctoken;
      typeparams = [];
79
      typerhs;
80 81 82
      typeconstraint = None
    }]
  ]
83

84 85
(* This is the definition of the token GADT. Here, the data constructors have
   no value argument, but have a type index. *)
86

87 88 89
(* The token GADT is produced only when [Settings.inspection] is true. Thus,
   when [Settings.inspection] is false, we remain compatible with old versions
   of OCaml, without GADTs. *)
90

91 92 93 94 95
(* Although the [token] type does not include the [error] token (because this
   token is never produced by the lexer), the token GADT must include the
   [error] token (because this GADT must describe all of the tokens that are
   allowed to appear in a production). *)

96 97 98 99
(* It is defined as a generalized algebraic data type, unless
   [--external-tokens M] is set, in which case it is defined as an
   abbreviation for the type ['a M.tokengadt]. *)

100
let tokengadtdef grammar =
101
  assert Settings.inspection;
102 103 104 105 106 107 108 109 110 111 112 113
  let param, typerhs =
    match Settings.token_type_mode with
    | Settings.TokenTypeOnly
    | Settings.TokenTypeAndCode ->

        (* Generalized algebraic data type. *)

        let param = "_" in
        param,
        TDefSum (
          (* The ordering of this list matters. We want the data constructors
             to respect the internal ordering (as determined by [typed_tokens]
114
             in [BasicSyntax]) of the terminal symbols. This may be
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
             exploited in the table back-end to allow an unsafe conversion
             of a data constructor to an integer code. See [t2i] in
             [InspectionTableInterpreter]. *)
          {
            dataname = ttokengadtdata "error";
            datavalparams = [];
            datatypeparams = Some [ tunit ]
              (* the [error] token has a semantic value of type [unit] *)
          } ::
          List.map (fun (token, typo) -> {
            dataname = ttokengadtdata token;
            datavalparams = [];
            datatypeparams = Some [ match typo with None -> tunit | Some t -> TypTextual t ]
          }) (typed_tokens grammar)
        )

    | Settings.CodeOnly m ->

        (* Type abbreviation. *)

        let param = "a" in
        param,
        TAbbrev (TypApp (m ^ "." ^ tctokengadt, [ TypVar param ]))

139 140 141 142 143
  in
  [
    IIComment "The indexed type of terminal symbols.";
    IITypeDecls [{
      typename = tctokengadt;
144 145
      typeparams = [ param ];
      typerhs;
146 147 148
      typeconstraint = None
    }]
  ]
149

150 151
(* If we were asked to only produce a type definition, then
   do so and stop. *)
152

153
let produce_tokentypes grammar =
154 155 156 157
  match Settings.token_type_mode with
  | Settings.TokenTypeOnly ->

      (* Create both an .mli file and an .ml file. This is made
158 159
         necessary by the fact that the two can be different
         when there are functor parameters. *)
160

161 162
      let i =
        tokentypedef grammar @
163
        listiflazy Settings.inspection (fun () ->
164 165
          tokengadtdef grammar
        )
166
      in
167

168 169
      let module P =
        Printer.Make (struct
170
                        let f = open_out (Settings.base ^ ".mli")
171 172
                        let locate_stretches = None
                      end)
173
      in
174
      P.interface [
175
        IIFunctor (grammar.parameters, i)
176
      ];
177 178
      let module P =
        Printer.Make (struct
179
                        let f = open_out (Settings.base ^ ".ml")
180 181
                        let locate_stretches = None
                      end)
182
      in
183
      P.program [
184 185 186
        SIFunctor (grammar.parameters,
          interface_to_structure i
        )
187
      ];
188
      exit 0
189 190 191 192 193

  | Settings.CodeOnly _
  | Settings.TokenTypeAndCode ->
      ()

194 195 196 197
(* The token type and the token GADTs can be referred to via a short
   (unqualified) name, regardless of how they have been defined (either
   directly or as an abbreviation). However, their data constructors must
   be qualified if [--external-tokens] is set. *)
198 199 200

let tokenprefix id =
  match Settings.token_type_mode with
201
  | Settings.CodeOnly m ->
202
      m ^ "." ^ id
203
  | Settings.TokenTypeAndCode ->
204 205 206
      id
  | Settings.TokenTypeOnly ->
      id (* irrelevant, really *)
207

208 209 210
let tokendata =
  tokenprefix

211 212 213
let tokengadtdata token =
  tokenprefix (ttokengadtdata token)