tokenType.ml 5.54 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
(* This module deals with a few details regarding the definition of
   the [token] type. In particular, if [--only-tokens] was specified,
   it emits the type definition and exits. *)

open UnparameterizedSyntax
open IL
open CodeBits

(* This is the conventional name of the [token] type, with no
   prefix. A prefix is possibly appended to it below, where
   [tctoken] is redefined before being exported. *)

let tctoken =
  "token"

16 17 18
let ttoken =
  TypApp (tctoken, [])

19 20 21 22 23 24
(* This is the conventional name of the token GADT, which describes
   the tokens. Same setup as above. *)

let tctokengadt =
  "terminal"

25 26 27
let ttokengadt a =
  TypApp (tctokengadt, [ a ])

28 29 30 31 32 33
(* This is the conventional name of the data constructors of
   the token GADT. *)

let ttokengadtdata token =
  "T_" ^ token

34 35 36
(* This is the definition of the type of tokens. It is defined as an algebraic
   data type, unless [--external-tokens M] is set, in which case it is defined
   as an abbreviation for the type [M.token]. *)
37

38
let tokentypedef grammar =
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
  let typerhs =
    match Settings.token_type_mode with
    | Settings.TokenTypeOnly
    | Settings.TokenTypeAndCode ->

        (* Algebraic data type. *)

        TDefSum (
          List.map (fun (tok, typo) -> {
            dataname = tok;
            datavalparams = (match typo with None -> [] | Some t -> [ TypTextual t ]);
            datatypeparams = None
          }) (typed_tokens grammar)
        )

    | Settings.CodeOnly m ->

        (* Type abbreviation. *)

        TAbbrev (TypApp (m ^ "." ^ tctoken, []))

60
  in
61 62 63 64 65
  [
    IIComment "The type of tokens.";
    IITypeDecls [{
      typename = tctoken;
      typeparams = [];
66
      typerhs;
67 68 69
      typeconstraint = None
    }]
  ]
70

71 72
(* This is the definition of the token GADT. Here, the data constructors have
   no value argument, but have a type index. *)
73

74 75 76
(* The token GADT is produced only when [Settings.inspection] is true. Thus,
   when [Settings.inspection] is false, we remain compatible with old versions
   of OCaml, without GADTs. *)
77

78 79 80 81 82
(* Although the [token] type does not include the [error] token (because this
   token is never produced by the lexer), the token GADT must include the
   [error] token (because this GADT must describe all of the tokens that are
   allowed to appear in a production). *)

83 84 85 86
(* It is defined as a generalized algebraic data type, unless
   [--external-tokens M] is set, in which case it is defined as an
   abbreviation for the type ['a M.tokengadt]. *)

87
let tokengadtdef grammar =
88
  assert Settings.inspection;
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
  let param, typerhs =
    match Settings.token_type_mode with
    | Settings.TokenTypeOnly
    | Settings.TokenTypeAndCode ->

        (* Generalized algebraic data type. *)

        let param = "_" in
        param,
        TDefSum (
          (* The ordering of this list matters. We want the data constructors
             to respect the internal ordering (as determined by [typed_tokens]
             in [UnparameterizedSyntax]) of the terminal symbols. This may be
             exploited in the table back-end to allow an unsafe conversion
             of a data constructor to an integer code. See [t2i] in
             [InspectionTableInterpreter]. *)
          {
            dataname = ttokengadtdata "error";
            datavalparams = [];
            datatypeparams = Some [ tunit ]
              (* the [error] token has a semantic value of type [unit] *)
          } ::
          List.map (fun (token, typo) -> {
            dataname = ttokengadtdata token;
            datavalparams = [];
            datatypeparams = Some [ match typo with None -> tunit | Some t -> TypTextual t ]
          }) (typed_tokens grammar)
        )

    | Settings.CodeOnly m ->

        (* Type abbreviation. *)

        let param = "a" in
        param,
        TAbbrev (TypApp (m ^ "." ^ tctokengadt, [ TypVar param ]))

126 127 128 129 130
  in
  [
    IIComment "The indexed type of terminal symbols.";
    IITypeDecls [{
      typename = tctokengadt;
131 132
      typeparams = [ param ];
      typerhs;
133 134 135
      typeconstraint = None
    }]
  ]
136

137 138
(* If we were asked to only produce a type definition, then
   do so and stop. *)
139

140
let produce_tokentypes grammar =
141 142 143 144 145 146 147
  match Settings.token_type_mode with
  | Settings.TokenTypeOnly ->

      (* Create both an .mli file and an .ml file. This is made
	 necessary by the fact that the two can be different
	 when there are functor parameters. *)

148 149
      let i =
        tokentypedef grammar @
150
        listiflazy Settings.inspection (fun () ->
151 152
          tokengadtdef grammar
        )
153
      in
154

155 156 157 158 159 160
      let module P = 
	Printer.Make (struct 
			let f = open_out (Settings.base ^ ".mli")
			let locate_stretches = None 
		      end) 
      in
161
      P.interface [
162
        IIFunctor (grammar.parameters, i)
163
      ];
164 165 166 167 168 169
      let module P = 
	Printer.Make (struct 
			let f = open_out (Settings.base ^ ".ml")
			let locate_stretches = None 
		      end) 
      in
170
      P.program [
171 172 173
        SIFunctor (grammar.parameters,
          interface_to_structure i
        )
174
      ];
175
      exit 0
176 177 178 179 180

  | Settings.CodeOnly _
  | Settings.TokenTypeAndCode ->
      ()

181 182 183 184
(* The token type and the token GADTs can be referred to via a short
   (unqualified) name, regardless of how they have been defined (either
   directly or as an abbreviation). However, their data constructors must
   be qualified if [--external-tokens] is set. *)
185 186 187

let tokenprefix id =
  match Settings.token_type_mode with
188
  | Settings.CodeOnly m ->
189
      m ^ "." ^ id
190
  | Settings.TokenTypeAndCode ->
191 192 193
      id
  | Settings.TokenTypeOnly ->
      id (* irrelevant, really *)
194

195 196 197
let tokendata =
  tokenprefix

POTTIER Francois's avatar
POTTIER Francois committed
198 199 200
let tokengadtdata token =
  tokenprefix (ttokengadtdata token)