cmly_format.ml 2.31 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
(* This module defines the data that is stored in .cmly files. In short, a
   .cmly file contains a value of type [grammar], defined below. *)

(* The type definitions in this module are used by [Cmly_write], which writes
   a .cmly file, and by [Cmly_read], which reads a .cmly file. They should not
   be used anywhere else. *)

(* All entities (terminal symbols, nonterminal symbols, and so on) are
   represented as integers. These integers serve as indices into arrays. This
   enables simple and efficient hashing, comparison, indexing, etc. *)

type terminal    = int
type nonterminal = int
type production  = int
type lr0         = int
type lr1         = int

18 19
type ocamltype   = string
type ocamlexpr   = string
20

21 22 23 24 25
type range = {
  r_start: Lexing.position;
  r_end: Lexing.position;
}

26 27 28
type attribute = {
  a_label: string;
  a_payload: string;
29
  a_position: range;
30
}
31 32 33 34 35 36 37

type attributes =
  attribute list

type terminal_def = {
  t_name: string;
  t_kind: [`REGULAR | `ERROR | `EOF | `PSEUDO];
38
  t_type: ocamltype option;
39 40 41 42 43 44 45
  t_attributes: attributes;
}

type nonterminal_def = {
  n_name: string;
  n_kind: [`REGULAR | `START];
  n_mangled_name: string;
46
  n_type: ocamltype option;
47
  n_positions: range list;
48
  n_nullable: bool;
49 50 51 52 53 54 55 56 57 58 59
  n_first: terminal list;
  n_attributes: attributes;
}

type symbol =
  | T of terminal
  | N of nonterminal

type identifier = string

type action = {
60
  a_expr: ocamlexpr;
61 62 63 64 65 66 67 68 69 70
  a_keywords: Keyword.keyword list;
}

type producer_def =
  symbol * identifier * attributes

type production_def = {
  p_kind: [`REGULAR | `START];
  p_lhs: nonterminal;
  p_rhs: producer_def array;
71
  p_positions: range list;
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
  p_action: action option;
  p_attributes: attributes;
}

type lr0_state_def = {
  lr0_incoming: symbol option;
  lr0_items: (production * int) list;
}

type lr1_state_def = {
  lr1_lr0: lr0;
  lr1_transitions: (symbol * lr1) list;
  lr1_reductions: (terminal * production list) list;
}

type grammar = {
  g_basename     : string;
89 90
  g_preludes     : string list;
  g_postludes    : string list;
91 92 93 94 95
  g_terminals    : terminal_def    array;
  g_nonterminals : nonterminal_def array;
  g_productions  : production_def  array;
  g_lr0_states   : lr0_state_def   array;
  g_lr1_states   : lr1_state_def   array;
96
  g_entry_points : (nonterminal * production * lr1) list;
97
  g_attributes   : attributes;
98
  g_parameters   : string list;
99
}