Commit 0190551f authored by POTTIER Francois's avatar POTTIER Francois

Add the new modules which read and write .cmly files.

parent c0d5c888
(* The following signatures describe the API offered by the functor
[Cfmly_read.Read]. This functor reads in a .cmly file and gives
access to the description of the grammar and automaton contained
in this file. *)
(* The module type [INDEXED] describes a type [t] whose elements are
in a bijection with an integer interval of the form [0..count). *)
module type INDEXED = sig
type t
val count : int
val of_int : int -> t
val to_int : t -> int
val iter : (t -> unit) -> unit
val fold : (t -> 'a -> 'a) -> 'a -> 'a
val tabulate : (t -> 'a) -> t -> 'a
end
(* The module type [GRAMMAR] describes the grammar and automaton. *)
module type GRAMMAR = sig
type terminal = private int
type nonterminal = private int
type production = private int
type lr0 = private int
type lr1 = private int
type item = production * int
type attribute =
string Positions.located * Stretch.t
type attributes =
attribute list
module Grammar : sig
val basename : string
val entry_points : (production * lr1) list
val attributes : attributes
val parameters : Stretch.t list
end
module Terminal : sig
include INDEXED with type t = terminal
val name : t -> string
val kind : t -> [`REGULAR | `ERROR | `EOF | `PSEUDO]
val typ : t -> Stretch.ocamltype option
val attributes : t -> attributes
end
module Nonterminal : sig
include INDEXED with type t = nonterminal
val name : t -> string
val mangled_name : t -> string
val kind : t -> [`REGULAR | `START]
val typ : t -> Stretch.ocamltype option
val positions : t -> Positions.t list
val is_nullable : t -> bool
val first : t -> terminal list
val attributes : t -> attributes
end
type symbol =
| T of terminal
| N of nonterminal
val symbol_name : ?mangled:bool -> symbol -> string
type identifier = string
module Action : sig
type t
val expr : t -> IL.expr
val keywords : t -> Keyword.keyword list
val filenames : t -> string list
end
module Production : sig
include INDEXED with type t = production
val kind : t -> [`REGULAR | `START]
val lhs : t -> nonterminal
val rhs : t -> (symbol * identifier * attributes) array
val positions : t -> Positions.t list
val action : t -> Action.t option
val attributes : t -> attributes
end
module Lr0 : sig
include INDEXED with type t = lr0
val incoming : t -> symbol option
val items : t -> item list
end
module Lr1 : sig
include INDEXED with type t = lr1
val lr0 : t -> lr0
val transitions : t -> (symbol * t) list
val reductions : t -> (terminal * production list) list
end
module Print : sig
open Format
val terminal : formatter -> terminal -> unit
val nonterminal : formatter -> nonterminal -> unit
val symbol : formatter -> symbol -> unit
val mangled_nonterminal : formatter -> nonterminal -> unit
val mangled_symbol : formatter -> symbol -> unit
val production : formatter -> production -> unit
val item : formatter -> item -> unit
val itemset : formatter -> item list -> unit
val annot_item : string list -> formatter -> item -> unit
val annot_itemset : string list list -> formatter -> item list -> unit
end
end
(* This module defines the data that is stored in .cmly files. In short, a
.cmly file contains a value of type [grammar], defined below. *)
(* The type definitions in this module are used by [Cmly_write], which writes
a .cmly file, and by [Cmly_read], which reads a .cmly file. They should not
be used anywhere else. *)
(* All entities (terminal symbols, nonterminal symbols, and so on) are
represented as integers. These integers serve as indices into arrays. This
enables simple and efficient hashing, comparison, indexing, etc. *)
type terminal = int
type nonterminal = int
type production = int
type lr0 = int
type lr1 = int
type attribute =
string Positions.located * Stretch.t
type attributes =
attribute list
type terminal_def = {
t_name: string;
t_kind: [`REGULAR | `ERROR | `EOF | `PSEUDO];
t_type: Stretch.ocamltype option;
t_attributes: attributes;
}
type nonterminal_def = {
n_name: string;
n_kind: [`REGULAR | `START];
n_mangled_name: string;
n_type: Stretch.ocamltype option;
n_positions: Positions.t list;
n_is_nullable: bool;
n_first: terminal list;
n_attributes: attributes;
}
type symbol =
| T of terminal
| N of nonterminal
type identifier = string
type action = {
a_expr: IL.expr;
a_keywords: Keyword.keyword list;
a_filenames: string list;
}
type producer_def =
symbol * identifier * attributes
type production_def = {
p_kind: [`REGULAR | `START];
p_lhs: nonterminal;
p_rhs: producer_def array;
p_positions: Positions.t list;
p_action: action option;
p_attributes: attributes;
}
type lr0_state_def = {
lr0_incoming: symbol option;
lr0_items: (production * int) list;
}
type lr1_state_def = {
lr1_lr0: lr0;
lr1_transitions: (symbol * lr1) list;
lr1_reductions: (terminal * production list) list;
}
type grammar = {
g_basename : string;
g_terminals : terminal_def array;
g_nonterminals : nonterminal_def array;
g_productions : production_def array;
g_lr0_states : lr0_state_def array;
g_lr1_states : lr1_state_def array;
g_entry_points : (production * lr1) list;
g_attributes : attributes;
g_parameters : Stretch.t list;
}
open Cmly_format
open Cmly_api
(* ------------------------------------------------------------------------ *)
(* Reading a .cmly file. *)
exception Error of string
let read (ic : in_channel) : grammar =
(* .cmly file format: version string ++ grammar *)
try
let m = really_input_string ic (String.length Version.version) in
if m <> Version.version then
raise (Error (Printf.sprintf "Invalid magic number in .cmly file.\n\
Expecting %S, but got %S." Version.version m))
else
(input_value ic : grammar)
with
| End_of_file (* [really_input_string], [input_value] *)
| Failure _ -> (* [input_value] *)
raise (Error (Printf.sprintf "Invalid or damaged .cmly file."))
let read (filename : string) : grammar =
let ic = open_in_bin filename in
IO.try_finally
(fun () -> read ic)
(fun () -> close_in_noerr ic)
(* ------------------------------------------------------------------------ *)
(* Packaging the interval [0..count) as a module of type [INDEXED]. *)
module Index (P : sig
val name: string (* for error messages only *)
val count: int
end)
: INDEXED with type t = int
= struct
type t = int
let count = P.count
let of_int n =
if 0 <= n && n < count then n
else invalid_arg (P.name ^ ".of_int: index out of bounds")
let to_int n = n
let iter f =
for i = 0 to count - 1 do
f i
done
let fold f x =
let r = ref x in
for i = 0 to count - 1 do
r := f i !r
done;
!r
let tabulate f =
let a = Array.init count f in
Array.get a
end
(* ------------------------------------------------------------------------ *)
(* Packaging a data structure of type [Cmly_format.grammar] as a module
of type [Cmly_api.GRAMMAR]. *)
module Make (G : sig val grammar : grammar end) : GRAMMAR = struct
open G
type terminal = int
type nonterminal = int
type production = int
type lr0 = int
type lr1 = int
type item = production * int
type attribute =
string Positions.located * Stretch.t
type attributes =
attribute list
module Grammar = struct
let basename = grammar.g_basename
let entry_points = grammar.g_entry_points
let attributes = grammar.g_attributes
let parameters = grammar.g_parameters
end
module Terminal = struct
let table = grammar.g_terminals
let name i = table.(i).t_name
let kind i = table.(i).t_kind
let typ i = table.(i).t_type
let attributes i = table.(i).t_attributes
include Index(struct
let name = "Terminal"
let count = Array.length table
end)
end
module Nonterminal = struct
let table = grammar.g_nonterminals
let name i = table.(i).n_name
let mangled_name i = table.(i).n_mangled_name
let kind i = table.(i).n_kind
let typ i = table.(i).n_type
let positions i = table.(i).n_positions
let is_nullable i = table.(i).n_is_nullable
let first i = table.(i).n_first
let attributes i = table.(i).n_attributes
include Index(struct
let name = "Nonterminal"
let count = Array.length table
end)
end
type symbol = Cmly_format.symbol =
| T of terminal
| N of nonterminal
let symbol_name ?(mangled=false) = function
| T t ->
Terminal.name t
| N n ->
if mangled then Nonterminal.mangled_name n
else Nonterminal.name n
type identifier = string
module Action = struct
type t = action
let expr t = t.a_expr
let keywords t = t.a_keywords
let filenames t = t.a_filenames
end
module Production = struct
let table = grammar.g_productions
let kind i = table.(i).p_kind
let lhs i = table.(i).p_lhs
let rhs i = table.(i).p_rhs
let positions i = table.(i).p_positions
let action i = table.(i).p_action
let attributes i = table.(i).p_attributes
include Index(struct
let name = "Production"
let count = Array.length table
end)
end
module Lr0 = struct
let table = grammar.g_lr0_states
let incoming i = table.(i).lr0_incoming
let items i = table.(i).lr0_items
include Index(struct
let name = "Lr0"
let count = Array.length table
end)
end
module Lr1 = struct
let table = grammar.g_lr1_states
let lr0 i = table.(i).lr1_lr0
let transitions i = table.(i).lr1_transitions
let reductions i = table.(i).lr1_reductions
include Index(struct
let name = "Lr1"
let count = Array.length table
end)
end
module Print = struct
let terminal ppf t =
Format.pp_print_string ppf (Terminal.name t)
let nonterminal ppf t =
Format.pp_print_string ppf (Nonterminal.name t)
let symbol ppf = function
| T t -> terminal ppf t
| N n -> nonterminal ppf n
let mangled_nonterminal ppf t =
Format.pp_print_string ppf (Nonterminal.name t)
let mangled_symbol ppf = function
| T t -> terminal ppf t
| N n -> mangled_nonterminal ppf n
let rec lengths l acc = function
| [] ->
if l = -1 then []
else l :: lengths (-1) [] acc
| [] :: rows ->
lengths l acc rows
| (col :: cols) :: rows ->
lengths (max l (String.length col)) (cols :: acc) rows
let rec adjust_length lengths cols =
match lengths, cols with
| l :: ls, c :: cs ->
let pad = l - String.length c in
let c =
if pad = 0 then c
else c ^ String.make pad ' '
in
c :: adjust_length ls cs
| _, [] -> []
| [], _ -> assert false
let align_tabular rows =
let lengths = lengths (-1) [] rows in
List.map (adjust_length lengths) rows
let print_line ppf = function
| [] -> ()
| x :: xs ->
Format.fprintf ppf "%s" x;
List.iter (Format.fprintf ppf " %s") xs
let print_table ppf table =
let table = align_tabular table in
List.iter (Format.fprintf ppf "%a\n" print_line) table
let annot_itemset annots ppf items =
let last_lhs = ref (-1) in
let prepare (p, pos) annot =
let rhs =
Array.map (fun (sym, id, _) ->
if id <> "" && id.[0] <> '_' then
"(" ^ id ^ " = " ^ symbol_name sym ^ ")"
else symbol_name sym
) (Production.rhs p)
in
if pos >= 0 && pos < Array.length rhs then
rhs.(pos) <- ". " ^ rhs.(pos)
else if pos > 0 && pos = Array.length rhs then
rhs.(pos - 1) <- rhs.(pos - 1) ^ " .";
let lhs = Production.lhs p in
let rhs = Array.to_list rhs in
let rhs =
if !last_lhs = lhs then
"" :: " |" :: rhs
else begin
last_lhs := lhs;
Nonterminal.name lhs :: "::=" :: rhs
end
in
if annot = [] then
[rhs]
else
[rhs; ("" :: "" :: annot)]
in
let rec prepare_all xs ys =
match xs, ys with
| [], _ ->
[]
| (x :: xs), (y :: ys) ->
let z = prepare x y in
z :: prepare_all xs ys
| (x :: xs), [] ->
let z = prepare x [] in
z :: prepare_all xs []
in
print_table ppf (List.concat (prepare_all items annots))
let itemset ppf t =
annot_itemset [] ppf t
let annot_item annot ppf item =
annot_itemset [annot] ppf [item]
let item ppf t =
annot_item [] ppf t
let production ppf t =
item ppf (t, -1)
end
end
module Read (X : sig val filename : string end) =
Make (struct let grammar = read X.filename end)
(* The functor [Read] reads a .cmly file. If the file is unreadable,
the exception [Error] is raised. Otherwise, the functor builds a
module of type [Cmly_api.GRAMMAR], which gives access to a description
of the grammar and automaton. *)
exception Error of string
module Read (X : sig val filename : string end) : Cmly_api.GRAMMAR
open Grammar
open Cmly_format
let terminal (t : Terminal.t) : terminal_def =
{
t_kind = (
if Terminal.equal t Terminal.error then
`ERROR
else if
(match Terminal.eof with
| None -> false
| Some eof -> Terminal.equal t eof) then
`EOF
else if Terminal.pseudo t then
`PSEUDO
else
`REGULAR
);
t_name = Terminal.print t;
t_type = Terminal.ocamltype t;
t_attributes = Terminal.attributes t;
}
let nonterminal (nt : Nonterminal.t) : nonterminal_def =
let is_start = Nonterminal.is_start nt in
{
n_kind = if is_start then `START else `REGULAR;
n_name = Nonterminal.print false nt;
n_mangled_name = Nonterminal.print true nt;
n_type = if is_start then None else Nonterminal.ocamltype nt;
n_positions = if is_start then [] else Nonterminal.positions nt;
n_is_nullable = Analysis.nullable nt;
n_first = List.map Terminal.t2i (TerminalSet.elements (Analysis.first nt));
n_attributes = if is_start then [] else Nonterminal.attributes nt;
}
let symbol (sym : Symbol.t) : symbol =
match sym with
| Symbol.N n -> N (Nonterminal.n2i n)
| Symbol.T t -> T (Terminal.t2i t)
let action (a : Action.t) : action =
{
a_expr = Action.to_il_expr a;
a_keywords = Keyword.KeywordSet.elements (Action.keywords a);
a_filenames = Action.filenames a;
}
let rhs (prod : Production.index) : producer_def array =
match Production.classify prod with
| Some n ->
[| (N (Nonterminal.n2i n), "", []) |]
| None ->
Array.mapi (fun i sym ->
let id = (Production.identifiers prod).(i) in
let attributes = (Production.rhs_attributes prod).(i) in
symbol sym, id, attributes
) (Production.rhs prod)
let production (prod : Production.index) : production_def =
{
p_kind = if Production.is_start prod then `START else `REGULAR;
p_lhs = Nonterminal.n2i (Production.nt prod);
p_rhs = rhs prod;
p_positions = Production.positions prod;
p_action = if Production.is_start prod then None
else Some (action (Production.action prod));
p_attributes = Production.lhs_attributes prod;
}
let item (i : Item.t) : production * int =
let p, i = Item.export i in
(Production.p2i p, i)
let itemset (is : Item.Set.t) : (production * int) list =
List.map item (Item.Set.elements is)
let lr0_state (node : Lr0.node) : lr0_state_def =
{
lr0_incoming = Option.map symbol (Lr0.incoming_symbol node);
lr0_items = itemset (Lr0.items node)
}
let transition (sym, node) : symbol * lr1 =
(symbol sym, Lr1.number node)
let lr1_state (node : Lr1.node) : lr1_state_def =
{
lr1_lr0 = Lr0.core (Lr1.state node);
lr1_transitions =
List.map transition (SymbolMap.bindings (Lr1.transitions node));
lr1_reductions =
let add t ps rs = (Terminal.t2i t, List.map Production.p2i ps) :: rs in
TerminalMap.fold_rev add (Lr1.reductions node) []
}
let entry_point prod node xs : (production * lr1) list =
(Production.p2i prod, Lr1.number node) :: xs
let encode () : grammar =
{
g_basename = Settings.base;
g_terminals = Terminal.init terminal;
g_nonterminals = Nonterminal.init nonterminal;
g_productions = Production.init production;
g_lr0_states = Array.init Lr0.n lr0_state;
g_lr1_states = Array.of_list (Lr1.map lr1_state);
g_entry_points = ProductionMap.fold entry_point Lr1.entry [];
g_attributes = Analysis.attributes;
g_parameters = Front.grammar.UnparameterizedSyntax.parameters;
}
let write oc t =
(* .cmly file format: version string ++ grammar *)
output_string oc Version.version;
output_value oc (t : grammar)
let write filename =
let oc = open_out filename in
write oc (encode());
close_out oc
(* [write filename] queries the module [Grammar] for information about the
grammar and queries the modules [Lr0] and [Lr1] for information about the
automaton. It writes this information to the .cmly file [filename]. *)
val write: string -> unit
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment