Commit 2a374271 authored by Andrei Paskevich's avatar Andrei Paskevich

Doc: language reference - lexical conventions

parent 62a4a52a
......@@ -1857,7 +1857,7 @@ ifeq (@enable_doc@,yes)
doc: doc/manual.pdf doc/html/index.html
BNF = qualid label constant operator term type formula theory theory2 \
BNF = ident qualid tag constant operator term type formula theory theory2 \
why_file spec expr expr2 module whyml_file term_old_at
BNFTEX = $(BNF:%=doc/%_bnf.tex)
......
......@@ -54,12 +54,12 @@ and syntax = parse
| ";" ([^ '\n']* as s) '\n' [' ''\t']* '|' {
print_string "& \\textrm{";
print_string s;
print_string "} \\alt{}";
print_string "} \\alt";
syntax lexbuf }
| ";" ([^ '\n']* as s) '\n' [' ''\t']* '\\' [' ''\t']* '\n' {
print_string "& \\textrm{";
print_string s;
print_string "} \\sep{}";
print_string "} \\sep";
syntax lexbuf }
| ";" ([^ '\n']* as s) "%\n" {
print_string "& \\textrm{";
......@@ -69,7 +69,7 @@ and syntax = parse
| ";" ([^ '\n']* as s) '\n' {
print_string "& \\textrm{";
print_string s;
print_string "} \\newl{}";
print_string "} \\newl";
syntax lexbuf }
| "@" {
print_string "}";
......@@ -84,7 +84,7 @@ and syntax = parse
indoublequote lexbuf }
| "below" { print_string "\\below"; syntax lexbuf }
| "epsilon" { print_string "\\emptystring"; syntax lexbuf }
| ['A'-'Z''a'-'z''-'] + {
| ['A'-'Z''a'-'z'] ['A'-'Z''a'-'z''-']* {
print_string "\\nonterm{";
print_string (lexeme lexbuf);
print_string"}";
......@@ -95,16 +95,18 @@ and syntax = parse
| ['_' '^'] _ {
print_string (lexeme lexbuf);
syntax lexbuf }
| "*" { print_string "\\repetstar{}"; syntax lexbuf }
| "+" { print_string "\\repetplus{}"; syntax lexbuf }
| "?" { print_string "\\repetone{}"; syntax lexbuf }
| "(" { print_string "\\lparen{}"; syntax lexbuf }
| ")" { print_string "\\rparen{}"; syntax lexbuf }
| "::=" { print_string "\\is{}"; syntax lexbuf }
| "|" { print_string "\\orelse{}"; syntax lexbuf }
| "\\" { print_string "\\sep{}"; syntax lexbuf }
| '-' { print_string "\\interval"; syntax lexbuf }
| "*" { print_string "\\repetstar"; syntax lexbuf }
| "+" { print_string "\\repetplus"; syntax lexbuf }
| "?" { print_string "\\repetone"; syntax lexbuf }
| "(" { print_string "\\lparen"; syntax lexbuf }
| ")" { print_string "\\rparen"; syntax lexbuf }
| "::=" { print_string "\\is"; syntax lexbuf }
| "|" { print_string "\\orelse"; syntax lexbuf }
| "\\" { print_string "\\sep"; syntax lexbuf }
| "{" { print_string "\\begin{notimplementedenv}"; check_implementation_note lexbuf }
| "}" { print_string "\\end{notimplementedenv}"; syntax lexbuf }
| [' ''\t'] { syntax lexbuf }
| _ {
print_char (lexeme_char lexbuf 0);
syntax lexbuf }
......
\begin{syntax}
digit ::= "0" - "9" ;
digit ::= "0" - "9"
\
hex-digit ::= digit | "a" - "f" | "A" - "F" ;
\
oct-digit ::= "0" - "7"
\
bin-digit ::= "0" | "1"
\
integer ::= digit (digit | "_")* ; decimal
| ("0x" | "0X") hex-digit (hex-digit | "_")* ; hexadecimal
| ("0o" | "0O") oct-digit (oct-digit | "_")* ; octal
| ("0b" | "0B") bin-digit (bin-digit | "_")* ; binary
hex-digit ::= "0" - "9" | "a" - "f" | "A" - "F"
\
real ::= digit+ exponent ; decimal
| digit+ "." digit* exponent? ;
| digit* "." digit+ exponent? ;
| ("0x" | "0X") hex-real h-exponent ; hexadecimal
oct-digit ::= "0" - "7"
\
hex-real ::= hex-digit+ ;
| hex-digit+ "." hex-digit* ;
| hex-digit* "." hex-digit+ ;
\
exponent ::= ("e" | "E") ("-" | "+")? digit+ ;
bin-digit ::= "0" | "1"
\
h-exponent ::= ("p" | "P") ("-" | "+")? digit+ ; %
\end{syntax}
\ No newline at end of file
integer ::= digit (digit | "_")* ;
| ("0x" | "0X") hex-digit (hex-digit | "_")* ;
| ("0o" | "0O") oct-digit (oct-digit | "_")* ;
| ("0b" | "0B") bin-digit (bin-digit | "_")* ;
\
real ::= digit+ exponent ;
| digit+ "." digit* exponent? ;
| digit* "." digit+ exponent? ;
| ("0x" | "0X") hex-digit+ h-exponent ;
| ("0x" | "0X") hex-digit+ "." hex-digit* h-exponent? ;
| ("0x" | "0X") hex-digit* "." hex-digit+ h-exponent? ;
\
exponent ::= ("e" | "E") ("-" | "+")? digit+
\
h-exponent ::= ("p" | "P") ("-" | "+")? digit+ %
\end{syntax}
\begin{syntax}
suffix ::= "a" - "z" | "A" - "Z" | "0" - "9" | "'" | "_"
\
lident ::= ("a" - "z") suffix* | "_" suffix+
\
uident ::= ("A" - "Z") suffix* %
\end{syntax}
\begin{syntax}
label ::= string ;
| "#" filename digit+ digit+ digit+ "#" ;
\
filename ::= string ;%
\end{syntax}
......@@ -19,13 +19,14 @@
\newcommand{\indexttbs}[1]{\index{#1@\protect\keywordbs{#1}}}
\newif\ifspace
\newif\ifnewentry
\newcommand{\addspace}{\ifspace \: \spacefalse \fi}
\newcommand{\addspace}{\ifspace\:\:\spacefalse\fi}
\newcommand{\term}[2]{\addspace\mbox{\lstinline|#1|%
\ifthenelse{\equal{#2}{}}{}{\indexttbase{#2}{#1}}}\spacetrue}
\newcommand{\nonterm}[2]{%
\ifthenelse{\equal{#2}{}}%
{\addspace\mbox{\textsl{#1}\ifnewentry\index{#1@\textsl{#1}}\fi}\spacetrue}%
{\addspace\mbox{\textsl{#1}\footnote{#2}\ifnewentry\index{grammar entries!\textsl{#1}}\fi}\spacetrue}}
\newcommand{\interval}{\addspace--\spacetrue}
\newcommand{\repetstar}{$^*$\spacetrue}
\newcommand{\repetplus}{$^+$\spacetrue}
\newcommand{\repetone}{$^?$\spacetrue}
......
......@@ -21,7 +21,7 @@ The following commands are available:
\whyml input files.
\item[\texttt{ide}] provides a graphical interface to display goals
and to run provers and transformations on them.
\item[\texttt{prove}] reads \why and \whyml input files and calls
\item[\texttt{prove}] reads \whyml input files and calls
provers, on the command-line.
\item[\texttt{realize}] generates interactive proof skeletons for
\why input files.
......@@ -29,8 +29,8 @@ The following commands are available:
for regression test purposes.
\item[\texttt{session}] dumps various informations from a proof
session, and possibly modifies the session.
\item[\texttt{wc}] gives some token statistics about \why and \whyml
source codes.
\item[\texttt{wc}] gives some token statistics about \whyml
source files.
\end{description}
All these commands are also available as standalone executable files,
......@@ -141,7 +141,7 @@ can be obtained by the option \verb|--list-prover-ids|.
\label{sec:why3ref}
\why is primarily used to call provers on goals contained in an
input file. By default, such a file must be written either in \why language
input file. By default, such a file must be written in \whyml language
(extension \texttt{.mlw}).
However, a dynamically loaded
plugin can register a parser for some other format of logical problems,
......@@ -303,7 +303,7 @@ Generally, proof attempts are marked obsolete just after
the start of the user interface. Indeed, when you load a session in order to
modify it (not with \texttt{why3session info} for instance), \why
rebuilds the goals to prove by using the information provided in the
session. If you modify the original file (\texttt{.why}, \texttt{.mlw}) or if the
session. If you modify the original file (\texttt{.mlw}) or if the
transformations have changed (new version of \why), \why will detect
that. Since the provers might answer differently on these new
proof obligations, the corresponding proof attempts are marked obsolete.
......@@ -1096,7 +1096,7 @@ realize the given theories. See also Section~\ref{sec:realizations}.
\section{The \texttt{wc} Command}
\label{sec:why3wc}
\why can give some token statistics about \why and \whyml source codes.
\why can give some token statistics about \whyml source files.
\index{wc@\texttt{wc}}
%%% Local Variables:
......
\begin{syntax}
op-char-1 ::= "=" | "<" | ">" | "~" ;
\
op-char-2 ::= "+" | "-";
op-char-2 ::= "+" | "-" ;
\
op-char-3 ::= "*" | "/" | "%" ;
op-char-3 ::= "*" | "/" | "\" | "%" ;
\
op-char-4 ::= "!" | "$" | "&" | "?" | "@" | "^" | "." | ":" | "|" | "#" ;
\
op-char ::= op-char-1 | op-char-2 | op-char-3 | op-char-4 ;
op-char-1234 ::= op-char-1 | op-char-2 | op-char-3 | op-char-4 ;
\
infix-op-1 ::= op-char* op-char-1 op-char* ;
op-char-234 ::= op-char-2 | op-char-3 | op-char-4 ;
\
infix-op ::= op-char+ ;
op-char-34 ::= op-char-3 | op-char-4 ;
\
prefix-op ::= op-char+ ;
infix-op-1 ::= op-char-234* op-char-1 op-char-1234* ;
\
bang-op ::= "!" op-char-4* | "?" op-char-4* ;%
\end{syntax}
\ No newline at end of file
infix-op-2 ::= op-char-34* op-char-2 op-char-234* ;
\
infix-op-3 ::= op-char-4* op-char-3 op-char-34* ;
\
infix-op-4 ::= op-char-4+ ;
\
infix-op ::= infix-op-1 | infix-op-2 | infix-op-3 | infix-op-4 ;
\
prefix-op ::= op-char-1234+ ;
\
bang-op ::= ("!" | "?") op-char-4* ; %
\end{syntax}
\begin{syntax}
lalpha ::= "a"-"z" | "_"
lident-ext ::= lident | "(" op-char-1234+ ")"
\
ualpha ::= "A"-"Z"
qualid ::= (uident ".")* (lident-ext | uident)
\
alpha ::= lalpha | ualpha
lqualid ::= (uident ".")* lident
\
suffix ::= (alpha | digit | "'")*
\
lident ::= lalpha suffix
\
uident ::= ualpha suffix
\
ident ::= lident | uident
\
lqualid ::= lident | uqualid "." lident
\
uqualid ::= uident | uqualid "." uident
\
qualid ::= ident | uqualid "." ident
\
digit-or-us ::= "0"-"9" | "_"
\
alpha-no-us ::= "a"-"z" | "A"-"Z"
\
suffix-nq ::= (alpha-no-us | "'"* digit-or-us)* "'"*
\
lident-nq ::= lalpha suffix-nq
\
uident-nq ::= ualpha suffix-nq
\
ident-nq ::= lident-nq | uident-nq ;%
uqualid ::= (uident ".")* uident %
\end{syntax}
......@@ -2,83 +2,93 @@
\label{chap:syntaxref}
%HEVEA\cutname{syntaxref.html}
This chapter gives the grammar and semantics for \why and \whyml input files.
In this chapter, we describe the syntax and semantics of \whyml.
\section{Lexical Conventions}
Lexical conventions are common to \why and \whyml.
% TODO: blanks
\subsection{Comments}
%Lexical conventions of \whyml are similar to those of OCaml.
%
Blank characters are space, horizontal tab, carriage return,
and line feed. Blanks separate lexemes but are otherwise ignored.
%
Comments are enclosed by \texttt{(*} and \texttt{*)} and can be nested.
\subsection{Strings}
Note that \texttt{(*)} does not start a comment.
Strings are enclosed in double quotes (\verb!"!). Double quotes can be
escaped in strings using the backslash character (\verb!\!).
The other special sequences are \verb!\n! for line feed and \verb!\t!
for horizontal tab.
In the following, strings are referred to with the non-terminal
\nonterm{string}{}.
\subsection{Identifiers}
Identifiers are non-empty sequences of characters among letters,
digits, the underscore character and the quote character. They cannot
start with a digit or a quote.
\begin{center}\framebox{\input{./qualid_bnf.tex}}\end{center}
The syntax distinguishes identifiers that start with a lowercase or an
uppercase letter (resp. \nonterm{lident}{} and \nonterm{uident}{}), and
similarly, lowercase and uppercase qualified identifiers.
The restricted classes of identifiers denoted by \nonterm{lident-nq}{}
and \nonterm{uident-nq}{} correspond to identifiers where the quote
character cannot be followed by a letter. Identifiers where a quote is
followed by a letter are reserved and cannot be used as identifier for
declarations introduced by the user (see Section~\ref{sec:rangetypes}).
\subsection{Constants}
The syntax for constants is given in Figure~\ref{fig:bnf:constant}.
\nonterm{string}{}\spacefalse.
%\subsection{Constants}
The syntax for numerical constants is given by the following rules:
%\begin{figure}[ht]
\begin{center}\input{./constant_bnf.tex}\end{center}
%\caption{Syntax for numerical constants.}
%\label{fig:bnf:constant}
%\end{figure}
%in Figure~\ref{fig:bnf:constant}.
Integer and real constants have arbitrary precision.
Integer constants may be given in base 16, 10, 8 or 2.
Real constants may be given in base 16 or 10.
\begin{figure}
\begin{center}\framebox{\input{./constant_bnf.tex}}\end{center}
\caption{Syntax for constants.}
\label{fig:bnf:constant}
\end{figure}
\subsection{Operators}
Integer constants can be given in base 10, 16, 8 or 2.
Real constants can be given in base 10 or 16.
Notice that the exponent in hexadecimal real constants is written in base 10.
%\subsection{Identifiers}
Identifiers are composed of letters, digits, the underscore character,
and the quote character. %, as shown in Figure~\ref{fig:bnf:ident}.
The syntax distinguishes identifiers that start with a lowercase letter
or an underscore (\nonterm{lident}{}\spacefalse) and identifiers that
start with an uppercase letter (\nonterm{uident}{}\spacefalse):
%\begin{figure}[ht]
\begin{center}\input{./ident_bnf.tex}\end{center}
%\caption{Syntax for identifiers.}
%\label{fig:bnf:ident}
%\end{figure}
Identifiers that contain a quote followed by a letter,
such as \texttt{int32'max}, are reserved for symbols
introduced by \why transformations and cannot be used
for user-defined symbols.
%\subsection{Operators}
Prefix and infix operators are built from characters organized in four
categories (\textsl{op-char-1} to \textsl{op-char-4}).
\begin{center}\framebox{\input{./operator_bnf.tex}}\end{center}
Infix operators are classified into 4 categories, according to the
characters they are built from:
\begin{itemize}
\item level 4: operators containing only characters from
\textit{op-char-4};
\item level 3: those containing
characters from \textit{op-char-3} or \textit{op-char-4};
\item level 2: those containing
characters from \textit{op-char-2}, \textit{op-char-3} or
\textit{op-char-4};
\item level 1: all other operators (non-terminal \textit{infix-op-1}).
\end{itemize}
\subsection{Labels}
Identifiers, terms, formulas, program expressions
can all be labeled, either with a string, or with a location tag.
\begin{center}\framebox{\input{./label_bnf.tex}}\end{center}
A location tag consists of a file name, a line number, and starting
and ending characters.
precedence groups (\textsl{op-char-1} to \textsl{op-char-4}):
%as shown in Figure~\ref{fig:bnf:operator}.
%\begin{figure}[ht]
\begin{center}\input{./operator_bnf.tex}\end{center}
%\caption{Syntax for operators.}
%\label{fig:bnf:operator}
%\end{figure}
The infix operators from a high-numbered group bind stronger
than the infix operators from a low-numbered group,
and prefix operators bind stronger than infix operators.
For example, the infix operator \texttt{.*.} (from \textsl{infix-op-3})
would have a higher precedence than the infix operator \texttt{->-}
(from \textsl{infix-op-1}).
The so-called ``bang operators'' are prefix operators that have even
higher precedence than the juxtaposition (application) operator,
allowing us to write expressions like \texttt{inv !x}.
Infix operators from the groups 2-4 are left-associative.
Infix operators from the group 1 are non-associative and
may instead be chained, as explained below.
An operator inside parenthesis can act as a lowercase identifier.
Qualified identifiers are prefixed with a sequence of uppercase
identifiers, e.g., \texttt{App.S.get}:
%\begin{figure}[ht]
\begin{center}\input{./qualid_bnf.tex}\end{center}
%\caption{Syntax for qualified identifiers.}
%\label{fig:bnf:qualid}
%\end{figure}
Finally, any identifier, term, formula, or expression
in a \whyml program can be tagged, either with a generic tag
or a location tag:
\begin{center}\input{./tag_bnf.tex}\end{center}
A generic tag cannot contain newlines or closing square brackets;
leading and trailing spaces are ignored.
A location tag consists of a file name in double quotes,
a line number, and starting and ending character positions.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
\begin{syntax}
tag ::= "[@" ~... "]" ; generic tag
| "[#" string digit+ digit+ digit+ "]" ; location tag %
\end{syntax}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment