xml.mll 5.7 KB
Newer Older
1 2 3
(********************************************************************)
(*                                                                  *)
(*  The Why3 Verification Platform   /   The Why3 Development Team  *)
4
(*  Copyright 2010-2015   --   INRIA - CNRS - Paris-Sud University  *)
5 6 7 8
(*                                                                  *)
(*  This software is distributed under the terms of the GNU Lesser  *)
(*  General Public License version 2.1, with the special exception  *)
(*  on linking described in file LICENSE.                           *)
9
(*                                                                  *)
10
(********************************************************************)
MARCHE Claude's avatar
MARCHE Claude committed
11 12

{
13 14 15

  type attributes = (string * string) list

Andrei Paskevich's avatar
Andrei Paskevich committed
16
  type element =
MARCHE Claude's avatar
MARCHE Claude committed
17
    { name : string;
18
      attributes : (string * string) list;
MARCHE Claude's avatar
MARCHE Claude committed
19 20 21
      elements : element list;
    }

22
  let mk_element name attrs elems =
23
    { name = name;
24
      attributes = attrs;
25 26 27
      elements = List.rev elems;
    }

MARCHE Claude's avatar
MARCHE Claude committed
28 29
  type t =
      { version : string;
30 31 32 33
        encoding : string;
        doctype : string;
        dtd : string;
        content : element;
MARCHE Claude's avatar
MARCHE Claude committed
34 35
      }

MARCHE Claude's avatar
MARCHE Claude committed
36 37
  let buf = Buffer.create 17

38
  let rec pop_all group_stack element_stack =
MARCHE Claude's avatar
MARCHE Claude committed
39 40 41
    match group_stack with
      | [] -> element_stack
      | (elem,att,elems)::g ->
42 43
          let e = mk_element elem att element_stack in
          pop_all g (e::elems)
MARCHE Claude's avatar
MARCHE Claude committed
44 45 46 47

  exception Parse_error of string

  let parse_error s = raise (Parse_error s)
48

49
  let debug = Debug.register_info_flag "xml"
Andrei Paskevich's avatar
Andrei Paskevich committed
50
    ~desc:"Print@ the@ XML@ parser@ debugging@ messages."
MARCHE Claude's avatar
MARCHE Claude committed
51 52 53 54 55
}

let space = [' ' '\t' '\r' '\n']
let digit = ['0'-'9']
let letter = ['a'-'z' 'A'-'Z']
56 57
let ident = (letter | digit | '_') +
let sign = '-' | '+'
MARCHE Claude's avatar
MARCHE Claude committed
58 59 60
let integer = sign? digit+
let mantissa = ['e''E'] sign? digit+
let real = sign? digit* '.' digit* mantissa?
61
let escape = ['\\''"''n''t''r']
MARCHE Claude's avatar
MARCHE Claude committed
62

63
rule xml_prolog fixattrs = parse
64
| space+
65
    { xml_prolog fixattrs lexbuf }
MARCHE Claude's avatar
MARCHE Claude committed
66
| "<?xml" space+ "version=\"1.0\"" space+ "?>"
67
    { xml_doctype fixattrs "1.0" "" lexbuf }
MARCHE Claude's avatar
MARCHE Claude committed
68
| "<?xml" space+ "version=\"1.0\"" space+ "encoding=\"UTF-8\"" space+ "?>"
69
    { xml_doctype fixattrs "1.0" "" lexbuf }
70
| "<?xml" ([^'?']|'?'[^'>'])* "?>"
71
    { Debug.dprintf debug "[Xml warning] prolog ignored@.";
72
      xml_doctype fixattrs "1.0" "" lexbuf }
73
| _
MARCHE Claude's avatar
MARCHE Claude committed
74
    { parse_error "wrong prolog" }
75

76
and xml_doctype fixattrs version encoding = parse
77
| space+
78
    { xml_doctype fixattrs version encoding lexbuf }
79
| "<!DOCTYPE" space+ (ident as doctype) space+ [^'>']* ">"
80
    { match elements fixattrs [] [] lexbuf with
81
         | [x] ->
MARCHE Claude's avatar
MARCHE Claude committed
82 83 84 85 86 87 88 89
            { version = version;
              encoding = encoding;
              doctype = doctype;
              dtd = "";
              content = x;
            }
         | _ -> parse_error "there should be exactly one root element"
    }
90
| _
MARCHE Claude's avatar
MARCHE Claude committed
91
    { parse_error "wrong DOCTYPE" }
92

93
and elements fixattrs group_stack element_stack = parse
94
  | space+
95
      { elements fixattrs group_stack element_stack lexbuf }
96
  | '<' (ident as elem)
97
      { attributes fixattrs group_stack element_stack elem [] lexbuf }
MARCHE Claude's avatar
MARCHE Claude committed
98 99
  | "</" (ident as celem) space* '>'
      { match group_stack with
100
         | [] ->
101
             Debug.dprintf debug
102
               "[Xml warning] unexpected closing Xml element `%s'@."
MARCHE Claude's avatar
MARCHE Claude committed
103
               celem;
104
             elements fixattrs group_stack element_stack lexbuf
MARCHE Claude's avatar
MARCHE Claude committed
105 106
         | (elem,att,stack)::g ->
             if celem <> elem then
107
               Debug.dprintf debug
108
                 "[Xml warning] Xml element `%s' closed by `%s'@."
MARCHE Claude's avatar
MARCHE Claude committed
109
                 elem celem;
110
             let e = mk_element elem att element_stack in
111
             elements fixattrs g (e::stack) lexbuf
MARCHE Claude's avatar
MARCHE Claude committed
112 113
       }
  | '<'
114
      { Debug.dprintf debug "[Xml warning] unexpected '<'@.";
115
        elements fixattrs group_stack element_stack lexbuf }
116
  | eof
MARCHE Claude's avatar
MARCHE Claude committed
117 118 119
      { match group_stack with
         | [] -> element_stack
         | (elem,_,_)::_ ->
120
             Debug.dprintf debug "[Xml warning] unclosed Xml element `%s'@." elem;
121
             pop_all group_stack element_stack
MARCHE Claude's avatar
MARCHE Claude committed
122 123
      }
  | _ as c
MARCHE Claude's avatar
MARCHE Claude committed
124
      { parse_error ("invalid element starting with " ^ String.make 1 c) }
MARCHE Claude's avatar
MARCHE Claude committed
125

126
and attributes fixattrs groupe_stack element_stack elem acc = parse
MARCHE Claude's avatar
MARCHE Claude committed
127
  | space+
128
      { attributes fixattrs groupe_stack element_stack elem acc lexbuf }
129
  | (ident as key) space* '='
MARCHE Claude's avatar
MARCHE Claude committed
130
      { let v = value lexbuf in
131
        attributes fixattrs groupe_stack element_stack elem ((key,v)::acc) lexbuf }
132
  | '>'
133 134
      { let acc = fixattrs elem acc in
        elements fixattrs ((elem,acc,element_stack)::groupe_stack) [] lexbuf }
MARCHE Claude's avatar
MARCHE Claude committed
135
  | "/>"
136 137
      { let acc = fixattrs elem acc in
        let e = mk_element elem acc [] in
138
        elements fixattrs groupe_stack (e::element_stack) lexbuf }
MARCHE Claude's avatar
MARCHE Claude committed
139
  | _ as c
MARCHE Claude's avatar
MARCHE Claude committed
140
      { parse_error ("'>' expected, got " ^ String.make 1 c) }
MARCHE Claude's avatar
MARCHE Claude committed
141
  | eof
MARCHE Claude's avatar
MARCHE Claude committed
142
      { parse_error "unclosed element, `>' expected" }
MARCHE Claude's avatar
MARCHE Claude committed
143 144

and value = parse
145
  | space+
MARCHE Claude's avatar
MARCHE Claude committed
146
      { value lexbuf }
147
  | '"'
MARCHE Claude's avatar
MARCHE Claude committed
148
      { Buffer.clear buf;
149
        string_val lexbuf }
MARCHE Claude's avatar
MARCHE Claude committed
150
  | _ as c
MARCHE Claude's avatar
MARCHE Claude committed
151
      { parse_error ("invalid value starting with " ^ String.make 1 c) }
MARCHE Claude's avatar
MARCHE Claude committed
152
  | eof
MARCHE Claude's avatar
MARCHE Claude committed
153
      { parse_error "unterminated keyval pair" }
MARCHE Claude's avatar
MARCHE Claude committed
154

155 156
and string_val = parse
  | '"'
157
      { Buffer.contents buf }
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
  | "&lt;"
      { Buffer.add_char buf '<';
        string_val lexbuf }
  | "&gt;"
      { Buffer.add_char buf '>';
        string_val lexbuf }
  | "&quot;"
      { Buffer.add_char buf '"';
        string_val lexbuf }
  | "&apos;"
      { Buffer.add_char buf '\'';
        string_val lexbuf }
  | "&amp;"
      { Buffer.add_char buf '&';
        string_val lexbuf }
MARCHE Claude's avatar
MARCHE Claude committed
173
  | [^ '"'] as c
MARCHE Claude's avatar
MARCHE Claude committed
174 175 176
      { Buffer.add_char buf c;
        string_val lexbuf }
  | eof
MARCHE Claude's avatar
MARCHE Claude committed
177
      { parse_error "unterminated string" }
MARCHE Claude's avatar
MARCHE Claude committed
178 179 180

{

181
  let from_file ?(fixattrs=fun _ a -> a) f =
MARCHE Claude's avatar
MARCHE Claude committed
182
      let c = open_in f in
MARCHE Claude's avatar
MARCHE Claude committed
183
      let lb = Lexing.from_channel c in
184
      let t = xml_prolog fixattrs lb in
MARCHE Claude's avatar
MARCHE Claude committed
185
      close_in c;
MARCHE Claude's avatar
MARCHE Claude committed
186
      t
MARCHE Claude's avatar
MARCHE Claude committed
187 188

}