xml.mll 6.04 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
(**************************************************************************)
(*                                                                        *)
(*  Copyright (C) 2010-2011                                               *)
(*    François Bobot                                                     *)
(*    Jean-Christophe Filliâtre                                          *)
(*    Claude Marché                                                      *)
(*    Andrei Paskevich                                                    *)
(*                                                                        *)
(*  This software is free software; you can redistribute it and/or        *)
(*  modify it under the terms of the GNU Library General Public           *)
(*  License version 2.1, with the special exception on linking            *)
(*  described in file LICENSE.                                            *)
(*                                                                        *)
(*  This software is distributed in the hope that it will be useful,      *)
(*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *)
(*                                                                        *)
(**************************************************************************)
MARCHE Claude's avatar
MARCHE Claude committed
19 20 21 22 23 24 25 26


{

  open Lexing

  type element =
    { name : string;
27
      attributes : (string * string) list;
MARCHE Claude's avatar
MARCHE Claude committed
28 29 30
      elements : element list;
    }

MARCHE Claude's avatar
MARCHE Claude committed
31 32
  type t =
      { version : string;
33 34 35 36
        encoding : string;
        doctype : string;
        dtd : string;
        content : element;
MARCHE Claude's avatar
MARCHE Claude committed
37 38
      }

MARCHE Claude's avatar
MARCHE Claude committed
39 40 41 42 43 44
  let buf = Buffer.create 17

  let rec pop_all group_stack element_stack =
    match group_stack with
      | [] -> element_stack
      | (elem,att,elems)::g ->
45 46 47 48 49 50
          let e = {
            name = elem;
            attributes = att;
            elements = List.rev element_stack;
          }
          in pop_all g (e::elems)
MARCHE Claude's avatar
MARCHE Claude committed
51 52 53 54 55

  exception Parse_error of string

  let parse_error s = raise (Parse_error s)

MARCHE Claude's avatar
MARCHE Claude committed
56 57 58 59 60 61 62 63 64 65 66 67
}

let space = [' ' '\t' '\r' '\n']
let digit = ['0'-'9']
let letter = ['a'-'z' 'A'-'Z']
let ident = (letter | digit | '_') + 
let sign = '-' | '+' 
let integer = sign? digit+
let mantissa = ['e''E'] sign? digit+
let real = sign? digit* '.' digit* mantissa?
let escape = ['\\''"''n''t''r'] 

MARCHE Claude's avatar
MARCHE Claude committed
68 69 70 71 72 73 74 75
rule xml_prolog = parse
| space+ 
    { xml_prolog lexbuf }
| "<?xml" space+ "version=\"1.0\"" space+ "?>"
    { xml_doctype "1.0" "" lexbuf }
| "<?xml" space+ "version=\"1.0\"" space+ "encoding=\"UTF-8\"" space+ "?>"
    { xml_doctype "1.0" "" lexbuf }
| "<?xml" ([^'?']|'?'[^'>'])* "?>" 
76
    { Format.eprintf "[Xml warning] prolog ignored@\n";
MARCHE Claude's avatar
MARCHE Claude committed
77
      xml_doctype "1.0" "" lexbuf }
MARCHE Claude's avatar
MARCHE Claude committed
78
| _ 
MARCHE Claude's avatar
MARCHE Claude committed
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    { parse_error "wrong prolog" }
      
and xml_doctype version encoding = parse
| space+ 
    { xml_doctype version encoding lexbuf }
| "<!DOCTYPE" space+ (ident as doctype) space+ [^'>']* ">" 
    { match elements [] [] lexbuf with 
         | [x] -> 
            { version = version;
              encoding = encoding;
              doctype = doctype;
              dtd = "";
              content = x;
            }
         | _ -> parse_error "there should be exactly one root element"
    }
| _ 
    { parse_error "wrong DOCTYPE" }
MARCHE Claude's avatar
MARCHE Claude committed
97 98 99 100 101 102 103 104 105
      
and elements group_stack element_stack = parse
  | space+ 
      { elements group_stack element_stack lexbuf }
  | '<' (ident as elem)   
      { attributes group_stack element_stack elem [] lexbuf }
  | "</" (ident as celem) space* '>'
      { match group_stack with
         | [] -> 
MARCHE Claude's avatar
MARCHE Claude committed
106
             Format.eprintf 
107
               "[Xml warning] unexpected closing Xml element `%s'@\n" 
MARCHE Claude's avatar
MARCHE Claude committed
108
               celem;
MARCHE Claude's avatar
MARCHE Claude committed
109 110 111
             elements group_stack element_stack lexbuf
         | (elem,att,stack)::g ->
             if celem <> elem then
MARCHE Claude's avatar
MARCHE Claude committed
112
               Format.eprintf 
113
                 "[Xml warning] Xml element `%s' closed by `%s'@\n" 
MARCHE Claude's avatar
MARCHE Claude committed
114
                 elem celem;
115 116 117 118 119
             let e = {
                name = elem;
                attributes = att;
                elements = List.rev element_stack;
             }
MARCHE Claude's avatar
MARCHE Claude committed
120 121 122
             in elements g (e::stack) lexbuf            
       }
  | '<'
123
      { Format.eprintf "[Xml warning] unexpected '<'@\n";
MARCHE Claude's avatar
MARCHE Claude committed
124 125 126 127 128
        elements group_stack element_stack lexbuf }      
  | eof 
      { match group_stack with
         | [] -> element_stack
         | (elem,_,_)::_ ->
129
             Format.eprintf "[Xml warning] unclosed Xml element `%s'@\n" elem;
MARCHE Claude's avatar
MARCHE Claude committed
130 131 132
             pop_all group_stack element_stack
      }
  | _ as c
MARCHE Claude's avatar
MARCHE Claude committed
133
      { parse_error ("invalid element starting with " ^ String.make 1 c) }
MARCHE Claude's avatar
MARCHE Claude committed
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149

and attributes groupe_stack element_stack elem acc = parse
  | space+
      { attributes groupe_stack element_stack elem acc lexbuf }
  | (ident as key) space* '=' 
      { let v = value lexbuf in
        attributes groupe_stack element_stack elem ((key,v)::acc) lexbuf }
  | '>' 
      { elements ((elem,acc,element_stack)::groupe_stack) [] lexbuf }
  | "/>"
      { let e = { name = elem ; 
                  attributes = acc;
                  elements = [] }
        in
        elements groupe_stack (e::element_stack) lexbuf }
  | _ as c
MARCHE Claude's avatar
MARCHE Claude committed
150
      { parse_error ("'>' expected, got " ^ String.make 1 c) }
MARCHE Claude's avatar
MARCHE Claude committed
151
  | eof
MARCHE Claude's avatar
MARCHE Claude committed
152
      { parse_error "unclosed element, `>' expected" }
MARCHE Claude's avatar
MARCHE Claude committed
153 154 155 156 157 158

and value = parse
  | space+ 
      { value lexbuf }
  | '"' 
      { Buffer.clear buf;
159
        string_val lexbuf } 
MARCHE Claude's avatar
MARCHE Claude committed
160
  | _ as c
MARCHE Claude's avatar
MARCHE Claude committed
161
      { parse_error ("invalid value starting with " ^ String.make 1 c) }
MARCHE Claude's avatar
MARCHE Claude committed
162
  | eof
MARCHE Claude's avatar
MARCHE Claude committed
163
      { parse_error "unterminated keyval pair" }
MARCHE Claude's avatar
MARCHE Claude committed
164 165 166

and string_val = parse 
  | '"' 
167
      { Buffer.contents buf }
MARCHE Claude's avatar
MARCHE Claude committed
168 169 170 171 172 173 174 175 176 177 178 179 180 181
  | [^ '\\' '"'] as c
      { Buffer.add_char buf c;
        string_val lexbuf }
  | '\\' (['\\''\"'] as c)   
      { Buffer.add_char buf c;
        string_val lexbuf }
  | '\\' 'n'
      { Buffer.add_char buf '\n';
        string_val lexbuf }
  | '\\' (_ as c)
      { Buffer.add_char buf '\\';
        Buffer.add_char buf c;
        string_val lexbuf }
  | eof
MARCHE Claude's avatar
MARCHE Claude committed
182
      { parse_error "unterminated string" }
MARCHE Claude's avatar
MARCHE Claude committed
183 184 185 186

{

  let from_file f =
MARCHE Claude's avatar
MARCHE Claude committed
187
      let c = open_in f in
MARCHE Claude's avatar
MARCHE Claude committed
188
      let lb = Lexing.from_channel c in
MARCHE Claude's avatar
MARCHE Claude committed
189
      let t = xml_prolog lb in
MARCHE Claude's avatar
MARCHE Claude committed
190
      close_in c;
MARCHE Claude's avatar
MARCHE Claude committed
191
      t
MARCHE Claude's avatar
MARCHE Claude committed
192 193

}