tony-xmlparse.mly 6.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
/* -*-indented-text-*- ---------------------------------------------- 
  

    Copyright (c) 1999 Christian Lindig <lindig@ips.cs.tu-bs.de>. All
    rights reserved. See COPYING for details.
    $Id: xmlparse.mly,v 1.12 1999/01/12 20:25:54 lindig Exp $
   
    This file implements a parser for XML files. 
    See: http://www.w3.org/
   
    A XML file mixes markup data, comments, processing instructions
    (pi) and character data. Since the different kinds of content
    are indistinguishable by the scanner the parser controlls
    so-called scanner contexts. 
*/

%{

(* Helpers *)

open Xml		(* XML abstract syntax 	*)
open Error		(* error() 		*)
open Xmlstate		(* setContext()		*)

let n    = None		(* just to save space 	*)

%}

/* tokens with value */

%token <string>         WORD
%token <string>         CHUNK
%token <string>         NAME
%token <string>         STRING 
%token <string>         PIOPEN
%token <string>         OPEN
%token <string>         OPENSLASH

/* token */

%token CLOSE 
%token COMMENT 
%token DOCTYPE 
%token DTDCLOSE
%token DTDOPEN
%token ENCODING
%token EOF 
%token EQ
%token ERROR
%token PICLOSE 
%token PUBLIC
%token S 
%token SLASHCLOSE
%token STANDALONE
%token SYSTEM
%token VERSION
%token XMLCLOSE
%token XMLDECL 
%token XMLNAME
%token XMLOPEN

%start document
%type <Xml.document> document

%%

document        : prolog topelement misc EOF{ XML($1,$2,$3) } 

topelement      : element                   { setContext DataContext;
                                              $1
                                            }
                                            /* xm dt pi */ 
prolog          : xmldecl misc              { Prolog($1,None    ,$2   ) }
                | xmldecl misc doctype misc { Prolog($1,Some($3),$2@$4) }
                |         misc doctype misc { Prolog(n ,Some($2),$1@$3) }
                |         misc              { Prolog(n ,None    ,$1   ) }

misc            : /**/                      {     [] }
                | misc pi                   { $2::$1 }
                | misc CHUNK                {     $1 }
                | misc COMMENT              {     $1 }

dtdopen         : DTDOPEN                   { setContext DeclContext}
dtdclose        : DTDCLOSE                  { setContext DataContext } 

doctype         : dtdopen NAME ext markup 
                  dtdclose                  { DTD($2,$3) }

ext             : /**/                      { None }    
                | SYSTEM STRING             { Some (DTDsys($2))    }
                | PUBLIC STRING STRING      { Some (DTDpub($2,$3)) }

markup          : /**/                      { None }
                | error                     { error "DTDs are unsupported" }

element         : emptyElemTag              { let (n,a) = $1 in
                                                single n a  
                                            }

                | sTag content eTag         {   let (sn,a) = $1 in
                                                let  en    = $3 in
                                                let  c     = $2 in
                                                  if sn = en then
                                                  element sn a c 
                                                  else error ("tag mismatch")
                                            }   

opn             : OPEN                      { setContext ElementContext; $1 }
opnslash        : OPENSLASH                 { setContext ElementContext; $1 }   
cls             : CLOSE                     { setContext DataContext  }
slashcls        : SLASHCLOSE                { setContext DataContext  } 

sTag            : opn attributes cls        { ($1,$2) }
eTag            : opnslash cls              {  $1     }
emptyElemTag    : opn attributes slashcls   { ($1,$2) }

attributes      : /**/                      {     []  }
                | attributes attribute      { $2::$1  }

attribute       : NAME EQ STRING            { ($1,$3) }
        
content         : /**/                      { empty                    	}
                | content CHUNK             { $1 ^^ chunk $2     	} 
                | content element           { $1 ^^ $2            	}
                | content pi                { match $2 with
                                              name,strings -> 
                                                $1 ^^ pi name strings   }
                | content COMMENT           { $1                        }
                 
xmlopen         : XMLOPEN                   { setContext DeclContext}
xmlclose        : XMLCLOSE                  { setContext DataContext } 

xmlinfo         : version encoding sddecl   { ($1,$2,Some $3) }
                | version                   { ($1,n ,None   ) }         
                | version encoding          { ($1,$2,None   ) }
                | version          sddecl   { ($1,n ,Some $2) }

xmldecl         : xmlopen xmlinfo xmlclose  { match $2 with    
                                              (vers,enc,sa) ->
                                              Some (XMLDecl(
                                                    vers,       (* version *)
                                                    sa,         (* standalone *)
                                                    enc         (* encoding *)
                                                   ))
                                            }
                
version         : VERSION EQ STRING         { $3 }

encoding        : ENCODING EQ STRING        { Some $3 }

sddecl          : STANDALONE EQ STRING      { match $3 with
                                            | "yes" -> true
                                            | "no"  -> false
                                            | _     -> error "yes/no expected"
                                            }

piopen          : PIOPEN                    { setContext PiContext; $1}
pi              : piopen picontent PICLOSE  { setContext DataContext;
                                                  ($1,List.rev $2)
                                            }
picontent       : /**/                      { []        }
                | picontent WORD            { $2 :: $1  }