1<?php 2/** 3* A class to assist with construction of XML documents 4* 5* @package awl 6* @subpackage XMLElement 7* @author Andrew McMillan <andrew@mcmillan.net.nz> 8* @copyright Catalyst .Net Ltd, Morphoss Ltd <http://www.morphoss.com/> 9* @license http://www.gnu.org/licenses/lgpl-3.0.txt GNU LGPL version 3 or later 10*/ 11 12require_once('AWLUtilities.php'); 13 14/** 15* A class for XML elements which may have attributes, or contain 16* other XML sub-elements 17* 18* @package awl 19*/ 20class XMLElement { 21 protected $tagname; 22 protected $xmlns; 23 protected $attributes; 24 protected $content; 25 protected $_parent; 26 27 /** 28 * Constructor - nothing fancy as yet. 29 * 30 * @param string $tagname The tag name of the new element 31 * @param mixed $content Either a string of content, or an array of sub-elements 32 * @param array $attributes An array of attribute name/value pairs 33 * @param string $xmlns An XML namespace specifier 34 */ 35 function __construct( $tagname, $content=false, $attributes=false, $xmlns=null ) { 36 $this->tagname=$tagname; 37 if ( gettype($content) == "object" ) { 38 // Subtree to be parented here 39 $this->content = array(&$content); 40 } 41 else { 42 // Array or text 43 $this->content = $content; 44 } 45 $this->attributes = $attributes; 46 if ( $this->attributes === false ) $this->attributes = array(); 47 if ( isset($xmlns) ) { 48 $this->xmlns = $xmlns; 49 } 50 else { 51 if ( preg_match( '{^(.*):([^:]*)$}', $tagname, $matches) ) { 52 $prefix = $matches[1]; 53 $tag = $matches[2]; 54 if ( isset($this->attributes['xmlns:'.$prefix]) ) { 55 $this->xmlns = $this->attributes['xmlns:'.$prefix]; 56 } 57 } 58 else if ( isset($this->attributes['xmlns']) ) { 59 $this->xmlns = $this->attributes['xmlns']; 60 } 61 } 62 } 63 64 65 /** 66 * Count the number of elements 67 * @return int The number of elements 68 */ 69 function CountElements( ) { 70 if ( $this->content === false ) return 0; 71 if ( is_array($this->content) ) return count($this->content); 72 if ( $this->content == '' ) return 0; 73 return 1; 74 } 75 76 /** 77 * Set an element attribute to a value 78 * 79 * @param string The attribute name 80 * @param string The attribute value 81 */ 82 function SetAttribute($k,$v) { 83 if ( gettype($this->attributes) != "array" ) $this->attributes = array(); 84 $this->attributes[$k] = $v; 85 if ( strtolower($k) == 'xmlns' ) { 86 $this->xmlns = $v; 87 } 88 } 89 90 /** 91 * Set the whole content to a value 92 * 93 * @param mixed The element content, which may be text, or an array of sub-elements 94 */ 95 function SetContent($v) { 96 $this->content = $v; 97 } 98 99 /** 100 * Accessor for the tag name 101 * 102 * @return string The tag name of the element 103 */ 104 function GetTag() { 105 return $this->tagname; 106 } 107 108 /** 109 * Accessor for the full-namespaced tag name 110 * 111 * @return string The tag name of the element, prefixed by the namespace 112 */ 113 function GetNSTag() { 114 return (empty($this->xmlns) ? '' : $this->xmlns . ':') . $this->tagname; 115 } 116 117 /** 118 * Accessor for a single attribute 119 * @param string $attr The name of the attribute. 120 * @return string The value of that attribute of the element 121 */ 122 function GetAttribute( $attr ) { 123 if ( $attr == 'xmlns' ) return $this->xmlns; 124 if ( isset($this->attributes[$attr]) ) return $this->attributes[$attr]; 125 return null; 126 } 127 128 /** 129 * Accessor for the attributes 130 * 131 * @return array The attributes of this element 132 */ 133 function GetAttributes() { 134 return $this->attributes; 135 } 136 137 /** 138 * Accessor for the content 139 * 140 * @return array The content of this element 141 */ 142 function GetContent() { 143 return $this->content; 144 } 145 146 /** 147 * Return an array of elements matching the specified tag, or all elements if no tag is supplied. 148 * Unlike GetContent() this will always return an array. 149 * 150 * @return array The XMLElements within the tree which match this tag 151 */ 152 function GetElements( $tag=null, $recursive=false ) { 153 $elements = array(); 154 if ( gettype($this->content) == "array" ) { 155 foreach( $this->content AS $k => $v ) { 156 if ( empty($tag) || $v->GetNSTag() == $tag ) { 157 $elements[] = $v; 158 } 159 if ( $recursive ) { 160 $elements = $elements + $v->GetElements($tag,true); 161 } 162 } 163 } 164 else if ( empty($tag) || (isset($v->content->tagname) && $v->content->GetNSTag() == $tag) ) { 165 $elements[] = $this->content; 166 } 167 return $elements; 168 } 169 170 171 /** 172 * Return an array of elements matching the specified path 173 * 174 * @return array The XMLElements within the tree which match this tag 175 */ 176 function GetPath( $path ) { 177 $elements = array(); 178 // printf( "Querying within '%s' for path '%s'\n", $this->tagname, $path ); 179 if ( !preg_match( '#(/)?([^/]+)(/?.*)$#', $path, $matches ) ) return $elements; 180 // printf( "Matches: %s -- %s -- %s\n", $matches[1], $matches[2], $matches[3] ); 181 if ( $matches[2] == '*' || $matches[2] == $this->GetNSTag()) { 182 if ( $matches[3] == '' ) { 183 /** 184 * That is the full path 185 */ 186 $elements[] = $this; 187 } 188 else if ( gettype($this->content) == "array" ) { 189 /** 190 * There is more to the path, so we recurse into that sub-part 191 */ 192 foreach( $this->content AS $k => $v ) { 193 $elements = array_merge( $elements, $v->GetPath($matches[3]) ); 194 } 195 } 196 } 197 198 if ( $matches[1] != '/' && gettype($this->content) == "array" ) { 199 /** 200 * If our input $path was not rooted, we recurse further 201 */ 202 foreach( $this->content AS $k => $v ) { 203 $elements = array_merge( $elements, $v->GetPath($path) ); 204 } 205 } 206 // printf( "Found %d within '%s' for path '%s'\n", count($elements), $this->tagname, $path ); 207 return $elements; 208 } 209 210 211 /** 212 * Add a sub-element 213 * 214 * @param object An XMLElement to be appended to the array of sub-elements 215 */ 216 function AddSubTag(&$v) { 217 if ( gettype($this->content) != "array" ) $this->content = array(); 218 $this->content[] =& $v; 219 return count($this->content); 220 } 221 222 /** 223 * Add a new sub-element 224 * 225 * @param string The tag name of the new element 226 * @param mixed Either a string of content, or an array of sub-elements 227 * @param array An array of attribute name/value pairs 228 * 229 * @return objectref A reference to the new XMLElement 230 */ 231 function &NewElement( $tagname, $content=false, $attributes=false, $xmlns=null ) { 232 if ( gettype($this->content) != "array" ) $this->content = array(); 233 $element = new XMLElement($tagname,$content,$attributes,$xmlns); 234 $this->content[] =& $element; 235 return $element; 236 } 237 238 239 /** 240 * Render just the internal content 241 * 242 * @return string The content of this element, as a string without this element wrapping it. 243 */ 244 function RenderContent($indent=0, $nslist=null, $force_xmlns=false ) { 245 $r = ""; 246 if ( is_array($this->content) ) { 247 /** 248 * Render the sub-elements with a deeper indent level 249 */ 250 $r .= "\n"; 251 foreach( $this->content AS $k => $v ) { 252 if ( is_object($v) ) { 253 $r .= $v->Render($indent+1, "", $nslist, $force_xmlns); 254 } 255 } 256 $r .= substr(" ",0,$indent); 257 } 258 else { 259 /** 260 * Render the content, with special characters escaped 261 * 262 */ 263 if(strpos($this->content, '<![CDATA[')===0 && strrpos($this->content, ']]>')===strlen($this->content)-3) 264 $r .= '<![CDATA[' . str_replace(']]>', ']]]]><![CDATA[>', substr($this->content, 9, -3)) . ']]>'; 265 else if ( defined('ENT_XML1') && defined('ENT_DISALLOWED') ) 266 // Newer PHP versions allow specifying ENT_XML1, but default to ENT_HTML401. Go figure. #PHPWTF 267 $r .= htmlspecialchars($this->content, ENT_NOQUOTES | ENT_XML1 | ENT_DISALLOWED ); 268 // Need to work out exactly how to do this in PHP. 269 // else if ( preg_match('{^[\t\n\r\x0020-\xD7FF\xE000-\xFFFD\x10000-\x10FFFF]+$}u', utf8ToUnicode($this->content)) ) 270 // $r .= '<![CDATA[' . $this->content . ']]>'; 271 else 272 // Older PHP versions default to ENT_XML1. 273 $r .= htmlspecialchars($this->content, ENT_NOQUOTES ); 274 } 275 return $r; 276 } 277 278 279 /** 280 * Render the document tree into (nicely formatted) XML 281 * 282 * @param int The indenting level for the pretty formatting of the element 283 */ 284 function Render($indent=0, $xmldef="", $nslist=null, $force_xmlns=false) { 285 $r = ( $xmldef == "" ? "" : $xmldef."\n"); 286 287 $attr = ""; 288 $tagname = $this->tagname; 289 $xmlns_done = false; 290 if ( gettype($this->attributes) == "array" ) { 291 /** 292 * Render the element attribute values 293 */ 294 foreach( $this->attributes AS $k => $v ) { 295 if ( preg_match('#^xmlns(:?(.+))?$#', $k, $matches ) ) { 296// if ( $force_xmlns ) printf( "1: %s: %s\n", $this->tagname, $this->xmlns ); 297 if ( !isset($nslist) ) $nslist = array(); 298 $prefix = (isset($matches[2]) ? $matches[2] : ''); 299 if ( isset($nslist[$v]) && $nslist[$v] == $prefix ) continue; // No need to include in list as it's in a wrapping element 300 $nslist[$v] = $prefix; 301 if ( !isset($this->xmlns) ) $this->xmlns = $v; 302 $xmlns_done = true; 303 } 304 $attr .= sprintf( ' %s="%s"', $k, htmlspecialchars($v) ); 305 } 306 } 307 if ( isset($this->xmlns) && isset($nslist[$this->xmlns]) && $nslist[$this->xmlns] != '' ) { 308// if ( $force_xmlns ) printf( "2: %s: %s\n", $this->tagname, $this->xmlns ); 309 $tagname = $nslist[$this->xmlns] . ':' . $tagname; 310 if ( $force_xmlns ) $attr .= sprintf( ' xmlns="%s"', $this->xmlns); 311 } 312 else if ( isset($this->xmlns) && !isset($nslist[$this->xmlns]) && gettype($this->attributes) == 'array' && !isset($this->attributes[$this->xmlns]) ) { 313// if ( $force_xmlns ) printf( "3: %s: %s\n", $this->tagname, $this->xmlns ); 314 $attr .= sprintf( ' xmlns="%s"', $this->xmlns); 315 } 316 else if ( $force_xmlns && isset($this->xmlns) && ! $xmlns_done ) { 317// printf( "4: %s: %s\n", $this->tagname, $this->xmlns ); 318 $attr .= sprintf( ' xmlns="%s"', $this->xmlns); 319 } 320 321 $r .= substr(" ",0,$indent) . '<' . $tagname . $attr; 322 323 if ( (is_array($this->content) && count($this->content) > 0) || (!is_array($this->content) && strlen($this->content) > 0) ) { 324 $r .= ">"; 325 $r .= $this->RenderContent($indent,$nslist,$force_xmlns); 326 $r .= '</' . $tagname.">\n"; 327 } 328 else { 329 $r .= "/>\n"; 330 } 331 return $r; 332 } 333 334 335 function __tostring() { 336 return $this->Render(); 337 } 338} 339 340 341/** 342* Rebuild an XML tree in our own style from the parsed XML tags using 343* a tail-recursive approach. 344* 345* @param array $xmltags An array of XML tags we get from using the PHP XML parser 346* @param intref &$start_from A pointer to our current integer offset into $xmltags 347* @return mixed Either a single XMLElement, or an array of XMLElement objects. 348*/ 349function BuildXMLTree( $xmltags, &$start_from ) { 350 $content = array(); 351 352 if ( !isset($start_from) ) $start_from = 0; 353 354 for( $i=0; $i < 50000 && isset($xmltags[$start_from]); $i++) { 355 $tagdata = $xmltags[$start_from++]; 356 if ( !isset($tagdata) || !isset($tagdata['tag']) || !isset($tagdata['type']) ) break; 357 if ( $tagdata['type'] == "close" ) break; 358 $xmlns = null; 359 $tag = $tagdata['tag']; 360 if ( preg_match( '{^(.*):([^:]*)$}', $tag, $matches) ) { 361 $xmlns = $matches[1]; 362 $tag = $matches[2]; 363 } 364 $attributes = ( isset($tagdata['attributes']) ? $tagdata['attributes'] : false ); 365 if ( $tagdata['type'] == "open" ) { 366 $subtree = BuildXMLTree( $xmltags, $start_from ); 367 $content[] = new XMLElement($tag, $subtree, $attributes, $xmlns ); 368 } 369 else if ( $tagdata['type'] == "complete" ) { 370 $value = ( isset($tagdata['value']) ? $tagdata['value'] : false ); 371 $content[] = new XMLElement($tag, $value, $attributes, $xmlns ); 372 } 373 } 374 375 /** 376 * If there is only one element, return it directly, otherwise return the 377 * array of them 378 */ 379 if ( count($content) == 1 ) { 380 return $content[0]; 381 } 382 return $content; 383} 384 385