1<?php
2/**
3* A class to assist with construction of XML documents
4*
5* @package   awl
6* @subpackage   XMLElement
7* @author    Andrew McMillan <andrew@mcmillan.net.nz>
8* @copyright Catalyst .Net Ltd, Morphoss Ltd <http://www.morphoss.com/>
9* @license   http://www.gnu.org/licenses/lgpl-3.0.txt  GNU LGPL version 3 or later
10*/
11
12require_once('AWLUtilities.php');
13
14/**
15* A class for XML elements which may have attributes, or contain
16* other XML sub-elements
17*
18* @package   awl
19*/
20class XMLElement {
21  protected $tagname;
22  protected $xmlns;
23  protected $attributes;
24  protected $content;
25  protected $_parent;
26
27  /**
28  * Constructor - nothing fancy as yet.
29  *
30  * @param string $tagname The tag name of the new element
31  * @param mixed $content Either a string of content, or an array of sub-elements
32  * @param array $attributes An array of attribute name/value pairs
33  * @param string $xmlns An XML namespace specifier
34  */
35  function __construct( $tagname, $content=false, $attributes=false, $xmlns=null ) {
36    $this->tagname=$tagname;
37    if ( gettype($content) == "object" ) {
38      // Subtree to be parented here
39      $this->content = array(&$content);
40    }
41    else {
42      // Array or text
43      $this->content = $content;
44    }
45    $this->attributes = $attributes;
46    if ( $this->attributes === false ) $this->attributes = array();
47    if ( isset($xmlns) ) {
48      $this->xmlns = $xmlns;
49    }
50    else {
51      if ( preg_match( '{^(.*):([^:]*)$}', $tagname, $matches) ) {
52        $prefix = $matches[1];
53        $tag = $matches[2];
54        if ( isset($this->attributes['xmlns:'.$prefix]) ) {
55          $this->xmlns = $this->attributes['xmlns:'.$prefix];
56        }
57      }
58      else if ( isset($this->attributes['xmlns']) ) {
59        $this->xmlns = $this->attributes['xmlns'];
60      }
61    }
62  }
63
64
65  /**
66  * Count the number of elements
67  * @return int The number of elements
68  */
69  function CountElements( ) {
70    if ( $this->content === false ) return 0;
71    if ( is_array($this->content) ) return count($this->content);
72    if ( $this->content == '' ) return 0;
73    return 1;
74  }
75
76  /**
77  * Set an element attribute to a value
78  *
79  * @param string The attribute name
80  * @param string The attribute value
81  */
82  function SetAttribute($k,$v) {
83    if ( gettype($this->attributes) != "array" ) $this->attributes = array();
84    $this->attributes[$k] = $v;
85    if ( strtolower($k) == 'xmlns' ) {
86      $this->xmlns = $v;
87    }
88  }
89
90  /**
91  * Set the whole content to a value
92  *
93  * @param mixed The element content, which may be text, or an array of sub-elements
94  */
95  function SetContent($v) {
96    $this->content = $v;
97  }
98
99  /**
100  * Accessor for the tag name
101  *
102  * @return string The tag name of the element
103  */
104  function GetTag() {
105    return $this->tagname;
106  }
107
108  /**
109  * Accessor for the full-namespaced tag name
110  *
111  * @return string The tag name of the element, prefixed by the namespace
112  */
113  function GetNSTag() {
114    return (empty($this->xmlns) ? '' : $this->xmlns . ':') . $this->tagname;
115  }
116
117  /**
118  * Accessor for a single attribute
119  * @param string $attr The name of the attribute.
120  * @return string The value of that attribute of the element
121  */
122  function GetAttribute( $attr ) {
123    if ( $attr == 'xmlns' ) return $this->xmlns;
124    if ( isset($this->attributes[$attr]) ) return $this->attributes[$attr];
125    return null;
126  }
127
128  /**
129  * Accessor for the attributes
130  *
131  * @return array The attributes of this element
132  */
133  function GetAttributes() {
134    return $this->attributes;
135  }
136
137  /**
138  * Accessor for the content
139  *
140  * @return array The content of this element
141  */
142  function GetContent() {
143    return $this->content;
144  }
145
146  /**
147  * Return an array of elements matching the specified tag, or all elements if no tag is supplied.
148  * Unlike GetContent() this will always return an array.
149  *
150  * @return array The XMLElements within the tree which match this tag
151  */
152  function GetElements( $tag=null, $recursive=false ) {
153    $elements = array();
154    if ( gettype($this->content) == "array" ) {
155      foreach( $this->content AS $k => $v ) {
156        if ( empty($tag) || $v->GetNSTag() == $tag ) {
157          $elements[] = $v;
158        }
159        if ( $recursive ) {
160          $elements = $elements + $v->GetElements($tag,true);
161        }
162      }
163    }
164    else if ( empty($tag) || (isset($v->content->tagname) && $v->content->GetNSTag() == $tag) ) {
165      $elements[] = $this->content;
166    }
167    return $elements;
168  }
169
170
171  /**
172  * Return an array of elements matching the specified path
173  *
174  * @return array The XMLElements within the tree which match this tag
175  */
176  function GetPath( $path ) {
177    $elements = array();
178    // printf( "Querying within '%s' for path '%s'\n", $this->tagname, $path );
179    if ( !preg_match( '#(/)?([^/]+)(/?.*)$#', $path, $matches ) ) return $elements;
180    // printf( "Matches: %s -- %s -- %s\n", $matches[1], $matches[2], $matches[3] );
181    if ( $matches[2] == '*' || $matches[2] == $this->GetNSTag()) {
182      if ( $matches[3] == '' ) {
183        /**
184        * That is the full path
185        */
186        $elements[] = $this;
187      }
188      else if ( gettype($this->content) == "array" ) {
189        /**
190        * There is more to the path, so we recurse into that sub-part
191        */
192        foreach( $this->content AS $k => $v ) {
193          $elements = array_merge( $elements, $v->GetPath($matches[3]) );
194        }
195      }
196    }
197
198    if ( $matches[1] != '/' && gettype($this->content) == "array" ) {
199      /**
200      * If our input $path was not rooted, we recurse further
201      */
202      foreach( $this->content AS $k => $v ) {
203        $elements = array_merge( $elements, $v->GetPath($path) );
204      }
205    }
206    // printf( "Found %d within '%s' for path '%s'\n", count($elements), $this->tagname, $path );
207    return $elements;
208  }
209
210
211  /**
212  * Add a sub-element
213  *
214  * @param object An XMLElement to be appended to the array of sub-elements
215  */
216  function AddSubTag(&$v) {
217    if ( gettype($this->content) != "array" ) $this->content = array();
218    $this->content[] =& $v;
219    return count($this->content);
220  }
221
222  /**
223  * Add a new sub-element
224  *
225  * @param string The tag name of the new element
226  * @param mixed Either a string of content, or an array of sub-elements
227  * @param array An array of attribute name/value pairs
228  *
229  * @return objectref A reference to the new XMLElement
230  */
231  function &NewElement( $tagname, $content=false, $attributes=false, $xmlns=null ) {
232    if ( gettype($this->content) != "array" ) $this->content = array();
233    $element = new XMLElement($tagname,$content,$attributes,$xmlns);
234    $this->content[] =& $element;
235    return $element;
236  }
237
238
239  /**
240  * Render just the internal content
241  *
242  * @return string The content of this element, as a string without this element wrapping it.
243  */
244  function RenderContent($indent=0, $nslist=null, $force_xmlns=false ) {
245    $r = "";
246    if ( is_array($this->content) ) {
247      /**
248      * Render the sub-elements with a deeper indent level
249      */
250      $r .= "\n";
251      foreach( $this->content AS $k => $v ) {
252        if ( is_object($v) ) {
253          $r .= $v->Render($indent+1, "", $nslist, $force_xmlns);
254        }
255      }
256      $r .= substr("                        ",0,$indent);
257    }
258    else {
259      /**
260      * Render the content, with special characters escaped
261      *
262      */
263      if(strpos($this->content, '<![CDATA[')===0 && strrpos($this->content, ']]>')===strlen($this->content)-3)
264        $r .= '<![CDATA[' . str_replace(']]>', ']]]]><![CDATA[>', substr($this->content, 9, -3)) . ']]>';
265      else if ( defined('ENT_XML1') && defined('ENT_DISALLOWED') )
266        // Newer PHP versions allow specifying ENT_XML1, but default to ENT_HTML401.  Go figure.  #PHPWTF
267        $r .= htmlspecialchars($this->content, ENT_NOQUOTES |  ENT_XML1 | ENT_DISALLOWED );
268      // Need to work out exactly how to do this in PHP.
269      // else if ( preg_match('{^[\t\n\r\x0020-\xD7FF\xE000-\xFFFD\x10000-\x10FFFF]+$}u', utf8ToUnicode($this->content)) )
270      //   $r .= '<![CDATA[' . $this->content . ']]>';
271      else
272        // Older PHP versions default to ENT_XML1.
273        $r .= htmlspecialchars($this->content, ENT_NOQUOTES );
274    }
275    return $r;
276  }
277
278
279  /**
280  * Render the document tree into (nicely formatted) XML
281  *
282  * @param int The indenting level for the pretty formatting of the element
283  */
284  function Render($indent=0, $xmldef="", $nslist=null, $force_xmlns=false) {
285    $r = ( $xmldef == "" ? "" : $xmldef."\n");
286
287    $attr = "";
288    $tagname = $this->tagname;
289    $xmlns_done = false;
290    if ( gettype($this->attributes) == "array" ) {
291      /**
292      * Render the element attribute values
293      */
294      foreach( $this->attributes AS $k => $v ) {
295        if ( preg_match('#^xmlns(:?(.+))?$#', $k, $matches ) ) {
296//          if ( $force_xmlns ) printf( "1: %s: %s\n", $this->tagname, $this->xmlns );
297          if ( !isset($nslist) ) $nslist = array();
298          $prefix = (isset($matches[2]) ? $matches[2] : '');
299          if ( isset($nslist[$v]) && $nslist[$v] == $prefix ) continue; // No need to include in list as it's in a wrapping element
300          $nslist[$v] = $prefix;
301          if ( !isset($this->xmlns) ) $this->xmlns = $v;
302          $xmlns_done = true;
303        }
304        $attr .= sprintf( ' %s="%s"', $k, htmlspecialchars($v) );
305      }
306    }
307    if ( isset($this->xmlns) && isset($nslist[$this->xmlns]) && $nslist[$this->xmlns] != '' ) {
308//      if ( $force_xmlns ) printf( "2: %s: %s\n", $this->tagname, $this->xmlns );
309      $tagname = $nslist[$this->xmlns] . ':' . $tagname;
310      if ( $force_xmlns ) $attr .= sprintf( ' xmlns="%s"', $this->xmlns);
311    }
312    else if ( isset($this->xmlns) && !isset($nslist[$this->xmlns]) && gettype($this->attributes) == 'array' && !isset($this->attributes[$this->xmlns]) ) {
313//      if ( $force_xmlns ) printf( "3: %s: %s\n", $this->tagname, $this->xmlns );
314      $attr .= sprintf( ' xmlns="%s"', $this->xmlns);
315    }
316    else if ( $force_xmlns && isset($this->xmlns) && ! $xmlns_done ) {
317//      printf( "4: %s: %s\n", $this->tagname, $this->xmlns );
318      $attr .= sprintf( ' xmlns="%s"', $this->xmlns);
319    }
320
321    $r .= substr("                        ",0,$indent) . '<' . $tagname . $attr;
322
323    if ( (is_array($this->content) && count($this->content) > 0) || (!is_array($this->content) && strlen($this->content) > 0) ) {
324      $r .= ">";
325      $r .= $this->RenderContent($indent,$nslist,$force_xmlns);
326      $r .= '</' . $tagname.">\n";
327    }
328    else {
329      $r .= "/>\n";
330    }
331    return $r;
332  }
333
334
335  function __tostring() {
336    return $this->Render();
337  }
338}
339
340
341/**
342* Rebuild an XML tree in our own style from the parsed XML tags using
343* a tail-recursive approach.
344*
345* @param array $xmltags An array of XML tags we get from using the PHP XML parser
346* @param intref &$start_from A pointer to our current integer offset into $xmltags
347* @return mixed Either a single XMLElement, or an array of XMLElement objects.
348*/
349function BuildXMLTree( $xmltags, &$start_from ) {
350  $content = array();
351
352  if ( !isset($start_from) ) $start_from = 0;
353
354  for( $i=0; $i < 50000 && isset($xmltags[$start_from]); $i++) {
355    $tagdata = $xmltags[$start_from++];
356    if ( !isset($tagdata) || !isset($tagdata['tag']) || !isset($tagdata['type']) ) break;
357    if ( $tagdata['type'] == "close" ) break;
358    $xmlns = null;
359    $tag = $tagdata['tag'];
360    if ( preg_match( '{^(.*):([^:]*)$}', $tag, $matches) ) {
361      $xmlns = $matches[1];
362      $tag = $matches[2];
363    }
364    $attributes = ( isset($tagdata['attributes']) ? $tagdata['attributes'] : false );
365    if ( $tagdata['type'] == "open" ) {
366      $subtree = BuildXMLTree( $xmltags, $start_from );
367      $content[] = new XMLElement($tag, $subtree, $attributes, $xmlns );
368    }
369    else if ( $tagdata['type'] == "complete" ) {
370      $value = ( isset($tagdata['value']) ? $tagdata['value'] : false );
371      $content[] = new XMLElement($tag, $value, $attributes, $xmlns );
372    }
373  }
374
375  /**
376  * If there is only one element, return it directly, otherwise return the
377  * array of them
378  */
379  if ( count($content) == 1 ) {
380    return $content[0];
381  }
382  return $content;
383}
384
385