1 //------------------------------------------------------------------------------
2 // <copyright file="XPathDocument.cs" company="Microsoft">
3 //     Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>
5 // <owner current="true" primary="true">Microsoft</owner>
6 //------------------------------------------------------------------------------
7 
8 using System;
9 using System.IO;
10 using System.Xml;
11 using System.Xml.Schema;
12 using System.Collections.Generic;
13 using MS.Internal.Xml.Cache;
14 using System.Diagnostics;
15 using System.Text;
16 using System.Runtime.Versioning;
17 
18 namespace System.Xml.XPath {
19 
20     /// <summary>
21     /// XDocument follows the XPath/XQuery data model.  All nodes in the tree reference the document,
22     /// and the document references the root node of the tree.  All namespaces are stored out-of-line,
23     /// in an Element --> In-Scope-Namespaces map.
24     /// </summary>
25     public class XPathDocument : IXPathNavigable {
26         private XPathNode[] pageText, pageRoot, pageXmlNmsp;
27         private int idxText, idxRoot, idxXmlNmsp;
28         private XmlNameTable nameTable;
29         private bool hasLineInfo;
30         private Dictionary<XPathNodeRef, XPathNodeRef> mapNmsp;
31         private Dictionary<string, XPathNodeRef> idValueMap;
32 
33         /// <summary>
34         /// Flags that control Load behavior.
35         /// </summary>
36         internal enum LoadFlags {
37             None = 0,
38             AtomizeNames = 1,       // Do not assume that names passed to XPathDocumentBuilder have been pre-atomized, and atomize them
39             Fragment = 2,           // Create a document with no document node
40         }
41 
42 
43         //-----------------------------------------------
44         // Creation Methods
45         //-----------------------------------------------
46 
47         /// <summary>
48         /// Create a new empty document.
49         /// </summary>
XPathDocument()50         internal XPathDocument() {
51             this.nameTable = new NameTable();
52         }
53 
54         /// <summary>
55         /// Create a new empty document.  All names should be atomized using "nameTable".
56         /// </summary>
XPathDocument(XmlNameTable nameTable)57         internal XPathDocument(XmlNameTable nameTable) {
58             if (nameTable == null)
59                 throw new ArgumentNullException("nameTable");
60 
61             this.nameTable = nameTable;
62         }
63 
64         /// <summary>
65         /// Create a new document and load the content from the reader.
66         /// </summary>
XPathDocument(XmlReader reader)67         public XPathDocument(XmlReader reader) : this(reader, XmlSpace.Default) {
68         }
69 
70         /// <summary>
71         /// Create a new document from "reader", with whitespace handling controlled according to "space".
72         /// </summary>
XPathDocument(XmlReader reader, XmlSpace space)73         public XPathDocument(XmlReader reader, XmlSpace space) {
74             if (reader == null)
75                 throw new ArgumentNullException("reader");
76 
77             LoadFromReader(reader, space);
78         }
79 
80         /// <summary>
81         /// Create a new document and load the content from the text reader.
82         /// </summary>
XPathDocument(TextReader textReader)83         public XPathDocument(TextReader textReader) {
84             XmlTextReaderImpl reader = SetupReader(new XmlTextReaderImpl(string.Empty, textReader));
85 
86             try {
87                 LoadFromReader(reader, XmlSpace.Default);
88             }
89             finally {
90                 reader.Close();
91             }
92         }
93 
94         /// <summary>
95         /// Create a new document and load the content from the stream.
96         /// </summary>
XPathDocument(Stream stream)97         public XPathDocument(Stream stream) {
98             XmlTextReaderImpl reader = SetupReader(new XmlTextReaderImpl(string.Empty, stream));
99 
100             try {
101                 LoadFromReader(reader, XmlSpace.Default);
102             }
103             finally {
104                 reader.Close();
105             }
106         }
107 
108         /// <summary>
109         /// Create a new document and load the content from the Uri.
110         /// </summary>
111         [ResourceConsumption(ResourceScope.Machine)]
112         [ResourceExposure(ResourceScope.Machine)]
XPathDocument(string uri)113         public XPathDocument(string uri) : this(uri, XmlSpace.Default) {
114         }
115 
116         /// <summary>
117         /// Create a new document and load the content from the Uri, with whitespace handling controlled according to "space".
118         /// </summary>
119         [ResourceConsumption(ResourceScope.Machine)]
120         [ResourceExposure(ResourceScope.Machine)]
XPathDocument(string uri, XmlSpace space)121         public XPathDocument(string uri, XmlSpace space) {
122             XmlTextReaderImpl reader = SetupReader(new XmlTextReaderImpl(uri));
123 
124             try {
125                 LoadFromReader(reader, space);
126             }
127             finally {
128                 reader.Close();
129             }
130         }
131 
132         /// <summary>
133         /// Create a writer that can be used to create nodes in this document.  The root node will be assigned "baseUri", and flags
134         /// can be passed to indicate that names should be atomized by the builder and/or a fragment should be created.
135         /// </summary>
LoadFromWriter(LoadFlags flags, string baseUri)136         internal XmlRawWriter LoadFromWriter(LoadFlags flags, string baseUri) {
137             return new XPathDocumentBuilder(this, null, baseUri, flags);
138         }
139 
140         /// <summary>
141         /// Create a writer that can be used to create nodes in this document.  The root node will be assigned "baseUri", and flags
142         /// can be passed to indicate that names should be atomized by the builder and/or a fragment should be created.
143         /// </summary>
LoadFromReader(XmlReader reader, XmlSpace space)144         internal void LoadFromReader(XmlReader reader, XmlSpace space) {
145             XPathDocumentBuilder builder;
146             IXmlLineInfo lineInfo;
147             string xmlnsUri;
148             bool topLevelReader;
149             int initialDepth;
150 
151             if (reader == null)
152                 throw new ArgumentNullException("reader");
153 
154             // Determine line number provider
155             lineInfo = reader as IXmlLineInfo;
156             if (lineInfo == null || !lineInfo.HasLineInfo())
157                 lineInfo = null;
158             this.hasLineInfo = (lineInfo != null);
159 
160             this.nameTable = reader.NameTable;
161             builder = new XPathDocumentBuilder(this, lineInfo, reader.BaseURI, LoadFlags.None);
162 
163             try {
164                 // Determine whether reader is in initial state
165                 topLevelReader = (reader.ReadState == ReadState.Initial);
166                 initialDepth = reader.Depth;
167 
168                 // Get atomized xmlns uri
169                 Debug.Assert((object) this.nameTable.Get(string.Empty) == (object) string.Empty, "NameTable must contain atomized string.Empty");
170                 xmlnsUri = this.nameTable.Get(XmlReservedNs.NsXmlNs);
171 
172                 // Read past Initial state; if there are no more events then load is complete
173                 if (topLevelReader && !reader.Read())
174                     return;
175 
176                 // Read all events
177                 do {
178                     // If reader began in intermediate state, return when all siblings have been read
179                     if (!topLevelReader && reader.Depth < initialDepth)
180                         return;
181 
182                     switch (reader.NodeType) {
183                         case XmlNodeType.Element: {
184                             bool isEmptyElement = reader.IsEmptyElement;
185 
186                             builder.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI, reader.BaseURI);
187 
188                             // Add attribute and namespace nodes to element
189                             while (reader.MoveToNextAttribute()) {
190                                 string namespaceUri = reader.NamespaceURI;
191 
192                                 if ((object) namespaceUri == (object) xmlnsUri) {
193                                     if (reader.Prefix.Length == 0) {
194                                         // Default namespace declaration "xmlns"
195                                         Debug.Assert(reader.LocalName == "xmlns");
196                                         builder.WriteNamespaceDeclaration(string.Empty, reader.Value);
197                                     }
198                                     else {
199                                         Debug.Assert(reader.Prefix == "xmlns");
200                                         builder.WriteNamespaceDeclaration(reader.LocalName, reader.Value);
201                                     }
202                                 }
203                                 else {
204                                     builder.WriteStartAttribute(reader.Prefix, reader.LocalName, namespaceUri);
205                                     builder.WriteString(reader.Value, TextBlockType.Text);
206                                     builder.WriteEndAttribute();
207                                 }
208                             }
209 
210                             if (isEmptyElement)
211                                 builder.WriteEndElement(true);
212                             break;
213                         }
214 
215                         case XmlNodeType.EndElement:
216                             builder.WriteEndElement(false);
217                             break;
218 
219                         case XmlNodeType.Text:
220                         case XmlNodeType.CDATA:
221                             builder.WriteString(reader.Value, TextBlockType.Text);
222                             break;
223 
224                         case XmlNodeType.SignificantWhitespace:
225                             if (reader.XmlSpace == XmlSpace.Preserve)
226                                 builder.WriteString(reader.Value, TextBlockType.SignificantWhitespace);
227                             else
228                                 // Significant whitespace without xml:space="preserve" is not significant in XPath/XQuery data model
229                                 goto case XmlNodeType.Whitespace;
230                             break;
231 
232                         case XmlNodeType.Whitespace:
233                             // We intentionally ignore the reader.XmlSpace property here and blindly trust
234                             //   the reported node type. If the reported information is not in sync
235                             //   (in this case if the reader.XmlSpace == Preserve) then we make the choice
236                             //   to trust the reported node type. Since we have no control over the input reader
237                             //   we can't even assert here.
238 
239                             // Always filter top-level whitespace
240                             if (space == XmlSpace.Preserve && (!topLevelReader || reader.Depth != 0))
241                                 builder.WriteString(reader.Value, TextBlockType.Whitespace);
242                             break;
243 
244                         case XmlNodeType.Comment:
245                             builder.WriteComment(reader.Value);
246                             break;
247 
248                         case XmlNodeType.ProcessingInstruction:
249                             builder.WriteProcessingInstruction(reader.LocalName, reader.Value, reader.BaseURI);
250                             break;
251 
252                         case XmlNodeType.EntityReference:
253                             reader.ResolveEntity();
254                             break;
255 
256                         case XmlNodeType.DocumentType:
257                             // Create ID tables
258                             IDtdInfo info = reader.DtdInfo;
259                             if (info != null)
260                                 builder.CreateIdTables(info);
261                             break;
262 
263                         case XmlNodeType.EndEntity:
264                         case XmlNodeType.None:
265                         case XmlNodeType.XmlDeclaration:
266                             break;
267                     }
268                 }
269                 while (reader.Read());
270             }
271             finally {
272                 builder.Close();
273             }
274         }
275 
276         /// <summary>
277         /// Create a navigator positioned on the root node of the document.
278         /// </summary>
CreateNavigator()279         public XPathNavigator CreateNavigator() {
280             return new XPathDocumentNavigator(this.pageRoot, this.idxRoot, null, 0);
281         }
282 
283 
284         //-----------------------------------------------
285         // Document Properties
286         //-----------------------------------------------
287 
288         /// <summary>
289         /// Return the name table used to atomize all name parts (local name, namespace uri, prefix).
290         /// </summary>
291         internal XmlNameTable NameTable {
292             get { return this.nameTable; }
293         }
294 
295         /// <summary>
296         /// Return true if line number information is recorded in the cache.
297         /// </summary>
298         internal bool HasLineInfo {
299             get { return this.hasLineInfo; }
300         }
301 
302         /// <summary>
303         /// Return the singleton collapsed text node associated with the document.  One physical text node
304         /// represents each logical text node in the document that is the only content-typed child of its
305         /// element parent.
306         /// </summary>
GetCollapsedTextNode(out XPathNode[] pageText)307         internal int GetCollapsedTextNode(out XPathNode[] pageText) {
308             pageText = this.pageText;
309             return this.idxText;
310         }
311 
312         /// <summary>
313         /// Set the page and index where the singleton collapsed text node is stored.
314         /// </summary>
SetCollapsedTextNode(XPathNode[] pageText, int idxText)315         internal void SetCollapsedTextNode(XPathNode[] pageText, int idxText) {
316             this.pageText = pageText;
317             this.idxText = idxText;
318         }
319 
320         /// <summary>
321         /// Return the root node of the document.  This may not be a node of type XPathNodeType.Root if this
322         /// is a document fragment.
323         /// </summary>
GetRootNode(out XPathNode[] pageRoot)324         internal int GetRootNode(out XPathNode[] pageRoot) {
325             pageRoot = this.pageRoot;
326             return this.idxRoot;
327         }
328 
329         /// <summary>
330         /// Set the page and index where the root node is stored.
331         /// </summary>
SetRootNode(XPathNode[] pageRoot, int idxRoot)332         internal void SetRootNode(XPathNode[] pageRoot, int idxRoot) {
333             this.pageRoot = pageRoot;
334             this.idxRoot = idxRoot;
335         }
336 
337         /// <summary>
338         /// Every document has an implicit xmlns:xml namespace node.
339         /// </summary>
GetXmlNamespaceNode(out XPathNode[] pageXmlNmsp)340         internal int GetXmlNamespaceNode(out XPathNode[] pageXmlNmsp) {
341             pageXmlNmsp = this.pageXmlNmsp;
342             return this.idxXmlNmsp;
343         }
344 
345         /// <summary>
346         /// Set the page and index where the implicit xmlns:xml node is stored.
347         /// </summary>
SetXmlNamespaceNode(XPathNode[] pageXmlNmsp, int idxXmlNmsp)348         internal void SetXmlNamespaceNode(XPathNode[] pageXmlNmsp, int idxXmlNmsp) {
349             this.pageXmlNmsp = pageXmlNmsp;
350             this.idxXmlNmsp = idxXmlNmsp;
351         }
352 
353         /// <summary>
354         /// Associate a namespace node with an element.
355         /// </summary>
AddNamespace(XPathNode[] pageElem, int idxElem, XPathNode[] pageNmsp, int idxNmsp)356         internal void AddNamespace(XPathNode[] pageElem, int idxElem, XPathNode[] pageNmsp, int idxNmsp) {
357             Debug.Assert(pageElem[idxElem].NodeType == XPathNodeType.Element && pageNmsp[idxNmsp].NodeType == XPathNodeType.Namespace);
358 
359             if (this.mapNmsp == null)
360                 this.mapNmsp = new Dictionary<XPathNodeRef, XPathNodeRef>();
361 
362             this.mapNmsp.Add(new XPathNodeRef(pageElem, idxElem), new XPathNodeRef(pageNmsp, idxNmsp));
363         }
364 
365         /// <summary>
366         /// Lookup the namespace nodes associated with an element.
367         /// </summary>
LookupNamespaces(XPathNode[] pageElem, int idxElem, out XPathNode[] pageNmsp)368         internal int LookupNamespaces(XPathNode[] pageElem, int idxElem, out XPathNode[] pageNmsp) {
369             XPathNodeRef nodeRef = new XPathNodeRef(pageElem, idxElem);
370             Debug.Assert(pageElem[idxElem].NodeType == XPathNodeType.Element);
371 
372             // Check whether this element has any local namespaces
373             if (this.mapNmsp == null || !this.mapNmsp.ContainsKey(nodeRef)) {
374                 pageNmsp = null;
375                 return 0;
376             }
377 
378             // Yes, so return the page and index of the first local namespace node
379             nodeRef = this.mapNmsp[nodeRef];
380 
381             pageNmsp = nodeRef.Page;
382             return nodeRef.Index;
383         }
384 
385         /// <summary>
386         /// Add an element indexed by ID value.
387         /// </summary>
AddIdElement(string id, XPathNode[] pageElem, int idxElem)388         internal void AddIdElement(string id, XPathNode[] pageElem, int idxElem) {
389             if (this.idValueMap == null)
390                 this.idValueMap = new Dictionary<string, XPathNodeRef>();
391 
392             if (!this.idValueMap.ContainsKey(id))
393                 this.idValueMap.Add(id, new XPathNodeRef(pageElem, idxElem));
394         }
395 
396         /// <summary>
397         /// Lookup the element node associated with the specified ID value.
398         /// </summary>
LookupIdElement(string id, out XPathNode[] pageElem)399         internal int LookupIdElement(string id, out XPathNode[] pageElem) {
400             XPathNodeRef nodeRef;
401 
402             if (this.idValueMap == null || !this.idValueMap.ContainsKey(id)) {
403                 pageElem = null;
404                 return 0;
405             }
406 
407             // Extract page and index from XPathNodeRef
408             nodeRef = this.idValueMap[id];
409             pageElem = nodeRef.Page;
410             return nodeRef.Index;
411         }
412 
413 
414         //-----------------------------------------------
415         // Helper Methods
416         //-----------------------------------------------
417 
418         /// <summary>
419         /// Set properties on the reader so that it is backwards-compatible with V1.
420         /// </summary>
SetupReader(XmlTextReaderImpl reader)421         private XmlTextReaderImpl SetupReader(XmlTextReaderImpl reader) {
422             reader.EntityHandling = EntityHandling.ExpandEntities;
423             reader.XmlValidatingReaderCompatibilityMode = true;
424             return reader;
425         }
426     }
427 }
428