1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "mozilla/dom/DOMParser.h"
8 
9 #include "nsIDOMDocument.h"
10 #include "nsNetUtil.h"
11 #include "nsIStreamListener.h"
12 #include "nsStringStream.h"
13 #include "nsIScriptError.h"
14 #include "nsIScriptSecurityManager.h"
15 #include "nsCRT.h"
16 #include "nsStreamUtils.h"
17 #include "nsContentUtils.h"
18 #include "nsDOMJSUtils.h"
19 #include "nsError.h"
20 #include "nsPIDOMWindow.h"
21 #include "NullPrincipal.h"
22 #include "mozilla/LoadInfo.h"
23 #include "mozilla/dom/BindingUtils.h"
24 #include "mozilla/dom/ScriptSettings.h"
25 
26 using namespace mozilla;
27 using namespace mozilla::dom;
28 
DOMParser()29 DOMParser::DOMParser()
30     : mAttemptedInit(false), mOriginalPrincipalWasSystem(false) {}
31 
~DOMParser()32 DOMParser::~DOMParser() {}
33 
34 // QueryInterface implementation for DOMParser
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(DOMParser)35 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(DOMParser)
36   NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
37   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIDOMParser)
38   NS_INTERFACE_MAP_ENTRY(nsIDOMParser)
39   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
40 NS_INTERFACE_MAP_END
41 
42 NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(DOMParser, mOwner)
43 
44 NS_IMPL_CYCLE_COLLECTING_ADDREF(DOMParser)
45 NS_IMPL_CYCLE_COLLECTING_RELEASE(DOMParser)
46 
47 static const char* StringFromSupportedType(SupportedType aType) {
48   return SupportedTypeValues::strings[static_cast<int>(aType)].value;
49 }
50 
ParseFromString(const nsAString & aStr,SupportedType aType,ErrorResult & rv)51 already_AddRefed<nsIDocument> DOMParser::ParseFromString(const nsAString& aStr,
52                                                          SupportedType aType,
53                                                          ErrorResult& rv) {
54   nsCOMPtr<nsIDOMDocument> domDocument;
55   rv = ParseFromString(aStr, StringFromSupportedType(aType),
56                        getter_AddRefs(domDocument));
57   nsCOMPtr<nsIDocument> document(do_QueryInterface(domDocument));
58   return document.forget();
59 }
60 
61 NS_IMETHODIMP
ParseFromString(const char16_t * str,const char * contentType,nsIDOMDocument ** aResult)62 DOMParser::ParseFromString(const char16_t* str, const char* contentType,
63                            nsIDOMDocument** aResult) {
64   NS_ENSURE_ARG(str);
65   // Converting a string to an enum value manually is a bit of a pain,
66   // so let's just use a helper that takes a content-type string.
67   return ParseFromString(nsDependentString(str), contentType, aResult);
68 }
69 
ParseFromString(const nsAString & str,const char * contentType,nsIDOMDocument ** aResult)70 nsresult DOMParser::ParseFromString(const nsAString& str,
71                                     const char* contentType,
72                                     nsIDOMDocument** aResult) {
73   NS_ENSURE_ARG_POINTER(aResult);
74 
75   nsresult rv;
76 
77   if (!nsCRT::strcmp(contentType, "text/html")) {
78     nsCOMPtr<nsIDOMDocument> domDocument;
79     rv = SetUpDocument(DocumentFlavorHTML, getter_AddRefs(domDocument));
80     NS_ENSURE_SUCCESS(rv, rv);
81     nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument);
82 
83     // Keep the XULXBL state in sync with the XML case.
84 
85     if (mOriginalPrincipalWasSystem) {
86       document->ForceEnableXULXBL();
87     }
88 
89     rv = nsContentUtils::ParseDocumentHTML(str, document, false);
90     NS_ENSURE_SUCCESS(rv, rv);
91 
92     domDocument.forget(aResult);
93     return rv;
94   }
95 
96   nsAutoCString utf8str;
97   // Convert from UTF16 to UTF8 using fallible allocations
98   if (!AppendUTF16toUTF8(str, utf8str, mozilla::fallible)) {
99     return NS_ERROR_OUT_OF_MEMORY;
100   }
101 
102   // The new stream holds a reference to the buffer
103   nsCOMPtr<nsIInputStream> stream;
104   rv = NS_NewByteInputStream(getter_AddRefs(stream), utf8str.get(),
105                              utf8str.Length(), NS_ASSIGNMENT_DEPEND);
106   if (NS_FAILED(rv)) return rv;
107 
108   return ParseFromStream(stream, "UTF-8", utf8str.Length(), contentType,
109                          aResult);
110 }
111 
ParseFromBuffer(const Sequence<uint8_t> & aBuf,uint32_t aBufLen,SupportedType aType,ErrorResult & rv)112 already_AddRefed<nsIDocument> DOMParser::ParseFromBuffer(
113     const Sequence<uint8_t>& aBuf, uint32_t aBufLen, SupportedType aType,
114     ErrorResult& rv) {
115   if (aBufLen > aBuf.Length()) {
116     rv.Throw(NS_ERROR_XPC_NOT_ENOUGH_ELEMENTS_IN_ARRAY);
117     return nullptr;
118   }
119   nsCOMPtr<nsIDOMDocument> domDocument;
120   rv = DOMParser::ParseFromBuffer(aBuf.Elements(), aBufLen,
121                                   StringFromSupportedType(aType),
122                                   getter_AddRefs(domDocument));
123   nsCOMPtr<nsIDocument> document(do_QueryInterface(domDocument));
124   return document.forget();
125 }
126 
ParseFromBuffer(const Uint8Array & aBuf,uint32_t aBufLen,SupportedType aType,ErrorResult & rv)127 already_AddRefed<nsIDocument> DOMParser::ParseFromBuffer(const Uint8Array& aBuf,
128                                                          uint32_t aBufLen,
129                                                          SupportedType aType,
130                                                          ErrorResult& rv) {
131   aBuf.ComputeLengthAndData();
132 
133   if (aBufLen > aBuf.Length()) {
134     rv.Throw(NS_ERROR_XPC_NOT_ENOUGH_ELEMENTS_IN_ARRAY);
135     return nullptr;
136   }
137   nsCOMPtr<nsIDOMDocument> domDocument;
138   rv = DOMParser::ParseFromBuffer(aBuf.Data(), aBufLen,
139                                   StringFromSupportedType(aType),
140                                   getter_AddRefs(domDocument));
141   nsCOMPtr<nsIDocument> document(do_QueryInterface(domDocument));
142   return document.forget();
143 }
144 
145 NS_IMETHODIMP
ParseFromBuffer(const uint8_t * buf,uint32_t bufLen,const char * contentType,nsIDOMDocument ** aResult)146 DOMParser::ParseFromBuffer(const uint8_t* buf, uint32_t bufLen,
147                            const char* contentType, nsIDOMDocument** aResult) {
148   NS_ENSURE_ARG_POINTER(buf);
149   NS_ENSURE_ARG_POINTER(aResult);
150 
151   // The new stream holds a reference to the buffer
152   nsCOMPtr<nsIInputStream> stream;
153   nsresult rv = NS_NewByteInputStream(getter_AddRefs(stream),
154                                       reinterpret_cast<const char*>(buf),
155                                       bufLen, NS_ASSIGNMENT_DEPEND);
156   if (NS_FAILED(rv)) return rv;
157 
158   return ParseFromStream(stream, nullptr, bufLen, contentType, aResult);
159 }
160 
ParseFromStream(nsIInputStream * aStream,const nsAString & aCharset,int32_t aContentLength,SupportedType aType,ErrorResult & rv)161 already_AddRefed<nsIDocument> DOMParser::ParseFromStream(
162     nsIInputStream* aStream, const nsAString& aCharset, int32_t aContentLength,
163     SupportedType aType, ErrorResult& rv) {
164   nsCOMPtr<nsIDOMDocument> domDocument;
165   rv = DOMParser::ParseFromStream(
166       aStream, NS_ConvertUTF16toUTF8(aCharset).get(), aContentLength,
167       StringFromSupportedType(aType), getter_AddRefs(domDocument));
168   nsCOMPtr<nsIDocument> document(do_QueryInterface(domDocument));
169   return document.forget();
170 }
171 
172 NS_IMETHODIMP
ParseFromStream(nsIInputStream * aStream,const char * aCharset,int32_t aContentLength,const char * aContentType,nsIDOMDocument ** aResult)173 DOMParser::ParseFromStream(nsIInputStream* aStream, const char* aCharset,
174                            int32_t aContentLength, const char* aContentType,
175                            nsIDOMDocument** aResult) {
176   NS_ENSURE_ARG(aStream);
177   NS_ENSURE_ARG(aContentType);
178   NS_ENSURE_ARG_POINTER(aResult);
179   *aResult = nullptr;
180 
181   bool svg = nsCRT::strcmp(aContentType, "image/svg+xml") == 0;
182 
183   // For now, we can only create XML documents.
184   // XXXsmaug Should we create an HTMLDocument (in XHTML mode)
185   //         for "application/xhtml+xml"?
186   if ((nsCRT::strcmp(aContentType, "text/xml") != 0) &&
187       (nsCRT::strcmp(aContentType, "application/xml") != 0) &&
188       (nsCRT::strcmp(aContentType, "application/xhtml+xml") != 0) && !svg)
189     return NS_ERROR_NOT_IMPLEMENTED;
190 
191   nsresult rv;
192 
193   // Put the nsCOMPtr out here so we hold a ref to the stream as needed
194   nsCOMPtr<nsIInputStream> stream = aStream;
195   if (!NS_InputStreamIsBuffered(stream)) {
196     nsCOMPtr<nsIInputStream> bufferedStream;
197     rv = NS_NewBufferedInputStream(getter_AddRefs(bufferedStream),
198                                    stream.forget(), 4096);
199     NS_ENSURE_SUCCESS(rv, rv);
200 
201     stream = bufferedStream;
202   }
203 
204   nsCOMPtr<nsIDOMDocument> domDocument;
205   rv = SetUpDocument(svg ? DocumentFlavorSVG : DocumentFlavorLegacyGuess,
206                      getter_AddRefs(domDocument));
207   NS_ENSURE_SUCCESS(rv, rv);
208 
209   // Create a fake channel
210   nsCOMPtr<nsIChannel> parserChannel;
211   NS_NewInputStreamChannel(getter_AddRefs(parserChannel), mDocumentURI,
212                            nullptr,  // aStream
213                            mPrincipal, nsILoadInfo::SEC_FORCE_INHERIT_PRINCIPAL,
214                            nsIContentPolicy::TYPE_OTHER,
215                            nsDependentCString(aContentType));
216   NS_ENSURE_STATE(parserChannel);
217 
218   if (aCharset) {
219     parserChannel->SetContentCharset(nsDependentCString(aCharset));
220   }
221 
222   // Tell the document to start loading
223   nsCOMPtr<nsIStreamListener> listener;
224 
225   // Have to pass false for reset here, else the reset will remove
226   // our event listener.  Should that listener addition move to later
227   // than this call?
228   nsCOMPtr<nsIDocument> document(do_QueryInterface(domDocument));
229   if (!document) return NS_ERROR_FAILURE;
230 
231   // Keep the XULXBL state in sync with the HTML case
232 
233   if (mOriginalPrincipalWasSystem) {
234     document->ForceEnableXULXBL();
235   }
236 
237   rv = document->StartDocumentLoad(kLoadAsData, parserChannel, nullptr, nullptr,
238                                    getter_AddRefs(listener), false);
239 
240   if (NS_FAILED(rv) || !listener) {
241     return NS_ERROR_FAILURE;
242   }
243 
244   // Now start pumping data to the listener
245   nsresult status;
246 
247   rv = listener->OnStartRequest(parserChannel, nullptr);
248   if (NS_FAILED(rv)) parserChannel->Cancel(rv);
249   parserChannel->GetStatus(&status);
250 
251   if (NS_SUCCEEDED(rv) && NS_SUCCEEDED(status)) {
252     rv = listener->OnDataAvailable(parserChannel, nullptr, stream, 0,
253                                    aContentLength);
254     if (NS_FAILED(rv)) parserChannel->Cancel(rv);
255     parserChannel->GetStatus(&status);
256   }
257 
258   rv = listener->OnStopRequest(parserChannel, nullptr, status);
259   // Failure returned from OnStopRequest does not affect the final status of
260   // the channel, so we do not need to call Cancel(rv) as we do above.
261 
262   if (NS_FAILED(rv)) {
263     return NS_ERROR_FAILURE;
264   }
265 
266   domDocument.swap(*aResult);
267 
268   return NS_OK;
269 }
270 
271 NS_IMETHODIMP
Init(nsIPrincipal * principal,nsIURI * documentURI,nsIURI * baseURI,nsIGlobalObject * aScriptObject)272 DOMParser::Init(nsIPrincipal* principal, nsIURI* documentURI, nsIURI* baseURI,
273                 nsIGlobalObject* aScriptObject) {
274   NS_ENSURE_STATE(!mAttemptedInit);
275   mAttemptedInit = true;
276   NS_ENSURE_ARG(principal || documentURI);
277   mDocumentURI = documentURI;
278 
279   if (!mDocumentURI) {
280     principal->GetURI(getter_AddRefs(mDocumentURI));
281     // If we have the system principal, then we'll just use the null principals
282     // uri.
283     if (!mDocumentURI && !nsContentUtils::IsSystemPrincipal(principal)) {
284       return NS_ERROR_INVALID_ARG;
285     }
286   }
287 
288   mScriptHandlingObject = do_GetWeakReference(aScriptObject);
289   mPrincipal = principal;
290   nsresult rv;
291   if (!mPrincipal) {
292     // BUG 1237080 -- in this case we're getting a chrome privilege scripted
293     // DOMParser object creation without an explicit principal set.  This is
294     // now deprecated.
295     nsContentUtils::ReportToConsole(
296         nsIScriptError::warningFlag, NS_LITERAL_CSTRING("DOM"), nullptr,
297         nsContentUtils::eDOM_PROPERTIES,
298         "ChromeScriptedDOMParserWithoutPrincipal", nullptr, 0, documentURI);
299 
300     OriginAttributes attrs;
301     mPrincipal = BasePrincipal::CreateCodebasePrincipal(mDocumentURI, attrs);
302     NS_ENSURE_TRUE(mPrincipal, NS_ERROR_FAILURE);
303   } else {
304     if (nsContentUtils::IsSystemPrincipal(mPrincipal)) {
305       // Don't give DOMParsers the system principal.  Use a null
306       // principal instead.
307       mOriginalPrincipalWasSystem = true;
308       mPrincipal = NullPrincipal::Create();
309 
310       if (!mDocumentURI) {
311         rv = mPrincipal->GetURI(getter_AddRefs(mDocumentURI));
312         NS_ENSURE_SUCCESS(rv, rv);
313       }
314     }
315   }
316 
317   mBaseURI = baseURI;
318 
319   MOZ_ASSERT(mPrincipal, "Must have principal");
320   MOZ_ASSERT(mDocumentURI, "Must have document URI");
321   return NS_OK;
322 }
323 
Constructor(const GlobalObject & aOwner,nsIPrincipal * aPrincipal,nsIURI * aDocumentURI,nsIURI * aBaseURI,ErrorResult & rv)324 /*static */ already_AddRefed<DOMParser> DOMParser::Constructor(
325     const GlobalObject& aOwner, nsIPrincipal* aPrincipal, nsIURI* aDocumentURI,
326     nsIURI* aBaseURI, ErrorResult& rv) {
327   if (aOwner.CallerType() != CallerType::System) {
328     rv.Throw(NS_ERROR_DOM_SECURITY_ERR);
329     return nullptr;
330   }
331   RefPtr<DOMParser> domParser = new DOMParser(aOwner.GetAsSupports());
332   rv = domParser->InitInternal(aOwner.GetAsSupports(), aPrincipal, aDocumentURI,
333                                aBaseURI);
334   if (rv.Failed()) {
335     return nullptr;
336   }
337   return domParser.forget();
338 }
339 
Constructor(const GlobalObject & aOwner,ErrorResult & rv)340 /*static */ already_AddRefed<DOMParser> DOMParser::Constructor(
341     const GlobalObject& aOwner, ErrorResult& rv) {
342   RefPtr<DOMParser> domParser = new DOMParser(aOwner.GetAsSupports());
343   rv = domParser->InitInternal(aOwner.GetAsSupports(),
344                                nsContentUtils::SubjectPrincipal(), nullptr,
345                                nullptr);
346   if (rv.Failed()) {
347     return nullptr;
348   }
349   return domParser.forget();
350 }
351 
InitInternal(nsISupports * aOwner,nsIPrincipal * prin,nsIURI * documentURI,nsIURI * baseURI)352 nsresult DOMParser::InitInternal(nsISupports* aOwner, nsIPrincipal* prin,
353                                  nsIURI* documentURI, nsIURI* baseURI) {
354   AttemptedInitMarker marker(&mAttemptedInit);
355   if (!documentURI) {
356     // No explicit documentURI; grab document and base URIs off the window our
357     // constructor was called on. Error out if anything untoward happens.
358 
359     // Note that this is a behavior change as far as I can tell -- we're now
360     // using the base URI and document URI of the window off of which the
361     // DOMParser is created, not the window in which parse*() is called.
362     // Does that matter?
363 
364     // Also note that |cx| matches what GetDocumentFromContext() would return,
365     // while GetDocumentFromCaller() gives us the window that the DOMParser()
366     // call was made on.
367 
368     nsCOMPtr<nsPIDOMWindowInner> window = do_QueryInterface(aOwner);
369     if (!window) {
370       return NS_ERROR_UNEXPECTED;
371     }
372 
373     baseURI = window->GetDocBaseURI();
374     documentURI = window->GetDocumentURI();
375     if (!documentURI) {
376       return NS_ERROR_UNEXPECTED;
377     }
378   }
379 
380   nsCOMPtr<nsIGlobalObject> scriptglobal = do_QueryInterface(aOwner);
381   return Init(prin, documentURI, baseURI, scriptglobal);
382 }
383 
Init(nsIPrincipal * aPrincipal,nsIURI * aDocumentURI,nsIURI * aBaseURI,mozilla::ErrorResult & rv)384 void DOMParser::Init(nsIPrincipal* aPrincipal, nsIURI* aDocumentURI,
385                      nsIURI* aBaseURI, mozilla::ErrorResult& rv) {
386   AttemptedInitMarker marker(&mAttemptedInit);
387 
388   nsCOMPtr<nsIPrincipal> principal = aPrincipal;
389   if (!principal && !aDocumentURI) {
390     principal = nsContentUtils::SubjectPrincipal();
391   }
392 
393   rv = Init(principal, aDocumentURI, aBaseURI, GetEntryGlobal());
394 }
395 
SetUpDocument(DocumentFlavor aFlavor,nsIDOMDocument ** aResult)396 nsresult DOMParser::SetUpDocument(DocumentFlavor aFlavor,
397                                   nsIDOMDocument** aResult) {
398   // We should really QI to nsIGlobalObject here, but nsDocument gets confused
399   // if we pass it a scriptHandlingObject that doesn't QI to
400   // nsIScriptGlobalObject, and test_isequalnode.js (an xpcshell test without
401   // a window global) breaks. The correct solution is just to wean nsDocument
402   // off of nsIScriptGlobalObject, but that's a yak to shave another day.
403   nsCOMPtr<nsIScriptGlobalObject> scriptHandlingObject =
404       do_QueryReferent(mScriptHandlingObject);
405   nsresult rv;
406   if (!mPrincipal) {
407     NS_ENSURE_TRUE(!mAttemptedInit, NS_ERROR_NOT_INITIALIZED);
408     AttemptedInitMarker marker(&mAttemptedInit);
409 
410     nsCOMPtr<nsIPrincipal> prin = NullPrincipal::Create();
411     rv = Init(prin, nullptr, nullptr, scriptHandlingObject);
412     NS_ENSURE_SUCCESS(rv, rv);
413   }
414 
415   // Try to inherit a style backend.
416   NS_ASSERTION(mPrincipal, "Must have principal by now");
417   NS_ASSERTION(mDocumentURI, "Must have document URI by now");
418 
419   return NS_NewDOMDocument(aResult, EmptyString(), EmptyString(), nullptr,
420                            mDocumentURI, mBaseURI, mPrincipal, true,
421                            scriptHandlingObject, aFlavor);
422 }
423