1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "nsAtom.h"
8 #include "nsParser.h"
9 #include "nsString.h"
10 #include "nsCRT.h"
11 #include "nsScanner.h"
12 #include "plstr.h"
13 #include "nsIChannel.h"
14 #include "nsIInputStream.h"
15 #include "CNavDTD.h"
16 #include "prenv.h"
17 #include "prlock.h"
18 #include "prcvar.h"
19 #include "nsParserCIID.h"
20 #include "nsReadableUtils.h"
21 #include "nsCOMPtr.h"
22 #include "nsExpatDriver.h"
23 #include "nsIFragmentContentSink.h"
24 #include "nsStreamUtils.h"
25 #include "nsHTMLTokenizer.h"
26 #include "nsXPCOMCIDInternal.h"
27 #include "nsMimeTypes.h"
28 #include "mozilla/CondVar.h"
29 #include "mozilla/Mutex.h"
30 #include "nsCharsetSource.h"
31 #include "nsThreadUtils.h"
32 #include "nsIHTMLContentSink.h"
33
34 #include "mozilla/BinarySearch.h"
35 #include "mozilla/dom/ScriptLoader.h"
36 #include "mozilla/Encoding.h"
37
38 using namespace mozilla;
39
40 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
41 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
42 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
43 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
44
45 //-------------- Begin ParseContinue Event Definition ------------------------
46 /*
47 The parser can be explicitly interrupted by passing a return value of
48 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
49 the parser to stop processing and allow the application to return to the event
50 loop. The data which was left at the time of interruption will be processed
51 the next time OnDataAvailable is called. If the parser has received its final
52 chunk of data then OnDataAvailable will no longer be called by the networking
53 module, so the parser will schedule a nsParserContinueEvent which will call
54 the parser to process the remaining data after returning to the event loop.
55 If the parser is interrupted while processing the remaining data it will
56 schedule another ParseContinueEvent. The processing of data followed by
57 scheduling of the continue events will proceed until either:
58
59 1) All of the remaining data can be processed without interrupting
60 2) The parser has been cancelled.
61
62
63 This capability is currently used in CNavDTD and nsHTMLContentSink. The
64 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
65 processed and when each token is processed. The nsHTML content sink records
66 the time when the chunk has started processing and will return
67 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
68 threshold called max tokenizing processing time. This allows the content sink
69 to limit how much data is processed in a single chunk which in turn gates how
70 much time is spent away from the event loop. Processing smaller chunks of data
71 also reduces the time spent in subsequent reflows.
72
73 This capability is most apparent when loading large documents. If the maximum
74 token processing time is set small enough the application will remain
75 responsive during document load.
76
77 A side-effect of this capability is that document load is not complete when
78 the last chunk of data is passed to OnDataAvailable since the parser may have
79 been interrupted when the last chunk of data arrived. The document is complete
80 when all of the document has been tokenized and there aren't any pending
81 nsParserContinueEvents. This can cause problems if the application assumes
82 that it can monitor the load requests to determine when the document load has
83 been completed. This is what happens in Mozilla. The document is considered
84 completely loaded when all of the load requests have been satisfied. To delay
85 the document load until all of the parsing has been completed the
86 nsHTMLContentSink adds a dummy parser load request which is not removed until
87 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
88 DidBuildModel until the final chunk of data has been passed to the parser
89 through the OnDataAvailable and there aren't any pending
90 nsParserContineEvents.
91
92 Currently the parser is ignores requests to be interrupted during the
93 processing of script. This is because a document.write followed by JavaScript
94 calls to manipulate the DOM may fail if the parser was interrupted during the
95 document.write.
96
97 For more details @see bugzilla bug 76722
98 */
99
100 class nsParserContinueEvent : public Runnable {
101 public:
102 RefPtr<nsParser> mParser;
103
nsParserContinueEvent(nsParser * aParser)104 explicit nsParserContinueEvent(nsParser* aParser)
105 : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
106
Run()107 NS_IMETHOD Run() override {
108 mParser->HandleParserContinueEvent(this);
109 return NS_OK;
110 }
111 };
112
113 //-------------- End ParseContinue Event Definition ------------------------
114
115 /**
116 * default constructor
117 */
nsParser()118 nsParser::nsParser()
119 : mParserContext(nullptr), mCharset(WINDOWS_1252_ENCODING) {
120 Initialize(true);
121 }
122
~nsParser()123 nsParser::~nsParser() { Cleanup(); }
124
Initialize(bool aConstructor)125 void nsParser::Initialize(bool aConstructor) {
126 if (aConstructor) {
127 // Raw pointer
128 mParserContext = 0;
129 } else {
130 // nsCOMPtrs
131 mObserver = nullptr;
132 mUnusedInput.Truncate();
133 }
134
135 mContinueEvent = nullptr;
136 mCharsetSource = kCharsetUninitialized;
137 mCharset = WINDOWS_1252_ENCODING;
138 mInternalState = NS_OK;
139 mStreamStatus = NS_OK;
140 mCommand = eViewNormal;
141 mBlocked = 0;
142 mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | NS_PARSER_FLAG_CAN_TOKENIZE;
143
144 mProcessingNetworkData = false;
145 mIsAboutBlank = false;
146 }
147
Cleanup()148 void nsParser::Cleanup() {
149 #ifdef DEBUG
150 if (mParserContext && mParserContext->mPrevContext) {
151 NS_WARNING("Extra parser contexts still on the parser stack");
152 }
153 #endif
154
155 while (mParserContext) {
156 CParserContext* pc = mParserContext->mPrevContext;
157 delete mParserContext;
158 mParserContext = pc;
159 }
160
161 // It should not be possible for this flag to be set when we are getting
162 // destroyed since this flag implies a pending nsParserContinueEvent, which
163 // has an owning reference to |this|.
164 NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
165 }
166
167 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
168
169 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
170 NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
171 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
172 NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
173 NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
174 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
175
176 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
177 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
178 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
179 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
180 CParserContext* pc = tmp->mParserContext;
181 while (pc) {
182 cb.NoteXPCOMChild(pc->mTokenizer);
183 pc = pc->mPrevContext;
184 }
185 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
186
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)187 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
188 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
189 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
190 NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
191 NS_INTERFACE_MAP_ENTRY(nsIParser)
192 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
193 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
194 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
195 NS_INTERFACE_MAP_END
196
197 // The parser continue event is posted only if
198 // all of the data to parse has been passed to ::OnDataAvailable
199 // and the parser has been interrupted by the content sink
200 // because the processing of tokens took too long.
201
202 nsresult nsParser::PostContinueEvent() {
203 if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
204 // If this flag isn't set, then there shouldn't be a live continue event!
205 NS_ASSERTION(!mContinueEvent, "bad");
206
207 // This creates a reference cycle between this and the event that is
208 // broken when the event fires.
209 nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
210 if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
211 NS_WARNING("failed to dispatch parser continuation event");
212 } else {
213 mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
214 mContinueEvent = event;
215 }
216 }
217 return NS_OK;
218 }
219
NS_IMETHODIMP_(void)220 NS_IMETHODIMP_(void)
221 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
222
223 /**
224 * Call this method once you've created a parser, and want to instruct it
225 * about the command which caused the parser to be constructed. For example,
226 * this allows us to select a DTD which can do, say, view-source.
227 *
228 * @param aCommand the command string to set
229 */
NS_IMETHODIMP_(void)230 NS_IMETHODIMP_(void)
231 nsParser::SetCommand(const char* aCommand) {
232 mCommandStr.Assign(aCommand);
233 if (mCommandStr.EqualsLiteral("view-source")) {
234 mCommand = eViewSource;
235 } else if (mCommandStr.EqualsLiteral("view-fragment")) {
236 mCommand = eViewFragment;
237 } else {
238 mCommand = eViewNormal;
239 }
240 }
241
242 /**
243 * Call this method once you've created a parser, and want to instruct it
244 * about the command which caused the parser to be constructed. For example,
245 * this allows us to select a DTD which can do, say, view-source.
246 *
247 * @param aParserCommand the command to set
248 */
NS_IMETHODIMP_(void)249 NS_IMETHODIMP_(void)
250 nsParser::SetCommand(eParserCommands aParserCommand) {
251 mCommand = aParserCommand;
252 }
253
254 /**
255 * Call this method once you've created a parser, and want to instruct it
256 * about what charset to load
257 *
258 * @param aCharset- the charset of a document
259 * @param aCharsetSource- the source of the charset
260 */
SetDocumentCharset(NotNull<const Encoding * > aCharset,int32_t aCharsetSource,bool aChannelHadCharset)261 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
262 int32_t aCharsetSource,
263 bool aChannelHadCharset) {
264 mCharset = aCharset;
265 mCharsetSource = aCharsetSource;
266 if (mParserContext && mParserContext->mScanner) {
267 mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
268 }
269 }
270
SetSinkCharset(NotNull<const Encoding * > aCharset)271 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
272 if (mSink) {
273 mSink->SetDocumentCharset(aCharset);
274 }
275 }
276
277 /**
278 * This method gets called in order to set the content
279 * sink for this parser to dump nodes to.
280 *
281 * @param nsIContentSink interface for node receiver
282 */
NS_IMETHODIMP_(void)283 NS_IMETHODIMP_(void)
284 nsParser::SetContentSink(nsIContentSink* aSink) {
285 MOZ_ASSERT(aSink, "sink cannot be null!");
286 mSink = aSink;
287
288 if (mSink) {
289 mSink->SetParser(this);
290 nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
291 if (htmlSink) {
292 mIsAboutBlank = true;
293 }
294 }
295 }
296
297 /**
298 * retrieve the sink set into the parser
299 * @return current sink
300 */
NS_IMETHODIMP_(nsIContentSink *)301 NS_IMETHODIMP_(nsIContentSink*)
302 nsParser::GetContentSink() { return mSink; }
303
FindSuitableDTD(CParserContext & aParserContext)304 static nsIDTD* FindSuitableDTD(CParserContext& aParserContext) {
305 // We always find a DTD.
306 aParserContext.mAutoDetectStatus = ePrimaryDetect;
307
308 // Quick check for view source.
309 MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
310 "The old parser is not supposed to be used for View Source "
311 "anymore.");
312
313 // Now see if we're parsing HTML (which, as far as we're concerned, simply
314 // means "not XML").
315 if (aParserContext.mDocType != eXML) {
316 return new CNavDTD();
317 }
318
319 // If we're here, then we'd better be parsing XML.
320 NS_ASSERTION(aParserContext.mDocType == eXML,
321 "What are you trying to send me, here?");
322 return new nsExpatDriver();
323 }
324
325 NS_IMETHODIMP
CancelParsingEvents()326 nsParser::CancelParsingEvents() {
327 if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
328 NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
329 // Revoke the pending continue parsing event
330 mContinueEvent = nullptr;
331 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
332 }
333 return NS_OK;
334 }
335
336 ////////////////////////////////////////////////////////////////////////
337
338 /**
339 * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value
340 * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
341 * (which could be success or failure).
342 *
343 * To understand the motivation for this construct, consider these example
344 * methods:
345 *
346 * nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
347 * nsresult rv = NS_OK;
348 * ...
349 * return obj->DoThatThing();
350 * NS_ENSURE_SUCCESS(rv, rv);
351 * ...
352 * return rv;
353 * }
354 *
355 * void nsCaller::MakeThingsHappen() {
356 * return mSomething->DoThatThing(mWhatever);
357 * }
358 *
359 * Suppose, for whatever reason*, we want to shift responsibility for calling
360 * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
361 * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows:
362 *
363 * nsresult nsSomething::DoThatThing() {
364 * nsresult rv = NS_OK;
365 * ...
366 * ...
367 * return rv;
368 * }
369 *
370 * void nsCaller::MakeThingsHappen() {
371 * nsresult rv;
372 * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
373 * mWhatever->DoThatThing(),
374 * rv);
375 * return rv;
376 * }
377 *
378 * *Possible reasons include: nsCaller doesn't want to give mSomething access
379 * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
380 * be called regardless of how nsSomething::DoThatThing behaves, &c.
381 */
382 #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) \
383 { \
384 nsresult RV##__temp = EXPR1; \
385 RV = EXPR2; \
386 if (NS_FAILED(RV)) { \
387 RV = RV##__temp; \
388 } \
389 }
390
391 /**
392 * This gets called just prior to the model actually
393 * being constructed. It's important to make this the
394 * last thing that happens right before parsing, so we
395 * can delay until the last moment the resolution of
396 * which DTD to use (unless of course we're assigned one).
397 */
WillBuildModel(nsString & aFilename)398 nsresult nsParser::WillBuildModel(nsString& aFilename) {
399 if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
400
401 if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
402
403 if (eDTDMode_unknown == mParserContext->mDTDMode ||
404 eDTDMode_autodetect == mParserContext->mDTDMode) {
405 if (mIsAboutBlank) {
406 mParserContext->mDTDMode = eDTDMode_quirks;
407 mParserContext->mDocType = eHTML_Quirks;
408 } else {
409 mParserContext->mDTDMode = eDTDMode_full_standards;
410 mParserContext->mDocType = eXML;
411 }
412 } // else XML fragment with nested parser context
413
414 NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
415 "Clobbering DTD for non-root parser context!");
416 mDTD = FindSuitableDTD(*mParserContext);
417 NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
418
419 nsITokenizer* tokenizer;
420 nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
421 NS_ENSURE_SUCCESS(rv, rv);
422
423 rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
424 nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
425 // nsIDTD::WillBuildModel used to be responsible for calling
426 // nsIContentSink::WillBuildModel, but that obligation isn't expressible
427 // in the nsIDTD interface itself, so it's sounder and simpler to give that
428 // responsibility back to the parser. The former behavior of the DTD was to
429 // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
430 // failure we should use sinkResult instead of rv, to preserve the old error
431 // handling behavior of the DTD:
432 return NS_FAILED(sinkResult) ? sinkResult : rv;
433 }
434
435 /**
436 * This gets called when the parser is done with its input.
437 * Note that the parser may have been called recursively, so we
438 * have to check for a prev. context before closing out the DTD/sink.
439 */
DidBuildModel(nsresult anErrorCode)440 nsresult nsParser::DidBuildModel(nsresult anErrorCode) {
441 nsresult result = anErrorCode;
442
443 if (IsComplete()) {
444 if (mParserContext && !mParserContext->mPrevContext) {
445 // Let sink know if we're about to end load because we've been terminated.
446 // In that case we don't want it to run deferred scripts.
447 bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
448 if (mDTD && mSink) {
449 nsresult dtdResult = mDTD->DidBuildModel(anErrorCode),
450 sinkResult = mSink->DidBuildModel(terminated);
451 // nsIDTD::DidBuildModel used to be responsible for calling
452 // nsIContentSink::DidBuildModel, but that obligation isn't expressible
453 // in the nsIDTD interface itself, so it's sounder and simpler to give
454 // that responsibility back to the parser. The former behavior of the
455 // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
456 // sink returns failure we should use sinkResult instead of dtdResult,
457 // to preserve the old error handling behavior of the DTD:
458 result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
459 }
460
461 // Ref. to bug 61462.
462 mParserContext->mRequest = nullptr;
463 }
464 }
465
466 return result;
467 }
468
469 /**
470 * This method adds a new parser context to the list,
471 * pushing the current one to the next position.
472 *
473 * @param ptr to new context
474 */
PushContext(CParserContext & aContext)475 void nsParser::PushContext(CParserContext& aContext) {
476 NS_ASSERTION(aContext.mPrevContext == mParserContext,
477 "Trying to push a context whose previous context differs from "
478 "the current parser context.");
479 mParserContext = &aContext;
480 }
481
482 /**
483 * This method pops the topmost context off the stack,
484 * returning it to the user. The next context (if any)
485 * becomes the current context.
486 * @update gess7/22/98
487 * @return prev. context
488 */
PopContext()489 CParserContext* nsParser::PopContext() {
490 CParserContext* oldContext = mParserContext;
491 if (oldContext) {
492 mParserContext = oldContext->mPrevContext;
493 if (mParserContext) {
494 // If the old context was blocked, propagate the blocked state
495 // back to the new one. Also, propagate the stream listener state
496 // but don't override onStop state to guarantee the call to
497 // DidBuildModel().
498 if (mParserContext->mStreamListenerState != eOnStop) {
499 mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
500 }
501 }
502 }
503 return oldContext;
504 }
505
506 /**
507 * Call this when you want control whether or not the parser will parse
508 * and tokenize input (TRUE), or whether it just caches input to be
509 * parsed later (FALSE).
510 *
511 * @param aState determines whether we parse/tokenize or just cache.
512 * @return current state
513 */
SetUnusedInput(nsString & aBuffer)514 void nsParser::SetUnusedInput(nsString& aBuffer) { mUnusedInput = aBuffer; }
515
516 /**
517 * Call this when you want to *force* the parser to terminate the
518 * parsing process altogether. This is binary -- so once you terminate
519 * you can't resume without restarting altogether.
520 */
521 NS_IMETHODIMP
Terminate(void)522 nsParser::Terminate(void) {
523 // We should only call DidBuildModel once, so don't do anything if this is
524 // the second time that Terminate has been called.
525 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
526 return NS_OK;
527 }
528
529 nsresult result = NS_OK;
530 // XXX - [ until we figure out a way to break parser-sink circularity ]
531 // Hack - Hold a reference until we are completely done...
532 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
533 mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
534
535 // CancelParsingEvents must be called to avoid leaking the nsParser object
536 // @see bug 108049
537 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
538 // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
539 // The IsComplete() call inside of DidBuildModel looks at the
540 // pendingContinueEvents flag.
541 CancelParsingEvents();
542
543 // If we got interrupted in the middle of a document.write, then we might
544 // have more than one parser context on our parsercontext stack. This has
545 // the effect of making DidBuildModel a no-op, meaning that we never call
546 // our sink's DidBuildModel and break the reference cycle, causing a leak.
547 // Since we're getting terminated, we manually clean up our context stack.
548 while (mParserContext && mParserContext->mPrevContext) {
549 CParserContext* prev = mParserContext->mPrevContext;
550 delete mParserContext;
551 mParserContext = prev;
552 }
553
554 if (mDTD) {
555 mDTD->Terminate();
556 DidBuildModel(result);
557 } else if (mSink) {
558 // We have no parser context or no DTD yet (so we got terminated before we
559 // got any data). Manually break the reference cycle with the sink.
560 result = mSink->DidBuildModel(true);
561 NS_ENSURE_SUCCESS(result, result);
562 }
563
564 return NS_OK;
565 }
566
567 NS_IMETHODIMP
ContinueInterruptedParsing()568 nsParser::ContinueInterruptedParsing() {
569 // If there are scripts executing, then the content sink is jumping the gun
570 // (probably due to a synchronous XMLHttpRequest) and will re-enable us
571 // later, see bug 460706.
572 if (!IsOkToProcessNetworkData()) {
573 return NS_OK;
574 }
575
576 // If the stream has already finished, there's a good chance
577 // that we might start closing things down when the parser
578 // is reenabled. To make sure that we're not deleted across
579 // the reenabling process, hold a reference to ourselves.
580 nsresult result = NS_OK;
581 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
582 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
583
584 #ifdef DEBUG
585 if (mBlocked) {
586 NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
587 }
588 #endif
589
590 bool isFinalChunk =
591 mParserContext && mParserContext->mStreamListenerState == eOnStop;
592
593 mProcessingNetworkData = true;
594 if (sinkDeathGrip) {
595 sinkDeathGrip->WillParse();
596 }
597 result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
598 mProcessingNetworkData = false;
599
600 if (result != NS_OK) {
601 result = mInternalState;
602 }
603
604 return result;
605 }
606
607 /**
608 * Stops parsing temporarily. That is, it will prevent the
609 * parser from building up content model while scripts
610 * are being loaded (either an external script from a web
611 * page, or any number of extension content scripts).
612 */
NS_IMETHODIMP_(void)613 NS_IMETHODIMP_(void)
614 nsParser::BlockParser() { mBlocked++; }
615
616 /**
617 * Open up the parser for tokenization, building up content
618 * model..etc. However, this method does not resume parsing
619 * automatically. It's the callers' responsibility to restart
620 * the parsing engine.
621 */
NS_IMETHODIMP_(void)622 NS_IMETHODIMP_(void)
623 nsParser::UnblockParser() {
624 MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
625 if (MOZ_LIKELY(mBlocked > 0)) {
626 mBlocked--;
627 }
628 }
629
NS_IMETHODIMP_(void)630 NS_IMETHODIMP_(void)
631 nsParser::ContinueInterruptedParsingAsync() {
632 MOZ_ASSERT(mSink);
633 if (MOZ_LIKELY(mSink)) {
634 mSink->ContinueInterruptedParsingAsync();
635 }
636 }
637
638 /**
639 * Call this to query whether the parser is enabled or not.
640 */
NS_IMETHODIMP_(bool)641 NS_IMETHODIMP_(bool)
642 nsParser::IsParserEnabled() { return !mBlocked; }
643
644 /**
645 * Call this to query whether the parser thinks it's done with parsing.
646 */
NS_IMETHODIMP_(bool)647 NS_IMETHODIMP_(bool)
648 nsParser::IsComplete() {
649 return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
650 }
651
HandleParserContinueEvent(nsParserContinueEvent * ev)652 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
653 // Ignore any revoked continue events...
654 if (mContinueEvent != ev) return;
655
656 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
657 mContinueEvent = nullptr;
658
659 NS_ASSERTION(IsOkToProcessNetworkData(),
660 "Interrupted in the middle of a script?");
661 ContinueInterruptedParsing();
662 }
663
IsInsertionPointDefined()664 bool nsParser::IsInsertionPointDefined() { return false; }
665
IncrementScriptNestingLevel()666 void nsParser::IncrementScriptNestingLevel() {}
667
DecrementScriptNestingLevel()668 void nsParser::DecrementScriptNestingLevel() {}
669
HasNonzeroScriptNestingLevel() const670 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
671
MarkAsNotScriptCreated(const char * aCommand)672 void nsParser::MarkAsNotScriptCreated(const char* aCommand) {}
673
IsScriptCreated()674 bool nsParser::IsScriptCreated() { return false; }
675
676 /**
677 * This is the main controlling routine in the parsing process.
678 * Note that it may get called multiple times for the same scanner,
679 * since this is a pushed based system, and all the tokens may
680 * not have been consumed by the scanner during a given invocation
681 * of this method.
682 */
683 NS_IMETHODIMP
Parse(nsIURI * aURL,nsIRequestObserver * aListener,void * aKey,nsDTDMode aMode)684 nsParser::Parse(nsIURI* aURL, nsIRequestObserver* aListener, void* aKey,
685 nsDTDMode aMode) {
686 MOZ_ASSERT(aURL, "Error: Null URL given");
687
688 nsresult result = NS_ERROR_HTMLPARSER_BADURL;
689 mObserver = aListener;
690
691 if (aURL) {
692 nsAutoCString spec;
693 nsresult rv = aURL->GetSpec(spec);
694 if (rv != NS_OK) {
695 return rv;
696 }
697 nsString theName; // Not nsAutoString due to length and usage
698 if (!CopyUTF8toUTF16(spec, theName, mozilla::fallible)) {
699 return NS_ERROR_OUT_OF_MEMORY;
700 }
701
702 nsScanner* theScanner = new nsScanner(theName, false);
703 CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
704 mCommand, aListener);
705 if (pc && theScanner) {
706 pc->mMultipart = true;
707 pc->mContextType = CParserContext::eCTURL;
708 pc->mDTDMode = aMode;
709 PushContext(*pc);
710
711 result = NS_OK;
712 } else {
713 result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
714 }
715 }
716 return result;
717 }
718
719 /**
720 * Used by XML fragment parsing below.
721 *
722 * @param aSourceBuffer contains a string-full of real content
723 */
Parse(const nsAString & aSourceBuffer,void * aKey,bool aLastCall)724 nsresult nsParser::Parse(const nsAString& aSourceBuffer, void* aKey,
725 bool aLastCall) {
726 nsresult result = NS_OK;
727
728 // Don't bother if we're never going to parse this.
729 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
730 return result;
731 }
732
733 if (!aLastCall && aSourceBuffer.IsEmpty()) {
734 // Nothing is being passed to the parser so return
735 // immediately. mUnusedInput will get processed when
736 // some data is actually passed in.
737 // But if this is the last call, make sure to finish up
738 // stuff correctly.
739 return result;
740 }
741
742 // Maintain a reference to ourselves so we don't go away
743 // till we're completely done.
744 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
745
746 if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
747 // Note: The following code will always find the parser context associated
748 // with the given key, even if that context has been suspended (e.g., for
749 // another document.write call). This doesn't appear to be exactly what IE
750 // does in the case where this happens, but this makes more sense.
751 CParserContext* pc = mParserContext;
752 while (pc && pc->mKey != aKey) {
753 pc = pc->mPrevContext;
754 }
755
756 if (!pc) {
757 // Only make a new context if we don't have one, OR if we do, but has a
758 // different context key.
759 nsScanner* theScanner = new nsScanner(mUnusedInput);
760 NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
761
762 eAutoDetectResult theStatus = eUnknownDetect;
763
764 if (mParserContext &&
765 mParserContext->mMimeType.EqualsLiteral("application/xml")) {
766 // Ref. Bug 90379
767 NS_ASSERTION(mDTD, "How come the DTD is null?");
768
769 if (mParserContext) {
770 theStatus = mParserContext->mAutoDetectStatus;
771 // Added this to fix bug 32022.
772 }
773 }
774
775 pc = new CParserContext(mParserContext, theScanner, aKey, mCommand, 0,
776 theStatus, aLastCall);
777 NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
778
779 PushContext(*pc);
780
781 pc->mMultipart = !aLastCall; // By default
782 if (pc->mPrevContext) {
783 pc->mMultipart |= pc->mPrevContext->mMultipart;
784 }
785
786 // Start fix bug 40143
787 if (pc->mMultipart) {
788 pc->mStreamListenerState = eOnDataAvail;
789 if (pc->mScanner) {
790 pc->mScanner->SetIncremental(true);
791 }
792 } else {
793 pc->mStreamListenerState = eOnStop;
794 if (pc->mScanner) {
795 pc->mScanner->SetIncremental(false);
796 }
797 }
798 // end fix for 40143
799
800 pc->mContextType = CParserContext::eCTString;
801 pc->SetMimeType("application/xml"_ns);
802 pc->mDTDMode = eDTDMode_full_standards;
803
804 mUnusedInput.Truncate();
805
806 pc->mScanner->Append(aSourceBuffer);
807 // Do not interrupt document.write() - bug 95487
808 result = ResumeParse(false, false, false);
809 } else {
810 pc->mScanner->Append(aSourceBuffer);
811 if (!pc->mPrevContext) {
812 // Set stream listener state to eOnStop, on the final context - Fix
813 // 68160, to guarantee DidBuildModel() call - Fix 36148
814 if (aLastCall) {
815 pc->mStreamListenerState = eOnStop;
816 pc->mScanner->SetIncremental(false);
817 }
818
819 if (pc == mParserContext) {
820 // If pc is not mParserContext, then this call to ResumeParse would
821 // do the wrong thing and try to continue parsing using
822 // mParserContext. We need to wait to actually resume parsing on pc.
823 ResumeParse(false, false, false);
824 }
825 }
826 }
827 }
828
829 return result;
830 }
831
832 NS_IMETHODIMP
ParseFragment(const nsAString & aSourceBuffer,nsTArray<nsString> & aTagStack)833 nsParser::ParseFragment(const nsAString& aSourceBuffer,
834 nsTArray<nsString>& aTagStack) {
835 nsresult result = NS_OK;
836 nsAutoString theContext;
837 uint32_t theCount = aTagStack.Length();
838 uint32_t theIndex = 0;
839
840 // Disable observers for fragments
841 mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
842
843 for (theIndex = 0; theIndex < theCount; theIndex++) {
844 theContext.Append('<');
845 theContext.Append(aTagStack[theCount - theIndex - 1]);
846 theContext.Append('>');
847 }
848
849 if (theCount == 0) {
850 // Ensure that the buffer is not empty. Because none of the DTDs care
851 // about leading whitespace, this doesn't change the result.
852 theContext.Assign(' ');
853 }
854
855 // First, parse the context to build up the DTD's tag stack. Note that we
856 // pass false for the aLastCall parameter.
857 result = Parse(theContext, (void*)&theContext, false);
858 if (NS_FAILED(result)) {
859 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
860 return result;
861 }
862
863 if (!mSink) {
864 // Parse must have failed in the XML case and so the sink was killed.
865 return NS_ERROR_HTMLPARSER_STOPPARSING;
866 }
867
868 nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
869 NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
870
871 fragSink->WillBuildContent();
872 // Now, parse the actual content. Note that this is the last call
873 // for HTML content, but for XML, we will want to build and parse
874 // the end tags. However, if tagStack is empty, it's the last call
875 // for XML as well.
876 if (theCount == 0) {
877 result = Parse(aSourceBuffer, &theContext, true);
878 fragSink->DidBuildContent();
879 } else {
880 // Add an end tag chunk, so expat will read the whole source buffer,
881 // and not worry about ']]' etc.
882 result = Parse(aSourceBuffer + u"</"_ns, &theContext, false);
883 fragSink->DidBuildContent();
884
885 if (NS_SUCCEEDED(result)) {
886 nsAutoString endContext;
887 for (theIndex = 0; theIndex < theCount; theIndex++) {
888 // we already added an end tag chunk above
889 if (theIndex > 0) {
890 endContext.AppendLiteral("</");
891 }
892
893 nsString& thisTag = aTagStack[theIndex];
894 // was there an xmlns=?
895 int32_t endOfTag = thisTag.FindChar(char16_t(' '));
896 if (endOfTag == -1) {
897 endContext.Append(thisTag);
898 } else {
899 endContext.Append(Substring(thisTag, 0, endOfTag));
900 }
901
902 endContext.Append('>');
903 }
904
905 result = Parse(endContext, &theContext, true);
906 }
907 }
908
909 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
910
911 return result;
912 }
913
914 /**
915 * This routine is called to cause the parser to continue parsing its
916 * underlying stream. This call allows the parse process to happen in
917 * chunks, such as when the content is push based, and we need to parse in
918 * pieces.
919 *
920 * An interesting change in how the parser gets used has led us to add extra
921 * processing to this method. The case occurs when the parser is blocked in
922 * one context, and gets a parse(string) call in another context. In this
923 * case, the parserContexts are linked. No problem.
924 *
925 * The problem is that Parse(string) assumes that it can proceed unabated,
926 * but if the parser is already blocked that assumption is false. So we
927 * needed to add a mechanism here to allow the parser to continue to process
928 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
929 * out of contexts.
930 *
931 *
932 * @param allowItertion : set to true if non-script resumption is requested
933 * @param aIsFinalChunk : tells us when the last chunk of data is provided.
934 * @return error code -- 0 if ok, non-zero if error.
935 */
ResumeParse(bool allowIteration,bool aIsFinalChunk,bool aCanInterrupt)936 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
937 bool aCanInterrupt) {
938 nsresult result = NS_OK;
939
940 if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
941 result = WillBuildModel(mParserContext->mScanner->GetFilename());
942 if (NS_FAILED(result)) {
943 mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
944 return result;
945 }
946
947 if (mDTD) {
948 mSink->WillResume();
949 bool theIterationIsOk = true;
950
951 while (result == NS_OK && theIterationIsOk) {
952 if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
953 // -- Ref: Bug# 22485 --
954 // Insert the unused input into the source buffer
955 // as if it was read from the input stream.
956 // Adding UngetReadable() per vidur!!
957 mParserContext->mScanner->UngetReadable(mUnusedInput);
958 mUnusedInput.Truncate(0);
959 }
960
961 // Only allow parsing to be interrupted in the subsequent call to
962 // build model.
963 nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
964 ? Tokenize(aIsFinalChunk)
965 : NS_OK;
966 result = BuildModel();
967
968 if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
969 PostContinueEvent();
970 }
971
972 theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
973 result != NS_ERROR_HTMLPARSER_INTERRUPTED;
974
975 // Make sure not to stop parsing too early. Therefore, before shutting
976 // down the parser, it's important to check whether the input buffer
977 // has been scanned to completion (theTokenizerResult should be kEOF).
978 // kEOF -> End of buffer.
979
980 // If we're told to block the parser, we disable all further parsing
981 // (and cache any data coming in) until the parser is re-enabled.
982 if (NS_ERROR_HTMLPARSER_BLOCK == result) {
983 mSink->WillInterrupt();
984 if (!mBlocked) {
985 // If we were blocked by a recursive invocation, don't re-block.
986 BlockParser();
987 }
988 return NS_OK;
989 }
990 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
991 // Note: Parser Terminate() calls DidBuildModel.
992 if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
993 DidBuildModel(mStreamStatus);
994 mInternalState = result;
995 }
996
997 return NS_OK;
998 }
999 if ((NS_OK == result &&
1000 theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
1001 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
1002 bool theContextIsStringBased =
1003 CParserContext::eCTString == mParserContext->mContextType;
1004
1005 if (mParserContext->mStreamListenerState == eOnStop ||
1006 !mParserContext->mMultipart || theContextIsStringBased) {
1007 if (!mParserContext->mPrevContext) {
1008 if (mParserContext->mStreamListenerState == eOnStop) {
1009 DidBuildModel(mStreamStatus);
1010 return NS_OK;
1011 }
1012 } else {
1013 CParserContext* theContext = PopContext();
1014 if (theContext) {
1015 theIterationIsOk = allowIteration && theContextIsStringBased;
1016 if (theContext->mCopyUnused) {
1017 if (!theContext->mScanner->CopyUnusedData(mUnusedInput)) {
1018 mInternalState = NS_ERROR_OUT_OF_MEMORY;
1019 }
1020 }
1021
1022 delete theContext;
1023 }
1024
1025 result = mInternalState;
1026 aIsFinalChunk = mParserContext &&
1027 mParserContext->mStreamListenerState == eOnStop;
1028 // ...then intentionally fall through to mSink->WillInterrupt()...
1029 }
1030 }
1031 }
1032
1033 if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
1034 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
1035 result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1036 mSink->WillInterrupt();
1037 }
1038 }
1039 } else {
1040 mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
1041 }
1042 }
1043
1044 return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1045 }
1046
1047 /**
1048 * This is where we loop over the tokens created in the
1049 * tokenization phase, and try to make sense out of them.
1050 */
BuildModel()1051 nsresult nsParser::BuildModel() {
1052 nsITokenizer* theTokenizer = nullptr;
1053
1054 nsresult result = NS_OK;
1055 if (mParserContext) {
1056 result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1057 }
1058
1059 if (NS_SUCCEEDED(result)) {
1060 if (mDTD) {
1061 result = mDTD->BuildModel(theTokenizer, mSink);
1062 }
1063 } else {
1064 mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1065 }
1066 return result;
1067 }
1068
1069 /*******************************************************************
1070 These methods are used to talk to the netlib system...
1071 *******************************************************************/
1072
OnStartRequest(nsIRequest * request)1073 nsresult nsParser::OnStartRequest(nsIRequest* request) {
1074 MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
1075 "Parser's nsIStreamListener API was not setup "
1076 "correctly in constructor.");
1077
1078 if (mObserver) {
1079 mObserver->OnStartRequest(request);
1080 }
1081 mParserContext->mStreamListenerState = eOnStart;
1082 mParserContext->mAutoDetectStatus = eUnknownDetect;
1083 mParserContext->mRequest = request;
1084
1085 NS_ASSERTION(!mParserContext->mPrevContext,
1086 "Clobbering DTD for non-root parser context!");
1087 mDTD = nullptr;
1088
1089 nsresult rv;
1090 nsAutoCString contentType;
1091 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
1092 if (channel) {
1093 rv = channel->GetContentType(contentType);
1094 if (NS_SUCCEEDED(rv)) {
1095 mParserContext->SetMimeType(contentType);
1096 }
1097 }
1098
1099 rv = NS_OK;
1100
1101 return rv;
1102 }
1103
ExtractCharsetFromXmlDeclaration(const unsigned char * aBytes,int32_t aLen,nsCString & oCharset)1104 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
1105 int32_t aLen,
1106 nsCString& oCharset) {
1107 // This code is rather pointless to have. Might as well reuse expat as
1108 // seen in nsHtml5StreamParser. -- hsivonen
1109 oCharset.Truncate();
1110 if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
1111 ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
1112 int32_t i;
1113 bool versionFound = false, encodingFound = false;
1114 for (i = 6; i < aLen && !encodingFound; ++i) {
1115 // end of XML declaration?
1116 if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
1117 (((char*)aBytes)[i + 1] == '>')) {
1118 break;
1119 }
1120 // Version is required.
1121 if (!versionFound) {
1122 // Want to avoid string comparisons, hence looking for 'n'
1123 // and only if found check the string leading to it. Not
1124 // foolproof, but fast.
1125 // The shortest string allowed before this is (strlen==13):
1126 // <?xml version
1127 if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
1128 (0 == PL_strncmp("versio", (char*)(aBytes + i - 6), 6))) {
1129 // Fast forward through version
1130 char q = 0;
1131 for (++i; i < aLen; ++i) {
1132 char qi = ((char*)aBytes)[i];
1133 if (qi == '\'' || qi == '"') {
1134 if (q && q == qi) {
1135 // ending quote
1136 versionFound = true;
1137 break;
1138 } else {
1139 // Starting quote
1140 q = qi;
1141 }
1142 }
1143 }
1144 }
1145 } else {
1146 // encoding must follow version
1147 // Want to avoid string comparisons, hence looking for 'g'
1148 // and only if found check the string leading to it. Not
1149 // foolproof, but fast.
1150 // The shortest allowed string before this (strlen==26):
1151 // <?xml version="1" encoding
1152 if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
1153 (0 == PL_strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
1154 int32_t encStart = 0;
1155 char q = 0;
1156 for (++i; i < aLen; ++i) {
1157 char qi = ((char*)aBytes)[i];
1158 if (qi == '\'' || qi == '"') {
1159 if (q && q == qi) {
1160 int32_t count = i - encStart;
1161 // encoding value is invalid if it is UTF-16
1162 if (count > 0 &&
1163 PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
1164 count)) {
1165 oCharset.Assign((char*)(aBytes + encStart), count);
1166 }
1167 encodingFound = true;
1168 break;
1169 } else {
1170 encStart = i + 1;
1171 q = qi;
1172 }
1173 }
1174 }
1175 }
1176 } // if (!versionFound)
1177 } // for
1178 }
1179 return !oCharset.IsEmpty();
1180 }
1181
GetNextChar(nsACString::const_iterator & aStart,nsACString::const_iterator & aEnd)1182 inline char GetNextChar(nsACString::const_iterator& aStart,
1183 nsACString::const_iterator& aEnd) {
1184 NS_ASSERTION(aStart != aEnd, "end of buffer");
1185 return (++aStart != aEnd) ? *aStart : '\0';
1186 }
1187
NoOpParserWriteFunc(nsIInputStream * in,void * closure,const char * fromRawSegment,uint32_t toOffset,uint32_t count,uint32_t * writeCount)1188 static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure,
1189 const char* fromRawSegment,
1190 uint32_t toOffset, uint32_t count,
1191 uint32_t* writeCount) {
1192 *writeCount = count;
1193 return NS_OK;
1194 }
1195
1196 typedef struct {
1197 bool mNeedCharsetCheck;
1198 nsParser* mParser;
1199 nsScanner* mScanner;
1200 nsIRequest* mRequest;
1201 } ParserWriteStruct;
1202
1203 /*
1204 * This function is invoked as a result of a call to a stream's
1205 * ReadSegments() method. It is called for each contiguous buffer
1206 * of data in the underlying stream or pipe. Using ReadSegments
1207 * allows us to avoid copying data to read out of the stream.
1208 */
ParserWriteFunc(nsIInputStream * in,void * closure,const char * fromRawSegment,uint32_t toOffset,uint32_t count,uint32_t * writeCount)1209 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
1210 const char* fromRawSegment, uint32_t toOffset,
1211 uint32_t count, uint32_t* writeCount) {
1212 nsresult result;
1213 ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
1214 const unsigned char* buf =
1215 reinterpret_cast<const unsigned char*>(fromRawSegment);
1216 uint32_t theNumRead = count;
1217
1218 if (!pws) {
1219 return NS_ERROR_FAILURE;
1220 }
1221
1222 if (pws->mNeedCharsetCheck) {
1223 pws->mNeedCharsetCheck = false;
1224 int32_t source;
1225 auto preferred = pws->mParser->GetDocumentCharset(source);
1226
1227 // This code was bogus when I found it. It expects the BOM or the XML
1228 // declaration to be entirely in the first network buffer. -- hsivonen
1229 const Encoding* encoding;
1230 size_t bomLength;
1231 Tie(encoding, bomLength) = Encoding::ForBOM(Span(buf, count));
1232 Unused << bomLength;
1233 if (encoding) {
1234 // The decoder will swallow the BOM. The UTF-16 will re-sniff for
1235 // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
1236 // or "UTF-16BE".
1237 preferred = WrapNotNull(encoding);
1238 source = kCharsetFromByteOrderMark;
1239 } else if (source < kCharsetFromChannel) {
1240 nsAutoCString declCharset;
1241
1242 if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
1243 encoding = Encoding::ForLabel(declCharset);
1244 if (encoding) {
1245 preferred = WrapNotNull(encoding);
1246 source = kCharsetFromMetaTag;
1247 }
1248 }
1249 }
1250
1251 pws->mParser->SetDocumentCharset(preferred, source, false);
1252 pws->mParser->SetSinkCharset(preferred);
1253 }
1254
1255 result = pws->mScanner->Append(fromRawSegment, theNumRead);
1256 if (NS_SUCCEEDED(result)) {
1257 *writeCount = count;
1258 }
1259
1260 return result;
1261 }
1262
OnDataAvailable(nsIRequest * request,nsIInputStream * pIStream,uint64_t sourceOffset,uint32_t aLength)1263 nsresult nsParser::OnDataAvailable(nsIRequest* request,
1264 nsIInputStream* pIStream,
1265 uint64_t sourceOffset, uint32_t aLength) {
1266 MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
1267 eOnDataAvail == mParserContext->mStreamListenerState),
1268 "Error: OnStartRequest() must be called before OnDataAvailable()");
1269 MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
1270 "Must have a buffered input stream");
1271
1272 nsresult rv = NS_OK;
1273
1274 if (mIsAboutBlank) {
1275 MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
1276 // ... but if an extension tries to feed us data for about:blank in a
1277 // release build, silently ignore the data.
1278 uint32_t totalRead;
1279 rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength,
1280 &totalRead);
1281 return rv;
1282 }
1283
1284 CParserContext* theContext = mParserContext;
1285
1286 while (theContext && theContext->mRequest != request) {
1287 theContext = theContext->mPrevContext;
1288 }
1289
1290 if (theContext) {
1291 theContext->mStreamListenerState = eOnDataAvail;
1292
1293 if (eInvalidDetect == theContext->mAutoDetectStatus) {
1294 if (theContext->mScanner) {
1295 nsScannerIterator iter;
1296 theContext->mScanner->EndReading(iter);
1297 theContext->mScanner->SetPosition(iter, true);
1298 }
1299 }
1300
1301 uint32_t totalRead;
1302 ParserWriteStruct pws;
1303 pws.mNeedCharsetCheck = true;
1304 pws.mParser = this;
1305 pws.mScanner = theContext->mScanner.get();
1306 pws.mRequest = request;
1307
1308 rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1309 if (NS_FAILED(rv)) {
1310 return rv;
1311 }
1312
1313 if (IsOkToProcessNetworkData()) {
1314 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1315 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1316 mProcessingNetworkData = true;
1317 if (sinkDeathGrip) {
1318 sinkDeathGrip->WillParse();
1319 }
1320 rv = ResumeParse();
1321 mProcessingNetworkData = false;
1322 }
1323 } else {
1324 rv = NS_ERROR_UNEXPECTED;
1325 }
1326
1327 return rv;
1328 }
1329
1330 /**
1331 * This is called by the networking library once the last block of data
1332 * has been collected from the net.
1333 */
OnStopRequest(nsIRequest * request,nsresult status)1334 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
1335 nsresult rv = NS_OK;
1336
1337 CParserContext* pc = mParserContext;
1338 while (pc) {
1339 if (pc->mRequest == request) {
1340 pc->mStreamListenerState = eOnStop;
1341 pc->mScanner->SetIncremental(false);
1342 break;
1343 }
1344
1345 pc = pc->mPrevContext;
1346 }
1347
1348 mStreamStatus = status;
1349
1350 if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
1351 mProcessingNetworkData = true;
1352 if (mSink) {
1353 mSink->WillParse();
1354 }
1355 rv = ResumeParse(true, true);
1356 mProcessingNetworkData = false;
1357 }
1358
1359 // If the parser isn't enabled, we don't finish parsing till
1360 // it is reenabled.
1361
1362 // XXX Should we wait to notify our observers as well if the
1363 // parser isn't yet enabled?
1364 if (mObserver) {
1365 mObserver->OnStopRequest(request, status);
1366 }
1367
1368 return rv;
1369 }
1370
1371 /*******************************************************************
1372 Here come the tokenization methods...
1373 *******************************************************************/
1374
1375 /**
1376 * Part of the code sandwich, this gets called right before
1377 * the tokenization process begins. The main reason for
1378 * this call is to allow the delegate to do initialization.
1379 */
WillTokenize(bool aIsFinalChunk)1380 bool nsParser::WillTokenize(bool aIsFinalChunk) {
1381 if (!mParserContext) {
1382 return true;
1383 }
1384
1385 nsITokenizer* theTokenizer;
1386 nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1387 NS_ENSURE_SUCCESS(result, false);
1388 return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
1389 }
1390
1391 /**
1392 * This is the primary control routine to consume tokens.
1393 * It iteratively consumes tokens until an error occurs or
1394 * you run out of data.
1395 */
Tokenize(bool aIsFinalChunk)1396 nsresult nsParser::Tokenize(bool aIsFinalChunk) {
1397 nsITokenizer* theTokenizer;
1398
1399 nsresult result = NS_ERROR_NOT_AVAILABLE;
1400 if (mParserContext) {
1401 result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1402 }
1403
1404 if (NS_SUCCEEDED(result)) {
1405 bool flushTokens = false;
1406
1407 bool killSink = false;
1408
1409 WillTokenize(aIsFinalChunk);
1410 while (NS_SUCCEEDED(result)) {
1411 mParserContext->mScanner->Mark();
1412 result =
1413 theTokenizer->ConsumeToken(*mParserContext->mScanner, flushTokens);
1414 if (NS_FAILED(result)) {
1415 mParserContext->mScanner->RewindToMark();
1416 if (NS_ERROR_HTMLPARSER_EOF == result) {
1417 break;
1418 }
1419 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
1420 killSink = true;
1421 result = Terminate();
1422 break;
1423 }
1424 } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
1425 // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix
1426 // Bug# 23931. Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
1427 // Also remember to update the marked position.
1428 mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
1429 mParserContext->mScanner->Mark();
1430 break;
1431 }
1432 }
1433
1434 if (killSink) {
1435 mSink = nullptr;
1436 }
1437 } else {
1438 result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1439 }
1440
1441 return result;
1442 }
1443
1444 /**
1445 * Get the channel associated with this parser
1446 *
1447 * @param aChannel out param that will contain the result
1448 * @return NS_OK if successful
1449 */
1450 NS_IMETHODIMP
GetChannel(nsIChannel ** aChannel)1451 nsParser::GetChannel(nsIChannel** aChannel) {
1452 nsresult result = NS_ERROR_NOT_AVAILABLE;
1453 if (mParserContext && mParserContext->mRequest) {
1454 result = CallQueryInterface(mParserContext->mRequest, aChannel);
1455 }
1456 return result;
1457 }
1458
1459 /**
1460 * Get the DTD associated with this parser
1461 */
1462 NS_IMETHODIMP
GetDTD(nsIDTD ** aDTD)1463 nsParser::GetDTD(nsIDTD** aDTD) {
1464 if (mParserContext) {
1465 NS_IF_ADDREF(*aDTD = mDTD);
1466 }
1467
1468 return NS_OK;
1469 }
1470
1471 /**
1472 * Get this as nsIStreamListener
1473 */
GetStreamListener()1474 nsIStreamListener* nsParser::GetStreamListener() { return this; }
1475