1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsAtom.h"
8 #include "nsParser.h"
9 #include "nsString.h"
10 #include "nsCRT.h"
11 #include "nsScanner.h"
12 #include "plstr.h"
13 #include "nsIChannel.h"
14 #include "nsIInputStream.h"
15 #include "CNavDTD.h"
16 #include "prenv.h"
17 #include "prlock.h"
18 #include "prcvar.h"
19 #include "nsParserCIID.h"
20 #include "nsReadableUtils.h"
21 #include "nsCOMPtr.h"
22 #include "nsExpatDriver.h"
23 #include "nsIFragmentContentSink.h"
24 #include "nsStreamUtils.h"
25 #include "nsHTMLTokenizer.h"
26 #include "nsXPCOMCIDInternal.h"
27 #include "nsMimeTypes.h"
28 #include "mozilla/CondVar.h"
29 #include "mozilla/Mutex.h"
30 #include "nsCharsetSource.h"
31 #include "nsThreadUtils.h"
32 #include "nsIHTMLContentSink.h"
33 
34 #include "mozilla/BinarySearch.h"
35 #include "mozilla/dom/ScriptLoader.h"
36 #include "mozilla/Encoding.h"
37 
38 using namespace mozilla;
39 
40 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
41 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
42 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
43 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
44 
45 //-------------- Begin ParseContinue Event Definition ------------------------
46 /*
47 The parser can be explicitly interrupted by passing a return value of
48 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
49 the parser to stop processing and allow the application to return to the event
50 loop. The data which was left at the time of interruption will be processed
51 the next time OnDataAvailable is called. If the parser has received its final
52 chunk of data then OnDataAvailable will no longer be called by the networking
53 module, so the parser will schedule a nsParserContinueEvent which will call
54 the parser to process the remaining data after returning to the event loop.
55 If the parser is interrupted while processing the remaining data it will
56 schedule another ParseContinueEvent. The processing of data followed by
57 scheduling of the continue events will proceed until either:
58 
59   1) All of the remaining data can be processed without interrupting
60   2) The parser has been cancelled.
61 
62 
63 This capability is currently used in CNavDTD and nsHTMLContentSink. The
64 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
65 processed and when each token is processed. The nsHTML content sink records
66 the time when the chunk has started processing and will return
67 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
68 threshold called max tokenizing processing time. This allows the content sink
69 to limit how much data is processed in a single chunk which in turn gates how
70 much time is spent away from the event loop. Processing smaller chunks of data
71 also reduces the time spent in subsequent reflows.
72 
73 This capability is most apparent when loading large documents. If the maximum
74 token processing time is set small enough the application will remain
75 responsive during document load.
76 
77 A side-effect of this capability is that document load is not complete when
78 the last chunk of data is passed to OnDataAvailable since  the parser may have
79 been interrupted when the last chunk of data arrived. The document is complete
80 when all of the document has been tokenized and there aren't any pending
81 nsParserContinueEvents. This can cause problems if the application assumes
82 that it can monitor the load requests to determine when the document load has
83 been completed. This is what happens in Mozilla. The document is considered
84 completely loaded when all of the load requests have been satisfied. To delay
85 the document load until all of the parsing has been completed the
86 nsHTMLContentSink adds a dummy parser load request which is not removed until
87 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
88 DidBuildModel until the final chunk of data has been passed to the parser
89 through the OnDataAvailable and there aren't any pending
90 nsParserContineEvents.
91 
92 Currently the parser is ignores requests to be interrupted during the
93 processing of script.  This is because a document.write followed by JavaScript
94 calls to manipulate the DOM may fail if the parser was interrupted during the
95 document.write.
96 
97 For more details @see bugzilla bug 76722
98 */
99 
100 class nsParserContinueEvent : public Runnable {
101  public:
102   RefPtr<nsParser> mParser;
103 
nsParserContinueEvent(nsParser * aParser)104   explicit nsParserContinueEvent(nsParser* aParser)
105       : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
106 
Run()107   NS_IMETHOD Run() override {
108     mParser->HandleParserContinueEvent(this);
109     return NS_OK;
110   }
111 };
112 
113 //-------------- End ParseContinue Event Definition ------------------------
114 
115 /**
116  *  default constructor
117  */
nsParser()118 nsParser::nsParser()
119     : mParserContext(nullptr), mCharset(WINDOWS_1252_ENCODING) {
120   Initialize(true);
121 }
122 
~nsParser()123 nsParser::~nsParser() { Cleanup(); }
124 
Initialize(bool aConstructor)125 void nsParser::Initialize(bool aConstructor) {
126   if (aConstructor) {
127     // Raw pointer
128     mParserContext = 0;
129   } else {
130     // nsCOMPtrs
131     mObserver = nullptr;
132     mUnusedInput.Truncate();
133   }
134 
135   mContinueEvent = nullptr;
136   mCharsetSource = kCharsetUninitialized;
137   mCharset = WINDOWS_1252_ENCODING;
138   mInternalState = NS_OK;
139   mStreamStatus = NS_OK;
140   mCommand = eViewNormal;
141   mBlocked = 0;
142   mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | NS_PARSER_FLAG_CAN_TOKENIZE;
143 
144   mProcessingNetworkData = false;
145   mIsAboutBlank = false;
146 }
147 
Cleanup()148 void nsParser::Cleanup() {
149 #ifdef DEBUG
150   if (mParserContext && mParserContext->mPrevContext) {
151     NS_WARNING("Extra parser contexts still on the parser stack");
152   }
153 #endif
154 
155   while (mParserContext) {
156     CParserContext* pc = mParserContext->mPrevContext;
157     delete mParserContext;
158     mParserContext = pc;
159   }
160 
161   // It should not be possible for this flag to be set when we are getting
162   // destroyed since this flag implies a pending nsParserContinueEvent, which
163   // has an owning reference to |this|.
164   NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
165 }
166 
167 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
168 
169 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
170   NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
171   NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
172   NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
173   NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
174 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
175 
176 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
177   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
178   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
179   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
180   CParserContext* pc = tmp->mParserContext;
181   while (pc) {
182     cb.NoteXPCOMChild(pc->mTokenizer);
183     pc = pc->mPrevContext;
184   }
185 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
186 
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)187 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
188 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
189 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
190   NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
191   NS_INTERFACE_MAP_ENTRY(nsIParser)
192   NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
193   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
194   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
195 NS_INTERFACE_MAP_END
196 
197 // The parser continue event is posted only if
198 // all of the data to parse has been passed to ::OnDataAvailable
199 // and the parser has been interrupted by the content sink
200 // because the processing of tokens took too long.
201 
202 nsresult nsParser::PostContinueEvent() {
203   if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
204     // If this flag isn't set, then there shouldn't be a live continue event!
205     NS_ASSERTION(!mContinueEvent, "bad");
206 
207     // This creates a reference cycle between this and the event that is
208     // broken when the event fires.
209     nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
210     if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
211       NS_WARNING("failed to dispatch parser continuation event");
212     } else {
213       mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
214       mContinueEvent = event;
215     }
216   }
217   return NS_OK;
218 }
219 
NS_IMETHODIMP_(void)220 NS_IMETHODIMP_(void)
221 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
222 
223 /**
224  *  Call this method once you've created a parser, and want to instruct it
225  *  about the command which caused the parser to be constructed. For example,
226  *  this allows us to select a DTD which can do, say, view-source.
227  *
228  *  @param   aCommand the command string to set
229  */
NS_IMETHODIMP_(void)230 NS_IMETHODIMP_(void)
231 nsParser::SetCommand(const char* aCommand) {
232   mCommandStr.Assign(aCommand);
233   if (mCommandStr.EqualsLiteral("view-source")) {
234     mCommand = eViewSource;
235   } else if (mCommandStr.EqualsLiteral("view-fragment")) {
236     mCommand = eViewFragment;
237   } else {
238     mCommand = eViewNormal;
239   }
240 }
241 
242 /**
243  *  Call this method once you've created a parser, and want to instruct it
244  *  about the command which caused the parser to be constructed. For example,
245  *  this allows us to select a DTD which can do, say, view-source.
246  *
247  *  @param   aParserCommand the command to set
248  */
NS_IMETHODIMP_(void)249 NS_IMETHODIMP_(void)
250 nsParser::SetCommand(eParserCommands aParserCommand) {
251   mCommand = aParserCommand;
252 }
253 
254 /**
255  *  Call this method once you've created a parser, and want to instruct it
256  *  about what charset to load
257  *
258  *  @param   aCharset- the charset of a document
259  *  @param   aCharsetSource- the source of the charset
260  */
SetDocumentCharset(NotNull<const Encoding * > aCharset,int32_t aCharsetSource,bool aChannelHadCharset)261 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
262                                   int32_t aCharsetSource,
263                                   bool aChannelHadCharset) {
264   mCharset = aCharset;
265   mCharsetSource = aCharsetSource;
266   if (mParserContext && mParserContext->mScanner) {
267     mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
268   }
269 }
270 
SetSinkCharset(NotNull<const Encoding * > aCharset)271 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
272   if (mSink) {
273     mSink->SetDocumentCharset(aCharset);
274   }
275 }
276 
277 /**
278  *  This method gets called in order to set the content
279  *  sink for this parser to dump nodes to.
280  *
281  *  @param   nsIContentSink interface for node receiver
282  */
NS_IMETHODIMP_(void)283 NS_IMETHODIMP_(void)
284 nsParser::SetContentSink(nsIContentSink* aSink) {
285   MOZ_ASSERT(aSink, "sink cannot be null!");
286   mSink = aSink;
287 
288   if (mSink) {
289     mSink->SetParser(this);
290     nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
291     if (htmlSink) {
292       mIsAboutBlank = true;
293     }
294   }
295 }
296 
297 /**
298  * retrieve the sink set into the parser
299  * @return  current sink
300  */
NS_IMETHODIMP_(nsIContentSink *)301 NS_IMETHODIMP_(nsIContentSink*)
302 nsParser::GetContentSink() { return mSink; }
303 
FindSuitableDTD(CParserContext & aParserContext)304 static nsIDTD* FindSuitableDTD(CParserContext& aParserContext) {
305   // We always find a DTD.
306   aParserContext.mAutoDetectStatus = ePrimaryDetect;
307 
308   // Quick check for view source.
309   MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
310              "The old parser is not supposed to be used for View Source "
311              "anymore.");
312 
313   // Now see if we're parsing HTML (which, as far as we're concerned, simply
314   // means "not XML").
315   if (aParserContext.mDocType != eXML) {
316     return new CNavDTD();
317   }
318 
319   // If we're here, then we'd better be parsing XML.
320   NS_ASSERTION(aParserContext.mDocType == eXML,
321                "What are you trying to send me, here?");
322   return new nsExpatDriver();
323 }
324 
325 NS_IMETHODIMP
CancelParsingEvents()326 nsParser::CancelParsingEvents() {
327   if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
328     NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
329     // Revoke the pending continue parsing event
330     mContinueEvent = nullptr;
331     mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
332   }
333   return NS_OK;
334 }
335 
336 ////////////////////////////////////////////////////////////////////////
337 
338 /**
339  * Evalutes EXPR1 and EXPR2 exactly once each, in that order.  Stores the value
340  * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
341  * (which could be success or failure).
342  *
343  * To understand the motivation for this construct, consider these example
344  * methods:
345  *
346  *   nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
347  *     nsresult rv = NS_OK;
348  *     ...
349  *     return obj->DoThatThing();
350  *     NS_ENSURE_SUCCESS(rv, rv);
351  *     ...
352  *     return rv;
353  *   }
354  *
355  *   void nsCaller::MakeThingsHappen() {
356  *     return mSomething->DoThatThing(mWhatever);
357  *   }
358  *
359  * Suppose, for whatever reason*, we want to shift responsibility for calling
360  * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
361  * nsCaller::MakeThingsHappen.  We might rewrite the two methods as follows:
362  *
363  *   nsresult nsSomething::DoThatThing() {
364  *     nsresult rv = NS_OK;
365  *     ...
366  *     ...
367  *     return rv;
368  *   }
369  *
370  *   void nsCaller::MakeThingsHappen() {
371  *     nsresult rv;
372  *     PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
373  *                              mWhatever->DoThatThing(),
374  *                              rv);
375  *     return rv;
376  *   }
377  *
378  * *Possible reasons include: nsCaller doesn't want to give mSomething access
379  * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
380  * be called regardless of how nsSomething::DoThatThing behaves, &c.
381  */
382 #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) \
383   {                                                \
384     nsresult RV##__temp = EXPR1;                   \
385     RV = EXPR2;                                    \
386     if (NS_FAILED(RV)) {                           \
387       RV = RV##__temp;                             \
388     }                                              \
389   }
390 
391 /**
392  * This gets called just prior to the model actually
393  * being constructed. It's important to make this the
394  * last thing that happens right before parsing, so we
395  * can delay until the last moment the resolution of
396  * which DTD to use (unless of course we're assigned one).
397  */
WillBuildModel(nsString & aFilename)398 nsresult nsParser::WillBuildModel(nsString& aFilename) {
399   if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
400 
401   if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
402 
403   if (eDTDMode_unknown == mParserContext->mDTDMode ||
404       eDTDMode_autodetect == mParserContext->mDTDMode) {
405     if (mIsAboutBlank) {
406       mParserContext->mDTDMode = eDTDMode_quirks;
407       mParserContext->mDocType = eHTML_Quirks;
408     } else {
409       mParserContext->mDTDMode = eDTDMode_full_standards;
410       mParserContext->mDocType = eXML;
411     }
412   }  // else XML fragment with nested parser context
413 
414   NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
415                "Clobbering DTD for non-root parser context!");
416   mDTD = FindSuitableDTD(*mParserContext);
417   NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
418 
419   nsITokenizer* tokenizer;
420   nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
421   NS_ENSURE_SUCCESS(rv, rv);
422 
423   rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
424   nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
425   // nsIDTD::WillBuildModel used to be responsible for calling
426   // nsIContentSink::WillBuildModel, but that obligation isn't expressible
427   // in the nsIDTD interface itself, so it's sounder and simpler to give that
428   // responsibility back to the parser. The former behavior of the DTD was to
429   // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
430   // failure we should use sinkResult instead of rv, to preserve the old error
431   // handling behavior of the DTD:
432   return NS_FAILED(sinkResult) ? sinkResult : rv;
433 }
434 
435 /**
436  * This gets called when the parser is done with its input.
437  * Note that the parser may have been called recursively, so we
438  * have to check for a prev. context before closing out the DTD/sink.
439  */
DidBuildModel(nsresult anErrorCode)440 nsresult nsParser::DidBuildModel(nsresult anErrorCode) {
441   nsresult result = anErrorCode;
442 
443   if (IsComplete()) {
444     if (mParserContext && !mParserContext->mPrevContext) {
445       // Let sink know if we're about to end load because we've been terminated.
446       // In that case we don't want it to run deferred scripts.
447       bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
448       if (mDTD && mSink) {
449         nsresult dtdResult = mDTD->DidBuildModel(anErrorCode),
450                  sinkResult = mSink->DidBuildModel(terminated);
451         // nsIDTD::DidBuildModel used to be responsible for calling
452         // nsIContentSink::DidBuildModel, but that obligation isn't expressible
453         // in the nsIDTD interface itself, so it's sounder and simpler to give
454         // that responsibility back to the parser. The former behavior of the
455         // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
456         // sink returns failure we should use sinkResult instead of dtdResult,
457         // to preserve the old error handling behavior of the DTD:
458         result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
459       }
460 
461       // Ref. to bug 61462.
462       mParserContext->mRequest = nullptr;
463     }
464   }
465 
466   return result;
467 }
468 
469 /**
470  * This method adds a new parser context to the list,
471  * pushing the current one to the next position.
472  *
473  * @param   ptr to new context
474  */
PushContext(CParserContext & aContext)475 void nsParser::PushContext(CParserContext& aContext) {
476   NS_ASSERTION(aContext.mPrevContext == mParserContext,
477                "Trying to push a context whose previous context differs from "
478                "the current parser context.");
479   mParserContext = &aContext;
480 }
481 
482 /**
483  * This method pops the topmost context off the stack,
484  * returning it to the user. The next context  (if any)
485  * becomes the current context.
486  * @update	gess7/22/98
487  * @return  prev. context
488  */
PopContext()489 CParserContext* nsParser::PopContext() {
490   CParserContext* oldContext = mParserContext;
491   if (oldContext) {
492     mParserContext = oldContext->mPrevContext;
493     if (mParserContext) {
494       // If the old context was blocked, propagate the blocked state
495       // back to the new one. Also, propagate the stream listener state
496       // but don't override onStop state to guarantee the call to
497       // DidBuildModel().
498       if (mParserContext->mStreamListenerState != eOnStop) {
499         mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
500       }
501     }
502   }
503   return oldContext;
504 }
505 
506 /**
507  *  Call this when you want control whether or not the parser will parse
508  *  and tokenize input (TRUE), or whether it just caches input to be
509  *  parsed later (FALSE).
510  *
511  *  @param   aState determines whether we parse/tokenize or just cache.
512  *  @return  current state
513  */
SetUnusedInput(nsString & aBuffer)514 void nsParser::SetUnusedInput(nsString& aBuffer) { mUnusedInput = aBuffer; }
515 
516 /**
517  *  Call this when you want to *force* the parser to terminate the
518  *  parsing process altogether. This is binary -- so once you terminate
519  *  you can't resume without restarting altogether.
520  */
521 NS_IMETHODIMP
Terminate(void)522 nsParser::Terminate(void) {
523   // We should only call DidBuildModel once, so don't do anything if this is
524   // the second time that Terminate has been called.
525   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
526     return NS_OK;
527   }
528 
529   nsresult result = NS_OK;
530   // XXX - [ until we figure out a way to break parser-sink circularity ]
531   // Hack - Hold a reference until we are completely done...
532   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
533   mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
534 
535   // CancelParsingEvents must be called to avoid leaking the nsParser object
536   // @see bug 108049
537   // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
538   // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
539   // The IsComplete() call inside of DidBuildModel looks at the
540   // pendingContinueEvents flag.
541   CancelParsingEvents();
542 
543   // If we got interrupted in the middle of a document.write, then we might
544   // have more than one parser context on our parsercontext stack. This has
545   // the effect of making DidBuildModel a no-op, meaning that we never call
546   // our sink's DidBuildModel and break the reference cycle, causing a leak.
547   // Since we're getting terminated, we manually clean up our context stack.
548   while (mParserContext && mParserContext->mPrevContext) {
549     CParserContext* prev = mParserContext->mPrevContext;
550     delete mParserContext;
551     mParserContext = prev;
552   }
553 
554   if (mDTD) {
555     mDTD->Terminate();
556     DidBuildModel(result);
557   } else if (mSink) {
558     // We have no parser context or no DTD yet (so we got terminated before we
559     // got any data).  Manually break the reference cycle with the sink.
560     result = mSink->DidBuildModel(true);
561     NS_ENSURE_SUCCESS(result, result);
562   }
563 
564   return NS_OK;
565 }
566 
567 NS_IMETHODIMP
ContinueInterruptedParsing()568 nsParser::ContinueInterruptedParsing() {
569   // If there are scripts executing, then the content sink is jumping the gun
570   // (probably due to a synchronous XMLHttpRequest) and will re-enable us
571   // later, see bug 460706.
572   if (!IsOkToProcessNetworkData()) {
573     return NS_OK;
574   }
575 
576   // If the stream has already finished, there's a good chance
577   // that we might start closing things down when the parser
578   // is reenabled. To make sure that we're not deleted across
579   // the reenabling process, hold a reference to ourselves.
580   nsresult result = NS_OK;
581   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
582   nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
583 
584 #ifdef DEBUG
585   if (mBlocked) {
586     NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
587   }
588 #endif
589 
590   bool isFinalChunk =
591       mParserContext && mParserContext->mStreamListenerState == eOnStop;
592 
593   mProcessingNetworkData = true;
594   if (sinkDeathGrip) {
595     sinkDeathGrip->WillParse();
596   }
597   result = ResumeParse(true, isFinalChunk);  // Ref. bug 57999
598   mProcessingNetworkData = false;
599 
600   if (result != NS_OK) {
601     result = mInternalState;
602   }
603 
604   return result;
605 }
606 
607 /**
608  *  Stops parsing temporarily. That is, it will prevent the
609  *  parser from building up content model while scripts
610  *  are being loaded (either an external script from a web
611  *  page, or any number of extension content scripts).
612  */
NS_IMETHODIMP_(void)613 NS_IMETHODIMP_(void)
614 nsParser::BlockParser() { mBlocked++; }
615 
616 /**
617  *  Open up the parser for tokenization, building up content
618  *  model..etc. However, this method does not resume parsing
619  *  automatically. It's the callers' responsibility to restart
620  *  the parsing engine.
621  */
NS_IMETHODIMP_(void)622 NS_IMETHODIMP_(void)
623 nsParser::UnblockParser() {
624   MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
625   if (MOZ_LIKELY(mBlocked > 0)) {
626     mBlocked--;
627   }
628 }
629 
NS_IMETHODIMP_(void)630 NS_IMETHODIMP_(void)
631 nsParser::ContinueInterruptedParsingAsync() {
632   MOZ_ASSERT(mSink);
633   if (MOZ_LIKELY(mSink)) {
634     mSink->ContinueInterruptedParsingAsync();
635   }
636 }
637 
638 /**
639  * Call this to query whether the parser is enabled or not.
640  */
NS_IMETHODIMP_(bool)641 NS_IMETHODIMP_(bool)
642 nsParser::IsParserEnabled() { return !mBlocked; }
643 
644 /**
645  * Call this to query whether the parser thinks it's done with parsing.
646  */
NS_IMETHODIMP_(bool)647 NS_IMETHODIMP_(bool)
648 nsParser::IsComplete() {
649   return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
650 }
651 
HandleParserContinueEvent(nsParserContinueEvent * ev)652 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
653   // Ignore any revoked continue events...
654   if (mContinueEvent != ev) return;
655 
656   mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
657   mContinueEvent = nullptr;
658 
659   NS_ASSERTION(IsOkToProcessNetworkData(),
660                "Interrupted in the middle of a script?");
661   ContinueInterruptedParsing();
662 }
663 
IsInsertionPointDefined()664 bool nsParser::IsInsertionPointDefined() { return false; }
665 
IncrementScriptNestingLevel()666 void nsParser::IncrementScriptNestingLevel() {}
667 
DecrementScriptNestingLevel()668 void nsParser::DecrementScriptNestingLevel() {}
669 
HasNonzeroScriptNestingLevel() const670 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
671 
MarkAsNotScriptCreated(const char * aCommand)672 void nsParser::MarkAsNotScriptCreated(const char* aCommand) {}
673 
IsScriptCreated()674 bool nsParser::IsScriptCreated() { return false; }
675 
676 /**
677  *  This is the main controlling routine in the parsing process.
678  *  Note that it may get called multiple times for the same scanner,
679  *  since this is a pushed based system, and all the tokens may
680  *  not have been consumed by the scanner during a given invocation
681  *  of this method.
682  */
683 NS_IMETHODIMP
Parse(nsIURI * aURL,nsIRequestObserver * aListener,void * aKey,nsDTDMode aMode)684 nsParser::Parse(nsIURI* aURL, nsIRequestObserver* aListener, void* aKey,
685                 nsDTDMode aMode) {
686   MOZ_ASSERT(aURL, "Error: Null URL given");
687 
688   nsresult result = NS_ERROR_HTMLPARSER_BADURL;
689   mObserver = aListener;
690 
691   if (aURL) {
692     nsAutoCString spec;
693     nsresult rv = aURL->GetSpec(spec);
694     if (rv != NS_OK) {
695       return rv;
696     }
697     nsString theName;  // Not nsAutoString due to length and usage
698     if (!CopyUTF8toUTF16(spec, theName, mozilla::fallible)) {
699       return NS_ERROR_OUT_OF_MEMORY;
700     }
701 
702     nsScanner* theScanner = new nsScanner(theName, false);
703     CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
704                                             mCommand, aListener);
705     if (pc && theScanner) {
706       pc->mMultipart = true;
707       pc->mContextType = CParserContext::eCTURL;
708       pc->mDTDMode = aMode;
709       PushContext(*pc);
710 
711       result = NS_OK;
712     } else {
713       result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
714     }
715   }
716   return result;
717 }
718 
719 /**
720  * Used by XML fragment parsing below.
721  *
722  * @param   aSourceBuffer contains a string-full of real content
723  */
Parse(const nsAString & aSourceBuffer,void * aKey,bool aLastCall)724 nsresult nsParser::Parse(const nsAString& aSourceBuffer, void* aKey,
725                          bool aLastCall) {
726   nsresult result = NS_OK;
727 
728   // Don't bother if we're never going to parse this.
729   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
730     return result;
731   }
732 
733   if (!aLastCall && aSourceBuffer.IsEmpty()) {
734     // Nothing is being passed to the parser so return
735     // immediately. mUnusedInput will get processed when
736     // some data is actually passed in.
737     // But if this is the last call, make sure to finish up
738     // stuff correctly.
739     return result;
740   }
741 
742   // Maintain a reference to ourselves so we don't go away
743   // till we're completely done.
744   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
745 
746   if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
747     // Note: The following code will always find the parser context associated
748     // with the given key, even if that context has been suspended (e.g., for
749     // another document.write call). This doesn't appear to be exactly what IE
750     // does in the case where this happens, but this makes more sense.
751     CParserContext* pc = mParserContext;
752     while (pc && pc->mKey != aKey) {
753       pc = pc->mPrevContext;
754     }
755 
756     if (!pc) {
757       // Only make a new context if we don't have one, OR if we do, but has a
758       // different context key.
759       nsScanner* theScanner = new nsScanner(mUnusedInput);
760       NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
761 
762       eAutoDetectResult theStatus = eUnknownDetect;
763 
764       if (mParserContext &&
765           mParserContext->mMimeType.EqualsLiteral("application/xml")) {
766         // Ref. Bug 90379
767         NS_ASSERTION(mDTD, "How come the DTD is null?");
768 
769         if (mParserContext) {
770           theStatus = mParserContext->mAutoDetectStatus;
771           // Added this to fix bug 32022.
772         }
773       }
774 
775       pc = new CParserContext(mParserContext, theScanner, aKey, mCommand, 0,
776                               theStatus, aLastCall);
777       NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
778 
779       PushContext(*pc);
780 
781       pc->mMultipart = !aLastCall;  // By default
782       if (pc->mPrevContext) {
783         pc->mMultipart |= pc->mPrevContext->mMultipart;
784       }
785 
786       // Start fix bug 40143
787       if (pc->mMultipart) {
788         pc->mStreamListenerState = eOnDataAvail;
789         if (pc->mScanner) {
790           pc->mScanner->SetIncremental(true);
791         }
792       } else {
793         pc->mStreamListenerState = eOnStop;
794         if (pc->mScanner) {
795           pc->mScanner->SetIncremental(false);
796         }
797       }
798       // end fix for 40143
799 
800       pc->mContextType = CParserContext::eCTString;
801       pc->SetMimeType("application/xml"_ns);
802       pc->mDTDMode = eDTDMode_full_standards;
803 
804       mUnusedInput.Truncate();
805 
806       pc->mScanner->Append(aSourceBuffer);
807       // Do not interrupt document.write() - bug 95487
808       result = ResumeParse(false, false, false);
809     } else {
810       pc->mScanner->Append(aSourceBuffer);
811       if (!pc->mPrevContext) {
812         // Set stream listener state to eOnStop, on the final context - Fix
813         // 68160, to guarantee DidBuildModel() call - Fix 36148
814         if (aLastCall) {
815           pc->mStreamListenerState = eOnStop;
816           pc->mScanner->SetIncremental(false);
817         }
818 
819         if (pc == mParserContext) {
820           // If pc is not mParserContext, then this call to ResumeParse would
821           // do the wrong thing and try to continue parsing using
822           // mParserContext. We need to wait to actually resume parsing on pc.
823           ResumeParse(false, false, false);
824         }
825       }
826     }
827   }
828 
829   return result;
830 }
831 
832 NS_IMETHODIMP
ParseFragment(const nsAString & aSourceBuffer,nsTArray<nsString> & aTagStack)833 nsParser::ParseFragment(const nsAString& aSourceBuffer,
834                         nsTArray<nsString>& aTagStack) {
835   nsresult result = NS_OK;
836   nsAutoString theContext;
837   uint32_t theCount = aTagStack.Length();
838   uint32_t theIndex = 0;
839 
840   // Disable observers for fragments
841   mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
842 
843   for (theIndex = 0; theIndex < theCount; theIndex++) {
844     theContext.Append('<');
845     theContext.Append(aTagStack[theCount - theIndex - 1]);
846     theContext.Append('>');
847   }
848 
849   if (theCount == 0) {
850     // Ensure that the buffer is not empty. Because none of the DTDs care
851     // about leading whitespace, this doesn't change the result.
852     theContext.Assign(' ');
853   }
854 
855   // First, parse the context to build up the DTD's tag stack. Note that we
856   // pass false for the aLastCall parameter.
857   result = Parse(theContext, (void*)&theContext, false);
858   if (NS_FAILED(result)) {
859     mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
860     return result;
861   }
862 
863   if (!mSink) {
864     // Parse must have failed in the XML case and so the sink was killed.
865     return NS_ERROR_HTMLPARSER_STOPPARSING;
866   }
867 
868   nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
869   NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
870 
871   fragSink->WillBuildContent();
872   // Now, parse the actual content. Note that this is the last call
873   // for HTML content, but for XML, we will want to build and parse
874   // the end tags.  However, if tagStack is empty, it's the last call
875   // for XML as well.
876   if (theCount == 0) {
877     result = Parse(aSourceBuffer, &theContext, true);
878     fragSink->DidBuildContent();
879   } else {
880     // Add an end tag chunk, so expat will read the whole source buffer,
881     // and not worry about ']]' etc.
882     result = Parse(aSourceBuffer + u"</"_ns, &theContext, false);
883     fragSink->DidBuildContent();
884 
885     if (NS_SUCCEEDED(result)) {
886       nsAutoString endContext;
887       for (theIndex = 0; theIndex < theCount; theIndex++) {
888         // we already added an end tag chunk above
889         if (theIndex > 0) {
890           endContext.AppendLiteral("</");
891         }
892 
893         nsString& thisTag = aTagStack[theIndex];
894         // was there an xmlns=?
895         int32_t endOfTag = thisTag.FindChar(char16_t(' '));
896         if (endOfTag == -1) {
897           endContext.Append(thisTag);
898         } else {
899           endContext.Append(Substring(thisTag, 0, endOfTag));
900         }
901 
902         endContext.Append('>');
903       }
904 
905       result = Parse(endContext, &theContext, true);
906     }
907   }
908 
909   mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
910 
911   return result;
912 }
913 
914 /**
915  *  This routine is called to cause the parser to continue parsing its
916  *  underlying stream.  This call allows the parse process to happen in
917  *  chunks, such as when the content is push based, and we need to parse in
918  *  pieces.
919  *
920  *  An interesting change in how the parser gets used has led us to add extra
921  *  processing to this method.  The case occurs when the parser is blocked in
922  *  one context, and gets a parse(string) call in another context.  In this
923  *  case, the parserContexts are linked. No problem.
924  *
925  *  The problem is that Parse(string) assumes that it can proceed unabated,
926  *  but if the parser is already blocked that assumption is false. So we
927  *  needed to add a mechanism here to allow the parser to continue to process
928  *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
929  *  out of contexts.
930  *
931  *
932  *  @param   allowItertion : set to true if non-script resumption is requested
933  *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
934  *  @return  error code -- 0 if ok, non-zero if error.
935  */
ResumeParse(bool allowIteration,bool aIsFinalChunk,bool aCanInterrupt)936 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
937                                bool aCanInterrupt) {
938   nsresult result = NS_OK;
939 
940   if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
941     result = WillBuildModel(mParserContext->mScanner->GetFilename());
942     if (NS_FAILED(result)) {
943       mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
944       return result;
945     }
946 
947     if (mDTD) {
948       mSink->WillResume();
949       bool theIterationIsOk = true;
950 
951       while (result == NS_OK && theIterationIsOk) {
952         if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
953           // -- Ref: Bug# 22485 --
954           // Insert the unused input into the source buffer
955           // as if it was read from the input stream.
956           // Adding UngetReadable() per vidur!!
957           mParserContext->mScanner->UngetReadable(mUnusedInput);
958           mUnusedInput.Truncate(0);
959         }
960 
961         // Only allow parsing to be interrupted in the subsequent call to
962         // build model.
963         nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
964                                           ? Tokenize(aIsFinalChunk)
965                                           : NS_OK;
966         result = BuildModel();
967 
968         if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
969           PostContinueEvent();
970         }
971 
972         theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
973                            result != NS_ERROR_HTMLPARSER_INTERRUPTED;
974 
975         // Make sure not to stop parsing too early. Therefore, before shutting
976         // down the parser, it's important to check whether the input buffer
977         // has been scanned to completion (theTokenizerResult should be kEOF).
978         // kEOF -> End of buffer.
979 
980         // If we're told to block the parser, we disable all further parsing
981         // (and cache any data coming in) until the parser is re-enabled.
982         if (NS_ERROR_HTMLPARSER_BLOCK == result) {
983           mSink->WillInterrupt();
984           if (!mBlocked) {
985             // If we were blocked by a recursive invocation, don't re-block.
986             BlockParser();
987           }
988           return NS_OK;
989         }
990         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
991           // Note: Parser Terminate() calls DidBuildModel.
992           if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
993             DidBuildModel(mStreamStatus);
994             mInternalState = result;
995           }
996 
997           return NS_OK;
998         }
999         if ((NS_OK == result &&
1000              theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
1001             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
1002           bool theContextIsStringBased =
1003               CParserContext::eCTString == mParserContext->mContextType;
1004 
1005           if (mParserContext->mStreamListenerState == eOnStop ||
1006               !mParserContext->mMultipart || theContextIsStringBased) {
1007             if (!mParserContext->mPrevContext) {
1008               if (mParserContext->mStreamListenerState == eOnStop) {
1009                 DidBuildModel(mStreamStatus);
1010                 return NS_OK;
1011               }
1012             } else {
1013               CParserContext* theContext = PopContext();
1014               if (theContext) {
1015                 theIterationIsOk = allowIteration && theContextIsStringBased;
1016                 if (theContext->mCopyUnused) {
1017                   if (!theContext->mScanner->CopyUnusedData(mUnusedInput)) {
1018                     mInternalState = NS_ERROR_OUT_OF_MEMORY;
1019                   }
1020                 }
1021 
1022                 delete theContext;
1023               }
1024 
1025               result = mInternalState;
1026               aIsFinalChunk = mParserContext &&
1027                               mParserContext->mStreamListenerState == eOnStop;
1028               // ...then intentionally fall through to mSink->WillInterrupt()...
1029             }
1030           }
1031         }
1032 
1033         if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
1034             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
1035           result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1036           mSink->WillInterrupt();
1037         }
1038       }
1039     } else {
1040       mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
1041     }
1042   }
1043 
1044   return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1045 }
1046 
1047 /**
1048  *  This is where we loop over the tokens created in the
1049  *  tokenization phase, and try to make sense out of them.
1050  */
BuildModel()1051 nsresult nsParser::BuildModel() {
1052   nsITokenizer* theTokenizer = nullptr;
1053 
1054   nsresult result = NS_OK;
1055   if (mParserContext) {
1056     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1057   }
1058 
1059   if (NS_SUCCEEDED(result)) {
1060     if (mDTD) {
1061       result = mDTD->BuildModel(theTokenizer, mSink);
1062     }
1063   } else {
1064     mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1065   }
1066   return result;
1067 }
1068 
1069 /*******************************************************************
1070   These methods are used to talk to the netlib system...
1071  *******************************************************************/
1072 
OnStartRequest(nsIRequest * request)1073 nsresult nsParser::OnStartRequest(nsIRequest* request) {
1074   MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
1075              "Parser's nsIStreamListener API was not setup "
1076              "correctly in constructor.");
1077 
1078   if (mObserver) {
1079     mObserver->OnStartRequest(request);
1080   }
1081   mParserContext->mStreamListenerState = eOnStart;
1082   mParserContext->mAutoDetectStatus = eUnknownDetect;
1083   mParserContext->mRequest = request;
1084 
1085   NS_ASSERTION(!mParserContext->mPrevContext,
1086                "Clobbering DTD for non-root parser context!");
1087   mDTD = nullptr;
1088 
1089   nsresult rv;
1090   nsAutoCString contentType;
1091   nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
1092   if (channel) {
1093     rv = channel->GetContentType(contentType);
1094     if (NS_SUCCEEDED(rv)) {
1095       mParserContext->SetMimeType(contentType);
1096     }
1097   }
1098 
1099   rv = NS_OK;
1100 
1101   return rv;
1102 }
1103 
ExtractCharsetFromXmlDeclaration(const unsigned char * aBytes,int32_t aLen,nsCString & oCharset)1104 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
1105                                              int32_t aLen,
1106                                              nsCString& oCharset) {
1107   // This code is rather pointless to have. Might as well reuse expat as
1108   // seen in nsHtml5StreamParser. -- hsivonen
1109   oCharset.Truncate();
1110   if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
1111       ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
1112     int32_t i;
1113     bool versionFound = false, encodingFound = false;
1114     for (i = 6; i < aLen && !encodingFound; ++i) {
1115       // end of XML declaration?
1116       if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
1117           (((char*)aBytes)[i + 1] == '>')) {
1118         break;
1119       }
1120       // Version is required.
1121       if (!versionFound) {
1122         // Want to avoid string comparisons, hence looking for 'n'
1123         // and only if found check the string leading to it. Not
1124         // foolproof, but fast.
1125         // The shortest string allowed before this is  (strlen==13):
1126         // <?xml version
1127         if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
1128             (0 == PL_strncmp("versio", (char*)(aBytes + i - 6), 6))) {
1129           // Fast forward through version
1130           char q = 0;
1131           for (++i; i < aLen; ++i) {
1132             char qi = ((char*)aBytes)[i];
1133             if (qi == '\'' || qi == '"') {
1134               if (q && q == qi) {
1135                 //  ending quote
1136                 versionFound = true;
1137                 break;
1138               } else {
1139                 // Starting quote
1140                 q = qi;
1141               }
1142             }
1143           }
1144         }
1145       } else {
1146         // encoding must follow version
1147         // Want to avoid string comparisons, hence looking for 'g'
1148         // and only if found check the string leading to it. Not
1149         // foolproof, but fast.
1150         // The shortest allowed string before this (strlen==26):
1151         // <?xml version="1" encoding
1152         if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
1153             (0 == PL_strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
1154           int32_t encStart = 0;
1155           char q = 0;
1156           for (++i; i < aLen; ++i) {
1157             char qi = ((char*)aBytes)[i];
1158             if (qi == '\'' || qi == '"') {
1159               if (q && q == qi) {
1160                 int32_t count = i - encStart;
1161                 // encoding value is invalid if it is UTF-16
1162                 if (count > 0 &&
1163                     PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
1164                                    count)) {
1165                   oCharset.Assign((char*)(aBytes + encStart), count);
1166                 }
1167                 encodingFound = true;
1168                 break;
1169               } else {
1170                 encStart = i + 1;
1171                 q = qi;
1172               }
1173             }
1174           }
1175         }
1176       }  // if (!versionFound)
1177     }    // for
1178   }
1179   return !oCharset.IsEmpty();
1180 }
1181 
GetNextChar(nsACString::const_iterator & aStart,nsACString::const_iterator & aEnd)1182 inline char GetNextChar(nsACString::const_iterator& aStart,
1183                         nsACString::const_iterator& aEnd) {
1184   NS_ASSERTION(aStart != aEnd, "end of buffer");
1185   return (++aStart != aEnd) ? *aStart : '\0';
1186 }
1187 
NoOpParserWriteFunc(nsIInputStream * in,void * closure,const char * fromRawSegment,uint32_t toOffset,uint32_t count,uint32_t * writeCount)1188 static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure,
1189                                     const char* fromRawSegment,
1190                                     uint32_t toOffset, uint32_t count,
1191                                     uint32_t* writeCount) {
1192   *writeCount = count;
1193   return NS_OK;
1194 }
1195 
1196 typedef struct {
1197   bool mNeedCharsetCheck;
1198   nsParser* mParser;
1199   nsScanner* mScanner;
1200   nsIRequest* mRequest;
1201 } ParserWriteStruct;
1202 
1203 /*
1204  * This function is invoked as a result of a call to a stream's
1205  * ReadSegments() method. It is called for each contiguous buffer
1206  * of data in the underlying stream or pipe. Using ReadSegments
1207  * allows us to avoid copying data to read out of the stream.
1208  */
ParserWriteFunc(nsIInputStream * in,void * closure,const char * fromRawSegment,uint32_t toOffset,uint32_t count,uint32_t * writeCount)1209 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
1210                                 const char* fromRawSegment, uint32_t toOffset,
1211                                 uint32_t count, uint32_t* writeCount) {
1212   nsresult result;
1213   ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
1214   const unsigned char* buf =
1215       reinterpret_cast<const unsigned char*>(fromRawSegment);
1216   uint32_t theNumRead = count;
1217 
1218   if (!pws) {
1219     return NS_ERROR_FAILURE;
1220   }
1221 
1222   if (pws->mNeedCharsetCheck) {
1223     pws->mNeedCharsetCheck = false;
1224     int32_t source;
1225     auto preferred = pws->mParser->GetDocumentCharset(source);
1226 
1227     // This code was bogus when I found it. It expects the BOM or the XML
1228     // declaration to be entirely in the first network buffer. -- hsivonen
1229     const Encoding* encoding;
1230     size_t bomLength;
1231     Tie(encoding, bomLength) = Encoding::ForBOM(Span(buf, count));
1232     Unused << bomLength;
1233     if (encoding) {
1234       // The decoder will swallow the BOM. The UTF-16 will re-sniff for
1235       // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
1236       // or "UTF-16BE".
1237       preferred = WrapNotNull(encoding);
1238       source = kCharsetFromByteOrderMark;
1239     } else if (source < kCharsetFromChannel) {
1240       nsAutoCString declCharset;
1241 
1242       if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
1243         encoding = Encoding::ForLabel(declCharset);
1244         if (encoding) {
1245           preferred = WrapNotNull(encoding);
1246           source = kCharsetFromMetaTag;
1247         }
1248       }
1249     }
1250 
1251     pws->mParser->SetDocumentCharset(preferred, source, false);
1252     pws->mParser->SetSinkCharset(preferred);
1253   }
1254 
1255   result = pws->mScanner->Append(fromRawSegment, theNumRead);
1256   if (NS_SUCCEEDED(result)) {
1257     *writeCount = count;
1258   }
1259 
1260   return result;
1261 }
1262 
OnDataAvailable(nsIRequest * request,nsIInputStream * pIStream,uint64_t sourceOffset,uint32_t aLength)1263 nsresult nsParser::OnDataAvailable(nsIRequest* request,
1264                                    nsIInputStream* pIStream,
1265                                    uint64_t sourceOffset, uint32_t aLength) {
1266   MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
1267               eOnDataAvail == mParserContext->mStreamListenerState),
1268              "Error: OnStartRequest() must be called before OnDataAvailable()");
1269   MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
1270              "Must have a buffered input stream");
1271 
1272   nsresult rv = NS_OK;
1273 
1274   if (mIsAboutBlank) {
1275     MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
1276     // ... but if an extension tries to feed us data for about:blank in a
1277     // release build, silently ignore the data.
1278     uint32_t totalRead;
1279     rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength,
1280                                 &totalRead);
1281     return rv;
1282   }
1283 
1284   CParserContext* theContext = mParserContext;
1285 
1286   while (theContext && theContext->mRequest != request) {
1287     theContext = theContext->mPrevContext;
1288   }
1289 
1290   if (theContext) {
1291     theContext->mStreamListenerState = eOnDataAvail;
1292 
1293     if (eInvalidDetect == theContext->mAutoDetectStatus) {
1294       if (theContext->mScanner) {
1295         nsScannerIterator iter;
1296         theContext->mScanner->EndReading(iter);
1297         theContext->mScanner->SetPosition(iter, true);
1298       }
1299     }
1300 
1301     uint32_t totalRead;
1302     ParserWriteStruct pws;
1303     pws.mNeedCharsetCheck = true;
1304     pws.mParser = this;
1305     pws.mScanner = theContext->mScanner.get();
1306     pws.mRequest = request;
1307 
1308     rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1309     if (NS_FAILED(rv)) {
1310       return rv;
1311     }
1312 
1313     if (IsOkToProcessNetworkData()) {
1314       nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1315       nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1316       mProcessingNetworkData = true;
1317       if (sinkDeathGrip) {
1318         sinkDeathGrip->WillParse();
1319       }
1320       rv = ResumeParse();
1321       mProcessingNetworkData = false;
1322     }
1323   } else {
1324     rv = NS_ERROR_UNEXPECTED;
1325   }
1326 
1327   return rv;
1328 }
1329 
1330 /**
1331  *  This is called by the networking library once the last block of data
1332  *  has been collected from the net.
1333  */
OnStopRequest(nsIRequest * request,nsresult status)1334 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
1335   nsresult rv = NS_OK;
1336 
1337   CParserContext* pc = mParserContext;
1338   while (pc) {
1339     if (pc->mRequest == request) {
1340       pc->mStreamListenerState = eOnStop;
1341       pc->mScanner->SetIncremental(false);
1342       break;
1343     }
1344 
1345     pc = pc->mPrevContext;
1346   }
1347 
1348   mStreamStatus = status;
1349 
1350   if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
1351     mProcessingNetworkData = true;
1352     if (mSink) {
1353       mSink->WillParse();
1354     }
1355     rv = ResumeParse(true, true);
1356     mProcessingNetworkData = false;
1357   }
1358 
1359   // If the parser isn't enabled, we don't finish parsing till
1360   // it is reenabled.
1361 
1362   // XXX Should we wait to notify our observers as well if the
1363   // parser isn't yet enabled?
1364   if (mObserver) {
1365     mObserver->OnStopRequest(request, status);
1366   }
1367 
1368   return rv;
1369 }
1370 
1371 /*******************************************************************
1372   Here come the tokenization methods...
1373  *******************************************************************/
1374 
1375 /**
1376  *  Part of the code sandwich, this gets called right before
1377  *  the tokenization process begins. The main reason for
1378  *  this call is to allow the delegate to do initialization.
1379  */
WillTokenize(bool aIsFinalChunk)1380 bool nsParser::WillTokenize(bool aIsFinalChunk) {
1381   if (!mParserContext) {
1382     return true;
1383   }
1384 
1385   nsITokenizer* theTokenizer;
1386   nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1387   NS_ENSURE_SUCCESS(result, false);
1388   return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
1389 }
1390 
1391 /**
1392  * This is the primary control routine to consume tokens.
1393  * It iteratively consumes tokens until an error occurs or
1394  * you run out of data.
1395  */
Tokenize(bool aIsFinalChunk)1396 nsresult nsParser::Tokenize(bool aIsFinalChunk) {
1397   nsITokenizer* theTokenizer;
1398 
1399   nsresult result = NS_ERROR_NOT_AVAILABLE;
1400   if (mParserContext) {
1401     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1402   }
1403 
1404   if (NS_SUCCEEDED(result)) {
1405     bool flushTokens = false;
1406 
1407     bool killSink = false;
1408 
1409     WillTokenize(aIsFinalChunk);
1410     while (NS_SUCCEEDED(result)) {
1411       mParserContext->mScanner->Mark();
1412       result =
1413           theTokenizer->ConsumeToken(*mParserContext->mScanner, flushTokens);
1414       if (NS_FAILED(result)) {
1415         mParserContext->mScanner->RewindToMark();
1416         if (NS_ERROR_HTMLPARSER_EOF == result) {
1417           break;
1418         }
1419         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
1420           killSink = true;
1421           result = Terminate();
1422           break;
1423         }
1424       } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
1425         // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix
1426         // Bug# 23931. Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
1427         // Also remember to update the marked position.
1428         mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
1429         mParserContext->mScanner->Mark();
1430         break;
1431       }
1432     }
1433 
1434     if (killSink) {
1435       mSink = nullptr;
1436     }
1437   } else {
1438     result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1439   }
1440 
1441   return result;
1442 }
1443 
1444 /**
1445  * Get the channel associated with this parser
1446  *
1447  * @param aChannel out param that will contain the result
1448  * @return NS_OK if successful
1449  */
1450 NS_IMETHODIMP
GetChannel(nsIChannel ** aChannel)1451 nsParser::GetChannel(nsIChannel** aChannel) {
1452   nsresult result = NS_ERROR_NOT_AVAILABLE;
1453   if (mParserContext && mParserContext->mRequest) {
1454     result = CallQueryInterface(mParserContext->mRequest, aChannel);
1455   }
1456   return result;
1457 }
1458 
1459 /**
1460  * Get the DTD associated with this parser
1461  */
1462 NS_IMETHODIMP
GetDTD(nsIDTD ** aDTD)1463 nsParser::GetDTD(nsIDTD** aDTD) {
1464   if (mParserContext) {
1465     NS_IF_ADDREF(*aDTD = mDTD);
1466   }
1467 
1468   return NS_OK;
1469 }
1470 
1471 /**
1472  * Get this as nsIStreamListener
1473  */
GetStreamListener()1474 nsIStreamListener* nsParser::GetStreamListener() { return this; }
1475