1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsHtml5Parser.h"
8 
9 #include "mozilla/AutoRestore.h"
10 #include "nsCRT.h"
11 #include "nsContentUtils.h"  // for kLoadAsData
12 #include "nsHtml5AtomTable.h"
13 #include "nsHtml5DependentUTF16Buffer.h"
14 #include "nsHtml5Tokenizer.h"
15 #include "nsHtml5TreeBuilder.h"
16 #include "nsNetUtil.h"
17 
18 NS_INTERFACE_TABLE_HEAD(nsHtml5Parser)
19   NS_INTERFACE_TABLE(nsHtml5Parser, nsIParser, nsISupportsWeakReference)
20   NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5Parser)
21 NS_INTERFACE_MAP_END
22 
23 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5Parser)
24 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5Parser)
25 
26 NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5Parser)
27 
28 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5Parser)
29   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mExecutor)
30   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_RAWPTR(GetStreamParser())
31 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
32 
33 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5Parser)
34   NS_IMPL_CYCLE_COLLECTION_UNLINK(mExecutor)
35   NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
36   tmp->DropStreamParser();
37 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
38 
nsHtml5Parser()39 nsHtml5Parser::nsHtml5Parser()
40     : mLastWasCR(false),
41       mDocWriteSpeculativeLastWasCR(false),
42       mBlocked(0),
43       mDocWriteSpeculatorActive(false),
44       mScriptNestingLevel(0),
45       mDocumentClosed(false),
46       mInDocumentWrite(false),
47       mInsertionPointPermanentlyUndefined(false),
48       mFirstBuffer(new nsHtml5OwningUTF16Buffer((void*)nullptr)),
49       mLastBuffer(mFirstBuffer),
50       mExecutor(new nsHtml5TreeOpExecutor()),
51       mTreeBuilder(new nsHtml5TreeBuilder(mExecutor, nullptr, false)),
52       mTokenizer(new nsHtml5Tokenizer(mTreeBuilder.get(), false)),
53       mRootContextLineNumber(1),
54       mReturnToStreamParserPermitted(false) {
55   mTokenizer->setInterner(&mAtomTable);
56 }
57 
~nsHtml5Parser()58 nsHtml5Parser::~nsHtml5Parser() {
59   mTokenizer->end();
60   if (mDocWriteSpeculativeTokenizer) {
61     mDocWriteSpeculativeTokenizer->end();
62   }
63 }
64 
NS_IMETHODIMP_(void)65 NS_IMETHODIMP_(void)
66 nsHtml5Parser::SetContentSink(nsIContentSink* aSink) {
67   NS_ASSERTION(aSink == static_cast<nsIContentSink*>(mExecutor),
68                "Attempt to set a foreign sink.");
69 }
70 
NS_IMETHODIMP_(nsIContentSink *)71 NS_IMETHODIMP_(nsIContentSink*)
72 nsHtml5Parser::GetContentSink() {
73   return static_cast<nsIContentSink*>(mExecutor);
74 }
75 
NS_IMETHODIMP_(void)76 NS_IMETHODIMP_(void)
77 nsHtml5Parser::GetCommand(nsCString& aCommand) {
78   aCommand.AssignLiteral("view");
79 }
80 
NS_IMETHODIMP_(void)81 NS_IMETHODIMP_(void)
82 nsHtml5Parser::SetCommand(const char* aCommand) {
83   NS_ASSERTION(!strcmp(aCommand, "view") || !strcmp(aCommand, "view-source") ||
84                    !strcmp(aCommand, "external-resource") ||
85                    !strcmp(aCommand, "import") ||
86                    !strcmp(aCommand, kLoadAsData),
87                "Unsupported parser command");
88 }
89 
NS_IMETHODIMP_(void)90 NS_IMETHODIMP_(void)
91 nsHtml5Parser::SetCommand(eParserCommands aParserCommand) {
92   NS_ASSERTION(aParserCommand == eViewNormal,
93                "Parser command was not eViewNormal.");
94 }
95 
SetDocumentCharset(NotNull<const Encoding * > aEncoding,int32_t aCharsetSource,bool aForceAutoDetection)96 void nsHtml5Parser::SetDocumentCharset(NotNull<const Encoding*> aEncoding,
97                                        int32_t aCharsetSource,
98                                        bool aForceAutoDetection) {
99   MOZ_ASSERT(!mExecutor->HasStarted(), "Document charset set too late.");
100   MOZ_ASSERT(GetStreamParser(), "Setting charset on a script-only parser.");
101   GetStreamParser()->SetDocumentCharset(
102       aEncoding, (nsCharsetSource)aCharsetSource, aForceAutoDetection);
103   mExecutor->SetDocumentCharsetAndSource(aEncoding,
104                                          (nsCharsetSource)aCharsetSource);
105 }
106 
GetChannel(nsIChannel ** aChannel)107 nsresult nsHtml5Parser::GetChannel(nsIChannel** aChannel) {
108   if (GetStreamParser()) {
109     return GetStreamParser()->GetChannel(aChannel);
110   } else {
111     return NS_ERROR_NOT_AVAILABLE;
112   }
113 }
114 
GetStreamListener()115 nsIStreamListener* nsHtml5Parser::GetStreamListener() {
116   return mStreamListener;
117 }
118 
119 NS_IMETHODIMP
ContinueInterruptedParsing()120 nsHtml5Parser::ContinueInterruptedParsing() {
121   MOZ_ASSERT_UNREACHABLE("Don't call. For interface compat only.");
122   return NS_ERROR_NOT_IMPLEMENTED;
123 }
124 
NS_IMETHODIMP_(void)125 NS_IMETHODIMP_(void)
126 nsHtml5Parser::BlockParser() { mBlocked++; }
127 
NS_IMETHODIMP_(void)128 NS_IMETHODIMP_(void)
129 nsHtml5Parser::UnblockParser() {
130   MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
131   if (MOZ_LIKELY(mBlocked > 0)) {
132     mBlocked--;
133   }
134   if (MOZ_LIKELY(mBlocked == 0) && mExecutor) {
135     mExecutor->ContinueInterruptedParsingAsync();
136   }
137 }
138 
NS_IMETHODIMP_(void)139 NS_IMETHODIMP_(void)
140 nsHtml5Parser::ContinueInterruptedParsingAsync() {
141   if (mExecutor) {
142     mExecutor->ContinueInterruptedParsingAsync();
143   }
144 }
145 
NS_IMETHODIMP_(bool)146 NS_IMETHODIMP_(bool)
147 nsHtml5Parser::IsParserEnabled() { return !mBlocked; }
148 
NS_IMETHODIMP_(bool)149 NS_IMETHODIMP_(bool)
150 nsHtml5Parser::IsComplete() { return mExecutor->IsComplete(); }
151 
152 NS_IMETHODIMP
Parse(nsIURI * aURL)153 nsHtml5Parser::Parse(nsIURI* aURL) {
154   /*
155    * Do NOT cause WillBuildModel to be called synchronously from here!
156    * The document won't be ready for it until OnStartRequest!
157    */
158   MOZ_ASSERT(!mExecutor->HasStarted(),
159              "Tried to start parse without initializing the parser.");
160   MOZ_ASSERT(GetStreamParser(),
161              "Can't call this Parse() variant on script-created parser");
162 
163   GetStreamParser()->SetViewSourceTitle(aURL);  // In case we're viewing source
164   mExecutor->SetStreamParser(GetStreamParser());
165   mExecutor->SetParser(this);
166   return NS_OK;
167 }
168 
Parse(const nsAString & aSourceBuffer,void * aKey,bool aLastCall)169 nsresult nsHtml5Parser::Parse(const nsAString& aSourceBuffer, void* aKey,
170                               bool aLastCall) {
171   nsresult rv;
172   if (NS_FAILED(rv = mExecutor->IsBroken())) {
173     return rv;
174   }
175   if (aSourceBuffer.Length() > INT32_MAX) {
176     return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
177   }
178 
179   // Maintain a reference to ourselves so we don't go away
180   // till we're completely done. The old parser grips itself in this method.
181   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
182 
183   // Gripping the other objects just in case, since the other old grip
184   // required grips to these, too.
185   RefPtr<nsHtml5StreamParser> streamKungFuDeathGrip(GetStreamParser());
186   mozilla::Unused << streamKungFuDeathGrip;  // Not used within function
187   RefPtr<nsHtml5TreeOpExecutor> executor(mExecutor);
188 
189   MOZ_RELEASE_ASSERT(executor->HasStarted());
190 
191   // Return early if the parser has processed EOF
192   if (executor->IsComplete()) {
193     return NS_OK;
194   }
195 
196   if (aLastCall && aSourceBuffer.IsEmpty() && !aKey) {
197     // document.close()
198     NS_ASSERTION(!GetStreamParser(),
199                  "Had stream parser but got document.close().");
200     if (mDocumentClosed) {
201       // already closed
202       return NS_OK;
203     }
204     mDocumentClosed = true;
205     if (!mBlocked && !mInDocumentWrite) {
206       return ParseUntilBlocked();
207     }
208     return NS_OK;
209   }
210 
211   // If we got this far, we are dealing with a document.write or
212   // document.writeln call--not document.close().
213 
214   MOZ_RELEASE_ASSERT(
215       IsInsertionPointDefined(),
216       "Doc.write reached parser with undefined insertion point.");
217 
218   MOZ_RELEASE_ASSERT(!(GetStreamParser() && !aKey),
219                      "Got a null key in a non-script-created parser");
220 
221   // XXX is this optimization bogus?
222   if (aSourceBuffer.IsEmpty()) {
223     return NS_OK;
224   }
225 
226   // This guard is here to prevent document.close from tokenizing synchronously
227   // while a document.write (that wrote the script that called document.close!)
228   // is still on the call stack.
229   mozilla::AutoRestore<bool> guard(mInDocumentWrite);
230   mInDocumentWrite = true;
231 
232   // The script is identified by aKey. If there's nothing in the buffer
233   // chain for that key, we'll insert at the head of the queue.
234   // When the script leaves something in the queue, a zero-length
235   // key-holder "buffer" is inserted in the queue. If the same script
236   // leaves something in the chain again, it will be inserted immediately
237   // before the old key holder belonging to the same script.
238   //
239   // We don't do the actual data insertion yet in the hope that the data gets
240   // tokenized and there no data or less data to copy to the heap after
241   // tokenization. Also, this way, we avoid inserting one empty data buffer
242   // per document.write, which matters for performance when the parser isn't
243   // blocked and a badly-authored script calls document.write() once per
244   // input character. (As seen in a benchmark!)
245   //
246   // The insertion into the input stream happens conceptually before anything
247   // gets tokenized. To make sure multi-level document.write works right,
248   // it's necessary to establish the location of our parser key up front
249   // in case this is the first write with this key.
250   //
251   // In a document.open() case, the first write level has a null key, so that
252   // case is handled separately, because normal buffers containing data
253   // have null keys.
254 
255   // These don't need to be owning references, because they always point to
256   // the buffer queue and buffers can't be removed from the buffer queue
257   // before document.write() returns. The buffer queue clean-up happens the
258   // next time ParseUntilBlocked() is called.
259   // However, they are made owning just in case the reasoning above is flawed
260   // and a flaw would lead to worse problems with plain pointers. If this
261   // turns out to be a perf problem, it's worthwhile to consider making
262   // prevSearchbuf a plain pointer again.
263   RefPtr<nsHtml5OwningUTF16Buffer> prevSearchBuf;
264   RefPtr<nsHtml5OwningUTF16Buffer> firstLevelMarker;
265 
266   if (aKey) {
267     if (mFirstBuffer == mLastBuffer) {
268       nsHtml5OwningUTF16Buffer* keyHolder = new nsHtml5OwningUTF16Buffer(aKey);
269       keyHolder->next = mLastBuffer;
270       mFirstBuffer = keyHolder;
271     } else if (mFirstBuffer->key != aKey) {
272       prevSearchBuf = mFirstBuffer;
273       for (;;) {
274         if (prevSearchBuf->next == mLastBuffer) {
275           // key was not found
276           nsHtml5OwningUTF16Buffer* keyHolder =
277               new nsHtml5OwningUTF16Buffer(aKey);
278           keyHolder->next = mFirstBuffer;
279           mFirstBuffer = keyHolder;
280           prevSearchBuf = nullptr;
281           break;
282         }
283         if (prevSearchBuf->next->key == aKey) {
284           // found a key holder
285           break;
286         }
287         prevSearchBuf = prevSearchBuf->next;
288       }
289     }  // else mFirstBuffer is the keyholder
290 
291     // prevSearchBuf is the previous buffer before the keyholder or null if
292     // there isn't one.
293   } else {
294     // We have a first-level write in the document.open() case. We insert before
295     // mLastBuffer, effectively, by making mLastBuffer be a new sentinel object
296     // and redesignating the previous mLastBuffer as our firstLevelMarker.  We
297     // need to put a marker there, because otherwise additional document.writes
298     // from nested event loops would insert in the wrong place. Sigh.
299     mLastBuffer->next = new nsHtml5OwningUTF16Buffer((void*)nullptr);
300     firstLevelMarker = mLastBuffer;
301     mLastBuffer = mLastBuffer->next;
302   }
303 
304   nsHtml5DependentUTF16Buffer stackBuffer(aSourceBuffer);
305 
306   while (!mBlocked && stackBuffer.hasMore()) {
307     stackBuffer.adjust(mLastWasCR);
308     mLastWasCR = false;
309     if (stackBuffer.hasMore()) {
310       int32_t lineNumberSave;
311       bool inRootContext = (!GetStreamParser() && !aKey);
312       if (inRootContext) {
313         mTokenizer->setLineNumber(mRootContextLineNumber);
314       } else {
315         // we aren't the root context, so save the line number on the
316         // *stack* so that we can restore it.
317         lineNumberSave = mTokenizer->getLineNumber();
318       }
319 
320       if (!mTokenizer->EnsureBufferSpace(stackBuffer.getLength())) {
321         return executor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
322       }
323       mLastWasCR = mTokenizer->tokenizeBuffer(&stackBuffer);
324       if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
325         return executor->MarkAsBroken(rv);
326       }
327 
328       if (inRootContext) {
329         mRootContextLineNumber = mTokenizer->getLineNumber();
330       } else {
331         mTokenizer->setLineNumber(lineNumberSave);
332       }
333 
334       if (mTreeBuilder->HasScript()) {
335         mTreeBuilder->Flush();                // Move ops to the executor
336         rv = executor->FlushDocumentWrite();  // run the ops
337         NS_ENSURE_SUCCESS(rv, rv);
338         // Flushing tree ops can cause all sorts of things.
339         // Return early if the parser got terminated.
340         if (executor->IsComplete()) {
341           return NS_OK;
342         }
343       }
344       // Ignore suspension requests
345     }
346   }
347 
348   RefPtr<nsHtml5OwningUTF16Buffer> heapBuffer;
349   if (stackBuffer.hasMore()) {
350     // The buffer wasn't tokenized to completion. Create a copy of the tail
351     // on the heap.
352     heapBuffer = stackBuffer.FalliblyCopyAsOwningBuffer();
353     if (!heapBuffer) {
354       // Allocation failed. The parser is now broken.
355       return executor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
356     }
357   }
358 
359   if (heapBuffer) {
360     // We have something to insert before the keyholder holding in the non-null
361     // aKey case and we have something to swap into firstLevelMarker in the
362     // null aKey case.
363     if (aKey) {
364       NS_ASSERTION(mFirstBuffer != mLastBuffer, "Where's the keyholder?");
365       // the key holder is still somewhere further down the list from
366       // prevSearchBuf (which may be null)
367       if (mFirstBuffer->key == aKey) {
368         NS_ASSERTION(
369             !prevSearchBuf,
370             "Non-null prevSearchBuf when mFirstBuffer is the key holder?");
371         heapBuffer->next = mFirstBuffer;
372         mFirstBuffer = heapBuffer;
373       } else {
374         if (!prevSearchBuf) {
375           prevSearchBuf = mFirstBuffer;
376         }
377         // We created a key holder earlier, so we will find it without walking
378         // past the end of the list.
379         while (prevSearchBuf->next->key != aKey) {
380           prevSearchBuf = prevSearchBuf->next;
381         }
382         heapBuffer->next = prevSearchBuf->next;
383         prevSearchBuf->next = heapBuffer;
384       }
385     } else {
386       NS_ASSERTION(firstLevelMarker, "How come we don't have a marker.");
387       firstLevelMarker->Swap(heapBuffer);
388     }
389   }
390 
391   if (!mBlocked) {  // buffer was tokenized to completion
392     NS_ASSERTION(!stackBuffer.hasMore(),
393                  "Buffer wasn't tokenized to completion?");
394     // Scripting semantics require a forced tree builder flush here
395     mTreeBuilder->Flush();                // Move ops to the executor
396     rv = executor->FlushDocumentWrite();  // run the ops
397     NS_ENSURE_SUCCESS(rv, rv);
398   } else if (stackBuffer.hasMore()) {
399     // The buffer wasn't tokenized to completion. Tokenize the untokenized
400     // content in order to preload stuff. This content will be retokenized
401     // later for normal parsing.
402     if (!mDocWriteSpeculatorActive) {
403       mDocWriteSpeculatorActive = true;
404       if (!mDocWriteSpeculativeTreeBuilder) {
405         // Lazily initialize if uninitialized
406         mDocWriteSpeculativeTreeBuilder =
407             MakeUnique<nsHtml5TreeBuilder>(nullptr, executor->GetStage(), true);
408         mDocWriteSpeculativeTreeBuilder->setScriptingEnabled(
409             mTreeBuilder->isScriptingEnabled());
410         mDocWriteSpeculativeTokenizer = MakeUnique<nsHtml5Tokenizer>(
411             mDocWriteSpeculativeTreeBuilder.get(), false);
412         mDocWriteSpeculativeTokenizer->setInterner(&mAtomTable);
413         mDocWriteSpeculativeTokenizer->start();
414       }
415       mDocWriteSpeculativeTokenizer->resetToDataState();
416       mDocWriteSpeculativeTreeBuilder->loadState(mTreeBuilder.get());
417       mDocWriteSpeculativeLastWasCR = false;
418     }
419 
420     // Note that with multilevel document.write if we didn't just activate the
421     // speculator, it's possible that the speculator is now in the wrong state.
422     // That's OK for the sake of simplicity. The worst that can happen is
423     // that the speculative loads aren't exactly right. The content will be
424     // reparsed anyway for non-preload purposes.
425 
426     // The buffer position for subsequent non-speculative parsing now lives
427     // in heapBuffer, so it's ok to let the buffer position of stackBuffer
428     // to be overwritten and not restored below.
429     while (stackBuffer.hasMore()) {
430       stackBuffer.adjust(mDocWriteSpeculativeLastWasCR);
431       if (stackBuffer.hasMore()) {
432         if (!mDocWriteSpeculativeTokenizer->EnsureBufferSpace(
433                 stackBuffer.getLength())) {
434           return executor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
435         }
436         mDocWriteSpeculativeLastWasCR =
437             mDocWriteSpeculativeTokenizer->tokenizeBuffer(&stackBuffer);
438         nsresult rv;
439         if (NS_FAILED((rv = mDocWriteSpeculativeTreeBuilder->IsBroken()))) {
440           return executor->MarkAsBroken(rv);
441         }
442       }
443     }
444 
445     mDocWriteSpeculativeTreeBuilder->Flush();
446     mDocWriteSpeculativeTreeBuilder->DropHandles();
447     executor->FlushSpeculativeLoads();
448   }
449 
450   return NS_OK;
451 }
452 
453 NS_IMETHODIMP
Terminate()454 nsHtml5Parser::Terminate() {
455   // Prevent a second call to DidBuildModel via document.close()
456   mDocumentClosed = true;
457   // We should only call DidBuildModel once, so don't do anything if this is
458   // the second time that Terminate has been called.
459   if (mExecutor->IsComplete()) {
460     return NS_OK;
461   }
462   // XXX - [ until we figure out a way to break parser-sink circularity ]
463   // Hack - Hold a reference until we are completely done...
464   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
465   RefPtr<nsHtml5StreamParser> streamParser(GetStreamParser());
466   RefPtr<nsHtml5TreeOpExecutor> executor(mExecutor);
467   if (streamParser) {
468     streamParser->Terminate();
469   }
470   return executor->DidBuildModel(true);
471 }
472 
IsInsertionPointDefined()473 bool nsHtml5Parser::IsInsertionPointDefined() {
474   return !mExecutor->IsFlushing() && !mInsertionPointPermanentlyUndefined &&
475          (!GetStreamParser() || mScriptNestingLevel != 0);
476 }
477 
IncrementScriptNestingLevel()478 void nsHtml5Parser::IncrementScriptNestingLevel() { ++mScriptNestingLevel; }
479 
DecrementScriptNestingLevel()480 void nsHtml5Parser::DecrementScriptNestingLevel() { --mScriptNestingLevel; }
481 
HasNonzeroScriptNestingLevel() const482 bool nsHtml5Parser::HasNonzeroScriptNestingLevel() const {
483   return mScriptNestingLevel != 0;
484 }
485 
MarkAsNotScriptCreated(const char * aCommand)486 void nsHtml5Parser::MarkAsNotScriptCreated(const char* aCommand) {
487   MOZ_ASSERT(!mStreamListener, "Must not call this twice.");
488   eParserMode mode = NORMAL;
489   if (!nsCRT::strcmp(aCommand, "view-source")) {
490     mode = VIEW_SOURCE_HTML;
491   } else if (!nsCRT::strcmp(aCommand, "view-source-xml")) {
492     mode = VIEW_SOURCE_XML;
493   } else if (!nsCRT::strcmp(aCommand, "view-source-plain")) {
494     mode = VIEW_SOURCE_PLAIN;
495   } else if (!nsCRT::strcmp(aCommand, "plain-text")) {
496     mode = PLAIN_TEXT;
497   } else if (!nsCRT::strcmp(aCommand, kLoadAsData)) {
498     mode = LOAD_AS_DATA;
499   }
500 #ifdef DEBUG
501   else {
502     NS_ASSERTION(!nsCRT::strcmp(aCommand, "view") ||
503                      !nsCRT::strcmp(aCommand, "external-resource") ||
504                      !nsCRT::strcmp(aCommand, "import"),
505                  "Unsupported parser command!");
506   }
507 #endif
508   mStreamListener =
509       new nsHtml5StreamListener(new nsHtml5StreamParser(mExecutor, this, mode));
510 }
511 
IsScriptCreated()512 bool nsHtml5Parser::IsScriptCreated() { return !GetStreamParser(); }
513 
514 /* End nsIParser  */
515 
516 // not from interface
ParseUntilBlocked()517 nsresult nsHtml5Parser::ParseUntilBlocked() {
518   nsresult rv = mExecutor->IsBroken();
519   NS_ENSURE_SUCCESS(rv, rv);
520   if (mBlocked || mInsertionPointPermanentlyUndefined ||
521       mExecutor->IsComplete()) {
522     return NS_OK;
523   }
524   NS_ASSERTION(mExecutor->HasStarted(), "Bad life cycle.");
525   NS_ASSERTION(!mInDocumentWrite,
526                "ParseUntilBlocked entered while in doc.write!");
527 
528   mDocWriteSpeculatorActive = false;
529 
530   for (;;) {
531     if (!mFirstBuffer->hasMore()) {
532       if (mFirstBuffer == mLastBuffer) {
533         if (mExecutor->IsComplete()) {
534           // something like cache manisfests stopped the parse in mid-flight
535           return NS_OK;
536         }
537         if (mDocumentClosed) {
538           PermanentlyUndefineInsertionPoint();
539           nsresult rv;
540           MOZ_RELEASE_ASSERT(
541               !GetStreamParser(),
542               "This should only happen with script-created parser.");
543           if (NS_SUCCEEDED((rv = mExecutor->IsBroken()))) {
544             mTokenizer->eof();
545             if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
546               mExecutor->MarkAsBroken(rv);
547             } else {
548               mTreeBuilder->StreamEnded();
549             }
550           }
551           mTreeBuilder->Flush();
552           mExecutor->FlushDocumentWrite();
553           // The below call does memory cleanup, so call it even if the
554           // parser has been marked as broken.
555           mTokenizer->end();
556           return rv;
557         }
558         // never release the last buffer.
559         NS_ASSERTION(!mLastBuffer->getStart() && !mLastBuffer->getEnd(),
560                      "Sentinel buffer had its indeces changed.");
561         if (GetStreamParser()) {
562           if (mReturnToStreamParserPermitted &&
563               !mExecutor->IsScriptExecuting()) {
564             mTreeBuilder->Flush();
565             mReturnToStreamParserPermitted = false;
566             GetStreamParser()->ContinueAfterScriptsOrEncodingCommitment(
567                 mTokenizer.get(), mTreeBuilder.get(), mLastWasCR);
568           }
569         } else {
570           // Script-created parser
571           mTreeBuilder->Flush();
572           // No need to flush the executor, because the executor is already
573           // in a flush
574           NS_ASSERTION(mExecutor->IsInFlushLoop(),
575                        "How did we come here without being in the flush loop?");
576         }
577         return NS_OK;  // no more data for now but expecting more
578       }
579       mFirstBuffer = mFirstBuffer->next;
580       continue;
581     }
582 
583     if (mBlocked || mExecutor->IsComplete()) {
584       return NS_OK;
585     }
586 
587     // now we have a non-empty buffer
588     mFirstBuffer->adjust(mLastWasCR);
589     mLastWasCR = false;
590     if (mFirstBuffer->hasMore()) {
591       bool inRootContext = (!GetStreamParser() && !mFirstBuffer->key);
592       if (inRootContext) {
593         mTokenizer->setLineNumber(mRootContextLineNumber);
594       }
595       if (!mTokenizer->EnsureBufferSpace(mFirstBuffer->getLength())) {
596         return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
597       }
598       mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
599       nsresult rv;
600       if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
601         return mExecutor->MarkAsBroken(rv);
602       }
603       if (inRootContext) {
604         mRootContextLineNumber = mTokenizer->getLineNumber();
605       }
606       if (mTreeBuilder->HasScript()) {
607         mTreeBuilder->Flush();
608         rv = mExecutor->FlushDocumentWrite();
609         NS_ENSURE_SUCCESS(rv, rv);
610       }
611       if (mBlocked) {
612         return NS_OK;
613       }
614     }
615   }
616 }
617 
StartExecutor()618 nsresult nsHtml5Parser::StartExecutor() {
619   MOZ_ASSERT(!GetStreamParser(),
620              "Had stream parser but document.write started life cycle.");
621   // This is part of the setup document.open() does.
622   RefPtr<nsHtml5TreeOpExecutor> executor(mExecutor);
623   executor->SetParser(this);
624   mTreeBuilder->setScriptingEnabled(executor->IsScriptEnabled());
625 
626   mTreeBuilder->setIsSrcdocDocument(false);
627 
628   mTokenizer->start();
629   executor->Start();
630 
631   /*
632    * We know we're in document.open(), so our document must already
633    * have a script global andthe WillBuildModel call is safe.
634    */
635   return executor->WillBuildModel();
636 }
637 
Initialize(mozilla::dom::Document * aDoc,nsIURI * aURI,nsISupports * aContainer,nsIChannel * aChannel)638 nsresult nsHtml5Parser::Initialize(mozilla::dom::Document* aDoc, nsIURI* aURI,
639                                    nsISupports* aContainer,
640                                    nsIChannel* aChannel) {
641   return mExecutor->Init(aDoc, aURI, aContainer, aChannel);
642 }
643 
StartTokenizer(bool aScriptingEnabled)644 void nsHtml5Parser::StartTokenizer(bool aScriptingEnabled) {
645   bool isSrcdoc = false;
646   nsCOMPtr<nsIChannel> channel;
647   nsresult rv = GetChannel(getter_AddRefs(channel));
648   if (NS_SUCCEEDED(rv)) {
649     isSrcdoc = NS_IsSrcdocChannel(channel);
650   }
651   mTreeBuilder->setIsSrcdocDocument(isSrcdoc);
652 
653   mTreeBuilder->SetPreventScriptExecution(!aScriptingEnabled);
654   mTreeBuilder->setScriptingEnabled(aScriptingEnabled);
655   mTokenizer->start();
656 }
657 
InitializeDocWriteParserState(nsAHtml5TreeBuilderState * aState,int32_t aLine)658 void nsHtml5Parser::InitializeDocWriteParserState(
659     nsAHtml5TreeBuilderState* aState, int32_t aLine) {
660   mTokenizer->resetToDataState();
661   mTokenizer->setLineNumber(aLine);
662   mTreeBuilder->loadState(aState);
663   mLastWasCR = false;
664   mReturnToStreamParserPermitted = true;
665 }
666 
ContinueAfterFailedCharsetSwitch()667 void nsHtml5Parser::ContinueAfterFailedCharsetSwitch() {
668   MOZ_ASSERT(
669       GetStreamParser(),
670       "Tried to continue after failed charset switch without a stream parser");
671   GetStreamParser()->ContinueAfterFailedCharsetSwitch();
672 }
673