1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #include "nsHtml5Highlighter.h"
6 #include "nsDebug.h"
7 #include "nsHtml5AttributeName.h"
8 #include "nsHtml5Tokenizer.h"
9 #include "nsHtml5ViewSourceUtils.h"
10 #include "nsString.h"
11 #include "nsThreadUtils.h"
12 
13 #include "mozilla/Attributes.h"
14 #include "mozilla/Preferences.h"
15 
16 using namespace mozilla;
17 
18 // The old code had a limit of 16 tokens. 1300 is a number picked my measuring
19 // the size of 16 tokens on cnn.com.
20 #define NS_HTML5_HIGHLIGHTER_PRE_BREAK_THRESHOLD 1300
21 
22 char16_t nsHtml5Highlighter::sComment[] = {'c', 'o', 'm', 'm',
23                                            'e', 'n', 't', 0};
24 
25 char16_t nsHtml5Highlighter::sCdata[] = {'c', 'd', 'a', 't', 'a', 0};
26 
27 char16_t nsHtml5Highlighter::sEntity[] = {'e', 'n', 't', 'i', 't', 'y', 0};
28 
29 char16_t nsHtml5Highlighter::sEndTag[] = {'e', 'n', 'd', '-', 't', 'a', 'g', 0};
30 
31 char16_t nsHtml5Highlighter::sStartTag[] = {'s', 't', 'a', 'r', 't',
32                                             '-', 't', 'a', 'g', 0};
33 
34 char16_t nsHtml5Highlighter::sAttributeName[] = {
35     'a', 't', 't', 'r', 'i', 'b', 'u', 't', 'e', '-', 'n', 'a', 'm', 'e', 0};
36 
37 char16_t nsHtml5Highlighter::sAttributeValue[] = {'a', 't', 't', 'r', 'i', 'b',
38                                                   'u', 't', 'e', '-', 'v', 'a',
39                                                   'l', 'u', 'e', 0};
40 
41 char16_t nsHtml5Highlighter::sDoctype[] = {'d', 'o', 'c', 't',
42                                            'y', 'p', 'e', 0};
43 
44 char16_t nsHtml5Highlighter::sPi[] = {'p', 'i', 0};
45 
nsHtml5Highlighter(nsAHtml5TreeOpSink * aOpSink)46 nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink)
47     : mState(nsHtml5Tokenizer::DATA),
48       mCStart(INT32_MAX),
49       mPos(0),
50       mLineNumber(1),
51       mInlinesOpen(0),
52       mInCharacters(false),
53       mBuffer(nullptr),
54       mOpSink(aOpSink),
55       mCurrentRun(nullptr),
56       mAmpersand(nullptr),
57       mSlash(nullptr),
58       mHandles(
59           MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH)),
60       mHandlesUsed(0),
61       mSeenBase(false) {
62   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
63 }
64 
~nsHtml5Highlighter()65 nsHtml5Highlighter::~nsHtml5Highlighter() {
66   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
67 }
68 
Start(const nsAutoString & aTitle)69 void nsHtml5Highlighter::Start(const nsAutoString& aTitle) {
70   // Doctype
71   opAppendDoctypeToDocument operation(nsGkAtoms::html, EmptyString(),
72                                       EmptyString());
73   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
74 
75   mOpQueue.AppendElement()->Init(mozilla::AsVariant(STANDARDS_MODE));
76 
77   // <html> uses NS_NewHTMLSharedElement creator
78   nsIContent** root =
79       CreateElement(nsGkAtoms::html, nullptr, nullptr, NS_NewHTMLSharedElement);
80   opAppendToDocument appendOp(root);
81   mOpQueue.AppendElement()->Init(mozilla::AsVariant(appendOp));
82   mStack.AppendElement(root);
83 
84   // <head> uses NS_NewHTMLSharedElement creator
85   Push(nsGkAtoms::head, nullptr, NS_NewHTMLSharedElement);
86 
87   Push(nsGkAtoms::meta, nsHtml5ViewSourceUtils::NewMetaViewportAttributes(),
88        NS_NewHTMLMetaElement);
89   Pop();  // meta
90 
91   Push(nsGkAtoms::title, nullptr, NS_NewHTMLTitleElement);
92   // XUL will add the "Source of: " prefix.
93   uint32_t length = aTitle.Length();
94   if (length > INT32_MAX) {
95     length = INT32_MAX;
96   }
97   AppendCharacters(aTitle.BeginReading(), 0, (int32_t)length);
98   Pop();  // title
99 
100   Push(nsGkAtoms::link, nsHtml5ViewSourceUtils::NewLinkAttributes(),
101        NS_NewHTMLLinkElement);
102 
103   opUpdateStyleSheet updateOp(CurrentNode());
104   mOpQueue.AppendElement()->Init(mozilla::AsVariant(updateOp));
105 
106   Pop();  // link
107 
108   Pop();  // head
109 
110   Push(nsGkAtoms::body, nsHtml5ViewSourceUtils::NewBodyAttributes(),
111        NS_NewHTMLBodyElement);
112 
113   nsHtml5HtmlAttributes* preAttrs = new nsHtml5HtmlAttributes(0);
114   nsHtml5String preId = nsHtml5Portability::newStringFromLiteral("line1");
115   preAttrs->addAttribute(nsHtml5AttributeName::ATTR_ID, preId, -1);
116   Push(nsGkAtoms::pre, preAttrs, NS_NewHTMLPreElement);
117 
118   StartCharacters();
119 
120   mOpQueue.AppendElement()->Init(mozilla::AsVariant(opStartLayout()));
121 }
122 
Transition(int32_t aState,bool aReconsume,int32_t aPos)123 int32_t nsHtml5Highlighter::Transition(int32_t aState, bool aReconsume,
124                                        int32_t aPos) {
125   mPos = aPos;
126   switch (mState) {
127     case nsHtml5Tokenizer::SCRIPT_DATA:
128     case nsHtml5Tokenizer::RAWTEXT:
129     case nsHtml5Tokenizer::RCDATA:
130     case nsHtml5Tokenizer::DATA:
131       // We can transition on < and on &. Either way, we don't yet know the
132       // role of the token, so open a span without class.
133       if (aState == nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE) {
134         StartSpan();
135         // Start another span for highlighting the ampersand
136         StartSpan();
137         mAmpersand = CurrentNode();
138       } else {
139         EndCharactersAndStartMarkupRun();
140       }
141       break;
142     case nsHtml5Tokenizer::TAG_OPEN:
143       switch (aState) {
144         case nsHtml5Tokenizer::TAG_NAME:
145           StartSpan(sStartTag);
146           break;
147         case nsHtml5Tokenizer::DATA:
148           FinishTag();  // DATA
149           break;
150         case nsHtml5Tokenizer::PROCESSING_INSTRUCTION:
151           AddClass(sPi);
152           break;
153       }
154       break;
155     case nsHtml5Tokenizer::TAG_NAME:
156       switch (aState) {
157         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
158           EndSpanOrA();  // nsHtml5Tokenizer::TAG_NAME
159           break;
160         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
161           EndSpanOrA();  // nsHtml5Tokenizer::TAG_NAME
162           StartSpan();   // for highlighting the slash
163           mSlash = CurrentNode();
164           break;
165         default:
166           FinishTag();
167           break;
168       }
169       break;
170     case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
171       switch (aState) {
172         case nsHtml5Tokenizer::ATTRIBUTE_NAME:
173           StartSpan(sAttributeName);
174           break;
175         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
176           StartSpan();  // for highlighting the slash
177           mSlash = CurrentNode();
178           break;
179         default:
180           FinishTag();
181           break;
182       }
183       break;
184     case nsHtml5Tokenizer::ATTRIBUTE_NAME:
185       switch (aState) {
186         case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
187         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
188           EndSpanOrA();  // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
189           break;
190         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
191           EndSpanOrA();  // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
192           StartSpan();   // for highlighting the slash
193           mSlash = CurrentNode();
194           break;
195         default:
196           FinishTag();
197           break;
198       }
199       break;
200     case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
201       switch (aState) {
202         case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
203         case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
204           FlushCurrent();
205           StartA();
206           break;
207         case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
208           StartA();
209           break;
210         default:
211           FinishTag();
212           break;
213       }
214       break;
215     case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
216     case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
217       switch (aState) {
218         case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
219           EndSpanOrA();
220           break;
221         case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
222           StartSpan();
223           StartSpan();  // for ampersand itself
224           mAmpersand = CurrentNode();
225           break;
226         default:
227           MOZ_ASSERT_UNREACHABLE("Impossible transition.");
228           break;
229       }
230       break;
231     case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
232       switch (aState) {
233         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
234           break;
235         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
236           StartSpan();  // for highlighting the slash
237           mSlash = CurrentNode();
238           break;
239         default:
240           FinishTag();
241           break;
242       }
243       break;
244     case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
245       EndSpanOrA();  // end the slash highlight
246       switch (aState) {
247         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
248           break;
249         default:
250           FinishTag();
251           break;
252       }
253       break;
254     case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
255       switch (aState) {
256         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
257           EndSpanOrA();
258           break;
259         case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
260           StartSpan();
261           StartSpan();  // for ampersand itself
262           mAmpersand = CurrentNode();
263           break;
264         default:
265           FinishTag();
266           break;
267       }
268       break;
269     case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
270       switch (aState) {
271         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
272           StartSpan();  // for highlighting the slash
273           mSlash = CurrentNode();
274           break;
275         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
276           break;
277         case nsHtml5Tokenizer::ATTRIBUTE_NAME:
278           StartSpan(sAttributeName);
279           break;
280         default:
281           FinishTag();
282           break;
283       }
284       break;
285     // most comment states are omitted, because they don't matter to
286     // highlighting
287     case nsHtml5Tokenizer::COMMENT_START:
288     case nsHtml5Tokenizer::COMMENT_END:
289     case nsHtml5Tokenizer::COMMENT_END_BANG:
290     case nsHtml5Tokenizer::COMMENT_START_DASH:
291     case nsHtml5Tokenizer::BOGUS_COMMENT:
292     case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
293       if (aState == nsHtml5Tokenizer::DATA) {
294         AddClass(sComment);
295         FinishTag();
296       }
297       break;
298     // most cdata states are omitted, because they don't matter to
299     // highlighting
300     case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
301       if (aState == nsHtml5Tokenizer::DATA) {
302         AddClass(sCdata);
303         FinishTag();
304       }
305       break;
306     case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
307       EndSpanOrA();  // the span for the ampersand
308       switch (aState) {
309         case nsHtml5Tokenizer::CONSUME_NCR:
310         case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
311           break;
312         default:
313           // not actually a character reference
314           EndSpanOrA();
315           break;
316       }
317       break;
318     case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
319       if (aState == nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL) {
320         break;
321       }
322       // not actually a character reference
323       EndSpanOrA();
324       break;
325     case nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL:
326       if (!aReconsume) {
327         FlushCurrent();
328       }
329       EndSpanOrA();
330       break;
331     case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
332     case nsHtml5Tokenizer::HEX_NCR_LOOP:
333       switch (aState) {
334         case nsHtml5Tokenizer::HANDLE_NCR_VALUE:
335           AddClass(sEntity);
336           FlushCurrent();
337           break;
338         case nsHtml5Tokenizer::HANDLE_NCR_VALUE_RECONSUME:
339           AddClass(sEntity);
340           break;
341       }
342       EndSpanOrA();
343       break;
344     case nsHtml5Tokenizer::CLOSE_TAG_OPEN:
345       switch (aState) {
346         case nsHtml5Tokenizer::DATA:
347           FinishTag();
348           break;
349         case nsHtml5Tokenizer::TAG_NAME:
350           StartSpan(sEndTag);
351           break;
352       }
353       break;
354     case nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN:
355       if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
356         FlushCurrent();
357         StartSpan();  // don't know if it is "end-tag" yet :-(
358         break;
359       }
360       EndSpanOrA();
361       StartCharacters();
362       break;
363     case nsHtml5Tokenizer::NON_DATA_END_TAG_NAME:
364       switch (aState) {
365         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
366           AddClass(sEndTag);
367           EndSpanOrA();
368           break;
369         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
370           AddClass(sEndTag);
371           EndSpanOrA();
372           StartSpan();  // for highlighting the slash
373           mSlash = CurrentNode();
374           break;
375         case nsHtml5Tokenizer::DATA:  // yes, as a result of emitting the token
376           AddClass(sEndTag);
377           FinishTag();
378           break;
379         default:
380           FinishTag();
381           break;
382       }
383       break;
384     case nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN:
385     case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
386       if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
387         FlushCurrent();
388         StartSpan();  // don't know if it is "end-tag" yet :-(
389         break;
390       }
391       FinishTag();
392       break;
393     case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH:
394     case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED:
395     case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH:
396       if (aState == nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN) {
397         EndCharactersAndStartMarkupRun();
398       }
399       break;
400     // Lots of double escape states omitted, because they don't highlight.
401     // Likewise, only doctype states that can emit the doctype are of
402     // interest. Otherwise, the transition out of bogus comment deals.
403     case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
404     case nsHtml5Tokenizer::DOCTYPE_NAME:
405     case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
406     case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
407     case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
408     case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
409     case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
410     case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
411     case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
412     case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
413     case nsHtml5Tokenizer::BOGUS_DOCTYPE:
414     case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
415     case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
416     case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
417     case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
418       if (aState == nsHtml5Tokenizer::DATA) {
419         AddClass(sDoctype);
420         FinishTag();
421       }
422       break;
423     case nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK:
424       if (aState == nsHtml5Tokenizer::DATA) {
425         FinishTag();
426       }
427       break;
428     default:
429       break;
430   }
431   mState = aState;
432   return aState;
433 }
434 
End()435 void nsHtml5Highlighter::End() {
436   switch (mState) {
437     case nsHtml5Tokenizer::COMMENT_END:
438     case nsHtml5Tokenizer::COMMENT_END_BANG:
439     case nsHtml5Tokenizer::COMMENT_START_DASH:
440     case nsHtml5Tokenizer::BOGUS_COMMENT:
441     case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
442       AddClass(sComment);
443       break;
444     case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
445       AddClass(sCdata);
446       break;
447     case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
448     case nsHtml5Tokenizer::HEX_NCR_LOOP:
449       // XXX need tokenizer help here
450       break;
451     case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
452     case nsHtml5Tokenizer::DOCTYPE_NAME:
453     case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
454     case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
455     case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
456     case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
457     case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
458     case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
459     case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
460     case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
461     case nsHtml5Tokenizer::BOGUS_DOCTYPE:
462     case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
463     case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
464     case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
465     case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
466       AddClass(sDoctype);
467       break;
468     default:
469       break;
470   }
471   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
472   NS_ASSERTION(treeOp, "Tree op allocation failed.");
473   treeOp->Init(mozilla::AsVariant(opStreamEnded()));
474   FlushOps();
475 }
476 
SetBuffer(nsHtml5UTF16Buffer * aBuffer)477 void nsHtml5Highlighter::SetBuffer(nsHtml5UTF16Buffer* aBuffer) {
478   MOZ_ASSERT(!mBuffer, "Old buffer still here!");
479   mBuffer = aBuffer;
480   mCStart = aBuffer->getStart();
481 }
482 
DropBuffer(int32_t aPos)483 void nsHtml5Highlighter::DropBuffer(int32_t aPos) {
484   MOZ_ASSERT(mBuffer, "No buffer to drop!");
485   mPos = aPos;
486   FlushChars();
487   mBuffer = nullptr;
488 }
489 
StartSpan()490 void nsHtml5Highlighter::StartSpan() {
491   FlushChars();
492   Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
493   ++mInlinesOpen;
494 }
495 
StartSpan(const char16_t * aClass)496 void nsHtml5Highlighter::StartSpan(const char16_t* aClass) {
497   StartSpan();
498   AddClass(aClass);
499 }
500 
EndSpanOrA()501 void nsHtml5Highlighter::EndSpanOrA() {
502   FlushChars();
503   Pop();
504   --mInlinesOpen;
505 }
506 
StartCharacters()507 void nsHtml5Highlighter::StartCharacters() {
508   MOZ_ASSERT(!mInCharacters, "Already in characters!");
509   FlushChars();
510   Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
511   mCurrentRun = CurrentNode();
512   mInCharacters = true;
513 }
514 
EndCharactersAndStartMarkupRun()515 void nsHtml5Highlighter::EndCharactersAndStartMarkupRun() {
516   MOZ_ASSERT(mInCharacters, "Not in characters!");
517   FlushChars();
518   Pop();
519   mInCharacters = false;
520   // Now start markup run
521   StartSpan();
522   mCurrentRun = CurrentNode();
523 }
524 
StartA()525 void nsHtml5Highlighter::StartA() {
526   FlushChars();
527   Push(nsGkAtoms::a, nullptr, NS_NewHTMLAnchorElement);
528   AddClass(sAttributeValue);
529   ++mInlinesOpen;
530 }
531 
FinishTag()532 void nsHtml5Highlighter::FinishTag() {
533   while (mInlinesOpen > 1) {
534     EndSpanOrA();
535   }
536   FlushCurrent();  // >
537   EndSpanOrA();    // DATA
538   NS_ASSERTION(!mInlinesOpen, "mInlinesOpen got out of sync!");
539   StartCharacters();
540 }
541 
FlushChars()542 void nsHtml5Highlighter::FlushChars() {
543   if (mCStart < mPos) {
544     char16_t* buf = mBuffer->getBuffer();
545     int32_t i = mCStart;
546     while (i < mPos) {
547       char16_t c = buf[i];
548       switch (c) {
549         case '\r':
550           // The input this code sees has been normalized so that there are
551           // CR breaks and LF breaks but no CRLF breaks. Overwrite CR with LF
552           // to show consistent LF line breaks to layout. It is OK to mutate
553           // the input data, because there are no reparses in the View Source
554           // case, so we won't need the original data in the buffer anymore.
555           buf[i] = '\n';
556           [[fallthrough]];
557         case '\n': {
558           ++i;
559           if (mCStart < i) {
560             int32_t len = i - mCStart;
561             AppendCharacters(buf, mCStart, len);
562             mCStart = i;
563           }
564           ++mLineNumber;
565           Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
566           nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
567           NS_ASSERTION(treeOp, "Tree op allocation failed.");
568           opAddLineNumberId operation(CurrentNode(), mLineNumber);
569           treeOp->Init(mozilla::AsVariant(operation));
570           Pop();
571           break;
572         }
573         default:
574           ++i;
575           break;
576       }
577     }
578     if (mCStart < mPos) {
579       int32_t len = mPos - mCStart;
580       AppendCharacters(buf, mCStart, len);
581       mCStart = mPos;
582     }
583   }
584 }
585 
FlushCurrent()586 void nsHtml5Highlighter::FlushCurrent() {
587   mPos++;
588   FlushChars();
589 }
590 
FlushOps()591 bool nsHtml5Highlighter::FlushOps() {
592   bool hasOps = !mOpQueue.IsEmpty();
593   if (hasOps) {
594     mOpSink->MoveOpsFrom(mOpQueue);
595   }
596   return hasOps;
597 }
598 
MaybeLinkifyAttributeValue(nsHtml5AttributeName * aName,nsHtml5String aValue)599 void nsHtml5Highlighter::MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
600                                                     nsHtml5String aValue) {
601   if (!(nsHtml5AttributeName::ATTR_HREF == aName ||
602         nsHtml5AttributeName::ATTR_SRC == aName ||
603         nsHtml5AttributeName::ATTR_ACTION == aName ||
604         nsHtml5AttributeName::ATTR_CITE == aName ||
605         nsHtml5AttributeName::ATTR_BACKGROUND == aName ||
606         nsHtml5AttributeName::ATTR_LONGDESC == aName ||
607         nsHtml5AttributeName::ATTR_XLINK_HREF == aName ||
608         nsHtml5AttributeName::ATTR_DEFINITIONURL == aName)) {
609     return;
610   }
611   AddViewSourceHref(aValue);
612 }
613 
CompletedNamedCharacterReference()614 void nsHtml5Highlighter::CompletedNamedCharacterReference() {
615   AddClass(sEntity);
616 }
617 
AllocateContentHandle()618 nsIContent** nsHtml5Highlighter::AllocateContentHandle() {
619   if (mHandlesUsed == NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH) {
620     mOldHandles.AppendElement(std::move(mHandles));
621     mHandles =
622         MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH);
623     mHandlesUsed = 0;
624   }
625 #ifdef DEBUG
626   mHandles[mHandlesUsed] = reinterpret_cast<nsIContent*>(uintptr_t(0xC0DEDBAD));
627 #endif
628   return &mHandles[mHandlesUsed++];
629 }
630 
CreateElement(nsAtom * aName,nsHtml5HtmlAttributes * aAttributes,nsIContent ** aIntendedParent,mozilla::dom::HTMLContentCreatorFunction aCreator)631 nsIContent** nsHtml5Highlighter::CreateElement(
632     nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
633     nsIContent** aIntendedParent,
634     mozilla::dom::HTMLContentCreatorFunction aCreator) {
635   MOZ_ASSERT(aName, "Got null name.");
636   nsIContent** content = AllocateContentHandle();
637   opCreateHTMLElement opeation(content, aName, aAttributes, aCreator,
638                                aIntendedParent,
639                                mozilla::dom::FROM_PARSER_NETWORK);
640   mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeation));
641   return content;
642 }
643 
CurrentNode()644 nsIContent** nsHtml5Highlighter::CurrentNode() {
645   MOZ_ASSERT(mStack.Length() >= 1, "Must have something on stack.");
646   return mStack[mStack.Length() - 1];
647 }
648 
Push(nsAtom * aName,nsHtml5HtmlAttributes * aAttributes,mozilla::dom::HTMLContentCreatorFunction aCreator)649 void nsHtml5Highlighter::Push(
650     nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
651     mozilla::dom::HTMLContentCreatorFunction aCreator) {
652   MOZ_ASSERT(mStack.Length() >= 1, "Pushing without root.");
653   nsIContent** elt = CreateElement(aName, aAttributes, CurrentNode(),
654                                    aCreator);  // Don't inline below!
655   opAppend operation(elt, CurrentNode());
656   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
657   mStack.AppendElement(elt);
658 }
659 
Pop()660 void nsHtml5Highlighter::Pop() {
661   MOZ_ASSERT(mStack.Length() >= 2, "Popping when stack too short.");
662   mStack.RemoveLastElement();
663 }
664 
AppendCharacters(const char16_t * aBuffer,int32_t aStart,int32_t aLength)665 void nsHtml5Highlighter::AppendCharacters(const char16_t* aBuffer,
666                                           int32_t aStart, int32_t aLength) {
667   MOZ_ASSERT(aBuffer, "Null buffer");
668 
669   char16_t* bufferCopy = new char16_t[aLength];
670   memcpy(bufferCopy, aBuffer + aStart, aLength * sizeof(char16_t));
671 
672   opAppendText operation(CurrentNode(), bufferCopy, aLength);
673   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
674 }
675 
AddClass(const char16_t * aClass)676 void nsHtml5Highlighter::AddClass(const char16_t* aClass) {
677   opAddClass operation(CurrentNode(), (char16_t*)aClass);
678   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
679 }
680 
AddViewSourceHref(nsHtml5String aValue)681 void nsHtml5Highlighter::AddViewSourceHref(nsHtml5String aValue) {
682   char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
683   aValue.CopyToBuffer(bufferCopy);
684   bufferCopy[aValue.Length()] = 0;
685 
686   opAddViewSourceHref operation(CurrentNode(), bufferCopy, aValue.Length());
687   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
688 }
689 
AddBase(nsHtml5String aValue)690 void nsHtml5Highlighter::AddBase(nsHtml5String aValue) {
691   if (mSeenBase) {
692     return;
693   }
694   mSeenBase = true;
695   char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
696   aValue.CopyToBuffer(bufferCopy);
697   bufferCopy[aValue.Length()] = 0;
698 
699   opAddViewSourceBase operation(bufferCopy, aValue.Length());
700   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
701 }
702 
AddErrorToCurrentNode(const char * aMsgId)703 void nsHtml5Highlighter::AddErrorToCurrentNode(const char* aMsgId) {
704   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
705   NS_ASSERTION(treeOp, "Tree op allocation failed.");
706   opAddErrorType operation(CurrentNode(), (char*)aMsgId);
707   treeOp->Init(mozilla::AsVariant(operation));
708 }
709 
AddErrorToCurrentRun(const char * aMsgId)710 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId) {
711   MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
712   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
713   NS_ASSERTION(treeOp, "Tree op allocation failed.");
714   opAddErrorType operation(mCurrentRun, (char*)aMsgId);
715   treeOp->Init(mozilla::AsVariant(operation));
716 }
717 
AddErrorToCurrentRun(const char * aMsgId,nsAtom * aName)718 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId,
719                                               nsAtom* aName) {
720   MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
721   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
722   NS_ASSERTION(treeOp, "Tree op allocation failed.");
723   opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName);
724   treeOp->Init(mozilla::AsVariant(operation));
725 }
726 
AddErrorToCurrentRun(const char * aMsgId,nsAtom * aName,nsAtom * aOther)727 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName,
728                                               nsAtom* aOther) {
729   MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
730   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
731   NS_ASSERTION(treeOp, "Tree op allocation failed.");
732   opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName, aOther);
733   treeOp->Init(mozilla::AsVariant(operation));
734 }
735 
AddErrorToCurrentAmpersand(const char * aMsgId)736 void nsHtml5Highlighter::AddErrorToCurrentAmpersand(const char* aMsgId) {
737   MOZ_ASSERT(mAmpersand, "Adding error to ampersand without one!");
738   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
739   NS_ASSERTION(treeOp, "Tree op allocation failed.");
740   opAddErrorType operation(mAmpersand, (char*)aMsgId);
741   treeOp->Init(mozilla::AsVariant(operation));
742 }
743 
AddErrorToCurrentSlash(const char * aMsgId)744 void nsHtml5Highlighter::AddErrorToCurrentSlash(const char* aMsgId) {
745   MOZ_ASSERT(mSlash, "Adding error to slash without one!");
746   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
747   NS_ASSERTION(treeOp, "Tree op allocation failed.");
748   opAddErrorType operation(mSlash, (char*)aMsgId);
749   treeOp->Init(mozilla::AsVariant(operation));
750 }
751