1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #include "nsHtml5Highlighter.h"
6 #include "nsDebug.h"
7 #include "nsHtml5AttributeName.h"
8 #include "nsHtml5Tokenizer.h"
9 #include "nsHtml5ViewSourceUtils.h"
10 #include "nsString.h"
11 #include "nsThreadUtils.h"
12 
13 #include "mozilla/Attributes.h"
14 #include "mozilla/Preferences.h"
15 
16 using namespace mozilla;
17 
18 // The old code had a limit of 16 tokens. 1300 is a number picked my measuring
19 // the size of 16 tokens on cnn.com.
20 #define NS_HTML5_HIGHLIGHTER_PRE_BREAK_THRESHOLD 1300
21 
22 char16_t nsHtml5Highlighter::sComment[] = {'c', 'o', 'm', 'm',
23                                            'e', 'n', 't', 0};
24 
25 char16_t nsHtml5Highlighter::sCdata[] = {'c', 'd', 'a', 't', 'a', 0};
26 
27 char16_t nsHtml5Highlighter::sEntity[] = {'e', 'n', 't', 'i', 't', 'y', 0};
28 
29 char16_t nsHtml5Highlighter::sEndTag[] = {'e', 'n', 'd', '-', 't', 'a', 'g', 0};
30 
31 char16_t nsHtml5Highlighter::sStartTag[] = {'s', 't', 'a', 'r', 't',
32                                             '-', 't', 'a', 'g', 0};
33 
34 char16_t nsHtml5Highlighter::sAttributeName[] = {
35     'a', 't', 't', 'r', 'i', 'b', 'u', 't', 'e', '-', 'n', 'a', 'm', 'e', 0};
36 
37 char16_t nsHtml5Highlighter::sAttributeValue[] = {'a', 't', 't', 'r', 'i', 'b',
38                                                   'u', 't', 'e', '-', 'v', 'a',
39                                                   'l', 'u', 'e', 0};
40 
41 char16_t nsHtml5Highlighter::sDoctype[] = {'d', 'o', 'c', 't',
42                                            'y', 'p', 'e', 0};
43 
44 char16_t nsHtml5Highlighter::sPi[] = {'p', 'i', 0};
45 
nsHtml5Highlighter(nsAHtml5TreeOpSink * aOpSink)46 nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink)
47     : mState(nsHtml5Tokenizer::DATA),
48       mCStart(INT32_MAX),
49       mPos(0),
50       mLineNumber(1),
51       mInlinesOpen(0),
52       mInCharacters(false),
53       mBuffer(nullptr),
54       mOpSink(aOpSink),
55       mCurrentRun(nullptr),
56       mAmpersand(nullptr),
57       mSlash(nullptr),
58       mHandles(
59           MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH)),
60       mHandlesUsed(0),
61       mSeenBase(false) {
62   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
63 }
64 
~nsHtml5Highlighter()65 nsHtml5Highlighter::~nsHtml5Highlighter() {
66   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
67 }
68 
Start(const nsAutoString & aTitle)69 void nsHtml5Highlighter::Start(const nsAutoString& aTitle) {
70   // Doctype
71   opAppendDoctypeToDocument operation(nsGkAtoms::html, u""_ns, u""_ns);
72   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
73 
74   mOpQueue.AppendElement()->Init(mozilla::AsVariant(STANDARDS_MODE));
75 
76   // <html> uses NS_NewHTMLSharedElement creator
77   nsIContent** root =
78       CreateElement(nsGkAtoms::html, nullptr, nullptr, NS_NewHTMLSharedElement);
79   opAppendToDocument appendOp(root);
80   mOpQueue.AppendElement()->Init(mozilla::AsVariant(appendOp));
81   mStack.AppendElement(root);
82 
83   // <head> uses NS_NewHTMLSharedElement creator
84   Push(nsGkAtoms::head, nullptr, NS_NewHTMLSharedElement);
85 
86   Push(nsGkAtoms::meta, nsHtml5ViewSourceUtils::NewMetaViewportAttributes(),
87        NS_NewHTMLMetaElement);
88   Pop();  // meta
89 
90   Push(nsGkAtoms::title, nullptr, NS_NewHTMLTitleElement);
91   // XUL will add the "Source of: " prefix.
92   uint32_t length = aTitle.Length();
93   if (length > INT32_MAX) {
94     length = INT32_MAX;
95   }
96   AppendCharacters(aTitle.BeginReading(), 0, (int32_t)length);
97   Pop();  // title
98 
99   Push(nsGkAtoms::link, nsHtml5ViewSourceUtils::NewLinkAttributes(),
100        NS_NewHTMLLinkElement);
101 
102   opUpdateStyleSheet updateOp(CurrentNode());
103   mOpQueue.AppendElement()->Init(mozilla::AsVariant(updateOp));
104 
105   Pop();  // link
106 
107   Pop();  // head
108 
109   Push(nsGkAtoms::body, nsHtml5ViewSourceUtils::NewBodyAttributes(),
110        NS_NewHTMLBodyElement);
111 
112   nsHtml5HtmlAttributes* preAttrs = new nsHtml5HtmlAttributes(0);
113   nsHtml5String preId = nsHtml5Portability::newStringFromLiteral("line1");
114   preAttrs->addAttribute(nsHtml5AttributeName::ATTR_ID, preId, -1);
115   Push(nsGkAtoms::pre, preAttrs, NS_NewHTMLPreElement);
116 
117   StartCharacters();
118 
119   mOpQueue.AppendElement()->Init(mozilla::AsVariant(opStartLayout()));
120 }
121 
Transition(int32_t aState,bool aReconsume,int32_t aPos)122 int32_t nsHtml5Highlighter::Transition(int32_t aState, bool aReconsume,
123                                        int32_t aPos) {
124   mPos = aPos;
125   switch (mState) {
126     case nsHtml5Tokenizer::SCRIPT_DATA:
127     case nsHtml5Tokenizer::RAWTEXT:
128     case nsHtml5Tokenizer::RCDATA:
129     case nsHtml5Tokenizer::DATA:
130       // We can transition on < and on &. Either way, we don't yet know the
131       // role of the token, so open a span without class.
132       if (aState == nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE) {
133         StartSpan();
134         // Start another span for highlighting the ampersand
135         StartSpan();
136         mAmpersand = CurrentNode();
137       } else {
138         EndCharactersAndStartMarkupRun();
139       }
140       break;
141     case nsHtml5Tokenizer::TAG_OPEN:
142       switch (aState) {
143         case nsHtml5Tokenizer::TAG_NAME:
144           StartSpan(sStartTag);
145           break;
146         case nsHtml5Tokenizer::DATA:
147           FinishTag();  // DATA
148           break;
149         case nsHtml5Tokenizer::PROCESSING_INSTRUCTION:
150           AddClass(sPi);
151           break;
152       }
153       break;
154     case nsHtml5Tokenizer::TAG_NAME:
155       switch (aState) {
156         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
157           EndSpanOrA();  // nsHtml5Tokenizer::TAG_NAME
158           break;
159         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
160           EndSpanOrA();  // nsHtml5Tokenizer::TAG_NAME
161           StartSpan();   // for highlighting the slash
162           mSlash = CurrentNode();
163           break;
164         default:
165           FinishTag();
166           break;
167       }
168       break;
169     case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
170       switch (aState) {
171         case nsHtml5Tokenizer::ATTRIBUTE_NAME:
172           StartSpan(sAttributeName);
173           break;
174         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
175           StartSpan();  // for highlighting the slash
176           mSlash = CurrentNode();
177           break;
178         default:
179           FinishTag();
180           break;
181       }
182       break;
183     case nsHtml5Tokenizer::ATTRIBUTE_NAME:
184       switch (aState) {
185         case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
186         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
187           EndSpanOrA();  // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
188           break;
189         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
190           EndSpanOrA();  // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
191           StartSpan();   // for highlighting the slash
192           mSlash = CurrentNode();
193           break;
194         default:
195           FinishTag();
196           break;
197       }
198       break;
199     case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
200       switch (aState) {
201         case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
202         case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
203           FlushCurrent();
204           StartA();
205           break;
206         case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
207           StartA();
208           break;
209         default:
210           FinishTag();
211           break;
212       }
213       break;
214     case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
215     case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
216       switch (aState) {
217         case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
218           EndSpanOrA();
219           break;
220         case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
221           StartSpan();
222           StartSpan();  // for ampersand itself
223           mAmpersand = CurrentNode();
224           break;
225         default:
226           MOZ_ASSERT_UNREACHABLE("Impossible transition.");
227           break;
228       }
229       break;
230     case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
231       switch (aState) {
232         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
233           break;
234         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
235           StartSpan();  // for highlighting the slash
236           mSlash = CurrentNode();
237           break;
238         default:
239           FinishTag();
240           break;
241       }
242       break;
243     case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
244       EndSpanOrA();  // end the slash highlight
245       switch (aState) {
246         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
247           break;
248         default:
249           FinishTag();
250           break;
251       }
252       break;
253     case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
254       switch (aState) {
255         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
256           EndSpanOrA();
257           break;
258         case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
259           StartSpan();
260           StartSpan();  // for ampersand itself
261           mAmpersand = CurrentNode();
262           break;
263         default:
264           FinishTag();
265           break;
266       }
267       break;
268     case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
269       switch (aState) {
270         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
271           StartSpan();  // for highlighting the slash
272           mSlash = CurrentNode();
273           break;
274         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
275           break;
276         case nsHtml5Tokenizer::ATTRIBUTE_NAME:
277           StartSpan(sAttributeName);
278           break;
279         default:
280           FinishTag();
281           break;
282       }
283       break;
284     // most comment states are omitted, because they don't matter to
285     // highlighting
286     case nsHtml5Tokenizer::COMMENT_START:
287     case nsHtml5Tokenizer::COMMENT_END:
288     case nsHtml5Tokenizer::COMMENT_END_BANG:
289     case nsHtml5Tokenizer::COMMENT_START_DASH:
290     case nsHtml5Tokenizer::BOGUS_COMMENT:
291     case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
292       if (aState == nsHtml5Tokenizer::DATA) {
293         AddClass(sComment);
294         FinishTag();
295       }
296       break;
297     // most cdata states are omitted, because they don't matter to
298     // highlighting
299     case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
300       if (aState == nsHtml5Tokenizer::DATA) {
301         AddClass(sCdata);
302         FinishTag();
303       }
304       break;
305     case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
306       EndSpanOrA();  // the span for the ampersand
307       switch (aState) {
308         case nsHtml5Tokenizer::CONSUME_NCR:
309         case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
310           break;
311         default:
312           // not actually a character reference
313           EndSpanOrA();
314           break;
315       }
316       break;
317     case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
318       if (aState == nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL) {
319         break;
320       }
321       // not actually a character reference
322       EndSpanOrA();
323       break;
324     case nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL:
325       if (!aReconsume) {
326         FlushCurrent();
327       }
328       EndSpanOrA();
329       break;
330     case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
331     case nsHtml5Tokenizer::HEX_NCR_LOOP:
332       switch (aState) {
333         case nsHtml5Tokenizer::HANDLE_NCR_VALUE:
334           AddClass(sEntity);
335           FlushCurrent();
336           break;
337         case nsHtml5Tokenizer::HANDLE_NCR_VALUE_RECONSUME:
338           AddClass(sEntity);
339           break;
340       }
341       EndSpanOrA();
342       break;
343     case nsHtml5Tokenizer::CLOSE_TAG_OPEN:
344       switch (aState) {
345         case nsHtml5Tokenizer::DATA:
346           FinishTag();
347           break;
348         case nsHtml5Tokenizer::TAG_NAME:
349           StartSpan(sEndTag);
350           break;
351       }
352       break;
353     case nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN:
354       if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
355         FlushCurrent();
356         StartSpan();  // don't know if it is "end-tag" yet :-(
357         break;
358       }
359       EndSpanOrA();
360       StartCharacters();
361       break;
362     case nsHtml5Tokenizer::NON_DATA_END_TAG_NAME:
363       switch (aState) {
364         case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
365           AddClass(sEndTag);
366           EndSpanOrA();
367           break;
368         case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
369           AddClass(sEndTag);
370           EndSpanOrA();
371           StartSpan();  // for highlighting the slash
372           mSlash = CurrentNode();
373           break;
374         case nsHtml5Tokenizer::DATA:  // yes, as a result of emitting the token
375           AddClass(sEndTag);
376           FinishTag();
377           break;
378         default:
379           FinishTag();
380           break;
381       }
382       break;
383     case nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN:
384     case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
385       if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
386         FlushCurrent();
387         StartSpan();  // don't know if it is "end-tag" yet :-(
388         break;
389       }
390       FinishTag();
391       break;
392     case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH:
393     case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED:
394     case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH:
395       if (aState == nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN) {
396         EndCharactersAndStartMarkupRun();
397       }
398       break;
399     // Lots of double escape states omitted, because they don't highlight.
400     // Likewise, only doctype states that can emit the doctype are of
401     // interest. Otherwise, the transition out of bogus comment deals.
402     case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
403     case nsHtml5Tokenizer::DOCTYPE_NAME:
404     case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
405     case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
406     case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
407     case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
408     case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
409     case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
410     case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
411     case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
412     case nsHtml5Tokenizer::BOGUS_DOCTYPE:
413     case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
414     case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
415     case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
416     case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
417       if (aState == nsHtml5Tokenizer::DATA) {
418         AddClass(sDoctype);
419         FinishTag();
420       }
421       break;
422     case nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK:
423       if (aState == nsHtml5Tokenizer::DATA) {
424         FinishTag();
425       }
426       break;
427     default:
428       break;
429   }
430   mState = aState;
431   return aState;
432 }
433 
End()434 void nsHtml5Highlighter::End() {
435   switch (mState) {
436     case nsHtml5Tokenizer::COMMENT_END:
437     case nsHtml5Tokenizer::COMMENT_END_BANG:
438     case nsHtml5Tokenizer::COMMENT_START_DASH:
439     case nsHtml5Tokenizer::BOGUS_COMMENT:
440     case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
441       AddClass(sComment);
442       break;
443     case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
444       AddClass(sCdata);
445       break;
446     case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
447     case nsHtml5Tokenizer::HEX_NCR_LOOP:
448       // XXX need tokenizer help here
449       break;
450     case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
451     case nsHtml5Tokenizer::DOCTYPE_NAME:
452     case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
453     case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
454     case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
455     case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
456     case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
457     case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
458     case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
459     case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
460     case nsHtml5Tokenizer::BOGUS_DOCTYPE:
461     case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
462     case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
463     case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
464     case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
465       AddClass(sDoctype);
466       break;
467     default:
468       break;
469   }
470   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
471   NS_ASSERTION(treeOp, "Tree op allocation failed.");
472   treeOp->Init(mozilla::AsVariant(opStreamEnded()));
473   FlushOps();
474 }
475 
SetBuffer(nsHtml5UTF16Buffer * aBuffer)476 void nsHtml5Highlighter::SetBuffer(nsHtml5UTF16Buffer* aBuffer) {
477   MOZ_ASSERT(!mBuffer, "Old buffer still here!");
478   mBuffer = aBuffer;
479   mCStart = aBuffer->getStart();
480 }
481 
DropBuffer(int32_t aPos)482 void nsHtml5Highlighter::DropBuffer(int32_t aPos) {
483   MOZ_ASSERT(mBuffer, "No buffer to drop!");
484   mPos = aPos;
485   FlushChars();
486   mBuffer = nullptr;
487 }
488 
StartSpan()489 void nsHtml5Highlighter::StartSpan() {
490   FlushChars();
491   Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
492   ++mInlinesOpen;
493 }
494 
StartSpan(const char16_t * aClass)495 void nsHtml5Highlighter::StartSpan(const char16_t* aClass) {
496   StartSpan();
497   AddClass(aClass);
498 }
499 
EndSpanOrA()500 void nsHtml5Highlighter::EndSpanOrA() {
501   FlushChars();
502   Pop();
503   --mInlinesOpen;
504 }
505 
StartCharacters()506 void nsHtml5Highlighter::StartCharacters() {
507   MOZ_ASSERT(!mInCharacters, "Already in characters!");
508   FlushChars();
509   Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
510   mCurrentRun = CurrentNode();
511   mInCharacters = true;
512 }
513 
EndCharactersAndStartMarkupRun()514 void nsHtml5Highlighter::EndCharactersAndStartMarkupRun() {
515   MOZ_ASSERT(mInCharacters, "Not in characters!");
516   FlushChars();
517   Pop();
518   mInCharacters = false;
519   // Now start markup run
520   StartSpan();
521   mCurrentRun = CurrentNode();
522 }
523 
StartA()524 void nsHtml5Highlighter::StartA() {
525   FlushChars();
526   Push(nsGkAtoms::a, nullptr, NS_NewHTMLAnchorElement);
527   AddClass(sAttributeValue);
528   ++mInlinesOpen;
529 }
530 
FinishTag()531 void nsHtml5Highlighter::FinishTag() {
532   while (mInlinesOpen > 1) {
533     EndSpanOrA();
534   }
535   FlushCurrent();  // >
536   EndSpanOrA();    // DATA
537   NS_ASSERTION(!mInlinesOpen, "mInlinesOpen got out of sync!");
538   StartCharacters();
539 }
540 
FlushChars()541 void nsHtml5Highlighter::FlushChars() {
542   if (mCStart < mPos) {
543     char16_t* buf = mBuffer->getBuffer();
544     int32_t i = mCStart;
545     while (i < mPos) {
546       char16_t c = buf[i];
547       switch (c) {
548         case '\r':
549           // The input this code sees has been normalized so that there are
550           // CR breaks and LF breaks but no CRLF breaks. Overwrite CR with LF
551           // to show consistent LF line breaks to layout. It is OK to mutate
552           // the input data, because there are no reparses in the View Source
553           // case, so we won't need the original data in the buffer anymore.
554           buf[i] = '\n';
555           [[fallthrough]];
556         case '\n': {
557           ++i;
558           if (mCStart < i) {
559             int32_t len = i - mCStart;
560             AppendCharacters(buf, mCStart, len);
561             mCStart = i;
562           }
563           ++mLineNumber;
564           Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
565           nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
566           NS_ASSERTION(treeOp, "Tree op allocation failed.");
567           opAddLineNumberId operation(CurrentNode(), mLineNumber);
568           treeOp->Init(mozilla::AsVariant(operation));
569           Pop();
570           break;
571         }
572         default:
573           ++i;
574           break;
575       }
576     }
577     if (mCStart < mPos) {
578       int32_t len = mPos - mCStart;
579       AppendCharacters(buf, mCStart, len);
580       mCStart = mPos;
581     }
582   }
583 }
584 
FlushCurrent()585 void nsHtml5Highlighter::FlushCurrent() {
586   mPos++;
587   FlushChars();
588 }
589 
FlushOps()590 bool nsHtml5Highlighter::FlushOps() {
591   bool hasOps = !mOpQueue.IsEmpty();
592   if (hasOps) {
593     mOpSink->MoveOpsFrom(mOpQueue);
594   }
595   return hasOps;
596 }
597 
MaybeLinkifyAttributeValue(nsHtml5AttributeName * aName,nsHtml5String aValue)598 void nsHtml5Highlighter::MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
599                                                     nsHtml5String aValue) {
600   if (!(nsHtml5AttributeName::ATTR_HREF == aName ||
601         nsHtml5AttributeName::ATTR_SRC == aName ||
602         nsHtml5AttributeName::ATTR_ACTION == aName ||
603         nsHtml5AttributeName::ATTR_CITE == aName ||
604         nsHtml5AttributeName::ATTR_BACKGROUND == aName ||
605         nsHtml5AttributeName::ATTR_LONGDESC == aName ||
606         nsHtml5AttributeName::ATTR_XLINK_HREF == aName ||
607         nsHtml5AttributeName::ATTR_DEFINITIONURL == aName)) {
608     return;
609   }
610   AddViewSourceHref(aValue);
611 }
612 
CompletedNamedCharacterReference()613 void nsHtml5Highlighter::CompletedNamedCharacterReference() {
614   AddClass(sEntity);
615 }
616 
AllocateContentHandle()617 nsIContent** nsHtml5Highlighter::AllocateContentHandle() {
618   if (mHandlesUsed == NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH) {
619     mOldHandles.AppendElement(std::move(mHandles));
620     mHandles =
621         MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH);
622     mHandlesUsed = 0;
623   }
624 #ifdef DEBUG
625   mHandles[mHandlesUsed] = reinterpret_cast<nsIContent*>(uintptr_t(0xC0DEDBAD));
626 #endif
627   return &mHandles[mHandlesUsed++];
628 }
629 
CreateElement(nsAtom * aName,nsHtml5HtmlAttributes * aAttributes,nsIContent ** aIntendedParent,mozilla::dom::HTMLContentCreatorFunction aCreator)630 nsIContent** nsHtml5Highlighter::CreateElement(
631     nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
632     nsIContent** aIntendedParent,
633     mozilla::dom::HTMLContentCreatorFunction aCreator) {
634   MOZ_ASSERT(aName, "Got null name.");
635   nsIContent** content = AllocateContentHandle();
636   opCreateHTMLElement opeation(content, aName, aAttributes, aCreator,
637                                aIntendedParent,
638                                mozilla::dom::FROM_PARSER_NETWORK);
639   mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeation));
640   return content;
641 }
642 
CurrentNode()643 nsIContent** nsHtml5Highlighter::CurrentNode() {
644   MOZ_ASSERT(mStack.Length() >= 1, "Must have something on stack.");
645   return mStack[mStack.Length() - 1];
646 }
647 
Push(nsAtom * aName,nsHtml5HtmlAttributes * aAttributes,mozilla::dom::HTMLContentCreatorFunction aCreator)648 void nsHtml5Highlighter::Push(
649     nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
650     mozilla::dom::HTMLContentCreatorFunction aCreator) {
651   MOZ_ASSERT(mStack.Length() >= 1, "Pushing without root.");
652   nsIContent** elt = CreateElement(aName, aAttributes, CurrentNode(),
653                                    aCreator);  // Don't inline below!
654   opAppend operation(elt, CurrentNode(), mozilla::dom::FROM_PARSER_NETWORK);
655   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
656   mStack.AppendElement(elt);
657 }
658 
Pop()659 void nsHtml5Highlighter::Pop() {
660   MOZ_ASSERT(mStack.Length() >= 2, "Popping when stack too short.");
661   mStack.RemoveLastElement();
662 }
663 
AppendCharacters(const char16_t * aBuffer,int32_t aStart,int32_t aLength)664 void nsHtml5Highlighter::AppendCharacters(const char16_t* aBuffer,
665                                           int32_t aStart, int32_t aLength) {
666   MOZ_ASSERT(aBuffer, "Null buffer");
667 
668   char16_t* bufferCopy = new char16_t[aLength];
669   memcpy(bufferCopy, aBuffer + aStart, aLength * sizeof(char16_t));
670 
671   opAppendText operation(CurrentNode(), bufferCopy, aLength);
672   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
673 }
674 
AddClass(const char16_t * aClass)675 void nsHtml5Highlighter::AddClass(const char16_t* aClass) {
676   opAddClass operation(CurrentNode(), (char16_t*)aClass);
677   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
678 }
679 
AddViewSourceHref(nsHtml5String aValue)680 void nsHtml5Highlighter::AddViewSourceHref(nsHtml5String aValue) {
681   char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
682   aValue.CopyToBuffer(bufferCopy);
683   bufferCopy[aValue.Length()] = 0;
684 
685   opAddViewSourceHref operation(CurrentNode(), bufferCopy, aValue.Length());
686   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
687 }
688 
AddBase(nsHtml5String aValue)689 void nsHtml5Highlighter::AddBase(nsHtml5String aValue) {
690   if (mSeenBase) {
691     return;
692   }
693   mSeenBase = true;
694   char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
695   aValue.CopyToBuffer(bufferCopy);
696   bufferCopy[aValue.Length()] = 0;
697 
698   opAddViewSourceBase operation(bufferCopy, aValue.Length());
699   mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
700 }
701 
AddErrorToCurrentNode(const char * aMsgId)702 void nsHtml5Highlighter::AddErrorToCurrentNode(const char* aMsgId) {
703   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
704   NS_ASSERTION(treeOp, "Tree op allocation failed.");
705   opAddErrorType operation(CurrentNode(), (char*)aMsgId);
706   treeOp->Init(mozilla::AsVariant(operation));
707 }
708 
AddErrorToCurrentRun(const char * aMsgId)709 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId) {
710   MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
711   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
712   NS_ASSERTION(treeOp, "Tree op allocation failed.");
713   opAddErrorType operation(mCurrentRun, (char*)aMsgId);
714   treeOp->Init(mozilla::AsVariant(operation));
715 }
716 
AddErrorToCurrentRun(const char * aMsgId,nsAtom * aName)717 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId,
718                                               nsAtom* aName) {
719   MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
720   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
721   NS_ASSERTION(treeOp, "Tree op allocation failed.");
722   opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName);
723   treeOp->Init(mozilla::AsVariant(operation));
724 }
725 
AddErrorToCurrentRun(const char * aMsgId,nsAtom * aName,nsAtom * aOther)726 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName,
727                                               nsAtom* aOther) {
728   MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
729   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
730   NS_ASSERTION(treeOp, "Tree op allocation failed.");
731   opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName, aOther);
732   treeOp->Init(mozilla::AsVariant(operation));
733 }
734 
AddErrorToCurrentAmpersand(const char * aMsgId)735 void nsHtml5Highlighter::AddErrorToCurrentAmpersand(const char* aMsgId) {
736   MOZ_ASSERT(mAmpersand, "Adding error to ampersand without one!");
737   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
738   NS_ASSERTION(treeOp, "Tree op allocation failed.");
739   opAddErrorType operation(mAmpersand, (char*)aMsgId);
740   treeOp->Init(mozilla::AsVariant(operation));
741 }
742 
AddErrorToCurrentSlash(const char * aMsgId)743 void nsHtml5Highlighter::AddErrorToCurrentSlash(const char* aMsgId) {
744   MOZ_ASSERT(mSlash, "Adding error to slash without one!");
745   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
746   NS_ASSERTION(treeOp, "Tree op allocation failed.");
747   opAddErrorType operation(mSlash, (char*)aMsgId);
748   treeOp->Init(mozilla::AsVariant(operation));
749 }
750