1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "nsHtml5Highlighter.h"
6 #include "nsDebug.h"
7 #include "nsHtml5AttributeName.h"
8 #include "nsHtml5Tokenizer.h"
9 #include "nsHtml5ViewSourceUtils.h"
10 #include "nsString.h"
11 #include "nsThreadUtils.h"
12
13 #include "mozilla/Attributes.h"
14 #include "mozilla/Preferences.h"
15
16 using namespace mozilla;
17
18 // The old code had a limit of 16 tokens. 1300 is a number picked my measuring
19 // the size of 16 tokens on cnn.com.
20 #define NS_HTML5_HIGHLIGHTER_PRE_BREAK_THRESHOLD 1300
21
22 char16_t nsHtml5Highlighter::sComment[] = {'c', 'o', 'm', 'm',
23 'e', 'n', 't', 0};
24
25 char16_t nsHtml5Highlighter::sCdata[] = {'c', 'd', 'a', 't', 'a', 0};
26
27 char16_t nsHtml5Highlighter::sEntity[] = {'e', 'n', 't', 'i', 't', 'y', 0};
28
29 char16_t nsHtml5Highlighter::sEndTag[] = {'e', 'n', 'd', '-', 't', 'a', 'g', 0};
30
31 char16_t nsHtml5Highlighter::sStartTag[] = {'s', 't', 'a', 'r', 't',
32 '-', 't', 'a', 'g', 0};
33
34 char16_t nsHtml5Highlighter::sAttributeName[] = {
35 'a', 't', 't', 'r', 'i', 'b', 'u', 't', 'e', '-', 'n', 'a', 'm', 'e', 0};
36
37 char16_t nsHtml5Highlighter::sAttributeValue[] = {'a', 't', 't', 'r', 'i', 'b',
38 'u', 't', 'e', '-', 'v', 'a',
39 'l', 'u', 'e', 0};
40
41 char16_t nsHtml5Highlighter::sDoctype[] = {'d', 'o', 'c', 't',
42 'y', 'p', 'e', 0};
43
44 char16_t nsHtml5Highlighter::sPi[] = {'p', 'i', 0};
45
nsHtml5Highlighter(nsAHtml5TreeOpSink * aOpSink)46 nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink)
47 : mState(nsHtml5Tokenizer::DATA),
48 mCStart(INT32_MAX),
49 mPos(0),
50 mLineNumber(1),
51 mInlinesOpen(0),
52 mInCharacters(false),
53 mBuffer(nullptr),
54 mOpSink(aOpSink),
55 mCurrentRun(nullptr),
56 mAmpersand(nullptr),
57 mSlash(nullptr),
58 mHandles(
59 MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH)),
60 mHandlesUsed(0),
61 mSeenBase(false) {
62 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
63 }
64
~nsHtml5Highlighter()65 nsHtml5Highlighter::~nsHtml5Highlighter() {
66 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
67 }
68
Start(const nsAutoString & aTitle)69 void nsHtml5Highlighter::Start(const nsAutoString& aTitle) {
70 // Doctype
71 opAppendDoctypeToDocument operation(nsGkAtoms::html, u""_ns, u""_ns);
72 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
73
74 mOpQueue.AppendElement()->Init(mozilla::AsVariant(STANDARDS_MODE));
75
76 // <html> uses NS_NewHTMLSharedElement creator
77 nsIContent** root =
78 CreateElement(nsGkAtoms::html, nullptr, nullptr, NS_NewHTMLSharedElement);
79 opAppendToDocument appendOp(root);
80 mOpQueue.AppendElement()->Init(mozilla::AsVariant(appendOp));
81 mStack.AppendElement(root);
82
83 // <head> uses NS_NewHTMLSharedElement creator
84 Push(nsGkAtoms::head, nullptr, NS_NewHTMLSharedElement);
85
86 Push(nsGkAtoms::meta, nsHtml5ViewSourceUtils::NewMetaViewportAttributes(),
87 NS_NewHTMLMetaElement);
88 Pop(); // meta
89
90 Push(nsGkAtoms::title, nullptr, NS_NewHTMLTitleElement);
91 // XUL will add the "Source of: " prefix.
92 uint32_t length = aTitle.Length();
93 if (length > INT32_MAX) {
94 length = INT32_MAX;
95 }
96 AppendCharacters(aTitle.BeginReading(), 0, (int32_t)length);
97 Pop(); // title
98
99 Push(nsGkAtoms::link, nsHtml5ViewSourceUtils::NewLinkAttributes(),
100 NS_NewHTMLLinkElement);
101
102 opUpdateStyleSheet updateOp(CurrentNode());
103 mOpQueue.AppendElement()->Init(mozilla::AsVariant(updateOp));
104
105 Pop(); // link
106
107 Pop(); // head
108
109 Push(nsGkAtoms::body, nsHtml5ViewSourceUtils::NewBodyAttributes(),
110 NS_NewHTMLBodyElement);
111
112 nsHtml5HtmlAttributes* preAttrs = new nsHtml5HtmlAttributes(0);
113 nsHtml5String preId = nsHtml5Portability::newStringFromLiteral("line1");
114 preAttrs->addAttribute(nsHtml5AttributeName::ATTR_ID, preId, -1);
115 Push(nsGkAtoms::pre, preAttrs, NS_NewHTMLPreElement);
116
117 StartCharacters();
118
119 mOpQueue.AppendElement()->Init(mozilla::AsVariant(opStartLayout()));
120 }
121
Transition(int32_t aState,bool aReconsume,int32_t aPos)122 int32_t nsHtml5Highlighter::Transition(int32_t aState, bool aReconsume,
123 int32_t aPos) {
124 mPos = aPos;
125 switch (mState) {
126 case nsHtml5Tokenizer::SCRIPT_DATA:
127 case nsHtml5Tokenizer::RAWTEXT:
128 case nsHtml5Tokenizer::RCDATA:
129 case nsHtml5Tokenizer::DATA:
130 // We can transition on < and on &. Either way, we don't yet know the
131 // role of the token, so open a span without class.
132 if (aState == nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE) {
133 StartSpan();
134 // Start another span for highlighting the ampersand
135 StartSpan();
136 mAmpersand = CurrentNode();
137 } else {
138 EndCharactersAndStartMarkupRun();
139 }
140 break;
141 case nsHtml5Tokenizer::TAG_OPEN:
142 switch (aState) {
143 case nsHtml5Tokenizer::TAG_NAME:
144 StartSpan(sStartTag);
145 break;
146 case nsHtml5Tokenizer::DATA:
147 FinishTag(); // DATA
148 break;
149 case nsHtml5Tokenizer::PROCESSING_INSTRUCTION:
150 AddClass(sPi);
151 break;
152 }
153 break;
154 case nsHtml5Tokenizer::TAG_NAME:
155 switch (aState) {
156 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
157 EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME
158 break;
159 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
160 EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME
161 StartSpan(); // for highlighting the slash
162 mSlash = CurrentNode();
163 break;
164 default:
165 FinishTag();
166 break;
167 }
168 break;
169 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
170 switch (aState) {
171 case nsHtml5Tokenizer::ATTRIBUTE_NAME:
172 StartSpan(sAttributeName);
173 break;
174 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
175 StartSpan(); // for highlighting the slash
176 mSlash = CurrentNode();
177 break;
178 default:
179 FinishTag();
180 break;
181 }
182 break;
183 case nsHtml5Tokenizer::ATTRIBUTE_NAME:
184 switch (aState) {
185 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
186 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
187 EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
188 break;
189 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
190 EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
191 StartSpan(); // for highlighting the slash
192 mSlash = CurrentNode();
193 break;
194 default:
195 FinishTag();
196 break;
197 }
198 break;
199 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
200 switch (aState) {
201 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
202 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
203 FlushCurrent();
204 StartA();
205 break;
206 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
207 StartA();
208 break;
209 default:
210 FinishTag();
211 break;
212 }
213 break;
214 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
215 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
216 switch (aState) {
217 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
218 EndSpanOrA();
219 break;
220 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
221 StartSpan();
222 StartSpan(); // for ampersand itself
223 mAmpersand = CurrentNode();
224 break;
225 default:
226 MOZ_ASSERT_UNREACHABLE("Impossible transition.");
227 break;
228 }
229 break;
230 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
231 switch (aState) {
232 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
233 break;
234 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
235 StartSpan(); // for highlighting the slash
236 mSlash = CurrentNode();
237 break;
238 default:
239 FinishTag();
240 break;
241 }
242 break;
243 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
244 EndSpanOrA(); // end the slash highlight
245 switch (aState) {
246 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
247 break;
248 default:
249 FinishTag();
250 break;
251 }
252 break;
253 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
254 switch (aState) {
255 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
256 EndSpanOrA();
257 break;
258 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
259 StartSpan();
260 StartSpan(); // for ampersand itself
261 mAmpersand = CurrentNode();
262 break;
263 default:
264 FinishTag();
265 break;
266 }
267 break;
268 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
269 switch (aState) {
270 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
271 StartSpan(); // for highlighting the slash
272 mSlash = CurrentNode();
273 break;
274 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
275 break;
276 case nsHtml5Tokenizer::ATTRIBUTE_NAME:
277 StartSpan(sAttributeName);
278 break;
279 default:
280 FinishTag();
281 break;
282 }
283 break;
284 // most comment states are omitted, because they don't matter to
285 // highlighting
286 case nsHtml5Tokenizer::COMMENT_START:
287 case nsHtml5Tokenizer::COMMENT_END:
288 case nsHtml5Tokenizer::COMMENT_END_BANG:
289 case nsHtml5Tokenizer::COMMENT_START_DASH:
290 case nsHtml5Tokenizer::BOGUS_COMMENT:
291 case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
292 if (aState == nsHtml5Tokenizer::DATA) {
293 AddClass(sComment);
294 FinishTag();
295 }
296 break;
297 // most cdata states are omitted, because they don't matter to
298 // highlighting
299 case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
300 if (aState == nsHtml5Tokenizer::DATA) {
301 AddClass(sCdata);
302 FinishTag();
303 }
304 break;
305 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
306 EndSpanOrA(); // the span for the ampersand
307 switch (aState) {
308 case nsHtml5Tokenizer::CONSUME_NCR:
309 case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
310 break;
311 default:
312 // not actually a character reference
313 EndSpanOrA();
314 break;
315 }
316 break;
317 case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
318 if (aState == nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL) {
319 break;
320 }
321 // not actually a character reference
322 EndSpanOrA();
323 break;
324 case nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL:
325 if (!aReconsume) {
326 FlushCurrent();
327 }
328 EndSpanOrA();
329 break;
330 case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
331 case nsHtml5Tokenizer::HEX_NCR_LOOP:
332 switch (aState) {
333 case nsHtml5Tokenizer::HANDLE_NCR_VALUE:
334 AddClass(sEntity);
335 FlushCurrent();
336 break;
337 case nsHtml5Tokenizer::HANDLE_NCR_VALUE_RECONSUME:
338 AddClass(sEntity);
339 break;
340 }
341 EndSpanOrA();
342 break;
343 case nsHtml5Tokenizer::CLOSE_TAG_OPEN:
344 switch (aState) {
345 case nsHtml5Tokenizer::DATA:
346 FinishTag();
347 break;
348 case nsHtml5Tokenizer::TAG_NAME:
349 StartSpan(sEndTag);
350 break;
351 }
352 break;
353 case nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN:
354 if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
355 FlushCurrent();
356 StartSpan(); // don't know if it is "end-tag" yet :-(
357 break;
358 }
359 EndSpanOrA();
360 StartCharacters();
361 break;
362 case nsHtml5Tokenizer::NON_DATA_END_TAG_NAME:
363 switch (aState) {
364 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
365 AddClass(sEndTag);
366 EndSpanOrA();
367 break;
368 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
369 AddClass(sEndTag);
370 EndSpanOrA();
371 StartSpan(); // for highlighting the slash
372 mSlash = CurrentNode();
373 break;
374 case nsHtml5Tokenizer::DATA: // yes, as a result of emitting the token
375 AddClass(sEndTag);
376 FinishTag();
377 break;
378 default:
379 FinishTag();
380 break;
381 }
382 break;
383 case nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN:
384 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
385 if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
386 FlushCurrent();
387 StartSpan(); // don't know if it is "end-tag" yet :-(
388 break;
389 }
390 FinishTag();
391 break;
392 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH:
393 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED:
394 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH:
395 if (aState == nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN) {
396 EndCharactersAndStartMarkupRun();
397 }
398 break;
399 // Lots of double escape states omitted, because they don't highlight.
400 // Likewise, only doctype states that can emit the doctype are of
401 // interest. Otherwise, the transition out of bogus comment deals.
402 case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
403 case nsHtml5Tokenizer::DOCTYPE_NAME:
404 case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
405 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
406 case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
407 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
408 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
409 case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
410 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
411 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
412 case nsHtml5Tokenizer::BOGUS_DOCTYPE:
413 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
414 case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
415 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
416 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
417 if (aState == nsHtml5Tokenizer::DATA) {
418 AddClass(sDoctype);
419 FinishTag();
420 }
421 break;
422 case nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK:
423 if (aState == nsHtml5Tokenizer::DATA) {
424 FinishTag();
425 }
426 break;
427 default:
428 break;
429 }
430 mState = aState;
431 return aState;
432 }
433
End()434 void nsHtml5Highlighter::End() {
435 switch (mState) {
436 case nsHtml5Tokenizer::COMMENT_END:
437 case nsHtml5Tokenizer::COMMENT_END_BANG:
438 case nsHtml5Tokenizer::COMMENT_START_DASH:
439 case nsHtml5Tokenizer::BOGUS_COMMENT:
440 case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
441 AddClass(sComment);
442 break;
443 case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
444 AddClass(sCdata);
445 break;
446 case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
447 case nsHtml5Tokenizer::HEX_NCR_LOOP:
448 // XXX need tokenizer help here
449 break;
450 case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
451 case nsHtml5Tokenizer::DOCTYPE_NAME:
452 case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
453 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
454 case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
455 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
456 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
457 case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
458 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
459 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
460 case nsHtml5Tokenizer::BOGUS_DOCTYPE:
461 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
462 case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
463 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
464 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
465 AddClass(sDoctype);
466 break;
467 default:
468 break;
469 }
470 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
471 NS_ASSERTION(treeOp, "Tree op allocation failed.");
472 treeOp->Init(mozilla::AsVariant(opStreamEnded()));
473 FlushOps();
474 }
475
SetBuffer(nsHtml5UTF16Buffer * aBuffer)476 void nsHtml5Highlighter::SetBuffer(nsHtml5UTF16Buffer* aBuffer) {
477 MOZ_ASSERT(!mBuffer, "Old buffer still here!");
478 mBuffer = aBuffer;
479 mCStart = aBuffer->getStart();
480 }
481
DropBuffer(int32_t aPos)482 void nsHtml5Highlighter::DropBuffer(int32_t aPos) {
483 MOZ_ASSERT(mBuffer, "No buffer to drop!");
484 mPos = aPos;
485 FlushChars();
486 mBuffer = nullptr;
487 }
488
StartSpan()489 void nsHtml5Highlighter::StartSpan() {
490 FlushChars();
491 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
492 ++mInlinesOpen;
493 }
494
StartSpan(const char16_t * aClass)495 void nsHtml5Highlighter::StartSpan(const char16_t* aClass) {
496 StartSpan();
497 AddClass(aClass);
498 }
499
EndSpanOrA()500 void nsHtml5Highlighter::EndSpanOrA() {
501 FlushChars();
502 Pop();
503 --mInlinesOpen;
504 }
505
StartCharacters()506 void nsHtml5Highlighter::StartCharacters() {
507 MOZ_ASSERT(!mInCharacters, "Already in characters!");
508 FlushChars();
509 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
510 mCurrentRun = CurrentNode();
511 mInCharacters = true;
512 }
513
EndCharactersAndStartMarkupRun()514 void nsHtml5Highlighter::EndCharactersAndStartMarkupRun() {
515 MOZ_ASSERT(mInCharacters, "Not in characters!");
516 FlushChars();
517 Pop();
518 mInCharacters = false;
519 // Now start markup run
520 StartSpan();
521 mCurrentRun = CurrentNode();
522 }
523
StartA()524 void nsHtml5Highlighter::StartA() {
525 FlushChars();
526 Push(nsGkAtoms::a, nullptr, NS_NewHTMLAnchorElement);
527 AddClass(sAttributeValue);
528 ++mInlinesOpen;
529 }
530
FinishTag()531 void nsHtml5Highlighter::FinishTag() {
532 while (mInlinesOpen > 1) {
533 EndSpanOrA();
534 }
535 FlushCurrent(); // >
536 EndSpanOrA(); // DATA
537 NS_ASSERTION(!mInlinesOpen, "mInlinesOpen got out of sync!");
538 StartCharacters();
539 }
540
FlushChars()541 void nsHtml5Highlighter::FlushChars() {
542 if (mCStart < mPos) {
543 char16_t* buf = mBuffer->getBuffer();
544 int32_t i = mCStart;
545 while (i < mPos) {
546 char16_t c = buf[i];
547 switch (c) {
548 case '\r':
549 // The input this code sees has been normalized so that there are
550 // CR breaks and LF breaks but no CRLF breaks. Overwrite CR with LF
551 // to show consistent LF line breaks to layout. It is OK to mutate
552 // the input data, because there are no reparses in the View Source
553 // case, so we won't need the original data in the buffer anymore.
554 buf[i] = '\n';
555 [[fallthrough]];
556 case '\n': {
557 ++i;
558 if (mCStart < i) {
559 int32_t len = i - mCStart;
560 AppendCharacters(buf, mCStart, len);
561 mCStart = i;
562 }
563 ++mLineNumber;
564 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
565 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
566 NS_ASSERTION(treeOp, "Tree op allocation failed.");
567 opAddLineNumberId operation(CurrentNode(), mLineNumber);
568 treeOp->Init(mozilla::AsVariant(operation));
569 Pop();
570 break;
571 }
572 default:
573 ++i;
574 break;
575 }
576 }
577 if (mCStart < mPos) {
578 int32_t len = mPos - mCStart;
579 AppendCharacters(buf, mCStart, len);
580 mCStart = mPos;
581 }
582 }
583 }
584
FlushCurrent()585 void nsHtml5Highlighter::FlushCurrent() {
586 mPos++;
587 FlushChars();
588 }
589
FlushOps()590 bool nsHtml5Highlighter::FlushOps() {
591 bool hasOps = !mOpQueue.IsEmpty();
592 if (hasOps) {
593 mOpSink->MoveOpsFrom(mOpQueue);
594 }
595 return hasOps;
596 }
597
MaybeLinkifyAttributeValue(nsHtml5AttributeName * aName,nsHtml5String aValue)598 void nsHtml5Highlighter::MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
599 nsHtml5String aValue) {
600 if (!(nsHtml5AttributeName::ATTR_HREF == aName ||
601 nsHtml5AttributeName::ATTR_SRC == aName ||
602 nsHtml5AttributeName::ATTR_ACTION == aName ||
603 nsHtml5AttributeName::ATTR_CITE == aName ||
604 nsHtml5AttributeName::ATTR_BACKGROUND == aName ||
605 nsHtml5AttributeName::ATTR_LONGDESC == aName ||
606 nsHtml5AttributeName::ATTR_XLINK_HREF == aName ||
607 nsHtml5AttributeName::ATTR_DEFINITIONURL == aName)) {
608 return;
609 }
610 AddViewSourceHref(aValue);
611 }
612
CompletedNamedCharacterReference()613 void nsHtml5Highlighter::CompletedNamedCharacterReference() {
614 AddClass(sEntity);
615 }
616
AllocateContentHandle()617 nsIContent** nsHtml5Highlighter::AllocateContentHandle() {
618 if (mHandlesUsed == NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH) {
619 mOldHandles.AppendElement(std::move(mHandles));
620 mHandles =
621 MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH);
622 mHandlesUsed = 0;
623 }
624 #ifdef DEBUG
625 mHandles[mHandlesUsed] = reinterpret_cast<nsIContent*>(uintptr_t(0xC0DEDBAD));
626 #endif
627 return &mHandles[mHandlesUsed++];
628 }
629
CreateElement(nsAtom * aName,nsHtml5HtmlAttributes * aAttributes,nsIContent ** aIntendedParent,mozilla::dom::HTMLContentCreatorFunction aCreator)630 nsIContent** nsHtml5Highlighter::CreateElement(
631 nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
632 nsIContent** aIntendedParent,
633 mozilla::dom::HTMLContentCreatorFunction aCreator) {
634 MOZ_ASSERT(aName, "Got null name.");
635 nsIContent** content = AllocateContentHandle();
636 opCreateHTMLElement opeation(content, aName, aAttributes, aCreator,
637 aIntendedParent,
638 mozilla::dom::FROM_PARSER_NETWORK);
639 mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeation));
640 return content;
641 }
642
CurrentNode()643 nsIContent** nsHtml5Highlighter::CurrentNode() {
644 MOZ_ASSERT(mStack.Length() >= 1, "Must have something on stack.");
645 return mStack[mStack.Length() - 1];
646 }
647
Push(nsAtom * aName,nsHtml5HtmlAttributes * aAttributes,mozilla::dom::HTMLContentCreatorFunction aCreator)648 void nsHtml5Highlighter::Push(
649 nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
650 mozilla::dom::HTMLContentCreatorFunction aCreator) {
651 MOZ_ASSERT(mStack.Length() >= 1, "Pushing without root.");
652 nsIContent** elt = CreateElement(aName, aAttributes, CurrentNode(),
653 aCreator); // Don't inline below!
654 opAppend operation(elt, CurrentNode(), mozilla::dom::FROM_PARSER_NETWORK);
655 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
656 mStack.AppendElement(elt);
657 }
658
Pop()659 void nsHtml5Highlighter::Pop() {
660 MOZ_ASSERT(mStack.Length() >= 2, "Popping when stack too short.");
661 mStack.RemoveLastElement();
662 }
663
AppendCharacters(const char16_t * aBuffer,int32_t aStart,int32_t aLength)664 void nsHtml5Highlighter::AppendCharacters(const char16_t* aBuffer,
665 int32_t aStart, int32_t aLength) {
666 MOZ_ASSERT(aBuffer, "Null buffer");
667
668 char16_t* bufferCopy = new char16_t[aLength];
669 memcpy(bufferCopy, aBuffer + aStart, aLength * sizeof(char16_t));
670
671 opAppendText operation(CurrentNode(), bufferCopy, aLength);
672 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
673 }
674
AddClass(const char16_t * aClass)675 void nsHtml5Highlighter::AddClass(const char16_t* aClass) {
676 opAddClass operation(CurrentNode(), (char16_t*)aClass);
677 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
678 }
679
AddViewSourceHref(nsHtml5String aValue)680 void nsHtml5Highlighter::AddViewSourceHref(nsHtml5String aValue) {
681 char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
682 aValue.CopyToBuffer(bufferCopy);
683 bufferCopy[aValue.Length()] = 0;
684
685 opAddViewSourceHref operation(CurrentNode(), bufferCopy, aValue.Length());
686 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
687 }
688
AddBase(nsHtml5String aValue)689 void nsHtml5Highlighter::AddBase(nsHtml5String aValue) {
690 if (mSeenBase) {
691 return;
692 }
693 mSeenBase = true;
694 char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
695 aValue.CopyToBuffer(bufferCopy);
696 bufferCopy[aValue.Length()] = 0;
697
698 opAddViewSourceBase operation(bufferCopy, aValue.Length());
699 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
700 }
701
AddErrorToCurrentNode(const char * aMsgId)702 void nsHtml5Highlighter::AddErrorToCurrentNode(const char* aMsgId) {
703 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
704 NS_ASSERTION(treeOp, "Tree op allocation failed.");
705 opAddErrorType operation(CurrentNode(), (char*)aMsgId);
706 treeOp->Init(mozilla::AsVariant(operation));
707 }
708
AddErrorToCurrentRun(const char * aMsgId)709 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId) {
710 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
711 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
712 NS_ASSERTION(treeOp, "Tree op allocation failed.");
713 opAddErrorType operation(mCurrentRun, (char*)aMsgId);
714 treeOp->Init(mozilla::AsVariant(operation));
715 }
716
AddErrorToCurrentRun(const char * aMsgId,nsAtom * aName)717 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId,
718 nsAtom* aName) {
719 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
720 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
721 NS_ASSERTION(treeOp, "Tree op allocation failed.");
722 opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName);
723 treeOp->Init(mozilla::AsVariant(operation));
724 }
725
AddErrorToCurrentRun(const char * aMsgId,nsAtom * aName,nsAtom * aOther)726 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName,
727 nsAtom* aOther) {
728 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
729 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
730 NS_ASSERTION(treeOp, "Tree op allocation failed.");
731 opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName, aOther);
732 treeOp->Init(mozilla::AsVariant(operation));
733 }
734
AddErrorToCurrentAmpersand(const char * aMsgId)735 void nsHtml5Highlighter::AddErrorToCurrentAmpersand(const char* aMsgId) {
736 MOZ_ASSERT(mAmpersand, "Adding error to ampersand without one!");
737 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
738 NS_ASSERTION(treeOp, "Tree op allocation failed.");
739 opAddErrorType operation(mAmpersand, (char*)aMsgId);
740 treeOp->Init(mozilla::AsVariant(operation));
741 }
742
AddErrorToCurrentSlash(const char * aMsgId)743 void nsHtml5Highlighter::AddErrorToCurrentSlash(const char* aMsgId) {
744 MOZ_ASSERT(mSlash, "Adding error to slash without one!");
745 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
746 NS_ASSERTION(treeOp, "Tree op allocation failed.");
747 opAddErrorType operation(mSlash, (char*)aMsgId);
748 treeOp->Init(mozilla::AsVariant(operation));
749 }
750