1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "nsHtml5Highlighter.h"
6 #include "nsDebug.h"
7 #include "nsHtml5AttributeName.h"
8 #include "nsHtml5Tokenizer.h"
9 #include "nsHtml5ViewSourceUtils.h"
10 #include "nsString.h"
11 #include "nsThreadUtils.h"
12
13 #include "mozilla/Attributes.h"
14 #include "mozilla/Preferences.h"
15
16 using namespace mozilla;
17
18 // The old code had a limit of 16 tokens. 1300 is a number picked my measuring
19 // the size of 16 tokens on cnn.com.
20 #define NS_HTML5_HIGHLIGHTER_PRE_BREAK_THRESHOLD 1300
21
22 char16_t nsHtml5Highlighter::sComment[] = {'c', 'o', 'm', 'm',
23 'e', 'n', 't', 0};
24
25 char16_t nsHtml5Highlighter::sCdata[] = {'c', 'd', 'a', 't', 'a', 0};
26
27 char16_t nsHtml5Highlighter::sEntity[] = {'e', 'n', 't', 'i', 't', 'y', 0};
28
29 char16_t nsHtml5Highlighter::sEndTag[] = {'e', 'n', 'd', '-', 't', 'a', 'g', 0};
30
31 char16_t nsHtml5Highlighter::sStartTag[] = {'s', 't', 'a', 'r', 't',
32 '-', 't', 'a', 'g', 0};
33
34 char16_t nsHtml5Highlighter::sAttributeName[] = {
35 'a', 't', 't', 'r', 'i', 'b', 'u', 't', 'e', '-', 'n', 'a', 'm', 'e', 0};
36
37 char16_t nsHtml5Highlighter::sAttributeValue[] = {'a', 't', 't', 'r', 'i', 'b',
38 'u', 't', 'e', '-', 'v', 'a',
39 'l', 'u', 'e', 0};
40
41 char16_t nsHtml5Highlighter::sDoctype[] = {'d', 'o', 'c', 't',
42 'y', 'p', 'e', 0};
43
44 char16_t nsHtml5Highlighter::sPi[] = {'p', 'i', 0};
45
nsHtml5Highlighter(nsAHtml5TreeOpSink * aOpSink)46 nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink)
47 : mState(nsHtml5Tokenizer::DATA),
48 mCStart(INT32_MAX),
49 mPos(0),
50 mLineNumber(1),
51 mInlinesOpen(0),
52 mInCharacters(false),
53 mBuffer(nullptr),
54 mOpSink(aOpSink),
55 mCurrentRun(nullptr),
56 mAmpersand(nullptr),
57 mSlash(nullptr),
58 mHandles(
59 MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH)),
60 mHandlesUsed(0),
61 mSeenBase(false) {
62 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
63 }
64
~nsHtml5Highlighter()65 nsHtml5Highlighter::~nsHtml5Highlighter() {
66 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
67 }
68
Start(const nsAutoString & aTitle)69 void nsHtml5Highlighter::Start(const nsAutoString& aTitle) {
70 // Doctype
71 opAppendDoctypeToDocument operation(nsGkAtoms::html, EmptyString(),
72 EmptyString());
73 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
74
75 mOpQueue.AppendElement()->Init(mozilla::AsVariant(STANDARDS_MODE));
76
77 // <html> uses NS_NewHTMLSharedElement creator
78 nsIContent** root =
79 CreateElement(nsGkAtoms::html, nullptr, nullptr, NS_NewHTMLSharedElement);
80 opAppendToDocument appendOp(root);
81 mOpQueue.AppendElement()->Init(mozilla::AsVariant(appendOp));
82 mStack.AppendElement(root);
83
84 // <head> uses NS_NewHTMLSharedElement creator
85 Push(nsGkAtoms::head, nullptr, NS_NewHTMLSharedElement);
86
87 Push(nsGkAtoms::meta, nsHtml5ViewSourceUtils::NewMetaViewportAttributes(),
88 NS_NewHTMLMetaElement);
89 Pop(); // meta
90
91 Push(nsGkAtoms::title, nullptr, NS_NewHTMLTitleElement);
92 // XUL will add the "Source of: " prefix.
93 uint32_t length = aTitle.Length();
94 if (length > INT32_MAX) {
95 length = INT32_MAX;
96 }
97 AppendCharacters(aTitle.BeginReading(), 0, (int32_t)length);
98 Pop(); // title
99
100 Push(nsGkAtoms::link, nsHtml5ViewSourceUtils::NewLinkAttributes(),
101 NS_NewHTMLLinkElement);
102
103 opUpdateStyleSheet updateOp(CurrentNode());
104 mOpQueue.AppendElement()->Init(mozilla::AsVariant(updateOp));
105
106 Pop(); // link
107
108 Pop(); // head
109
110 Push(nsGkAtoms::body, nsHtml5ViewSourceUtils::NewBodyAttributes(),
111 NS_NewHTMLBodyElement);
112
113 nsHtml5HtmlAttributes* preAttrs = new nsHtml5HtmlAttributes(0);
114 nsHtml5String preId = nsHtml5Portability::newStringFromLiteral("line1");
115 preAttrs->addAttribute(nsHtml5AttributeName::ATTR_ID, preId, -1);
116 Push(nsGkAtoms::pre, preAttrs, NS_NewHTMLPreElement);
117
118 StartCharacters();
119
120 mOpQueue.AppendElement()->Init(mozilla::AsVariant(opStartLayout()));
121 }
122
Transition(int32_t aState,bool aReconsume,int32_t aPos)123 int32_t nsHtml5Highlighter::Transition(int32_t aState, bool aReconsume,
124 int32_t aPos) {
125 mPos = aPos;
126 switch (mState) {
127 case nsHtml5Tokenizer::SCRIPT_DATA:
128 case nsHtml5Tokenizer::RAWTEXT:
129 case nsHtml5Tokenizer::RCDATA:
130 case nsHtml5Tokenizer::DATA:
131 // We can transition on < and on &. Either way, we don't yet know the
132 // role of the token, so open a span without class.
133 if (aState == nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE) {
134 StartSpan();
135 // Start another span for highlighting the ampersand
136 StartSpan();
137 mAmpersand = CurrentNode();
138 } else {
139 EndCharactersAndStartMarkupRun();
140 }
141 break;
142 case nsHtml5Tokenizer::TAG_OPEN:
143 switch (aState) {
144 case nsHtml5Tokenizer::TAG_NAME:
145 StartSpan(sStartTag);
146 break;
147 case nsHtml5Tokenizer::DATA:
148 FinishTag(); // DATA
149 break;
150 case nsHtml5Tokenizer::PROCESSING_INSTRUCTION:
151 AddClass(sPi);
152 break;
153 }
154 break;
155 case nsHtml5Tokenizer::TAG_NAME:
156 switch (aState) {
157 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
158 EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME
159 break;
160 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
161 EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME
162 StartSpan(); // for highlighting the slash
163 mSlash = CurrentNode();
164 break;
165 default:
166 FinishTag();
167 break;
168 }
169 break;
170 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
171 switch (aState) {
172 case nsHtml5Tokenizer::ATTRIBUTE_NAME:
173 StartSpan(sAttributeName);
174 break;
175 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
176 StartSpan(); // for highlighting the slash
177 mSlash = CurrentNode();
178 break;
179 default:
180 FinishTag();
181 break;
182 }
183 break;
184 case nsHtml5Tokenizer::ATTRIBUTE_NAME:
185 switch (aState) {
186 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
187 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
188 EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
189 break;
190 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
191 EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
192 StartSpan(); // for highlighting the slash
193 mSlash = CurrentNode();
194 break;
195 default:
196 FinishTag();
197 break;
198 }
199 break;
200 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
201 switch (aState) {
202 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
203 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
204 FlushCurrent();
205 StartA();
206 break;
207 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
208 StartA();
209 break;
210 default:
211 FinishTag();
212 break;
213 }
214 break;
215 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
216 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
217 switch (aState) {
218 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
219 EndSpanOrA();
220 break;
221 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
222 StartSpan();
223 StartSpan(); // for ampersand itself
224 mAmpersand = CurrentNode();
225 break;
226 default:
227 MOZ_ASSERT_UNREACHABLE("Impossible transition.");
228 break;
229 }
230 break;
231 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
232 switch (aState) {
233 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
234 break;
235 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
236 StartSpan(); // for highlighting the slash
237 mSlash = CurrentNode();
238 break;
239 default:
240 FinishTag();
241 break;
242 }
243 break;
244 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
245 EndSpanOrA(); // end the slash highlight
246 switch (aState) {
247 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
248 break;
249 default:
250 FinishTag();
251 break;
252 }
253 break;
254 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
255 switch (aState) {
256 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
257 EndSpanOrA();
258 break;
259 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
260 StartSpan();
261 StartSpan(); // for ampersand itself
262 mAmpersand = CurrentNode();
263 break;
264 default:
265 FinishTag();
266 break;
267 }
268 break;
269 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
270 switch (aState) {
271 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
272 StartSpan(); // for highlighting the slash
273 mSlash = CurrentNode();
274 break;
275 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
276 break;
277 case nsHtml5Tokenizer::ATTRIBUTE_NAME:
278 StartSpan(sAttributeName);
279 break;
280 default:
281 FinishTag();
282 break;
283 }
284 break;
285 // most comment states are omitted, because they don't matter to
286 // highlighting
287 case nsHtml5Tokenizer::COMMENT_START:
288 case nsHtml5Tokenizer::COMMENT_END:
289 case nsHtml5Tokenizer::COMMENT_END_BANG:
290 case nsHtml5Tokenizer::COMMENT_START_DASH:
291 case nsHtml5Tokenizer::BOGUS_COMMENT:
292 case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
293 if (aState == nsHtml5Tokenizer::DATA) {
294 AddClass(sComment);
295 FinishTag();
296 }
297 break;
298 // most cdata states are omitted, because they don't matter to
299 // highlighting
300 case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
301 if (aState == nsHtml5Tokenizer::DATA) {
302 AddClass(sCdata);
303 FinishTag();
304 }
305 break;
306 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
307 EndSpanOrA(); // the span for the ampersand
308 switch (aState) {
309 case nsHtml5Tokenizer::CONSUME_NCR:
310 case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
311 break;
312 default:
313 // not actually a character reference
314 EndSpanOrA();
315 break;
316 }
317 break;
318 case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
319 if (aState == nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL) {
320 break;
321 }
322 // not actually a character reference
323 EndSpanOrA();
324 break;
325 case nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL:
326 if (!aReconsume) {
327 FlushCurrent();
328 }
329 EndSpanOrA();
330 break;
331 case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
332 case nsHtml5Tokenizer::HEX_NCR_LOOP:
333 switch (aState) {
334 case nsHtml5Tokenizer::HANDLE_NCR_VALUE:
335 AddClass(sEntity);
336 FlushCurrent();
337 break;
338 case nsHtml5Tokenizer::HANDLE_NCR_VALUE_RECONSUME:
339 AddClass(sEntity);
340 break;
341 }
342 EndSpanOrA();
343 break;
344 case nsHtml5Tokenizer::CLOSE_TAG_OPEN:
345 switch (aState) {
346 case nsHtml5Tokenizer::DATA:
347 FinishTag();
348 break;
349 case nsHtml5Tokenizer::TAG_NAME:
350 StartSpan(sEndTag);
351 break;
352 }
353 break;
354 case nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN:
355 if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
356 FlushCurrent();
357 StartSpan(); // don't know if it is "end-tag" yet :-(
358 break;
359 }
360 EndSpanOrA();
361 StartCharacters();
362 break;
363 case nsHtml5Tokenizer::NON_DATA_END_TAG_NAME:
364 switch (aState) {
365 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
366 AddClass(sEndTag);
367 EndSpanOrA();
368 break;
369 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
370 AddClass(sEndTag);
371 EndSpanOrA();
372 StartSpan(); // for highlighting the slash
373 mSlash = CurrentNode();
374 break;
375 case nsHtml5Tokenizer::DATA: // yes, as a result of emitting the token
376 AddClass(sEndTag);
377 FinishTag();
378 break;
379 default:
380 FinishTag();
381 break;
382 }
383 break;
384 case nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN:
385 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
386 if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
387 FlushCurrent();
388 StartSpan(); // don't know if it is "end-tag" yet :-(
389 break;
390 }
391 FinishTag();
392 break;
393 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH:
394 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED:
395 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH:
396 if (aState == nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN) {
397 EndCharactersAndStartMarkupRun();
398 }
399 break;
400 // Lots of double escape states omitted, because they don't highlight.
401 // Likewise, only doctype states that can emit the doctype are of
402 // interest. Otherwise, the transition out of bogus comment deals.
403 case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
404 case nsHtml5Tokenizer::DOCTYPE_NAME:
405 case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
406 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
407 case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
408 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
409 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
410 case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
411 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
412 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
413 case nsHtml5Tokenizer::BOGUS_DOCTYPE:
414 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
415 case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
416 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
417 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
418 if (aState == nsHtml5Tokenizer::DATA) {
419 AddClass(sDoctype);
420 FinishTag();
421 }
422 break;
423 case nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK:
424 if (aState == nsHtml5Tokenizer::DATA) {
425 FinishTag();
426 }
427 break;
428 default:
429 break;
430 }
431 mState = aState;
432 return aState;
433 }
434
End()435 void nsHtml5Highlighter::End() {
436 switch (mState) {
437 case nsHtml5Tokenizer::COMMENT_END:
438 case nsHtml5Tokenizer::COMMENT_END_BANG:
439 case nsHtml5Tokenizer::COMMENT_START_DASH:
440 case nsHtml5Tokenizer::BOGUS_COMMENT:
441 case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
442 AddClass(sComment);
443 break;
444 case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
445 AddClass(sCdata);
446 break;
447 case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
448 case nsHtml5Tokenizer::HEX_NCR_LOOP:
449 // XXX need tokenizer help here
450 break;
451 case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
452 case nsHtml5Tokenizer::DOCTYPE_NAME:
453 case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
454 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
455 case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
456 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
457 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
458 case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
459 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
460 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
461 case nsHtml5Tokenizer::BOGUS_DOCTYPE:
462 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
463 case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
464 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
465 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
466 AddClass(sDoctype);
467 break;
468 default:
469 break;
470 }
471 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
472 NS_ASSERTION(treeOp, "Tree op allocation failed.");
473 treeOp->Init(mozilla::AsVariant(opStreamEnded()));
474 FlushOps();
475 }
476
SetBuffer(nsHtml5UTF16Buffer * aBuffer)477 void nsHtml5Highlighter::SetBuffer(nsHtml5UTF16Buffer* aBuffer) {
478 MOZ_ASSERT(!mBuffer, "Old buffer still here!");
479 mBuffer = aBuffer;
480 mCStart = aBuffer->getStart();
481 }
482
DropBuffer(int32_t aPos)483 void nsHtml5Highlighter::DropBuffer(int32_t aPos) {
484 MOZ_ASSERT(mBuffer, "No buffer to drop!");
485 mPos = aPos;
486 FlushChars();
487 mBuffer = nullptr;
488 }
489
StartSpan()490 void nsHtml5Highlighter::StartSpan() {
491 FlushChars();
492 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
493 ++mInlinesOpen;
494 }
495
StartSpan(const char16_t * aClass)496 void nsHtml5Highlighter::StartSpan(const char16_t* aClass) {
497 StartSpan();
498 AddClass(aClass);
499 }
500
EndSpanOrA()501 void nsHtml5Highlighter::EndSpanOrA() {
502 FlushChars();
503 Pop();
504 --mInlinesOpen;
505 }
506
StartCharacters()507 void nsHtml5Highlighter::StartCharacters() {
508 MOZ_ASSERT(!mInCharacters, "Already in characters!");
509 FlushChars();
510 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
511 mCurrentRun = CurrentNode();
512 mInCharacters = true;
513 }
514
EndCharactersAndStartMarkupRun()515 void nsHtml5Highlighter::EndCharactersAndStartMarkupRun() {
516 MOZ_ASSERT(mInCharacters, "Not in characters!");
517 FlushChars();
518 Pop();
519 mInCharacters = false;
520 // Now start markup run
521 StartSpan();
522 mCurrentRun = CurrentNode();
523 }
524
StartA()525 void nsHtml5Highlighter::StartA() {
526 FlushChars();
527 Push(nsGkAtoms::a, nullptr, NS_NewHTMLAnchorElement);
528 AddClass(sAttributeValue);
529 ++mInlinesOpen;
530 }
531
FinishTag()532 void nsHtml5Highlighter::FinishTag() {
533 while (mInlinesOpen > 1) {
534 EndSpanOrA();
535 }
536 FlushCurrent(); // >
537 EndSpanOrA(); // DATA
538 NS_ASSERTION(!mInlinesOpen, "mInlinesOpen got out of sync!");
539 StartCharacters();
540 }
541
FlushChars()542 void nsHtml5Highlighter::FlushChars() {
543 if (mCStart < mPos) {
544 char16_t* buf = mBuffer->getBuffer();
545 int32_t i = mCStart;
546 while (i < mPos) {
547 char16_t c = buf[i];
548 switch (c) {
549 case '\r':
550 // The input this code sees has been normalized so that there are
551 // CR breaks and LF breaks but no CRLF breaks. Overwrite CR with LF
552 // to show consistent LF line breaks to layout. It is OK to mutate
553 // the input data, because there are no reparses in the View Source
554 // case, so we won't need the original data in the buffer anymore.
555 buf[i] = '\n';
556 [[fallthrough]];
557 case '\n': {
558 ++i;
559 if (mCStart < i) {
560 int32_t len = i - mCStart;
561 AppendCharacters(buf, mCStart, len);
562 mCStart = i;
563 }
564 ++mLineNumber;
565 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
566 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
567 NS_ASSERTION(treeOp, "Tree op allocation failed.");
568 opAddLineNumberId operation(CurrentNode(), mLineNumber);
569 treeOp->Init(mozilla::AsVariant(operation));
570 Pop();
571 break;
572 }
573 default:
574 ++i;
575 break;
576 }
577 }
578 if (mCStart < mPos) {
579 int32_t len = mPos - mCStart;
580 AppendCharacters(buf, mCStart, len);
581 mCStart = mPos;
582 }
583 }
584 }
585
FlushCurrent()586 void nsHtml5Highlighter::FlushCurrent() {
587 mPos++;
588 FlushChars();
589 }
590
FlushOps()591 bool nsHtml5Highlighter::FlushOps() {
592 bool hasOps = !mOpQueue.IsEmpty();
593 if (hasOps) {
594 mOpSink->MoveOpsFrom(mOpQueue);
595 }
596 return hasOps;
597 }
598
MaybeLinkifyAttributeValue(nsHtml5AttributeName * aName,nsHtml5String aValue)599 void nsHtml5Highlighter::MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
600 nsHtml5String aValue) {
601 if (!(nsHtml5AttributeName::ATTR_HREF == aName ||
602 nsHtml5AttributeName::ATTR_SRC == aName ||
603 nsHtml5AttributeName::ATTR_ACTION == aName ||
604 nsHtml5AttributeName::ATTR_CITE == aName ||
605 nsHtml5AttributeName::ATTR_BACKGROUND == aName ||
606 nsHtml5AttributeName::ATTR_LONGDESC == aName ||
607 nsHtml5AttributeName::ATTR_XLINK_HREF == aName ||
608 nsHtml5AttributeName::ATTR_DEFINITIONURL == aName)) {
609 return;
610 }
611 AddViewSourceHref(aValue);
612 }
613
CompletedNamedCharacterReference()614 void nsHtml5Highlighter::CompletedNamedCharacterReference() {
615 AddClass(sEntity);
616 }
617
AllocateContentHandle()618 nsIContent** nsHtml5Highlighter::AllocateContentHandle() {
619 if (mHandlesUsed == NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH) {
620 mOldHandles.AppendElement(std::move(mHandles));
621 mHandles =
622 MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH);
623 mHandlesUsed = 0;
624 }
625 #ifdef DEBUG
626 mHandles[mHandlesUsed] = reinterpret_cast<nsIContent*>(uintptr_t(0xC0DEDBAD));
627 #endif
628 return &mHandles[mHandlesUsed++];
629 }
630
CreateElement(nsAtom * aName,nsHtml5HtmlAttributes * aAttributes,nsIContent ** aIntendedParent,mozilla::dom::HTMLContentCreatorFunction aCreator)631 nsIContent** nsHtml5Highlighter::CreateElement(
632 nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
633 nsIContent** aIntendedParent,
634 mozilla::dom::HTMLContentCreatorFunction aCreator) {
635 MOZ_ASSERT(aName, "Got null name.");
636 nsIContent** content = AllocateContentHandle();
637 opCreateHTMLElement opeation(content, aName, aAttributes, aCreator,
638 aIntendedParent,
639 mozilla::dom::FROM_PARSER_NETWORK);
640 mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeation));
641 return content;
642 }
643
CurrentNode()644 nsIContent** nsHtml5Highlighter::CurrentNode() {
645 MOZ_ASSERT(mStack.Length() >= 1, "Must have something on stack.");
646 return mStack[mStack.Length() - 1];
647 }
648
Push(nsAtom * aName,nsHtml5HtmlAttributes * aAttributes,mozilla::dom::HTMLContentCreatorFunction aCreator)649 void nsHtml5Highlighter::Push(
650 nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
651 mozilla::dom::HTMLContentCreatorFunction aCreator) {
652 MOZ_ASSERT(mStack.Length() >= 1, "Pushing without root.");
653 nsIContent** elt = CreateElement(aName, aAttributes, CurrentNode(),
654 aCreator); // Don't inline below!
655 opAppend operation(elt, CurrentNode());
656 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
657 mStack.AppendElement(elt);
658 }
659
Pop()660 void nsHtml5Highlighter::Pop() {
661 MOZ_ASSERT(mStack.Length() >= 2, "Popping when stack too short.");
662 mStack.RemoveLastElement();
663 }
664
AppendCharacters(const char16_t * aBuffer,int32_t aStart,int32_t aLength)665 void nsHtml5Highlighter::AppendCharacters(const char16_t* aBuffer,
666 int32_t aStart, int32_t aLength) {
667 MOZ_ASSERT(aBuffer, "Null buffer");
668
669 char16_t* bufferCopy = new char16_t[aLength];
670 memcpy(bufferCopy, aBuffer + aStart, aLength * sizeof(char16_t));
671
672 opAppendText operation(CurrentNode(), bufferCopy, aLength);
673 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
674 }
675
AddClass(const char16_t * aClass)676 void nsHtml5Highlighter::AddClass(const char16_t* aClass) {
677 opAddClass operation(CurrentNode(), (char16_t*)aClass);
678 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
679 }
680
AddViewSourceHref(nsHtml5String aValue)681 void nsHtml5Highlighter::AddViewSourceHref(nsHtml5String aValue) {
682 char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
683 aValue.CopyToBuffer(bufferCopy);
684 bufferCopy[aValue.Length()] = 0;
685
686 opAddViewSourceHref operation(CurrentNode(), bufferCopy, aValue.Length());
687 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
688 }
689
AddBase(nsHtml5String aValue)690 void nsHtml5Highlighter::AddBase(nsHtml5String aValue) {
691 if (mSeenBase) {
692 return;
693 }
694 mSeenBase = true;
695 char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
696 aValue.CopyToBuffer(bufferCopy);
697 bufferCopy[aValue.Length()] = 0;
698
699 opAddViewSourceBase operation(bufferCopy, aValue.Length());
700 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
701 }
702
AddErrorToCurrentNode(const char * aMsgId)703 void nsHtml5Highlighter::AddErrorToCurrentNode(const char* aMsgId) {
704 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
705 NS_ASSERTION(treeOp, "Tree op allocation failed.");
706 opAddErrorType operation(CurrentNode(), (char*)aMsgId);
707 treeOp->Init(mozilla::AsVariant(operation));
708 }
709
AddErrorToCurrentRun(const char * aMsgId)710 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId) {
711 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
712 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
713 NS_ASSERTION(treeOp, "Tree op allocation failed.");
714 opAddErrorType operation(mCurrentRun, (char*)aMsgId);
715 treeOp->Init(mozilla::AsVariant(operation));
716 }
717
AddErrorToCurrentRun(const char * aMsgId,nsAtom * aName)718 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId,
719 nsAtom* aName) {
720 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
721 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
722 NS_ASSERTION(treeOp, "Tree op allocation failed.");
723 opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName);
724 treeOp->Init(mozilla::AsVariant(operation));
725 }
726
AddErrorToCurrentRun(const char * aMsgId,nsAtom * aName,nsAtom * aOther)727 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName,
728 nsAtom* aOther) {
729 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
730 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
731 NS_ASSERTION(treeOp, "Tree op allocation failed.");
732 opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName, aOther);
733 treeOp->Init(mozilla::AsVariant(operation));
734 }
735
AddErrorToCurrentAmpersand(const char * aMsgId)736 void nsHtml5Highlighter::AddErrorToCurrentAmpersand(const char* aMsgId) {
737 MOZ_ASSERT(mAmpersand, "Adding error to ampersand without one!");
738 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
739 NS_ASSERTION(treeOp, "Tree op allocation failed.");
740 opAddErrorType operation(mAmpersand, (char*)aMsgId);
741 treeOp->Init(mozilla::AsVariant(operation));
742 }
743
AddErrorToCurrentSlash(const char * aMsgId)744 void nsHtml5Highlighter::AddErrorToCurrentSlash(const char* aMsgId) {
745 MOZ_ASSERT(mSlash, "Adding error to slash without one!");
746 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
747 NS_ASSERTION(treeOp, "Tree op allocation failed.");
748 opAddErrorType operation(mSlash, (char*)aMsgId);
749 treeOp->Init(mozilla::AsVariant(operation));
750 }
751