1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 /*
8 * nsIContentSerializer implementation that can be used with an
9 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
10 * (eg for copy/paste as plaintext).
11 */
12
13 #include "nsPlainTextSerializer.h"
14
15 #include <limits>
16
17 #include "nsPrintfCString.h"
18 #include "nsDebug.h"
19 #include "nsGkAtoms.h"
20 #include "nsNameSpaceManager.h"
21 #include "nsTextFragment.h"
22 #include "nsContentUtils.h"
23 #include "nsReadableUtils.h"
24 #include "nsUnicharUtils.h"
25 #include "nsCRT.h"
26 #include "mozilla/EditorUtils.h"
27 #include "mozilla/dom/CharacterData.h"
28 #include "mozilla/dom/Element.h"
29 #include "mozilla/dom/HTMLBRElement.h"
30 #include "mozilla/dom/Text.h"
31 #include "mozilla/Preferences.h"
32 #include "mozilla/StaticPrefs_converter.h"
33 #include "mozilla/BinarySearch.h"
34 #include "nsComputedDOMStyle.h"
35
36 namespace mozilla {
37 class Encoding;
38 }
39
40 using namespace mozilla;
41 using namespace mozilla::dom;
42
43 #define PREF_STRUCTS "converter.html2txt.structs"
44 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
45
46 static const int32_t kTabSize = 4;
47 static const int32_t kIndentSizeHeaders =
48 2; /* Indention of h1, if
49 mHeaderStrategy = kIndentIncreasedWithHeaderLevel
50 or = kNumberHeadingsAndIndentSlightly. Indention of
51 other headers is derived from that. */
52 static const int32_t kIndentIncrementHeaders =
53 2; /* If mHeaderStrategy = kIndentIncreasedWithHeaderLevel,
54 indent h(x+1) this many
55 columns more than h(x) */
56 static const int32_t kIndentSizeList = kTabSize;
57 // Indention of non-first lines of ul and ol
58 static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd>
59 static const char16_t kNBSP = 160;
60 static const char16_t kSPACE = ' ';
61
62 constexpr int32_t kNoFlags = 0;
63
64 static int32_t HeaderLevel(const nsAtom* aTag);
65 static int32_t GetUnicharWidth(char16_t ucs);
66 static int32_t GetUnicharStringWidth(const nsString& aString);
67
68 // Someday may want to make this non-const:
69 static const uint32_t TagStackSize = 500;
70
71 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)72 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
73
74 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
75 NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
76 NS_INTERFACE_MAP_ENTRY(nsISupports)
77 NS_INTERFACE_MAP_END
78
79 NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer, mElement)
80
81 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
82 RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
83 it.forget(aSerializer);
84 return NS_OK;
85 }
86
87 // @param aFlags As defined in nsIDocumentEncoder.idl.
DetermineLineBreak(const int32_t aFlags,nsAString & aLineBreak)88 static void DetermineLineBreak(const int32_t aFlags, nsAString& aLineBreak) {
89 // Set the line break character:
90 if ((aFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
91 (aFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
92 // Windows
93 aLineBreak.AssignLiteral(u"\r\n");
94 } else if (aFlags & nsIDocumentEncoder::OutputCRLineBreak) {
95 // Mac
96 aLineBreak.AssignLiteral(u"\r");
97 } else if (aFlags & nsIDocumentEncoder::OutputLFLineBreak) {
98 // Unix/DOM
99 aLineBreak.AssignLiteral(u"\n");
100 } else {
101 // Platform/default
102 aLineBreak.AssignLiteral(NS_ULINEBREAK);
103 }
104 }
105
MaybeReplaceNbspsInContent(const int32_t aFlags)106 void nsPlainTextSerializer::CurrentLine::MaybeReplaceNbspsInContent(
107 const int32_t aFlags) {
108 if (!(aFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
109 // First, replace all nbsp characters with spaces,
110 // which the unicode encoder won't do for us.
111 mContent.ReplaceChar(kNBSP, kSPACE);
112 }
113 }
114
ResetContentAndIndentationHeader()115 void nsPlainTextSerializer::CurrentLine::ResetContentAndIndentationHeader() {
116 mContent.Truncate();
117 mIndentation.mHeader.Truncate();
118 }
119
FindWrapIndexForContent(const uint32_t aWrapColumn,const uint32_t aContentWidth,mozilla::intl::LineBreaker * aLineBreaker) const120 int32_t nsPlainTextSerializer::CurrentLine::FindWrapIndexForContent(
121 const uint32_t aWrapColumn, const uint32_t aContentWidth,
122 mozilla::intl::LineBreaker* aLineBreaker) const {
123 MOZ_ASSERT(aContentWidth < std::numeric_limits<int32_t>::max());
124 MOZ_ASSERT(static_cast<int32_t>(aContentWidth) ==
125 GetUnicharStringWidth(mContent));
126
127 const uint32_t prefixwidth = DeterminePrefixWidth();
128 int32_t goodSpace = mContent.Length();
129
130 if (aLineBreaker) {
131 // We go from the end removing one letter at a time until
132 // we have a reasonable width
133 uint32_t width = aContentWidth;
134 while (goodSpace > 0 && (width + prefixwidth > aWrapColumn)) {
135 goodSpace--;
136 width -= GetUnicharWidth(mContent[goodSpace]);
137 }
138
139 goodSpace++;
140
141 goodSpace =
142 aLineBreaker->Prev(mContent.get(), mContent.Length(), goodSpace);
143 if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
144 nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace - 1))) {
145 --goodSpace; // adjust the position since line breaker returns a
146 // position next to space
147 }
148 } else {
149 // In this case we don't want strings, especially CJK-ones, to be split.
150 // See
151 // https://bugzilla.mozilla.org/show_bug.cgi?id=333064 for more
152 // information.
153
154 if (mContent.IsEmpty() || aWrapColumn < prefixwidth) {
155 goodSpace = NS_LINEBREAKER_NEED_MORE_TEXT;
156 } else {
157 goodSpace =
158 std::min<int32_t>(aWrapColumn - prefixwidth, mContent.Length() - 1);
159 while (goodSpace >= 0 &&
160 !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
161 goodSpace--;
162 }
163 }
164 }
165
166 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
167 // If we didn't find a good place to break, accept long line and
168 // try to find another place to break
169 goodSpace =
170 (prefixwidth > aWrapColumn + 1) ? 1 : aWrapColumn - prefixwidth + 1;
171 if (aLineBreaker) {
172 if ((uint32_t)goodSpace < mContent.Length())
173 goodSpace =
174 aLineBreaker->Next(mContent.get(), mContent.Length(), goodSpace);
175 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
176 goodSpace = mContent.Length();
177 } else {
178 // In this case we don't want strings, especially CJK-ones, to be
179 // split. See
180 // https://bugzilla.mozilla.org/show_bug.cgi?id=333064 for more
181 // information.
182 goodSpace = (prefixwidth > aWrapColumn) ? 1 : aWrapColumn - prefixwidth;
183 const int32_t contentLength = mContent.Length();
184 while (goodSpace < contentLength &&
185 !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
186 goodSpace++;
187 }
188 }
189 }
190 return goodSpace;
191 }
192
OutputManager(const int32_t aFlags,nsAString & aOutput)193 nsPlainTextSerializer::OutputManager::OutputManager(const int32_t aFlags,
194 nsAString& aOutput)
195 : mFlags{aFlags}, mOutput{aOutput}, mAtFirstColumn{true} {
196 MOZ_ASSERT(aOutput.IsEmpty());
197
198 DetermineLineBreak(mFlags, mLineBreak);
199 }
200
Append(const CurrentLine & aCurrentLine,const StripTrailingWhitespaces aStripTrailingWhitespaces)201 void nsPlainTextSerializer::OutputManager::Append(
202 const CurrentLine& aCurrentLine,
203 const StripTrailingWhitespaces aStripTrailingWhitespaces) {
204 if (IsAtFirstColumn()) {
205 nsAutoString quotesAndIndent;
206 aCurrentLine.CreateQuotesAndIndent(quotesAndIndent);
207
208 if ((aStripTrailingWhitespaces == StripTrailingWhitespaces::kMaybe)) {
209 const bool stripTrailingSpaces = aCurrentLine.mContent.IsEmpty();
210 if (stripTrailingSpaces) {
211 quotesAndIndent.Trim(" ", false, true, false);
212 }
213 }
214
215 Append(quotesAndIndent);
216 }
217
218 Append(aCurrentLine.mContent);
219 }
220
Append(const nsAString & aString)221 void nsPlainTextSerializer::OutputManager::Append(const nsAString& aString) {
222 if (!aString.IsEmpty()) {
223 mOutput.Append(aString);
224 mAtFirstColumn = false;
225 }
226 }
227
AppendLineBreak()228 void nsPlainTextSerializer::OutputManager::AppendLineBreak() {
229 mOutput.Append(mLineBreak);
230 mAtFirstColumn = true;
231 }
232
GetOutputLength() const233 uint32_t nsPlainTextSerializer::OutputManager::GetOutputLength() const {
234 return mOutput.Length();
235 }
236
nsPlainTextSerializer()237 nsPlainTextSerializer::nsPlainTextSerializer()
238 : mFloatingLines(-1),
239 mLineBreakDue(false),
240 kSpace(u" "_ns) // Init of "constant"
241 {
242 mHeadLevel = 0;
243 mHasWrittenCiteBlockquote = false;
244 mSpanLevel = 0;
245 for (int32_t i = 0; i <= 6; i++) {
246 mHeaderCounter[i] = 0;
247 }
248
249 // Flow
250 mEmptyLines = 1; // The start of the document is an "empty line" in itself,
251 mInWhitespace = false;
252 mPreFormattedMail = false;
253
254 mPreformattedBlockBoundary = false;
255
256 // initialize the tag stack to zero:
257 // The stack only ever contains pointers to static atoms, so they don't
258 // need refcounting.
259 mTagStack = new const nsAtom*[TagStackSize];
260 mTagStackIndex = 0;
261 mIgnoreAboveIndex = (uint32_t)kNotFound;
262
263 mULCount = 0;
264
265 mIgnoredChildNodeLevel = 0;
266 }
267
~nsPlainTextSerializer()268 nsPlainTextSerializer::~nsPlainTextSerializer() {
269 delete[] mTagStack;
270 NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
271 }
272
273 nsPlainTextSerializer::Settings::HeaderStrategy
Convert(const int32_t aPrefHeaderStrategy)274 nsPlainTextSerializer::Settings::Convert(const int32_t aPrefHeaderStrategy) {
275 HeaderStrategy result{HeaderStrategy::kIndentIncreasedWithHeaderLevel};
276
277 switch (aPrefHeaderStrategy) {
278 case 0: {
279 result = HeaderStrategy::kNoIndentation;
280 break;
281 }
282 case 1: {
283 result = HeaderStrategy::kIndentIncreasedWithHeaderLevel;
284 break;
285 }
286 case 2: {
287 result = HeaderStrategy::kNumberHeadingsAndIndentSlightly;
288 break;
289 }
290 default: {
291 NS_WARNING(
292 nsPrintfCString("Header strategy pref contains undefined value: %i",
293 aPrefHeaderStrategy)
294 .get());
295 }
296 }
297
298 return result;
299 }
300
301 const int32_t kDefaultHeaderStrategy = 1;
302
Init(const int32_t aFlags,const uint32_t aWrapColumn)303 void nsPlainTextSerializer::Settings::Init(const int32_t aFlags,
304 const uint32_t aWrapColumn) {
305 mFlags = aFlags;
306
307 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
308 // Get some prefs that controls how we do formatted output
309 mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
310
311 int32_t headerStrategy =
312 Preferences::GetInt(PREF_HEADER_STRATEGY, kDefaultHeaderStrategy);
313 mHeaderStrategy = Convert(headerStrategy);
314 }
315
316 mWithRubyAnnotation = StaticPrefs::converter_html2txt_always_include_ruby() ||
317 (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
318
319 // XXX We should let the caller decide whether to do this or not
320 mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
321
322 mWrapColumn = aWrapColumn;
323 }
324
325 NS_IMETHODIMP
Init(const uint32_t aFlags,uint32_t aWrapColumn,const Encoding * aEncoding,bool aIsCopying,bool aIsWholeDocument,bool * aNeedsPreformatScanning,nsAString & aOutput)326 nsPlainTextSerializer::Init(const uint32_t aFlags, uint32_t aWrapColumn,
327 const Encoding* aEncoding, bool aIsCopying,
328 bool aIsWholeDocument,
329 bool* aNeedsPreformatScanning, nsAString& aOutput) {
330 #ifdef DEBUG
331 // Check if the major control flags are set correctly.
332 if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
333 NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
334 "If you want format=flowed, you must combine it with "
335 "nsIDocumentEncoder::OutputFormatted");
336 }
337
338 if (aFlags & nsIDocumentEncoder::OutputFormatted) {
339 NS_ASSERTION(
340 !(aFlags & nsIDocumentEncoder::OutputPreformatted),
341 "Can't do formatted and preformatted output at the same time!");
342 }
343 #endif
344 MOZ_ASSERT(!(aFlags & nsIDocumentEncoder::OutputFormatDelSp) ||
345 (aFlags & nsIDocumentEncoder::OutputFormatFlowed));
346
347 *aNeedsPreformatScanning = true;
348 mSettings.Init(aFlags, aWrapColumn);
349 mOutputManager.emplace(mSettings.GetFlags(), aOutput);
350
351 if (mSettings.MayWrap() && mSettings.MayBreakLines()) {
352 mLineBreaker = nsContentUtils::LineBreaker();
353 }
354
355 mLineBreakDue = false;
356 mFloatingLines = -1;
357
358 mPreformattedBlockBoundary = false;
359
360 MOZ_ASSERT(mOLStack.IsEmpty());
361
362 return NS_OK;
363 }
364
GetLastBool(const nsTArray<bool> & aStack)365 bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) {
366 uint32_t size = aStack.Length();
367 if (size == 0) {
368 return false;
369 }
370 return aStack.ElementAt(size - 1);
371 }
372
SetLastBool(nsTArray<bool> & aStack,bool aValue)373 void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) {
374 uint32_t size = aStack.Length();
375 if (size > 0) {
376 aStack.ElementAt(size - 1) = aValue;
377 } else {
378 NS_ERROR("There is no \"Last\" value");
379 }
380 }
381
PushBool(nsTArray<bool> & aStack,bool aValue)382 void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) {
383 aStack.AppendElement(bool(aValue));
384 }
385
PopBool(nsTArray<bool> & aStack)386 bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) {
387 return aStack.Length() ? aStack.PopLastElement() : false;
388 }
389
IsIgnorableRubyAnnotation(const nsAtom * aTag) const390 bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(
391 const nsAtom* aTag) const {
392 if (mSettings.GetWithRubyAnnotation()) {
393 return false;
394 }
395
396 return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt ||
397 aTag == nsGkAtoms::rtc;
398 }
399
400 // Return true if aElement has 'display:none' or if we just don't know.
IsDisplayNone(Element * aElement)401 static bool IsDisplayNone(Element* aElement) {
402 RefPtr<ComputedStyle> computedStyle =
403 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
404 return !computedStyle ||
405 computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
406 }
407
IsIgnorableScriptOrStyle(Element * aElement)408 static bool IsIgnorableScriptOrStyle(Element* aElement) {
409 return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) &&
410 IsDisplayNone(aElement);
411 }
412
413 NS_IMETHODIMP
AppendText(nsIContent * aText,int32_t aStartOffset,int32_t aEndOffset)414 nsPlainTextSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
415 int32_t aEndOffset) {
416 if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
417 return NS_OK;
418 }
419
420 NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
421 if (aStartOffset < 0) return NS_ERROR_INVALID_ARG;
422
423 NS_ENSURE_ARG(aText);
424
425 nsresult rv = NS_OK;
426
427 nsIContent* content = aText;
428 const nsTextFragment* frag;
429 if (!content || !(frag = content->GetText())) {
430 return NS_ERROR_FAILURE;
431 }
432
433 int32_t fragLength = frag->GetLength();
434 int32_t endoffset =
435 (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
436 NS_ASSERTION(aStartOffset <= endoffset,
437 "A start offset is beyond the end of the text fragment!");
438
439 int32_t length = endoffset - aStartOffset;
440 if (length <= 0) {
441 return NS_OK;
442 }
443
444 nsAutoString textstr;
445 if (frag->Is2b()) {
446 textstr.Assign(frag->Get2b() + aStartOffset, length);
447 } else {
448 // AssignASCII is for 7-bit character only, so don't use it
449 const char* data = frag->Get1b();
450 CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
451 }
452
453 // Mask the text if the text node is in a password field.
454 if (content->HasFlag(NS_MAYBE_MASKED)) {
455 EditorUtils::MaskString(textstr, content->AsText(), 0, aStartOffset);
456 }
457
458 // We have to split the string across newlines
459 // to match parser behavior
460 int32_t start = 0;
461 int32_t offset = textstr.FindCharInSet("\n\r");
462 while (offset != kNotFound) {
463 if (offset > start) {
464 // Pass in the line
465 DoAddText(false, Substring(textstr, start, offset - start));
466 }
467
468 // Pass in a newline
469 DoAddText();
470
471 start = offset + 1;
472 offset = textstr.FindCharInSet("\n\r", start);
473 }
474
475 // Consume the last bit of the string if there's any left
476 if (start < length) {
477 if (start) {
478 DoAddText(false, Substring(textstr, start, length - start));
479 } else {
480 DoAddText(false, textstr);
481 }
482 }
483
484 return rv;
485 }
486
487 NS_IMETHODIMP
AppendCDATASection(nsIContent * aCDATASection,int32_t aStartOffset,int32_t aEndOffset)488 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
489 int32_t aStartOffset,
490 int32_t aEndOffset) {
491 return AppendText(aCDATASection, aStartOffset, aEndOffset);
492 }
493
494 NS_IMETHODIMP
ScanElementForPreformat(Element * aElement)495 nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) {
496 mPreformatStack.push(IsElementPreformatted(aElement));
497 return NS_OK;
498 }
499
500 NS_IMETHODIMP
ForgetElementForPreformat(Element * aElement)501 nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
502 MOZ_RELEASE_ASSERT(!mPreformatStack.empty(),
503 "Tried to pop without previous push.");
504 mPreformatStack.pop();
505 return NS_OK;
506 }
507
508 NS_IMETHODIMP
AppendElementStart(Element * aElement,Element * aOriginalElement)509 nsPlainTextSerializer::AppendElementStart(Element* aElement,
510 Element* aOriginalElement) {
511 NS_ENSURE_ARG(aElement);
512
513 mElement = aElement;
514
515 nsresult rv;
516 nsAtom* id = GetIdForContent(mElement);
517
518 bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
519
520 if (isContainer) {
521 rv = DoOpenContainer(id);
522 } else {
523 rv = DoAddLeaf(id);
524 }
525
526 mElement = nullptr;
527
528 if (id == nsGkAtoms::head) {
529 ++mHeadLevel;
530 }
531
532 return rv;
533 }
534
535 NS_IMETHODIMP
AppendElementEnd(Element * aElement,Element * aOriginalElement)536 nsPlainTextSerializer::AppendElementEnd(Element* aElement,
537 Element* aOriginalElement) {
538 NS_ENSURE_ARG(aElement);
539
540 mElement = aElement;
541
542 nsresult rv;
543 nsAtom* id = GetIdForContent(mElement);
544
545 bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
546
547 rv = NS_OK;
548 if (isContainer) {
549 rv = DoCloseContainer(id);
550 }
551
552 mElement = nullptr;
553
554 if (id == nsGkAtoms::head) {
555 NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0");
556 --mHeadLevel;
557 }
558
559 return rv;
560 }
561
562 NS_IMETHODIMP
FlushAndFinish()563 nsPlainTextSerializer::FlushAndFinish() {
564 MOZ_ASSERT(mOutputManager);
565
566 mOutputManager->Flush(mCurrentLine);
567 return Finish();
568 }
569
570 NS_IMETHODIMP
Finish()571 nsPlainTextSerializer::Finish() {
572 mOutputManager.reset();
573
574 return NS_OK;
575 }
576
577 NS_IMETHODIMP
GetOutputLength(uint32_t & aLength) const578 nsPlainTextSerializer::GetOutputLength(uint32_t& aLength) const {
579 MOZ_ASSERT(mOutputManager);
580
581 aLength = mOutputManager->GetOutputLength();
582
583 return NS_OK;
584 }
585
586 NS_IMETHODIMP
AppendDocumentStart(Document * aDocument)587 nsPlainTextSerializer::AppendDocumentStart(Document* aDocument) {
588 return NS_OK;
589 }
590
591 constexpr int32_t kOlStackDummyValue = 0;
592
DoOpenContainer(const nsAtom * aTag)593 nsresult nsPlainTextSerializer::DoOpenContainer(const nsAtom* aTag) {
594 if (IsIgnorableRubyAnnotation(aTag)) {
595 // Ignorable ruby annotation shouldn't be replaced by a placeholder
596 // character, neither any of its descendants.
597 mIgnoredChildNodeLevel++;
598 return NS_OK;
599 }
600 if (IsIgnorableScriptOrStyle(mElement)) {
601 mIgnoredChildNodeLevel++;
602 return NS_OK;
603 }
604
605 if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
606 if (mPreformattedBlockBoundary && DoOutput()) {
607 // Should always end a line, but get no more whitespace
608 if (mFloatingLines < 0) mFloatingLines = 0;
609 mLineBreakDue = true;
610 }
611 mPreformattedBlockBoundary = false;
612 }
613
614 if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
615 // Raw means raw. Don't even think about doing anything fancy
616 // here like indenting, adding line breaks or any other
617 // characters such as list item bullets, quote characters
618 // around <q>, etc.
619
620 return NS_OK;
621 }
622
623 if (mTagStackIndex < TagStackSize) {
624 mTagStack[mTagStackIndex++] = aTag;
625 }
626
627 if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
628 return NS_OK;
629 }
630
631 // Reset this so that <blockquote type=cite> doesn't affect the whitespace
632 // above random <pre>s below it.
633 mHasWrittenCiteBlockquote =
634 mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;
635
636 bool isInCiteBlockquote = false;
637
638 // XXX special-case <blockquote type=cite> so that we don't add additional
639 // newlines before the text.
640 if (aTag == nsGkAtoms::blockquote) {
641 nsAutoString value;
642 nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
643 isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
644 }
645
646 if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);
647
648 // Check if this tag's content that should not be output
649 if ((aTag == nsGkAtoms::noscript &&
650 !mSettings.HasFlag(nsIDocumentEncoder::OutputNoScriptContent)) ||
651 ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
652 !mSettings.HasFlag(nsIDocumentEncoder::OutputNoFramesContent))) {
653 // Ignore everything that follows the current tag in
654 // question until a matching end tag is encountered.
655 mIgnoreAboveIndex = mTagStackIndex - 1;
656 return NS_OK;
657 }
658
659 if (aTag == nsGkAtoms::body) {
660 // Try to figure out here whether we have a
661 // preformatted style attribute set by Thunderbird.
662 //
663 // Trigger on the presence of a "pre-wrap" in the
664 // style attribute. That's a very simplistic way to do
665 // it, but better than nothing.
666 nsAutoString style;
667 int32_t whitespace;
668 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
669 (kNotFound != (whitespace = style.Find("white-space:")))) {
670 if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
671 #ifdef DEBUG_preformatted
672 printf("Set mPreFormattedMail based on style pre-wrap\n");
673 #endif
674 mPreFormattedMail = true;
675 } else if (kNotFound != style.Find("pre", true, whitespace)) {
676 #ifdef DEBUG_preformatted
677 printf("Set mPreFormattedMail based on style pre\n");
678 #endif
679 mPreFormattedMail = true;
680 }
681 } else {
682 /* See comment at end of function. */
683 mInWhitespace = true;
684 mPreFormattedMail = false;
685 }
686
687 return NS_OK;
688 }
689
690 // Keep this in sync with DoCloseContainer!
691 if (!DoOutput()) {
692 return NS_OK;
693 }
694
695 if (aTag == nsGkAtoms::p)
696 EnsureVerticalSpace(1);
697 else if (aTag == nsGkAtoms::pre) {
698 if (GetLastBool(mIsInCiteBlockquote))
699 EnsureVerticalSpace(0);
700 else if (mHasWrittenCiteBlockquote) {
701 EnsureVerticalSpace(0);
702 mHasWrittenCiteBlockquote = false;
703 } else
704 EnsureVerticalSpace(1);
705 } else if (aTag == nsGkAtoms::tr) {
706 PushBool(mHasWrittenCellsForRow, false);
707 } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
708 // We must make sure that the content of two table cells get a
709 // space between them.
710
711 // To make the separation between cells most obvious and
712 // importable, we use a TAB.
713 if (mHasWrittenCellsForRow.IsEmpty()) {
714 // We don't always see a <tr> (nor a <table>) before the <td> if we're
715 // copying part of a table
716 PushBool(mHasWrittenCellsForRow, true); // will never be popped
717 } else if (GetLastBool(mHasWrittenCellsForRow)) {
718 // Bypass |Write| so that the TAB isn't compressed away.
719 AddToLine(u"\t", 1);
720 mInWhitespace = true;
721 } else {
722 SetLastBool(mHasWrittenCellsForRow, true);
723 }
724 } else if (aTag == nsGkAtoms::ul) {
725 // Indent here to support nested lists, which aren't included in li :-(
726 EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
727 // Must end the current line before we change indention
728 mCurrentLine.mIndentation.mLength += kIndentSizeList;
729 mULCount++;
730 } else if (aTag == nsGkAtoms::ol) {
731 EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
732 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
733 // Must end the current line before we change indention
734 nsAutoString startAttr;
735 int32_t startVal = 1;
736 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
737 nsresult rv = NS_OK;
738 startVal = startAttr.ToInteger(&rv);
739 if (NS_FAILED(rv)) {
740 startVal = 1;
741 }
742 }
743 mOLStack.AppendElement(startVal);
744 } else {
745 mOLStack.AppendElement(kOlStackDummyValue);
746 }
747 mCurrentLine.mIndentation.mLength += kIndentSizeList; // see ul
748 } else if (aTag == nsGkAtoms::li &&
749 mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
750 if (mTagStackIndex > 1 && IsInOL()) {
751 if (!mOLStack.IsEmpty()) {
752 nsAutoString valueAttr;
753 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
754 nsresult rv = NS_OK;
755 int32_t valueAttrVal = valueAttr.ToInteger(&rv);
756 if (NS_SUCCEEDED(rv)) {
757 mOLStack.LastElement() = valueAttrVal;
758 }
759 }
760 // This is what nsBulletFrame does for OLs:
761 mCurrentLine.mIndentation.mHeader.AppendInt(mOLStack.LastElement(), 10);
762 mOLStack.LastElement()++;
763 } else {
764 mCurrentLine.mIndentation.mHeader.Append(char16_t('#'));
765 }
766
767 mCurrentLine.mIndentation.mHeader.Append(char16_t('.'));
768
769 } else {
770 static const char bulletCharArray[] = "*o+#";
771 uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
772 char bulletChar = bulletCharArray[index % 4];
773 mCurrentLine.mIndentation.mHeader.Append(char16_t(bulletChar));
774 }
775
776 mCurrentLine.mIndentation.mHeader.Append(char16_t(' '));
777 } else if (aTag == nsGkAtoms::dl) {
778 EnsureVerticalSpace(1);
779 } else if (aTag == nsGkAtoms::dt) {
780 EnsureVerticalSpace(0);
781 } else if (aTag == nsGkAtoms::dd) {
782 EnsureVerticalSpace(0);
783 mCurrentLine.mIndentation.mLength += kIndentSizeDD;
784 } else if (aTag == nsGkAtoms::span) {
785 ++mSpanLevel;
786 } else if (aTag == nsGkAtoms::blockquote) {
787 // Push
788 PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
789 if (isInCiteBlockquote) {
790 EnsureVerticalSpace(0);
791 mCurrentLine.mCiteQuoteLevel++;
792 } else {
793 EnsureVerticalSpace(1);
794 mCurrentLine.mIndentation.mLength +=
795 kTabSize; // Check for some maximum value?
796 }
797 } else if (aTag == nsGkAtoms::q) {
798 Write(u"\""_ns);
799 }
800
801 // Else make sure we'll separate block level tags,
802 // even if we're about to leave, before doing any other formatting.
803 else if (IsCssBlockLevelElement(mElement)) {
804 EnsureVerticalSpace(0);
805 }
806
807 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
808 OpenContainerForOutputFormatted(aTag);
809 }
810 return NS_OK;
811 }
812
OpenContainerForOutputFormatted(const nsAtom * aTag)813 void nsPlainTextSerializer::OpenContainerForOutputFormatted(
814 const nsAtom* aTag) {
815 const bool currentNodeIsConverted = IsCurrentNodeConverted();
816
817 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
818 aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
819 EnsureVerticalSpace(2);
820 if (mSettings.GetHeaderStrategy() ==
821 Settings::HeaderStrategy::kNumberHeadingsAndIndentSlightly) {
822 mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
823 // Caching
824 int32_t level = HeaderLevel(aTag);
825 // Increase counter for current level
826 mHeaderCounter[level]++;
827 // Reset all lower levels
828 int32_t i;
829
830 for (i = level + 1; i <= 6; i++) {
831 mHeaderCounter[i] = 0;
832 }
833
834 // Construct numbers
835 nsAutoString leadup;
836 for (i = 1; i <= level; i++) {
837 leadup.AppendInt(mHeaderCounter[i]);
838 leadup.Append(char16_t('.'));
839 }
840 leadup.Append(char16_t(' '));
841 Write(leadup);
842 } else if (mSettings.GetHeaderStrategy() ==
843 Settings::HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
844 mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
845 for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
846 // for h(x), run x-1 times
847 mCurrentLine.mIndentation.mLength += kIndentIncrementHeaders;
848 }
849 }
850 } else if (aTag == nsGkAtoms::sup && mSettings.GetStructs() &&
851 !currentNodeIsConverted) {
852 Write(u"^"_ns);
853 } else if (aTag == nsGkAtoms::sub && mSettings.GetStructs() &&
854 !currentNodeIsConverted) {
855 Write(u"_"_ns);
856 } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
857 !currentNodeIsConverted) {
858 Write(u"|"_ns);
859 } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
860 mSettings.GetStructs() && !currentNodeIsConverted) {
861 Write(u"*"_ns);
862 } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
863 mSettings.GetStructs() && !currentNodeIsConverted) {
864 Write(u"/"_ns);
865 } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
866 !currentNodeIsConverted) {
867 Write(u"_"_ns);
868 }
869
870 /* Container elements are always block elements, so we shouldn't
871 output any whitespace immediately after the container tag even if
872 there's extra whitespace there because the HTML is pretty-printed
873 or something. To ensure that happens, tell the serializer we're
874 already in whitespace so it won't output more. */
875 mInWhitespace = true;
876 }
877
DoCloseContainer(const nsAtom * aTag)878 nsresult nsPlainTextSerializer::DoCloseContainer(const nsAtom* aTag) {
879 if (IsIgnorableRubyAnnotation(aTag)) {
880 mIgnoredChildNodeLevel--;
881 return NS_OK;
882 }
883 if (IsIgnorableScriptOrStyle(mElement)) {
884 mIgnoredChildNodeLevel--;
885 return NS_OK;
886 }
887
888 if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
889 if (DoOutput() && IsElementPreformatted() &&
890 IsCssBlockLevelElement(mElement)) {
891 // If we're closing a preformatted block element, output a line break
892 // when we find a new container.
893 mPreformattedBlockBoundary = true;
894 }
895 }
896
897 if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
898 // Raw means raw. Don't even think about doing anything fancy
899 // here like indenting, adding line breaks or any other
900 // characters such as list item bullets, quote characters
901 // around <q>, etc.
902
903 return NS_OK;
904 }
905
906 if (mTagStackIndex > 0) {
907 --mTagStackIndex;
908 }
909
910 if (mTagStackIndex >= mIgnoreAboveIndex) {
911 if (mTagStackIndex == mIgnoreAboveIndex) {
912 // We're dealing with the close tag whose matching
913 // open tag had set the mIgnoreAboveIndex value.
914 // Reset mIgnoreAboveIndex before discarding this tag.
915 mIgnoreAboveIndex = (uint32_t)kNotFound;
916 }
917 return NS_OK;
918 }
919
920 MOZ_ASSERT(mOutputManager);
921
922 // End current line if we're ending a block level tag
923 if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
924 // We want the output to end with a new line,
925 // but in preformatted areas like text fields,
926 // we can't emit newlines that weren't there.
927 // So add the newline only in the case of formatted output.
928 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
929 EnsureVerticalSpace(0);
930 } else {
931 mOutputManager->Flush(mCurrentLine);
932 }
933 // We won't want to do anything with these in formatted mode either,
934 // so just return now:
935 return NS_OK;
936 }
937
938 // Keep this in sync with DoOpenContainer!
939 if (!DoOutput()) {
940 return NS_OK;
941 }
942
943 if (aTag == nsGkAtoms::tr) {
944 PopBool(mHasWrittenCellsForRow);
945 // Should always end a line, but get no more whitespace
946 if (mFloatingLines < 0) mFloatingLines = 0;
947 mLineBreakDue = true;
948 } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
949 mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
950 // Items that should always end a line, but get no more whitespace
951 if (mFloatingLines < 0) mFloatingLines = 0;
952 mLineBreakDue = true;
953 } else if (aTag == nsGkAtoms::pre) {
954 mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
955 mLineBreakDue = true;
956 } else if (aTag == nsGkAtoms::ul) {
957 mOutputManager->Flush(mCurrentLine);
958 mCurrentLine.mIndentation.mLength -= kIndentSizeList;
959 --mULCount;
960 if (!IsInOlOrUl()) {
961 mFloatingLines = 1;
962 mLineBreakDue = true;
963 }
964 } else if (aTag == nsGkAtoms::ol) {
965 mOutputManager->Flush(mCurrentLine); // Doing this after decreasing
966 // OLStackIndex would be wrong.
967 mCurrentLine.mIndentation.mLength -= kIndentSizeList;
968 MOZ_ASSERT(!mOLStack.IsEmpty(), "Wrong OLStack level!");
969 mOLStack.RemoveLastElement();
970 if (!IsInOlOrUl()) {
971 mFloatingLines = 1;
972 mLineBreakDue = true;
973 }
974 } else if (aTag == nsGkAtoms::dl) {
975 mFloatingLines = 1;
976 mLineBreakDue = true;
977 } else if (aTag == nsGkAtoms::dd) {
978 mOutputManager->Flush(mCurrentLine);
979 mCurrentLine.mIndentation.mLength -= kIndentSizeDD;
980 } else if (aTag == nsGkAtoms::span) {
981 NS_ASSERTION(mSpanLevel, "Span level will be negative!");
982 --mSpanLevel;
983 } else if (aTag == nsGkAtoms::div) {
984 if (mFloatingLines < 0) mFloatingLines = 0;
985 mLineBreakDue = true;
986 } else if (aTag == nsGkAtoms::blockquote) {
987 mOutputManager->Flush(mCurrentLine); // Is this needed?
988
989 // Pop
990 bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
991
992 if (isInCiteBlockquote) {
993 NS_ASSERTION(mCurrentLine.mCiteQuoteLevel,
994 "CiteQuote level will be negative!");
995 mCurrentLine.mCiteQuoteLevel--;
996 mFloatingLines = 0;
997 mHasWrittenCiteBlockquote = true;
998 } else {
999 mCurrentLine.mIndentation.mLength -= kTabSize;
1000 mFloatingLines = 1;
1001 }
1002 mLineBreakDue = true;
1003 } else if (aTag == nsGkAtoms::q) {
1004 Write(u"\""_ns);
1005 } else if (IsCssBlockLevelElement(mElement)) {
1006 // All other blocks get 1 vertical space after them
1007 // in formatted mode, otherwise 0.
1008 // This is hard. Sometimes 0 is a better number, but
1009 // how to know?
1010 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1011 EnsureVerticalSpace(1);
1012 } else {
1013 if (mFloatingLines < 0) mFloatingLines = 0;
1014 mLineBreakDue = true;
1015 }
1016 }
1017
1018 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1019 CloseContainerForOutputFormatted(aTag);
1020 }
1021
1022 return NS_OK;
1023 }
1024
CloseContainerForOutputFormatted(const nsAtom * aTag)1025 void nsPlainTextSerializer::CloseContainerForOutputFormatted(
1026 const nsAtom* aTag) {
1027 const bool currentNodeIsConverted = IsCurrentNodeConverted();
1028
1029 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
1030 aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
1031 using HeaderStrategy = Settings::HeaderStrategy;
1032 if ((mSettings.GetHeaderStrategy() ==
1033 HeaderStrategy::kIndentIncreasedWithHeaderLevel) ||
1034 (mSettings.GetHeaderStrategy() ==
1035 HeaderStrategy::kNumberHeadingsAndIndentSlightly)) {
1036 mCurrentLine.mIndentation.mLength -= kIndentSizeHeaders;
1037 }
1038 if (mSettings.GetHeaderStrategy() ==
1039 HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
1040 for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
1041 // for h(x), run x-1 times
1042 mCurrentLine.mIndentation.mLength -= kIndentIncrementHeaders;
1043 }
1044 }
1045 EnsureVerticalSpace(1);
1046 } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
1047 nsAutoString url;
1048 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) &&
1049 !url.IsEmpty()) {
1050 nsAutoString temp;
1051 temp.AssignLiteral(" <");
1052 temp += url;
1053 temp.Append(char16_t('>'));
1054 Write(temp);
1055 }
1056 } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) &&
1057 mSettings.GetStructs() && !currentNodeIsConverted) {
1058 Write(kSpace);
1059 } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
1060 !currentNodeIsConverted) {
1061 Write(u"|"_ns);
1062 } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
1063 mSettings.GetStructs() && !currentNodeIsConverted) {
1064 Write(u"*"_ns);
1065 } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
1066 mSettings.GetStructs() && !currentNodeIsConverted) {
1067 Write(u"/"_ns);
1068 } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
1069 !currentNodeIsConverted) {
1070 Write(u"_"_ns);
1071 }
1072 }
1073
MustSuppressLeaf() const1074 bool nsPlainTextSerializer::MustSuppressLeaf() const {
1075 if (mIgnoredChildNodeLevel > 0) {
1076 return true;
1077 }
1078
1079 if ((mTagStackIndex > 1 &&
1080 mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) ||
1081 (mTagStackIndex > 0 &&
1082 mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) {
1083 // Don't output the contents of SELECT elements;
1084 // Might be nice, eventually, to output just the selected element.
1085 // Read more in bug 31994.
1086 return true;
1087 }
1088
1089 return false;
1090 }
1091
DoAddText()1092 void nsPlainTextSerializer::DoAddText() { DoAddText(true, u""_ns); }
1093
DoAddText(bool aIsLineBreak,const nsAString & aText)1094 void nsPlainTextSerializer::DoAddText(bool aIsLineBreak,
1095 const nsAString& aText) {
1096 // If we don't want any output, just return
1097 if (!DoOutput()) {
1098 return;
1099 }
1100
1101 if (!aIsLineBreak) {
1102 // Make sure to reset this, since it's no longer true.
1103 mHasWrittenCiteBlockquote = false;
1104 }
1105
1106 if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1107
1108 if (MustSuppressLeaf()) {
1109 return;
1110 }
1111
1112 if (aIsLineBreak) {
1113 // The only times we want to pass along whitespace from the original
1114 // html source are if we're forced into preformatted mode via flags,
1115 // or if we're prettyprinting and we're inside a <pre>.
1116 // Otherwise, either we're collapsing to minimal text, or we're
1117 // prettyprinting to mimic the html format, and in neither case
1118 // does the formatting of the html source help us.
1119 if (mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) ||
1120 (mPreFormattedMail && !mSettings.GetWrapColumn()) ||
1121 IsElementPreformatted()) {
1122 EnsureVerticalSpace(mEmptyLines + 1);
1123 } else if (!mInWhitespace) {
1124 Write(kSpace);
1125 mInWhitespace = true;
1126 }
1127 return;
1128 }
1129
1130 Write(aText);
1131 }
1132
CreateLineOfDashes(nsAString & aResult,const uint32_t aWrapColumn)1133 void CreateLineOfDashes(nsAString& aResult, const uint32_t aWrapColumn) {
1134 MOZ_ASSERT(aResult.IsEmpty());
1135
1136 const uint32_t width = (aWrapColumn > 0 ? aWrapColumn : 25);
1137 while (aResult.Length() < width) {
1138 aResult.Append(char16_t('-'));
1139 }
1140 }
1141
DoAddLeaf(const nsAtom * aTag)1142 nsresult nsPlainTextSerializer::DoAddLeaf(const nsAtom* aTag) {
1143 mPreformattedBlockBoundary = false;
1144
1145 if (!DoOutput()) {
1146 return NS_OK;
1147 }
1148
1149 if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1150
1151 if (MustSuppressLeaf()) {
1152 return NS_OK;
1153 }
1154
1155 if (aTag == nsGkAtoms::br) {
1156 // Another egregious editor workaround, see bug 38194:
1157 // ignore the bogus br tags that the editor sticks here and there.
1158 // FYI: `brElement` may be `nullptr` if the element is <br> element
1159 // of non-HTML element.
1160 // XXX Do we need to call `EnsureVerticalSpace()` when the <br> element
1161 // is not an HTML element?
1162 HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(mElement);
1163 if (!brElement || !brElement->IsPaddingForEmptyLastLine()) {
1164 EnsureVerticalSpace(mEmptyLines + 1);
1165 }
1166 } else if (aTag == nsGkAtoms::hr &&
1167 mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1168 EnsureVerticalSpace(0);
1169
1170 // Make a line of dashes as wide as the wrap width
1171 // XXX honoring percentage would be nice
1172 nsAutoString line;
1173 CreateLineOfDashes(line, mSettings.GetWrapColumn());
1174 Write(line);
1175
1176 EnsureVerticalSpace(0);
1177 } else if (aTag == nsGkAtoms::img) {
1178 /* Output (in decreasing order of preference)
1179 alt, title or nothing */
1180 // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1181 nsAutoString imageDescription;
1182 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, imageDescription))) {
1183 // If the alt attribute has an empty value (|alt=""|), output nothing
1184 } else if (NS_SUCCEEDED(
1185 GetAttributeValue(nsGkAtoms::title, imageDescription)) &&
1186 !imageDescription.IsEmpty()) {
1187 imageDescription = u" ["_ns + imageDescription + u"] "_ns;
1188 }
1189
1190 Write(imageDescription);
1191 }
1192
1193 return NS_OK;
1194 }
1195
1196 /**
1197 * Adds as many newline as necessary to get |aNumberOfRows| empty lines
1198 *
1199 * aNumberOfRows = -1 : Being in the middle of some line of text
1200 * aNumberOfRows = 0 : Being at the start of a line
1201 * aNumberOfRows = n>0 : Having n empty lines before the current line.
1202 */
EnsureVerticalSpace(const int32_t aNumberOfRows)1203 void nsPlainTextSerializer::EnsureVerticalSpace(const int32_t aNumberOfRows) {
1204 // If we have something in the indent we probably want to output
1205 // it and it's not included in the count for empty lines so we don't
1206 // realize that we should start a new line.
1207 if (aNumberOfRows >= 0 && !mCurrentLine.mIndentation.mHeader.IsEmpty()) {
1208 EndLine(false);
1209 mInWhitespace = true;
1210 }
1211
1212 while (mEmptyLines < aNumberOfRows) {
1213 EndLine(false);
1214 mInWhitespace = true;
1215 }
1216 mLineBreakDue = false;
1217 mFloatingLines = -1;
1218 }
1219
Flush(CurrentLine & aCurrentLine)1220 void nsPlainTextSerializer::OutputManager::Flush(CurrentLine& aCurrentLine) {
1221 if (!aCurrentLine.mContent.IsEmpty()) {
1222 aCurrentLine.MaybeReplaceNbspsInContent(mFlags);
1223
1224 Append(aCurrentLine, StripTrailingWhitespaces::kNo);
1225
1226 aCurrentLine.ResetContentAndIndentationHeader();
1227 }
1228 }
1229
IsSpaceStuffable(const char16_t * s)1230 static bool IsSpaceStuffable(const char16_t* s) {
1231 return (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1232 NS_strncmp(s, u"From ", 5) == 0);
1233 }
1234
MaybeWrapAndOutputCompleteLines()1235 void nsPlainTextSerializer::MaybeWrapAndOutputCompleteLines() {
1236 if (!mSettings.MayWrap()) {
1237 return;
1238 }
1239
1240 const uint32_t prefixwidth = mCurrentLine.DeterminePrefixWidth();
1241
1242 // The width of the line as it will appear on the screen (approx.).
1243 uint32_t currentLineContentWidth =
1244 GetUnicharStringWidth(mCurrentLine.mContent);
1245
1246 // Yes, wrap!
1247 // The "+4" is to avoid wrap lines that only would be a couple
1248 // of letters too long. We give this bonus only if the
1249 // wrapcolumn is more than 20.
1250 const uint32_t wrapColumn = mSettings.GetWrapColumn();
1251 uint32_t bonuswidth = (wrapColumn > 20) ? 4 : 0;
1252
1253 while (currentLineContentWidth + prefixwidth > wrapColumn + bonuswidth) {
1254 const int32_t goodSpace = mCurrentLine.FindWrapIndexForContent(
1255 wrapColumn, currentLineContentWidth, mLineBreaker);
1256
1257 const int32_t contentLength = mCurrentLine.mContent.Length();
1258 if ((goodSpace < contentLength) && (goodSpace > 0)) {
1259 // Found a place to break
1260
1261 // -1 (trim a char at the break position)
1262 // only if the line break was a space.
1263 nsAutoString restOfContent;
1264 if (nsCRT::IsAsciiSpace(mCurrentLine.mContent.CharAt(goodSpace))) {
1265 mCurrentLine.mContent.Right(restOfContent,
1266 contentLength - goodSpace - 1);
1267 } else {
1268 mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace);
1269 }
1270 // if breaker was U+0020, it has to consider for delsp=yes support
1271 const bool breakBySpace = mCurrentLine.mContent.CharAt(goodSpace) == ' ';
1272 mCurrentLine.mContent.Truncate(goodSpace);
1273 EndLine(true, breakBySpace);
1274 mCurrentLine.mContent.Truncate();
1275 // Space stuff new line?
1276 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1277 if (!restOfContent.IsEmpty() && IsSpaceStuffable(restOfContent.get()) &&
1278 mCurrentLine.mCiteQuoteLevel ==
1279 0 // We space-stuff quoted lines anyway
1280 ) {
1281 // Space stuffing a la RFC 2646 (format=flowed).
1282 mCurrentLine.mContent.Append(char16_t(' '));
1283 // XXX doesn't seem to work correctly for ' '
1284 }
1285 }
1286 mCurrentLine.mContent.Append(restOfContent);
1287 currentLineContentWidth = GetUnicharStringWidth(mCurrentLine.mContent);
1288 mEmptyLines = -1;
1289 } else {
1290 // Nothing to do. Hopefully we get more data later
1291 // to use for a place to break line
1292 break;
1293 }
1294 }
1295 }
1296
1297 /**
1298 * This function adds a piece of text to the current stored line. If we are
1299 * wrapping text and the stored line will become too long, a suitable
1300 * location to wrap will be found and the line that's complete will be
1301 * output.
1302 */
AddToLine(const char16_t * aLineFragment,int32_t aLineFragmentLength)1303 void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
1304 int32_t aLineFragmentLength) {
1305 if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1306
1307 if (mCurrentLine.mContent.IsEmpty()) {
1308 if (0 == aLineFragmentLength) {
1309 return;
1310 }
1311
1312 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1313 if (IsSpaceStuffable(aLineFragment) &&
1314 mCurrentLine.mCiteQuoteLevel ==
1315 0 // We space-stuff quoted lines anyway
1316 ) {
1317 // Space stuffing a la RFC 2646 (format=flowed).
1318 mCurrentLine.mContent.Append(char16_t(' '));
1319 }
1320 }
1321 mEmptyLines = -1;
1322 }
1323
1324 mCurrentLine.mContent.Append(aLineFragment, aLineFragmentLength);
1325
1326 MaybeWrapAndOutputCompleteLines();
1327 }
1328
1329 // The signature separator (RFC 2646).
1330 const char kSignatureSeparator[] = "-- ";
1331
1332 // The OpenPGP dash-escaped signature separator in inline
1333 // signed messages according to the OpenPGP standard (RFC 2440).
1334 const char kDashEscapedSignatureSeparator[] = "- -- ";
1335
IsSignatureSeparator(const nsAString & aString)1336 static bool IsSignatureSeparator(const nsAString& aString) {
1337 return aString.EqualsLiteral(kSignatureSeparator) ||
1338 aString.EqualsLiteral(kDashEscapedSignatureSeparator);
1339 }
1340
1341 /**
1342 * Outputs the contents of mCurrentLine.mContent, and resets line
1343 * specific variables. Also adds an indentation and prefix if there is one
1344 * specified. Strips ending spaces from the line if it isn't preformatted.
1345 */
EndLine(bool aSoftLineBreak,bool aBreakBySpace)1346 void nsPlainTextSerializer::EndLine(bool aSoftLineBreak, bool aBreakBySpace) {
1347 if (aSoftLineBreak && mCurrentLine.mContent.IsEmpty()) {
1348 // No meaning
1349 return;
1350 }
1351
1352 /* In non-preformatted mode, remove spaces from the end of the line for
1353 * format=flowed compatibility. Don't do this for these special cases:
1354 * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1355 * "- -- ", the OpenPGP dash-escaped signature separator in inline
1356 * signed messages according to the OpenPGP standard (RFC 2440).
1357 */
1358 if (!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) &&
1359 (aSoftLineBreak || !IsSignatureSeparator(mCurrentLine.mContent))) {
1360 mCurrentLine.mContent.Trim(" ", false, true, false);
1361 }
1362
1363 if (aSoftLineBreak &&
1364 mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed) &&
1365 (mCurrentLine.mIndentation.mLength == 0)) {
1366 // Add the soft part of the soft linebreak (RFC 2646 4.1)
1367 // We only do this when there is no indentation since format=flowed
1368 // lines and indentation doesn't work well together.
1369
1370 // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1371 // add twice space.
1372 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatDelSp) &&
1373 aBreakBySpace) {
1374 mCurrentLine.mContent.AppendLiteral(" ");
1375 } else {
1376 mCurrentLine.mContent.Append(char16_t(' '));
1377 }
1378 }
1379
1380 if (aSoftLineBreak) {
1381 mEmptyLines = 0;
1382 } else {
1383 // Hard break
1384 if (mCurrentLine.HasContentOrIndentationHeader()) {
1385 mEmptyLines = 0;
1386 } else {
1387 mEmptyLines++;
1388 }
1389 }
1390
1391 MOZ_ASSERT(mOutputManager);
1392
1393 mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
1394
1395 // If we don't have anything "real" to output we have to
1396 // make sure the indent doesn't end in a space since that
1397 // would trick a format=flowed-aware receiver.
1398 mOutputManager->Append(mCurrentLine,
1399 OutputManager::StripTrailingWhitespaces::kMaybe);
1400 mOutputManager->AppendLineBreak();
1401 mCurrentLine.ResetContentAndIndentationHeader();
1402 mInWhitespace = true;
1403 mLineBreakDue = false;
1404 mFloatingLines = -1;
1405 }
1406
1407 /**
1408 * Creates the calculated and stored indent and text in the indentation. That is
1409 * quote chars and numbers for numbered lists and such.
1410 */
CreateQuotesAndIndent(nsAString & aResult) const1411 void nsPlainTextSerializer::CurrentLine::CreateQuotesAndIndent(
1412 nsAString& aResult) const {
1413 // Put the mail quote "> " chars in, if appropriate:
1414 if (mCiteQuoteLevel > 0) {
1415 nsAutoString quotes;
1416 for (int i = 0; i < mCiteQuoteLevel; i++) {
1417 quotes.Append(char16_t('>'));
1418 }
1419 if (!mContent.IsEmpty()) {
1420 /* Better don't output a space here, if the line is empty,
1421 in case a receiving format=flowed-aware UA thinks, this were a flowed
1422 line, which it isn't - it's just empty. (Flowed lines may be joined
1423 with the following one, so the empty line may be lost completely.) */
1424 quotes.Append(char16_t(' '));
1425 }
1426 aResult = quotes;
1427 }
1428
1429 // Indent if necessary
1430 int32_t indentwidth = mIndentation.mLength - mIndentation.mHeader.Length();
1431 if (indentwidth > 0 && HasContentOrIndentationHeader()
1432 // Don't make empty lines look flowed
1433 ) {
1434 nsAutoString spaces;
1435 for (int i = 0; i < indentwidth; ++i) spaces.Append(char16_t(' '));
1436 aResult += spaces;
1437 }
1438
1439 if (!mIndentation.mHeader.IsEmpty()) {
1440 aResult += mIndentation.mHeader;
1441 }
1442 }
1443
IsLineFeedCarriageReturnBlankOrTab(char16_t c)1444 static bool IsLineFeedCarriageReturnBlankOrTab(char16_t c) {
1445 return ('\n' == c || '\r' == c || ' ' == c || '\t' == c);
1446 }
1447
ReplaceVisiblyTrailingNbsps(nsAString & aString)1448 static void ReplaceVisiblyTrailingNbsps(nsAString& aString) {
1449 const int32_t totLen = aString.Length();
1450 for (int32_t i = totLen - 1; i >= 0; i--) {
1451 char16_t c = aString[i];
1452 if (IsLineFeedCarriageReturnBlankOrTab(c)) {
1453 continue;
1454 }
1455 if (kNBSP == c) {
1456 aString.Replace(i, 1, ' ');
1457 } else {
1458 break;
1459 }
1460 }
1461 }
1462
ConvertToLinesAndOutput(const nsAString & aString)1463 void nsPlainTextSerializer::ConvertToLinesAndOutput(const nsAString& aString) {
1464 const int32_t totLen = aString.Length();
1465 int32_t newline{0};
1466
1467 // Put the mail quote "> " chars in, if appropriate.
1468 // Have to put it in before every line.
1469 int32_t bol = 0;
1470 while (bol < totLen) {
1471 bool outputLineBreak = false;
1472 bool spacesOnly = true;
1473
1474 // Find one of '\n' or '\r' using iterators since nsAString
1475 // doesn't have the old FindCharInSet function.
1476 nsAString::const_iterator iter;
1477 aString.BeginReading(iter);
1478 nsAString::const_iterator done_searching;
1479 aString.EndReading(done_searching);
1480 iter.advance(bol);
1481 int32_t new_newline = bol;
1482 newline = kNotFound;
1483 while (iter != done_searching) {
1484 if ('\n' == *iter || '\r' == *iter) {
1485 newline = new_newline;
1486 break;
1487 }
1488 if (' ' != *iter) {
1489 spacesOnly = false;
1490 }
1491 ++new_newline;
1492 ++iter;
1493 }
1494
1495 // Done searching
1496 nsAutoString stringpart;
1497 if (newline == kNotFound) {
1498 // No new lines.
1499 stringpart.Assign(Substring(aString, bol, totLen - bol));
1500 if (!stringpart.IsEmpty()) {
1501 char16_t lastchar = stringpart.Last();
1502 mInWhitespace = IsLineFeedCarriageReturnBlankOrTab(lastchar);
1503 }
1504 mEmptyLines = -1;
1505 bol = totLen;
1506 } else {
1507 // There is a newline
1508 stringpart.Assign(Substring(aString, bol, newline - bol));
1509 mInWhitespace = true;
1510 outputLineBreak = true;
1511 mEmptyLines = 0;
1512 bol = newline + 1;
1513 if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1514 // There was a CRLF in the input. This used to be illegal and
1515 // stripped by the parser. Apparently not anymore. Let's skip
1516 // over the LF.
1517 bol++;
1518 }
1519 }
1520
1521 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1522 if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
1523 !IsQuotedLine(stringpart) && !IsSignatureSeparator(stringpart)) {
1524 stringpart.Trim(" ", false, true, true);
1525 }
1526 if (IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart)) {
1527 mCurrentLine.mContent.Append(char16_t(' '));
1528 }
1529 }
1530 mCurrentLine.mContent.Append(stringpart);
1531
1532 mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
1533
1534 mOutputManager->Append(mCurrentLine,
1535 OutputManager::StripTrailingWhitespaces::kNo);
1536 if (outputLineBreak) {
1537 mOutputManager->AppendLineBreak();
1538 }
1539
1540 mCurrentLine.ResetContentAndIndentationHeader();
1541 }
1542
1543 #ifdef DEBUG_wrapping
1544 printf("No wrapping: newline is %d, totLen is %d\n", newline, totLen);
1545 #endif
1546 }
1547
1548 /**
1549 * Write a string. This is the highlevel function to use to get text output.
1550 * By using AddToLine, Output, EndLine and other functions it handles quotation,
1551 * line wrapping, indentation, whitespace compression and other things.
1552 */
Write(const nsAString & aStr)1553 void nsPlainTextSerializer::Write(const nsAString& aStr) {
1554 // XXX Copy necessary to use nsString methods and gain
1555 // access to underlying buffer
1556 nsAutoString str(aStr);
1557
1558 #ifdef DEBUG_wrapping
1559 printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
1560 mSettings.GetWrapColumn());
1561 #endif
1562
1563 const int32_t totLen = str.Length();
1564
1565 // If the string is empty, do nothing:
1566 if (totLen <= 0) return;
1567
1568 // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1569 // to be cut off along with usual spaces if required. (bug #125928)
1570 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1571 ReplaceVisiblyTrailingNbsps(str);
1572 }
1573
1574 // We have two major codepaths here. One that does preformatted text and one
1575 // that does normal formatted text. The one for preformatted text calls
1576 // Output directly while the other code path goes through AddToLine.
1577 if ((mPreFormattedMail && !mSettings.GetWrapColumn()) ||
1578 (IsElementPreformatted() && !mPreFormattedMail) ||
1579 (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
1580 // No intelligent wrapping.
1581
1582 // This mustn't be mixed with intelligent wrapping without clearing
1583 // the mCurrentLine.mContent buffer before!!!
1584 NS_ASSERTION(mCurrentLine.mContent.IsEmpty() ||
1585 (IsElementPreformatted() && !mPreFormattedMail),
1586 "Mixed wrapping data and nonwrapping data on the same line");
1587 MOZ_ASSERT(mOutputManager);
1588
1589 if (!mCurrentLine.mContent.IsEmpty()) {
1590 mOutputManager->Flush(mCurrentLine);
1591 }
1592
1593 ConvertToLinesAndOutput(str);
1594 return;
1595 }
1596
1597 // Intelligent handling of text
1598 // If needed, strip out all "end of lines"
1599 // and multiple whitespace between words
1600 int32_t nextpos;
1601 const char16_t* offsetIntoBuffer = nullptr;
1602
1603 int32_t bol = 0;
1604 while (bol < totLen) { // Loop over lines
1605 // Find a place where we may have to do whitespace compression
1606 nextpos = str.FindCharInSet(" \t\n\r", bol);
1607 #ifdef DEBUG_wrapping
1608 nsAutoString remaining;
1609 str.Right(remaining, totLen - bol);
1610 foo = ToNewCString(remaining);
1611 // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, "
1612 // "string = '%s'\n", bol, nextpos, totLen, foo);
1613 free(foo);
1614 #endif
1615
1616 if (nextpos == kNotFound) {
1617 // The rest of the string
1618 offsetIntoBuffer = str.get() + bol;
1619 AddToLine(offsetIntoBuffer, totLen - bol);
1620 bol = totLen;
1621 mInWhitespace = false;
1622 } else {
1623 // There's still whitespace left in the string
1624 if (nextpos != 0 && (nextpos + 1) < totLen) {
1625 offsetIntoBuffer = str.get() + nextpos;
1626 // skip '\n' if it is between CJ chars
1627 if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
1628 IS_CJ_CHAR(offsetIntoBuffer[1])) {
1629 offsetIntoBuffer = str.get() + bol;
1630 AddToLine(offsetIntoBuffer, nextpos - bol);
1631 bol = nextpos + 1;
1632 continue;
1633 }
1634 }
1635 // If we're already in whitespace and not preformatted, just skip it:
1636 if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
1637 !mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
1638 // Skip whitespace
1639 bol++;
1640 continue;
1641 }
1642
1643 if (nextpos == bol) {
1644 // Note that we are in whitespace.
1645 mInWhitespace = true;
1646 offsetIntoBuffer = str.get() + nextpos;
1647 AddToLine(offsetIntoBuffer, 1);
1648 bol++;
1649 continue;
1650 }
1651
1652 mInWhitespace = true;
1653
1654 offsetIntoBuffer = str.get() + bol;
1655 if (mPreFormattedMail ||
1656 mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
1657 // Preserve the real whitespace character
1658 nextpos++;
1659 AddToLine(offsetIntoBuffer, nextpos - bol);
1660 bol = nextpos;
1661 } else {
1662 // Replace the whitespace with a space
1663 AddToLine(offsetIntoBuffer, nextpos - bol);
1664 AddToLine(kSpace.get(), 1);
1665 bol = nextpos + 1; // Let's eat the whitespace
1666 }
1667 }
1668 } // Continue looping over the string
1669 }
1670
1671 /**
1672 * Gets the value of an attribute in a string. If the function returns
1673 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1674 */
GetAttributeValue(const nsAtom * aName,nsString & aValueRet) const1675 nsresult nsPlainTextSerializer::GetAttributeValue(const nsAtom* aName,
1676 nsString& aValueRet) const {
1677 if (mElement) {
1678 if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
1679 return NS_OK;
1680 }
1681 }
1682
1683 return NS_ERROR_NOT_AVAILABLE;
1684 }
1685
1686 /**
1687 * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1688 * In this case, we should ignore it.
1689 */
IsCurrentNodeConverted() const1690 bool nsPlainTextSerializer::IsCurrentNodeConverted() const {
1691 nsAutoString value;
1692 nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1693 return (NS_SUCCEEDED(rv) && (value.EqualsIgnoreCase("moz-txt", 7) ||
1694 value.EqualsIgnoreCase("\"moz-txt", 8)));
1695 }
1696
1697 // static
GetIdForContent(nsIContent * aContent)1698 nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) {
1699 if (!aContent->IsHTMLElement()) {
1700 return nullptr;
1701 }
1702
1703 nsAtom* localName = aContent->NodeInfo()->NameAtom();
1704 return localName->IsStatic() ? localName : nullptr;
1705 }
1706
IsElementPreformatted() const1707 bool nsPlainTextSerializer::IsElementPreformatted() const {
1708 return !mPreformatStack.empty() && mPreformatStack.top();
1709 }
1710
IsElementPreformatted(Element * aElement)1711 bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
1712 RefPtr<ComputedStyle> computedStyle =
1713 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1714 if (computedStyle) {
1715 const nsStyleText* textStyle = computedStyle->StyleText();
1716 return textStyle->WhiteSpaceOrNewlineIsSignificant();
1717 }
1718 // Fall back to looking at the tag, in case there is no style information.
1719 return GetIdForContent(aElement) == nsGkAtoms::pre;
1720 }
1721
IsCssBlockLevelElement(Element * aElement)1722 bool nsPlainTextSerializer::IsCssBlockLevelElement(Element* aElement) {
1723 RefPtr<ComputedStyle> computedStyle =
1724 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1725 if (computedStyle) {
1726 const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
1727 return displayStyle->IsBlockOutsideStyle();
1728 }
1729 // Fall back to looking at the tag, in case there is no style information.
1730 return nsContentUtils::IsHTMLBlockLevelElement(aElement);
1731 }
1732
1733 /**
1734 * This method is required only to identify LI's inside OL.
1735 * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1736 */
IsInOL() const1737 bool nsPlainTextSerializer::IsInOL() const {
1738 int32_t i = mTagStackIndex;
1739 while (--i >= 0) {
1740 if (mTagStack[i] == nsGkAtoms::ol) return true;
1741 if (mTagStack[i] == nsGkAtoms::ul) {
1742 // If a UL is reached first, LI belongs the UL nested in OL.
1743 return false;
1744 }
1745 }
1746 // We may reach here for orphan LI's.
1747 return false;
1748 }
1749
IsInOlOrUl() const1750 bool nsPlainTextSerializer::IsInOlOrUl() const {
1751 return (mULCount > 0) || !mOLStack.IsEmpty();
1752 }
1753
1754 /*
1755 @return 0 = no header, 1 = h1, ..., 6 = h6
1756 */
HeaderLevel(const nsAtom * aTag)1757 int32_t HeaderLevel(const nsAtom* aTag) {
1758 if (aTag == nsGkAtoms::h1) {
1759 return 1;
1760 }
1761 if (aTag == nsGkAtoms::h2) {
1762 return 2;
1763 }
1764 if (aTag == nsGkAtoms::h3) {
1765 return 3;
1766 }
1767 if (aTag == nsGkAtoms::h4) {
1768 return 4;
1769 }
1770 if (aTag == nsGkAtoms::h5) {
1771 return 5;
1772 }
1773 if (aTag == nsGkAtoms::h6) {
1774 return 6;
1775 }
1776 return 0;
1777 }
1778
1779 /*
1780 * This is an implementation of GetUnicharWidth() and
1781 * GetUnicharStringWidth() as defined in
1782 * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1783 * <http://www.UNIX-systems.org/online.html>
1784 *
1785 * Markus Kuhn -- 2000-02-08 -- public domain
1786 *
1787 * Minor alterations to fit Mozilla's data types by Daniel Bratell
1788 */
1789
1790 /* These functions define the column width of an ISO 10646 character
1791 * as follows:
1792 *
1793 * - The null character (U+0000) has a column width of 0.
1794 *
1795 * - Other C0/C1 control characters and DEL will lead to a return
1796 * value of -1.
1797 *
1798 * - Non-spacing and enclosing combining characters (general
1799 * category code Mn or Me in the Unicode database) have a
1800 * column width of 0.
1801 *
1802 * - Spacing characters in the East Asian Wide (W) or East Asian
1803 * FullWidth (F) category as defined in Unicode Technical
1804 * Report #11 have a column width of 2.
1805 *
1806 * - All remaining characters (including all printable
1807 * ISO 8859-1 and WGL4 characters, Unicode control characters,
1808 * etc.) have a column width of 1.
1809 *
1810 * This implementation assumes that wchar_t characters are encoded
1811 * in ISO 10646.
1812 */
1813
1814 namespace {
1815
1816 struct interval {
1817 uint16_t first;
1818 uint16_t last;
1819 };
1820
1821 struct CombiningComparator {
1822 const char16_t mUcs;
CombiningComparator__anon1c77cadb0111::CombiningComparator1823 explicit CombiningComparator(char16_t aUcs) : mUcs(aUcs) {}
operator ()__anon1c77cadb0111::CombiningComparator1824 int operator()(const interval& combining) const {
1825 if (mUcs > combining.last) return 1;
1826 if (mUcs < combining.first) return -1;
1827
1828 MOZ_ASSERT(combining.first <= mUcs);
1829 MOZ_ASSERT(mUcs <= combining.last);
1830 return 0;
1831 }
1832 };
1833
1834 } // namespace
1835
GetUnicharWidth(char16_t ucs)1836 int32_t GetUnicharWidth(char16_t ucs) {
1837 /* sorted list of non-overlapping intervals of non-spacing characters */
1838 static const interval combining[] = {
1839 {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486}, {0x0488, 0x0489},
1840 {0x0591, 0x05A1}, {0x05A3, 0x05B9}, {0x05BB, 0x05BD}, {0x05BF, 0x05BF},
1841 {0x05C1, 0x05C2}, {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
1842 {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
1843 {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
1844 {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954}, {0x0962, 0x0963},
1845 {0x0981, 0x0981}, {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD},
1846 {0x09E2, 0x09E3}, {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
1847 {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71}, {0x0A81, 0x0A82},
1848 {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD},
1849 {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
1850 {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0},
1851 {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
1852 {0x0C55, 0x0C56}, {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
1853 {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4},
1854 {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E},
1855 {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
1856 {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39},
1857 {0x0F71, 0x0F7E}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97},
1858 {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
1859 {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059}, {0x17B7, 0x17BD},
1860 {0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x18A9, 0x18A9}, {0x20D0, 0x20E3},
1861 {0x302A, 0x302F}, {0x3099, 0x309A}, {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}};
1862
1863 /* test for 8-bit control characters */
1864 if (ucs == 0) return 0;
1865 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return -1;
1866
1867 /* first quick check for Latin-1 etc. characters */
1868 if (ucs < combining[0].first) return 1;
1869
1870 /* binary search in table of non-spacing characters */
1871 size_t idx;
1872 if (BinarySearchIf(combining, 0, ArrayLength(combining),
1873 CombiningComparator(ucs), &idx)) {
1874 return 0;
1875 }
1876
1877 /* if we arrive here, ucs is not a combining or C0/C1 control character */
1878
1879 /* fast test for majority of non-wide scripts */
1880 if (ucs < 0x1100) return 1;
1881
1882 return 1 +
1883 ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
1884 (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
1885 ucs != 0x303f) || /* CJK ... Yi */
1886 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
1887 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
1888 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
1889 (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
1890 (ucs >= 0xffe0 && ucs <= 0xffe6));
1891 }
1892
GetUnicharStringWidth(const nsString & aString)1893 int32_t GetUnicharStringWidth(const nsString& aString) {
1894 const char16_t* pwcs = aString.get();
1895 int32_t n = aString.Length();
1896
1897 int32_t w, width = 0;
1898
1899 for (; *pwcs && n-- > 0; pwcs++)
1900 if ((w = GetUnicharWidth(*pwcs)) < 0)
1901 ++width; // Taking 1 as the width of non-printable character, for bug#
1902 // 94475.
1903 else
1904 width += w;
1905
1906 return width;
1907 }
1908