1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /*
8  * nsIContentSerializer implementation that can be used with an
9  * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
10  * (eg for copy/paste as plaintext).
11  */
12 
13 #include "nsPlainTextSerializer.h"
14 
15 #include <limits>
16 
17 #include "nsPrintfCString.h"
18 #include "nsDebug.h"
19 #include "nsGkAtoms.h"
20 #include "nsNameSpaceManager.h"
21 #include "nsTextFragment.h"
22 #include "nsContentUtils.h"
23 #include "nsReadableUtils.h"
24 #include "nsUnicharUtils.h"
25 #include "nsCRT.h"
26 #include "mozilla/EditorUtils.h"
27 #include "mozilla/dom/CharacterData.h"
28 #include "mozilla/dom/Element.h"
29 #include "mozilla/dom/HTMLBRElement.h"
30 #include "mozilla/dom/Text.h"
31 #include "mozilla/Preferences.h"
32 #include "mozilla/StaticPrefs_converter.h"
33 #include "mozilla/BinarySearch.h"
34 #include "nsComputedDOMStyle.h"
35 
36 namespace mozilla {
37 class Encoding;
38 }
39 
40 using namespace mozilla;
41 using namespace mozilla::dom;
42 
43 #define PREF_STRUCTS "converter.html2txt.structs"
44 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
45 
46 static const int32_t kTabSize = 4;
47 static const int32_t kIndentSizeHeaders =
48     2; /* Indention of h1, if
49         mHeaderStrategy = kIndentIncreasedWithHeaderLevel
50         or = kNumberHeadingsAndIndentSlightly. Indention of
51         other headers is derived from that. */
52 static const int32_t kIndentIncrementHeaders =
53     2; /* If mHeaderStrategy = kIndentIncreasedWithHeaderLevel,
54    indent h(x+1) this many
55    columns more than h(x) */
56 static const int32_t kIndentSizeList = kTabSize;
57 // Indention of non-first lines of ul and ol
58 static const int32_t kIndentSizeDD = kTabSize;  // Indention of <dd>
59 static const char16_t kNBSP = 160;
60 static const char16_t kSPACE = ' ';
61 
62 constexpr int32_t kNoFlags = 0;
63 
64 static int32_t HeaderLevel(const nsAtom* aTag);
65 static int32_t GetUnicharWidth(char16_t ucs);
66 static int32_t GetUnicharStringWidth(const nsString& aString);
67 
68 // Someday may want to make this non-const:
69 static const uint32_t TagStackSize = 500;
70 
71 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)72 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
73 
74 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
75   NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
76   NS_INTERFACE_MAP_ENTRY(nsISupports)
77 NS_INTERFACE_MAP_END
78 
79 NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer, mElement)
80 
81 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
82   RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
83   it.forget(aSerializer);
84   return NS_OK;
85 }
86 
87 // @param aFlags As defined in nsIDocumentEncoder.idl.
DetermineLineBreak(const int32_t aFlags,nsAString & aLineBreak)88 static void DetermineLineBreak(const int32_t aFlags, nsAString& aLineBreak) {
89   // Set the line break character:
90   if ((aFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
91       (aFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
92     // Windows
93     aLineBreak.AssignLiteral(u"\r\n");
94   } else if (aFlags & nsIDocumentEncoder::OutputCRLineBreak) {
95     // Mac
96     aLineBreak.AssignLiteral(u"\r");
97   } else if (aFlags & nsIDocumentEncoder::OutputLFLineBreak) {
98     // Unix/DOM
99     aLineBreak.AssignLiteral(u"\n");
100   } else {
101     // Platform/default
102     aLineBreak.AssignLiteral(NS_ULINEBREAK);
103   }
104 }
105 
MaybeReplaceNbspsInContent(const int32_t aFlags)106 void nsPlainTextSerializer::CurrentLine::MaybeReplaceNbspsInContent(
107     const int32_t aFlags) {
108   if (!(aFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
109     // First, replace all nbsp characters with spaces,
110     // which the unicode encoder won't do for us.
111     mContent.ReplaceChar(kNBSP, kSPACE);
112   }
113 }
114 
ResetContentAndIndentationHeader()115 void nsPlainTextSerializer::CurrentLine::ResetContentAndIndentationHeader() {
116   mContent.Truncate();
117   mIndentation.mHeader.Truncate();
118 }
119 
FindWrapIndexForContent(const uint32_t aWrapColumn,const uint32_t aContentWidth,mozilla::intl::LineBreaker * aLineBreaker) const120 int32_t nsPlainTextSerializer::CurrentLine::FindWrapIndexForContent(
121     const uint32_t aWrapColumn, const uint32_t aContentWidth,
122     mozilla::intl::LineBreaker* aLineBreaker) const {
123   MOZ_ASSERT(aContentWidth < std::numeric_limits<int32_t>::max());
124   MOZ_ASSERT(static_cast<int32_t>(aContentWidth) ==
125              GetUnicharStringWidth(mContent));
126 
127   const uint32_t prefixwidth = DeterminePrefixWidth();
128   int32_t goodSpace = mContent.Length();
129 
130   if (aLineBreaker) {
131     // We go from the end removing one letter at a time until
132     // we have a reasonable width
133     uint32_t width = aContentWidth;
134     while (goodSpace > 0 && (width + prefixwidth > aWrapColumn)) {
135       goodSpace--;
136       width -= GetUnicharWidth(mContent[goodSpace]);
137     }
138 
139     goodSpace++;
140 
141     goodSpace =
142         aLineBreaker->Prev(mContent.get(), mContent.Length(), goodSpace);
143     if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
144         nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace - 1))) {
145       --goodSpace;  // adjust the position since line breaker returns a
146                     // position next to space
147     }
148   } else {
149     // In this case we don't want strings, especially CJK-ones, to be split.
150     // See
151     // https://bugzilla.mozilla.org/show_bug.cgi?id=333064 for more
152     // information.
153 
154     if (mContent.IsEmpty() || aWrapColumn < prefixwidth) {
155       goodSpace = NS_LINEBREAKER_NEED_MORE_TEXT;
156     } else {
157       goodSpace =
158           std::min<int32_t>(aWrapColumn - prefixwidth, mContent.Length() - 1);
159       while (goodSpace >= 0 &&
160              !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
161         goodSpace--;
162       }
163     }
164   }
165 
166   if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
167     // If we didn't find a good place to break, accept long line and
168     // try to find another place to break
169     goodSpace =
170         (prefixwidth > aWrapColumn + 1) ? 1 : aWrapColumn - prefixwidth + 1;
171     if (aLineBreaker) {
172       if ((uint32_t)goodSpace < mContent.Length())
173         goodSpace =
174             aLineBreaker->Next(mContent.get(), mContent.Length(), goodSpace);
175       if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
176         goodSpace = mContent.Length();
177     } else {
178       // In this case we don't want strings, especially CJK-ones, to be
179       // split. See
180       // https://bugzilla.mozilla.org/show_bug.cgi?id=333064 for more
181       // information.
182       goodSpace = (prefixwidth > aWrapColumn) ? 1 : aWrapColumn - prefixwidth;
183       const int32_t contentLength = mContent.Length();
184       while (goodSpace < contentLength &&
185              !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
186         goodSpace++;
187       }
188     }
189   }
190   return goodSpace;
191 }
192 
OutputManager(const int32_t aFlags,nsAString & aOutput)193 nsPlainTextSerializer::OutputManager::OutputManager(const int32_t aFlags,
194                                                     nsAString& aOutput)
195     : mFlags{aFlags}, mOutput{aOutput}, mAtFirstColumn{true} {
196   MOZ_ASSERT(aOutput.IsEmpty());
197 
198   DetermineLineBreak(mFlags, mLineBreak);
199 }
200 
Append(const CurrentLine & aCurrentLine,const StripTrailingWhitespaces aStripTrailingWhitespaces)201 void nsPlainTextSerializer::OutputManager::Append(
202     const CurrentLine& aCurrentLine,
203     const StripTrailingWhitespaces aStripTrailingWhitespaces) {
204   if (IsAtFirstColumn()) {
205     nsAutoString quotesAndIndent;
206     aCurrentLine.CreateQuotesAndIndent(quotesAndIndent);
207 
208     if ((aStripTrailingWhitespaces == StripTrailingWhitespaces::kMaybe)) {
209       const bool stripTrailingSpaces = aCurrentLine.mContent.IsEmpty();
210       if (stripTrailingSpaces) {
211         quotesAndIndent.Trim(" ", false, true, false);
212       }
213     }
214 
215     Append(quotesAndIndent);
216   }
217 
218   Append(aCurrentLine.mContent);
219 }
220 
Append(const nsAString & aString)221 void nsPlainTextSerializer::OutputManager::Append(const nsAString& aString) {
222   if (!aString.IsEmpty()) {
223     mOutput.Append(aString);
224     mAtFirstColumn = false;
225   }
226 }
227 
AppendLineBreak()228 void nsPlainTextSerializer::OutputManager::AppendLineBreak() {
229   mOutput.Append(mLineBreak);
230   mAtFirstColumn = true;
231 }
232 
GetOutputLength() const233 uint32_t nsPlainTextSerializer::OutputManager::GetOutputLength() const {
234   return mOutput.Length();
235 }
236 
nsPlainTextSerializer()237 nsPlainTextSerializer::nsPlainTextSerializer()
238     : mFloatingLines(-1),
239       mLineBreakDue(false),
240       kSpace(u" "_ns)  // Init of "constant"
241 {
242   mHeadLevel = 0;
243   mHasWrittenCiteBlockquote = false;
244   mSpanLevel = 0;
245   for (int32_t i = 0; i <= 6; i++) {
246     mHeaderCounter[i] = 0;
247   }
248 
249   // Flow
250   mEmptyLines = 1;  // The start of the document is an "empty line" in itself,
251   mInWhitespace = false;
252   mPreFormattedMail = false;
253 
254   mPreformattedBlockBoundary = false;
255 
256   // initialize the tag stack to zero:
257   // The stack only ever contains pointers to static atoms, so they don't
258   // need refcounting.
259   mTagStack = new const nsAtom*[TagStackSize];
260   mTagStackIndex = 0;
261   mIgnoreAboveIndex = (uint32_t)kNotFound;
262 
263   mULCount = 0;
264 
265   mIgnoredChildNodeLevel = 0;
266 }
267 
~nsPlainTextSerializer()268 nsPlainTextSerializer::~nsPlainTextSerializer() {
269   delete[] mTagStack;
270   NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
271 }
272 
273 nsPlainTextSerializer::Settings::HeaderStrategy
Convert(const int32_t aPrefHeaderStrategy)274 nsPlainTextSerializer::Settings::Convert(const int32_t aPrefHeaderStrategy) {
275   HeaderStrategy result{HeaderStrategy::kIndentIncreasedWithHeaderLevel};
276 
277   switch (aPrefHeaderStrategy) {
278     case 0: {
279       result = HeaderStrategy::kNoIndentation;
280       break;
281     }
282     case 1: {
283       result = HeaderStrategy::kIndentIncreasedWithHeaderLevel;
284       break;
285     }
286     case 2: {
287       result = HeaderStrategy::kNumberHeadingsAndIndentSlightly;
288       break;
289     }
290     default: {
291       NS_WARNING(
292           nsPrintfCString("Header strategy pref contains undefined value: %i",
293                           aPrefHeaderStrategy)
294               .get());
295     }
296   }
297 
298   return result;
299 }
300 
301 const int32_t kDefaultHeaderStrategy = 1;
302 
Init(const int32_t aFlags,const uint32_t aWrapColumn)303 void nsPlainTextSerializer::Settings::Init(const int32_t aFlags,
304                                            const uint32_t aWrapColumn) {
305   mFlags = aFlags;
306 
307   if (mFlags & nsIDocumentEncoder::OutputFormatted) {
308     // Get some prefs that controls how we do formatted output
309     mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
310 
311     int32_t headerStrategy =
312         Preferences::GetInt(PREF_HEADER_STRATEGY, kDefaultHeaderStrategy);
313     mHeaderStrategy = Convert(headerStrategy);
314   }
315 
316   mWithRubyAnnotation = StaticPrefs::converter_html2txt_always_include_ruby() ||
317                         (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
318 
319   // XXX We should let the caller decide whether to do this or not
320   mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
321 
322   mWrapColumn = aWrapColumn;
323 }
324 
325 NS_IMETHODIMP
Init(const uint32_t aFlags,uint32_t aWrapColumn,const Encoding * aEncoding,bool aIsCopying,bool aIsWholeDocument,bool * aNeedsPreformatScanning,nsAString & aOutput)326 nsPlainTextSerializer::Init(const uint32_t aFlags, uint32_t aWrapColumn,
327                             const Encoding* aEncoding, bool aIsCopying,
328                             bool aIsWholeDocument,
329                             bool* aNeedsPreformatScanning, nsAString& aOutput) {
330 #ifdef DEBUG
331   // Check if the major control flags are set correctly.
332   if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
333     NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
334                  "If you want format=flowed, you must combine it with "
335                  "nsIDocumentEncoder::OutputFormatted");
336   }
337 
338   if (aFlags & nsIDocumentEncoder::OutputFormatted) {
339     NS_ASSERTION(
340         !(aFlags & nsIDocumentEncoder::OutputPreformatted),
341         "Can't do formatted and preformatted output at the same time!");
342   }
343 #endif
344   MOZ_ASSERT(!(aFlags & nsIDocumentEncoder::OutputFormatDelSp) ||
345              (aFlags & nsIDocumentEncoder::OutputFormatFlowed));
346 
347   *aNeedsPreformatScanning = true;
348   mSettings.Init(aFlags, aWrapColumn);
349   mOutputManager.emplace(mSettings.GetFlags(), aOutput);
350 
351   if (mSettings.MayWrap() && mSettings.MayBreakLines()) {
352     mLineBreaker = nsContentUtils::LineBreaker();
353   }
354 
355   mLineBreakDue = false;
356   mFloatingLines = -1;
357 
358   mPreformattedBlockBoundary = false;
359 
360   MOZ_ASSERT(mOLStack.IsEmpty());
361 
362   return NS_OK;
363 }
364 
GetLastBool(const nsTArray<bool> & aStack)365 bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) {
366   uint32_t size = aStack.Length();
367   if (size == 0) {
368     return false;
369   }
370   return aStack.ElementAt(size - 1);
371 }
372 
SetLastBool(nsTArray<bool> & aStack,bool aValue)373 void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) {
374   uint32_t size = aStack.Length();
375   if (size > 0) {
376     aStack.ElementAt(size - 1) = aValue;
377   } else {
378     NS_ERROR("There is no \"Last\" value");
379   }
380 }
381 
PushBool(nsTArray<bool> & aStack,bool aValue)382 void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) {
383   aStack.AppendElement(bool(aValue));
384 }
385 
PopBool(nsTArray<bool> & aStack)386 bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) {
387   return aStack.Length() ? aStack.PopLastElement() : false;
388 }
389 
IsIgnorableRubyAnnotation(const nsAtom * aTag) const390 bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(
391     const nsAtom* aTag) const {
392   if (mSettings.GetWithRubyAnnotation()) {
393     return false;
394   }
395 
396   return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt ||
397          aTag == nsGkAtoms::rtc;
398 }
399 
400 // Return true if aElement has 'display:none' or if we just don't know.
IsDisplayNone(Element * aElement)401 static bool IsDisplayNone(Element* aElement) {
402   RefPtr<ComputedStyle> computedStyle =
403       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
404   return !computedStyle ||
405          computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
406 }
407 
IsIgnorableScriptOrStyle(Element * aElement)408 static bool IsIgnorableScriptOrStyle(Element* aElement) {
409   return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) &&
410          IsDisplayNone(aElement);
411 }
412 
413 NS_IMETHODIMP
AppendText(nsIContent * aText,int32_t aStartOffset,int32_t aEndOffset)414 nsPlainTextSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
415                                   int32_t aEndOffset) {
416   if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
417     return NS_OK;
418   }
419 
420   NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
421   if (aStartOffset < 0) return NS_ERROR_INVALID_ARG;
422 
423   NS_ENSURE_ARG(aText);
424 
425   nsresult rv = NS_OK;
426 
427   nsIContent* content = aText;
428   const nsTextFragment* frag;
429   if (!content || !(frag = content->GetText())) {
430     return NS_ERROR_FAILURE;
431   }
432 
433   int32_t fragLength = frag->GetLength();
434   int32_t endoffset =
435       (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
436   NS_ASSERTION(aStartOffset <= endoffset,
437                "A start offset is beyond the end of the text fragment!");
438 
439   int32_t length = endoffset - aStartOffset;
440   if (length <= 0) {
441     return NS_OK;
442   }
443 
444   nsAutoString textstr;
445   if (frag->Is2b()) {
446     textstr.Assign(frag->Get2b() + aStartOffset, length);
447   } else {
448     // AssignASCII is for 7-bit character only, so don't use it
449     const char* data = frag->Get1b();
450     CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
451   }
452 
453   // Mask the text if the text node is in a password field.
454   if (content->HasFlag(NS_MAYBE_MASKED)) {
455     EditorUtils::MaskString(textstr, content->AsText(), 0, aStartOffset);
456   }
457 
458   // We have to split the string across newlines
459   // to match parser behavior
460   int32_t start = 0;
461   int32_t offset = textstr.FindCharInSet("\n\r");
462   while (offset != kNotFound) {
463     if (offset > start) {
464       // Pass in the line
465       DoAddText(false, Substring(textstr, start, offset - start));
466     }
467 
468     // Pass in a newline
469     DoAddText();
470 
471     start = offset + 1;
472     offset = textstr.FindCharInSet("\n\r", start);
473   }
474 
475   // Consume the last bit of the string if there's any left
476   if (start < length) {
477     if (start) {
478       DoAddText(false, Substring(textstr, start, length - start));
479     } else {
480       DoAddText(false, textstr);
481     }
482   }
483 
484   return rv;
485 }
486 
487 NS_IMETHODIMP
AppendCDATASection(nsIContent * aCDATASection,int32_t aStartOffset,int32_t aEndOffset)488 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
489                                           int32_t aStartOffset,
490                                           int32_t aEndOffset) {
491   return AppendText(aCDATASection, aStartOffset, aEndOffset);
492 }
493 
494 NS_IMETHODIMP
ScanElementForPreformat(Element * aElement)495 nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) {
496   mPreformatStack.push(IsElementPreformatted(aElement));
497   return NS_OK;
498 }
499 
500 NS_IMETHODIMP
ForgetElementForPreformat(Element * aElement)501 nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
502   MOZ_RELEASE_ASSERT(!mPreformatStack.empty(),
503                      "Tried to pop without previous push.");
504   mPreformatStack.pop();
505   return NS_OK;
506 }
507 
508 NS_IMETHODIMP
AppendElementStart(Element * aElement,Element * aOriginalElement)509 nsPlainTextSerializer::AppendElementStart(Element* aElement,
510                                           Element* aOriginalElement) {
511   NS_ENSURE_ARG(aElement);
512 
513   mElement = aElement;
514 
515   nsresult rv;
516   nsAtom* id = GetIdForContent(mElement);
517 
518   bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
519 
520   if (isContainer) {
521     rv = DoOpenContainer(id);
522   } else {
523     rv = DoAddLeaf(id);
524   }
525 
526   mElement = nullptr;
527 
528   if (id == nsGkAtoms::head) {
529     ++mHeadLevel;
530   }
531 
532   return rv;
533 }
534 
535 NS_IMETHODIMP
AppendElementEnd(Element * aElement,Element * aOriginalElement)536 nsPlainTextSerializer::AppendElementEnd(Element* aElement,
537                                         Element* aOriginalElement) {
538   NS_ENSURE_ARG(aElement);
539 
540   mElement = aElement;
541 
542   nsresult rv;
543   nsAtom* id = GetIdForContent(mElement);
544 
545   bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
546 
547   rv = NS_OK;
548   if (isContainer) {
549     rv = DoCloseContainer(id);
550   }
551 
552   mElement = nullptr;
553 
554   if (id == nsGkAtoms::head) {
555     NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0");
556     --mHeadLevel;
557   }
558 
559   return rv;
560 }
561 
562 NS_IMETHODIMP
FlushAndFinish()563 nsPlainTextSerializer::FlushAndFinish() {
564   MOZ_ASSERT(mOutputManager);
565 
566   mOutputManager->Flush(mCurrentLine);
567   return Finish();
568 }
569 
570 NS_IMETHODIMP
Finish()571 nsPlainTextSerializer::Finish() {
572   mOutputManager.reset();
573 
574   return NS_OK;
575 }
576 
577 NS_IMETHODIMP
GetOutputLength(uint32_t & aLength) const578 nsPlainTextSerializer::GetOutputLength(uint32_t& aLength) const {
579   MOZ_ASSERT(mOutputManager);
580 
581   aLength = mOutputManager->GetOutputLength();
582 
583   return NS_OK;
584 }
585 
586 NS_IMETHODIMP
AppendDocumentStart(Document * aDocument)587 nsPlainTextSerializer::AppendDocumentStart(Document* aDocument) {
588   return NS_OK;
589 }
590 
591 constexpr int32_t kOlStackDummyValue = 0;
592 
DoOpenContainer(const nsAtom * aTag)593 nsresult nsPlainTextSerializer::DoOpenContainer(const nsAtom* aTag) {
594   if (IsIgnorableRubyAnnotation(aTag)) {
595     // Ignorable ruby annotation shouldn't be replaced by a placeholder
596     // character, neither any of its descendants.
597     mIgnoredChildNodeLevel++;
598     return NS_OK;
599   }
600   if (IsIgnorableScriptOrStyle(mElement)) {
601     mIgnoredChildNodeLevel++;
602     return NS_OK;
603   }
604 
605   if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
606     if (mPreformattedBlockBoundary && DoOutput()) {
607       // Should always end a line, but get no more whitespace
608       if (mFloatingLines < 0) mFloatingLines = 0;
609       mLineBreakDue = true;
610     }
611     mPreformattedBlockBoundary = false;
612   }
613 
614   if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
615     // Raw means raw.  Don't even think about doing anything fancy
616     // here like indenting, adding line breaks or any other
617     // characters such as list item bullets, quote characters
618     // around <q>, etc.
619 
620     return NS_OK;
621   }
622 
623   if (mTagStackIndex < TagStackSize) {
624     mTagStack[mTagStackIndex++] = aTag;
625   }
626 
627   if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
628     return NS_OK;
629   }
630 
631   // Reset this so that <blockquote type=cite> doesn't affect the whitespace
632   // above random <pre>s below it.
633   mHasWrittenCiteBlockquote =
634       mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;
635 
636   bool isInCiteBlockquote = false;
637 
638   // XXX special-case <blockquote type=cite> so that we don't add additional
639   // newlines before the text.
640   if (aTag == nsGkAtoms::blockquote) {
641     nsAutoString value;
642     nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
643     isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
644   }
645 
646   if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);
647 
648   // Check if this tag's content that should not be output
649   if ((aTag == nsGkAtoms::noscript &&
650        !mSettings.HasFlag(nsIDocumentEncoder::OutputNoScriptContent)) ||
651       ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
652        !mSettings.HasFlag(nsIDocumentEncoder::OutputNoFramesContent))) {
653     // Ignore everything that follows the current tag in
654     // question until a matching end tag is encountered.
655     mIgnoreAboveIndex = mTagStackIndex - 1;
656     return NS_OK;
657   }
658 
659   if (aTag == nsGkAtoms::body) {
660     // Try to figure out here whether we have a
661     // preformatted style attribute set by Thunderbird.
662     //
663     // Trigger on the presence of a "pre-wrap" in the
664     // style attribute. That's a very simplistic way to do
665     // it, but better than nothing.
666     nsAutoString style;
667     int32_t whitespace;
668     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
669         (kNotFound != (whitespace = style.Find("white-space:")))) {
670       if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
671 #ifdef DEBUG_preformatted
672         printf("Set mPreFormattedMail based on style pre-wrap\n");
673 #endif
674         mPreFormattedMail = true;
675       } else if (kNotFound != style.Find("pre", true, whitespace)) {
676 #ifdef DEBUG_preformatted
677         printf("Set mPreFormattedMail based on style pre\n");
678 #endif
679         mPreFormattedMail = true;
680       }
681     } else {
682       /* See comment at end of function. */
683       mInWhitespace = true;
684       mPreFormattedMail = false;
685     }
686 
687     return NS_OK;
688   }
689 
690   // Keep this in sync with DoCloseContainer!
691   if (!DoOutput()) {
692     return NS_OK;
693   }
694 
695   if (aTag == nsGkAtoms::p)
696     EnsureVerticalSpace(1);
697   else if (aTag == nsGkAtoms::pre) {
698     if (GetLastBool(mIsInCiteBlockquote))
699       EnsureVerticalSpace(0);
700     else if (mHasWrittenCiteBlockquote) {
701       EnsureVerticalSpace(0);
702       mHasWrittenCiteBlockquote = false;
703     } else
704       EnsureVerticalSpace(1);
705   } else if (aTag == nsGkAtoms::tr) {
706     PushBool(mHasWrittenCellsForRow, false);
707   } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
708     // We must make sure that the content of two table cells get a
709     // space between them.
710 
711     // To make the separation between cells most obvious and
712     // importable, we use a TAB.
713     if (mHasWrittenCellsForRow.IsEmpty()) {
714       // We don't always see a <tr> (nor a <table>) before the <td> if we're
715       // copying part of a table
716       PushBool(mHasWrittenCellsForRow, true);  // will never be popped
717     } else if (GetLastBool(mHasWrittenCellsForRow)) {
718       // Bypass |Write| so that the TAB isn't compressed away.
719       AddToLine(u"\t", 1);
720       mInWhitespace = true;
721     } else {
722       SetLastBool(mHasWrittenCellsForRow, true);
723     }
724   } else if (aTag == nsGkAtoms::ul) {
725     // Indent here to support nested lists, which aren't included in li :-(
726     EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
727     // Must end the current line before we change indention
728     mCurrentLine.mIndentation.mLength += kIndentSizeList;
729     mULCount++;
730   } else if (aTag == nsGkAtoms::ol) {
731     EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
732     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
733       // Must end the current line before we change indention
734       nsAutoString startAttr;
735       int32_t startVal = 1;
736       if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
737         nsresult rv = NS_OK;
738         startVal = startAttr.ToInteger(&rv);
739         if (NS_FAILED(rv)) {
740           startVal = 1;
741         }
742       }
743       mOLStack.AppendElement(startVal);
744     } else {
745       mOLStack.AppendElement(kOlStackDummyValue);
746     }
747     mCurrentLine.mIndentation.mLength += kIndentSizeList;  // see ul
748   } else if (aTag == nsGkAtoms::li &&
749              mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
750     if (mTagStackIndex > 1 && IsInOL()) {
751       if (!mOLStack.IsEmpty()) {
752         nsAutoString valueAttr;
753         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
754           nsresult rv = NS_OK;
755           int32_t valueAttrVal = valueAttr.ToInteger(&rv);
756           if (NS_SUCCEEDED(rv)) {
757             mOLStack.LastElement() = valueAttrVal;
758           }
759         }
760         // This is what nsBulletFrame does for OLs:
761         mCurrentLine.mIndentation.mHeader.AppendInt(mOLStack.LastElement(), 10);
762         mOLStack.LastElement()++;
763       } else {
764         mCurrentLine.mIndentation.mHeader.Append(char16_t('#'));
765       }
766 
767       mCurrentLine.mIndentation.mHeader.Append(char16_t('.'));
768 
769     } else {
770       static const char bulletCharArray[] = "*o+#";
771       uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
772       char bulletChar = bulletCharArray[index % 4];
773       mCurrentLine.mIndentation.mHeader.Append(char16_t(bulletChar));
774     }
775 
776     mCurrentLine.mIndentation.mHeader.Append(char16_t(' '));
777   } else if (aTag == nsGkAtoms::dl) {
778     EnsureVerticalSpace(1);
779   } else if (aTag == nsGkAtoms::dt) {
780     EnsureVerticalSpace(0);
781   } else if (aTag == nsGkAtoms::dd) {
782     EnsureVerticalSpace(0);
783     mCurrentLine.mIndentation.mLength += kIndentSizeDD;
784   } else if (aTag == nsGkAtoms::span) {
785     ++mSpanLevel;
786   } else if (aTag == nsGkAtoms::blockquote) {
787     // Push
788     PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
789     if (isInCiteBlockquote) {
790       EnsureVerticalSpace(0);
791       mCurrentLine.mCiteQuoteLevel++;
792     } else {
793       EnsureVerticalSpace(1);
794       mCurrentLine.mIndentation.mLength +=
795           kTabSize;  // Check for some maximum value?
796     }
797   } else if (aTag == nsGkAtoms::q) {
798     Write(u"\""_ns);
799   }
800 
801   // Else make sure we'll separate block level tags,
802   // even if we're about to leave, before doing any other formatting.
803   else if (IsCssBlockLevelElement(mElement)) {
804     EnsureVerticalSpace(0);
805   }
806 
807   if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
808     OpenContainerForOutputFormatted(aTag);
809   }
810   return NS_OK;
811 }
812 
OpenContainerForOutputFormatted(const nsAtom * aTag)813 void nsPlainTextSerializer::OpenContainerForOutputFormatted(
814     const nsAtom* aTag) {
815   const bool currentNodeIsConverted = IsCurrentNodeConverted();
816 
817   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
818       aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
819     EnsureVerticalSpace(2);
820     if (mSettings.GetHeaderStrategy() ==
821         Settings::HeaderStrategy::kNumberHeadingsAndIndentSlightly) {
822       mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
823       // Caching
824       int32_t level = HeaderLevel(aTag);
825       // Increase counter for current level
826       mHeaderCounter[level]++;
827       // Reset all lower levels
828       int32_t i;
829 
830       for (i = level + 1; i <= 6; i++) {
831         mHeaderCounter[i] = 0;
832       }
833 
834       // Construct numbers
835       nsAutoString leadup;
836       for (i = 1; i <= level; i++) {
837         leadup.AppendInt(mHeaderCounter[i]);
838         leadup.Append(char16_t('.'));
839       }
840       leadup.Append(char16_t(' '));
841       Write(leadup);
842     } else if (mSettings.GetHeaderStrategy() ==
843                Settings::HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
844       mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
845       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
846         // for h(x), run x-1 times
847         mCurrentLine.mIndentation.mLength += kIndentIncrementHeaders;
848       }
849     }
850   } else if (aTag == nsGkAtoms::sup && mSettings.GetStructs() &&
851              !currentNodeIsConverted) {
852     Write(u"^"_ns);
853   } else if (aTag == nsGkAtoms::sub && mSettings.GetStructs() &&
854              !currentNodeIsConverted) {
855     Write(u"_"_ns);
856   } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
857              !currentNodeIsConverted) {
858     Write(u"|"_ns);
859   } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
860              mSettings.GetStructs() && !currentNodeIsConverted) {
861     Write(u"*"_ns);
862   } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
863              mSettings.GetStructs() && !currentNodeIsConverted) {
864     Write(u"/"_ns);
865   } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
866              !currentNodeIsConverted) {
867     Write(u"_"_ns);
868   }
869 
870   /* Container elements are always block elements, so we shouldn't
871      output any whitespace immediately after the container tag even if
872      there's extra whitespace there because the HTML is pretty-printed
873      or something. To ensure that happens, tell the serializer we're
874      already in whitespace so it won't output more. */
875   mInWhitespace = true;
876 }
877 
DoCloseContainer(const nsAtom * aTag)878 nsresult nsPlainTextSerializer::DoCloseContainer(const nsAtom* aTag) {
879   if (IsIgnorableRubyAnnotation(aTag)) {
880     mIgnoredChildNodeLevel--;
881     return NS_OK;
882   }
883   if (IsIgnorableScriptOrStyle(mElement)) {
884     mIgnoredChildNodeLevel--;
885     return NS_OK;
886   }
887 
888   if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
889     if (DoOutput() && IsElementPreformatted() &&
890         IsCssBlockLevelElement(mElement)) {
891       // If we're closing a preformatted block element, output a line break
892       // when we find a new container.
893       mPreformattedBlockBoundary = true;
894     }
895   }
896 
897   if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
898     // Raw means raw.  Don't even think about doing anything fancy
899     // here like indenting, adding line breaks or any other
900     // characters such as list item bullets, quote characters
901     // around <q>, etc.
902 
903     return NS_OK;
904   }
905 
906   if (mTagStackIndex > 0) {
907     --mTagStackIndex;
908   }
909 
910   if (mTagStackIndex >= mIgnoreAboveIndex) {
911     if (mTagStackIndex == mIgnoreAboveIndex) {
912       // We're dealing with the close tag whose matching
913       // open tag had set the mIgnoreAboveIndex value.
914       // Reset mIgnoreAboveIndex before discarding this tag.
915       mIgnoreAboveIndex = (uint32_t)kNotFound;
916     }
917     return NS_OK;
918   }
919 
920   MOZ_ASSERT(mOutputManager);
921 
922   // End current line if we're ending a block level tag
923   if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
924     // We want the output to end with a new line,
925     // but in preformatted areas like text fields,
926     // we can't emit newlines that weren't there.
927     // So add the newline only in the case of formatted output.
928     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
929       EnsureVerticalSpace(0);
930     } else {
931       mOutputManager->Flush(mCurrentLine);
932     }
933     // We won't want to do anything with these in formatted mode either,
934     // so just return now:
935     return NS_OK;
936   }
937 
938   // Keep this in sync with DoOpenContainer!
939   if (!DoOutput()) {
940     return NS_OK;
941   }
942 
943   if (aTag == nsGkAtoms::tr) {
944     PopBool(mHasWrittenCellsForRow);
945     // Should always end a line, but get no more whitespace
946     if (mFloatingLines < 0) mFloatingLines = 0;
947     mLineBreakDue = true;
948   } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
949              mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
950     // Items that should always end a line, but get no more whitespace
951     if (mFloatingLines < 0) mFloatingLines = 0;
952     mLineBreakDue = true;
953   } else if (aTag == nsGkAtoms::pre) {
954     mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
955     mLineBreakDue = true;
956   } else if (aTag == nsGkAtoms::ul) {
957     mOutputManager->Flush(mCurrentLine);
958     mCurrentLine.mIndentation.mLength -= kIndentSizeList;
959     --mULCount;
960     if (!IsInOlOrUl()) {
961       mFloatingLines = 1;
962       mLineBreakDue = true;
963     }
964   } else if (aTag == nsGkAtoms::ol) {
965     mOutputManager->Flush(mCurrentLine);  // Doing this after decreasing
966                                           // OLStackIndex would be wrong.
967     mCurrentLine.mIndentation.mLength -= kIndentSizeList;
968     MOZ_ASSERT(!mOLStack.IsEmpty(), "Wrong OLStack level!");
969     mOLStack.RemoveLastElement();
970     if (!IsInOlOrUl()) {
971       mFloatingLines = 1;
972       mLineBreakDue = true;
973     }
974   } else if (aTag == nsGkAtoms::dl) {
975     mFloatingLines = 1;
976     mLineBreakDue = true;
977   } else if (aTag == nsGkAtoms::dd) {
978     mOutputManager->Flush(mCurrentLine);
979     mCurrentLine.mIndentation.mLength -= kIndentSizeDD;
980   } else if (aTag == nsGkAtoms::span) {
981     NS_ASSERTION(mSpanLevel, "Span level will be negative!");
982     --mSpanLevel;
983   } else if (aTag == nsGkAtoms::div) {
984     if (mFloatingLines < 0) mFloatingLines = 0;
985     mLineBreakDue = true;
986   } else if (aTag == nsGkAtoms::blockquote) {
987     mOutputManager->Flush(mCurrentLine);  // Is this needed?
988 
989     // Pop
990     bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
991 
992     if (isInCiteBlockquote) {
993       NS_ASSERTION(mCurrentLine.mCiteQuoteLevel,
994                    "CiteQuote level will be negative!");
995       mCurrentLine.mCiteQuoteLevel--;
996       mFloatingLines = 0;
997       mHasWrittenCiteBlockquote = true;
998     } else {
999       mCurrentLine.mIndentation.mLength -= kTabSize;
1000       mFloatingLines = 1;
1001     }
1002     mLineBreakDue = true;
1003   } else if (aTag == nsGkAtoms::q) {
1004     Write(u"\""_ns);
1005   } else if (IsCssBlockLevelElement(mElement)) {
1006     // All other blocks get 1 vertical space after them
1007     // in formatted mode, otherwise 0.
1008     // This is hard. Sometimes 0 is a better number, but
1009     // how to know?
1010     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1011       EnsureVerticalSpace(1);
1012     } else {
1013       if (mFloatingLines < 0) mFloatingLines = 0;
1014       mLineBreakDue = true;
1015     }
1016   }
1017 
1018   if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1019     CloseContainerForOutputFormatted(aTag);
1020   }
1021 
1022   return NS_OK;
1023 }
1024 
CloseContainerForOutputFormatted(const nsAtom * aTag)1025 void nsPlainTextSerializer::CloseContainerForOutputFormatted(
1026     const nsAtom* aTag) {
1027   const bool currentNodeIsConverted = IsCurrentNodeConverted();
1028 
1029   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
1030       aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
1031     using HeaderStrategy = Settings::HeaderStrategy;
1032     if ((mSettings.GetHeaderStrategy() ==
1033          HeaderStrategy::kIndentIncreasedWithHeaderLevel) ||
1034         (mSettings.GetHeaderStrategy() ==
1035          HeaderStrategy::kNumberHeadingsAndIndentSlightly)) {
1036       mCurrentLine.mIndentation.mLength -= kIndentSizeHeaders;
1037     }
1038     if (mSettings.GetHeaderStrategy() ==
1039         HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
1040       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
1041         // for h(x), run x-1 times
1042         mCurrentLine.mIndentation.mLength -= kIndentIncrementHeaders;
1043       }
1044     }
1045     EnsureVerticalSpace(1);
1046   } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
1047     nsAutoString url;
1048     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) &&
1049         !url.IsEmpty()) {
1050       nsAutoString temp;
1051       temp.AssignLiteral(" <");
1052       temp += url;
1053       temp.Append(char16_t('>'));
1054       Write(temp);
1055     }
1056   } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) &&
1057              mSettings.GetStructs() && !currentNodeIsConverted) {
1058     Write(kSpace);
1059   } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
1060              !currentNodeIsConverted) {
1061     Write(u"|"_ns);
1062   } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
1063              mSettings.GetStructs() && !currentNodeIsConverted) {
1064     Write(u"*"_ns);
1065   } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
1066              mSettings.GetStructs() && !currentNodeIsConverted) {
1067     Write(u"/"_ns);
1068   } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
1069              !currentNodeIsConverted) {
1070     Write(u"_"_ns);
1071   }
1072 }
1073 
MustSuppressLeaf() const1074 bool nsPlainTextSerializer::MustSuppressLeaf() const {
1075   if (mIgnoredChildNodeLevel > 0) {
1076     return true;
1077   }
1078 
1079   if ((mTagStackIndex > 1 &&
1080        mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) ||
1081       (mTagStackIndex > 0 &&
1082        mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) {
1083     // Don't output the contents of SELECT elements;
1084     // Might be nice, eventually, to output just the selected element.
1085     // Read more in bug 31994.
1086     return true;
1087   }
1088 
1089   return false;
1090 }
1091 
DoAddText()1092 void nsPlainTextSerializer::DoAddText() { DoAddText(true, u""_ns); }
1093 
DoAddText(bool aIsLineBreak,const nsAString & aText)1094 void nsPlainTextSerializer::DoAddText(bool aIsLineBreak,
1095                                       const nsAString& aText) {
1096   // If we don't want any output, just return
1097   if (!DoOutput()) {
1098     return;
1099   }
1100 
1101   if (!aIsLineBreak) {
1102     // Make sure to reset this, since it's no longer true.
1103     mHasWrittenCiteBlockquote = false;
1104   }
1105 
1106   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1107 
1108   if (MustSuppressLeaf()) {
1109     return;
1110   }
1111 
1112   if (aIsLineBreak) {
1113     // The only times we want to pass along whitespace from the original
1114     // html source are if we're forced into preformatted mode via flags,
1115     // or if we're prettyprinting and we're inside a <pre>.
1116     // Otherwise, either we're collapsing to minimal text, or we're
1117     // prettyprinting to mimic the html format, and in neither case
1118     // does the formatting of the html source help us.
1119     if (mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) ||
1120         (mPreFormattedMail && !mSettings.GetWrapColumn()) ||
1121         IsElementPreformatted()) {
1122       EnsureVerticalSpace(mEmptyLines + 1);
1123     } else if (!mInWhitespace) {
1124       Write(kSpace);
1125       mInWhitespace = true;
1126     }
1127     return;
1128   }
1129 
1130   Write(aText);
1131 }
1132 
CreateLineOfDashes(nsAString & aResult,const uint32_t aWrapColumn)1133 void CreateLineOfDashes(nsAString& aResult, const uint32_t aWrapColumn) {
1134   MOZ_ASSERT(aResult.IsEmpty());
1135 
1136   const uint32_t width = (aWrapColumn > 0 ? aWrapColumn : 25);
1137   while (aResult.Length() < width) {
1138     aResult.Append(char16_t('-'));
1139   }
1140 }
1141 
DoAddLeaf(const nsAtom * aTag)1142 nsresult nsPlainTextSerializer::DoAddLeaf(const nsAtom* aTag) {
1143   mPreformattedBlockBoundary = false;
1144 
1145   if (!DoOutput()) {
1146     return NS_OK;
1147   }
1148 
1149   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1150 
1151   if (MustSuppressLeaf()) {
1152     return NS_OK;
1153   }
1154 
1155   if (aTag == nsGkAtoms::br) {
1156     // Another egregious editor workaround, see bug 38194:
1157     // ignore the bogus br tags that the editor sticks here and there.
1158     // FYI: `brElement` may be `nullptr` if the element is <br> element
1159     //      of non-HTML element.
1160     // XXX Do we need to call `EnsureVerticalSpace()` when the <br> element
1161     //     is not an HTML element?
1162     HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(mElement);
1163     if (!brElement || !brElement->IsPaddingForEmptyLastLine()) {
1164       EnsureVerticalSpace(mEmptyLines + 1);
1165     }
1166   } else if (aTag == nsGkAtoms::hr &&
1167              mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1168     EnsureVerticalSpace(0);
1169 
1170     // Make a line of dashes as wide as the wrap width
1171     // XXX honoring percentage would be nice
1172     nsAutoString line;
1173     CreateLineOfDashes(line, mSettings.GetWrapColumn());
1174     Write(line);
1175 
1176     EnsureVerticalSpace(0);
1177   } else if (aTag == nsGkAtoms::img) {
1178     /* Output (in decreasing order of preference)
1179        alt, title or nothing */
1180     // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1181     nsAutoString imageDescription;
1182     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, imageDescription))) {
1183       // If the alt attribute has an empty value (|alt=""|), output nothing
1184     } else if (NS_SUCCEEDED(
1185                    GetAttributeValue(nsGkAtoms::title, imageDescription)) &&
1186                !imageDescription.IsEmpty()) {
1187       imageDescription = u" ["_ns + imageDescription + u"] "_ns;
1188     }
1189 
1190     Write(imageDescription);
1191   }
1192 
1193   return NS_OK;
1194 }
1195 
1196 /**
1197  * Adds as many newline as necessary to get |aNumberOfRows| empty lines
1198  *
1199  * aNumberOfRows = -1    :   Being in the middle of some line of text
1200  * aNumberOfRows =  0    :   Being at the start of a line
1201  * aNumberOfRows =  n>0  :   Having n empty lines before the current line.
1202  */
EnsureVerticalSpace(const int32_t aNumberOfRows)1203 void nsPlainTextSerializer::EnsureVerticalSpace(const int32_t aNumberOfRows) {
1204   // If we have something in the indent we probably want to output
1205   // it and it's not included in the count for empty lines so we don't
1206   // realize that we should start a new line.
1207   if (aNumberOfRows >= 0 && !mCurrentLine.mIndentation.mHeader.IsEmpty()) {
1208     EndLine(false);
1209     mInWhitespace = true;
1210   }
1211 
1212   while (mEmptyLines < aNumberOfRows) {
1213     EndLine(false);
1214     mInWhitespace = true;
1215   }
1216   mLineBreakDue = false;
1217   mFloatingLines = -1;
1218 }
1219 
Flush(CurrentLine & aCurrentLine)1220 void nsPlainTextSerializer::OutputManager::Flush(CurrentLine& aCurrentLine) {
1221   if (!aCurrentLine.mContent.IsEmpty()) {
1222     aCurrentLine.MaybeReplaceNbspsInContent(mFlags);
1223 
1224     Append(aCurrentLine, StripTrailingWhitespaces::kNo);
1225 
1226     aCurrentLine.ResetContentAndIndentationHeader();
1227   }
1228 }
1229 
IsSpaceStuffable(const char16_t * s)1230 static bool IsSpaceStuffable(const char16_t* s) {
1231   return (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1232           NS_strncmp(s, u"From ", 5) == 0);
1233 }
1234 
MaybeWrapAndOutputCompleteLines()1235 void nsPlainTextSerializer::MaybeWrapAndOutputCompleteLines() {
1236   if (!mSettings.MayWrap()) {
1237     return;
1238   }
1239 
1240   const uint32_t prefixwidth = mCurrentLine.DeterminePrefixWidth();
1241 
1242   // The width of the line as it will appear on the screen (approx.).
1243   uint32_t currentLineContentWidth =
1244       GetUnicharStringWidth(mCurrentLine.mContent);
1245 
1246   // Yes, wrap!
1247   // The "+4" is to avoid wrap lines that only would be a couple
1248   // of letters too long. We give this bonus only if the
1249   // wrapcolumn is more than 20.
1250   const uint32_t wrapColumn = mSettings.GetWrapColumn();
1251   uint32_t bonuswidth = (wrapColumn > 20) ? 4 : 0;
1252 
1253   while (currentLineContentWidth + prefixwidth > wrapColumn + bonuswidth) {
1254     const int32_t goodSpace = mCurrentLine.FindWrapIndexForContent(
1255         wrapColumn, currentLineContentWidth, mLineBreaker);
1256 
1257     const int32_t contentLength = mCurrentLine.mContent.Length();
1258     if ((goodSpace < contentLength) && (goodSpace > 0)) {
1259       // Found a place to break
1260 
1261       // -1 (trim a char at the break position)
1262       // only if the line break was a space.
1263       nsAutoString restOfContent;
1264       if (nsCRT::IsAsciiSpace(mCurrentLine.mContent.CharAt(goodSpace))) {
1265         mCurrentLine.mContent.Right(restOfContent,
1266                                     contentLength - goodSpace - 1);
1267       } else {
1268         mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace);
1269       }
1270       // if breaker was U+0020, it has to consider for delsp=yes support
1271       const bool breakBySpace = mCurrentLine.mContent.CharAt(goodSpace) == ' ';
1272       mCurrentLine.mContent.Truncate(goodSpace);
1273       EndLine(true, breakBySpace);
1274       mCurrentLine.mContent.Truncate();
1275       // Space stuff new line?
1276       if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1277         if (!restOfContent.IsEmpty() && IsSpaceStuffable(restOfContent.get()) &&
1278             mCurrentLine.mCiteQuoteLevel ==
1279                 0  // We space-stuff quoted lines anyway
1280         ) {
1281           // Space stuffing a la RFC 2646 (format=flowed).
1282           mCurrentLine.mContent.Append(char16_t(' '));
1283           // XXX doesn't seem to work correctly for ' '
1284         }
1285       }
1286       mCurrentLine.mContent.Append(restOfContent);
1287       currentLineContentWidth = GetUnicharStringWidth(mCurrentLine.mContent);
1288       mEmptyLines = -1;
1289     } else {
1290       // Nothing to do. Hopefully we get more data later
1291       // to use for a place to break line
1292       break;
1293     }
1294   }
1295 }
1296 
1297 /**
1298  * This function adds a piece of text to the current stored line. If we are
1299  * wrapping text and the stored line will become too long, a suitable
1300  * location to wrap will be found and the line that's complete will be
1301  * output.
1302  */
AddToLine(const char16_t * aLineFragment,int32_t aLineFragmentLength)1303 void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
1304                                       int32_t aLineFragmentLength) {
1305   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1306 
1307   if (mCurrentLine.mContent.IsEmpty()) {
1308     if (0 == aLineFragmentLength) {
1309       return;
1310     }
1311 
1312     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1313       if (IsSpaceStuffable(aLineFragment) &&
1314           mCurrentLine.mCiteQuoteLevel ==
1315               0  // We space-stuff quoted lines anyway
1316       ) {
1317         // Space stuffing a la RFC 2646 (format=flowed).
1318         mCurrentLine.mContent.Append(char16_t(' '));
1319       }
1320     }
1321     mEmptyLines = -1;
1322   }
1323 
1324   mCurrentLine.mContent.Append(aLineFragment, aLineFragmentLength);
1325 
1326   MaybeWrapAndOutputCompleteLines();
1327 }
1328 
1329 // The signature separator (RFC 2646).
1330 const char kSignatureSeparator[] = "-- ";
1331 
1332 // The OpenPGP dash-escaped signature separator in inline
1333 // signed messages according to the OpenPGP standard (RFC 2440).
1334 const char kDashEscapedSignatureSeparator[] = "- -- ";
1335 
IsSignatureSeparator(const nsAString & aString)1336 static bool IsSignatureSeparator(const nsAString& aString) {
1337   return aString.EqualsLiteral(kSignatureSeparator) ||
1338          aString.EqualsLiteral(kDashEscapedSignatureSeparator);
1339 }
1340 
1341 /**
1342  * Outputs the contents of mCurrentLine.mContent, and resets line
1343  * specific variables. Also adds an indentation and prefix if there is one
1344  * specified. Strips ending spaces from the line if it isn't preformatted.
1345  */
EndLine(bool aSoftLineBreak,bool aBreakBySpace)1346 void nsPlainTextSerializer::EndLine(bool aSoftLineBreak, bool aBreakBySpace) {
1347   if (aSoftLineBreak && mCurrentLine.mContent.IsEmpty()) {
1348     // No meaning
1349     return;
1350   }
1351 
1352   /* In non-preformatted mode, remove spaces from the end of the line for
1353    * format=flowed compatibility. Don't do this for these special cases:
1354    * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1355    * "- -- ", the OpenPGP dash-escaped signature separator in inline
1356    * signed messages according to the OpenPGP standard (RFC 2440).
1357    */
1358   if (!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) &&
1359       (aSoftLineBreak || !IsSignatureSeparator(mCurrentLine.mContent))) {
1360     mCurrentLine.mContent.Trim(" ", false, true, false);
1361   }
1362 
1363   if (aSoftLineBreak &&
1364       mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed) &&
1365       (mCurrentLine.mIndentation.mLength == 0)) {
1366     // Add the soft part of the soft linebreak (RFC 2646 4.1)
1367     // We only do this when there is no indentation since format=flowed
1368     // lines and indentation doesn't work well together.
1369 
1370     // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1371     // add twice space.
1372     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatDelSp) &&
1373         aBreakBySpace) {
1374       mCurrentLine.mContent.AppendLiteral("  ");
1375     } else {
1376       mCurrentLine.mContent.Append(char16_t(' '));
1377     }
1378   }
1379 
1380   if (aSoftLineBreak) {
1381     mEmptyLines = 0;
1382   } else {
1383     // Hard break
1384     if (mCurrentLine.HasContentOrIndentationHeader()) {
1385       mEmptyLines = 0;
1386     } else {
1387       mEmptyLines++;
1388     }
1389   }
1390 
1391   MOZ_ASSERT(mOutputManager);
1392 
1393   mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
1394 
1395   // If we don't have anything "real" to output we have to
1396   // make sure the indent doesn't end in a space since that
1397   // would trick a format=flowed-aware receiver.
1398   mOutputManager->Append(mCurrentLine,
1399                          OutputManager::StripTrailingWhitespaces::kMaybe);
1400   mOutputManager->AppendLineBreak();
1401   mCurrentLine.ResetContentAndIndentationHeader();
1402   mInWhitespace = true;
1403   mLineBreakDue = false;
1404   mFloatingLines = -1;
1405 }
1406 
1407 /**
1408  * Creates the calculated and stored indent and text in the indentation. That is
1409  * quote chars and numbers for numbered lists and such.
1410  */
CreateQuotesAndIndent(nsAString & aResult) const1411 void nsPlainTextSerializer::CurrentLine::CreateQuotesAndIndent(
1412     nsAString& aResult) const {
1413   // Put the mail quote "> " chars in, if appropriate:
1414   if (mCiteQuoteLevel > 0) {
1415     nsAutoString quotes;
1416     for (int i = 0; i < mCiteQuoteLevel; i++) {
1417       quotes.Append(char16_t('>'));
1418     }
1419     if (!mContent.IsEmpty()) {
1420       /* Better don't output a space here, if the line is empty,
1421          in case a receiving format=flowed-aware UA thinks, this were a flowed
1422          line, which it isn't - it's just empty. (Flowed lines may be joined
1423          with the following one, so the empty line may be lost completely.) */
1424       quotes.Append(char16_t(' '));
1425     }
1426     aResult = quotes;
1427   }
1428 
1429   // Indent if necessary
1430   int32_t indentwidth = mIndentation.mLength - mIndentation.mHeader.Length();
1431   if (indentwidth > 0 && HasContentOrIndentationHeader()
1432       // Don't make empty lines look flowed
1433   ) {
1434     nsAutoString spaces;
1435     for (int i = 0; i < indentwidth; ++i) spaces.Append(char16_t(' '));
1436     aResult += spaces;
1437   }
1438 
1439   if (!mIndentation.mHeader.IsEmpty()) {
1440     aResult += mIndentation.mHeader;
1441   }
1442 }
1443 
IsLineFeedCarriageReturnBlankOrTab(char16_t c)1444 static bool IsLineFeedCarriageReturnBlankOrTab(char16_t c) {
1445   return ('\n' == c || '\r' == c || ' ' == c || '\t' == c);
1446 }
1447 
ReplaceVisiblyTrailingNbsps(nsAString & aString)1448 static void ReplaceVisiblyTrailingNbsps(nsAString& aString) {
1449   const int32_t totLen = aString.Length();
1450   for (int32_t i = totLen - 1; i >= 0; i--) {
1451     char16_t c = aString[i];
1452     if (IsLineFeedCarriageReturnBlankOrTab(c)) {
1453       continue;
1454     }
1455     if (kNBSP == c) {
1456       aString.Replace(i, 1, ' ');
1457     } else {
1458       break;
1459     }
1460   }
1461 }
1462 
ConvertToLinesAndOutput(const nsAString & aString)1463 void nsPlainTextSerializer::ConvertToLinesAndOutput(const nsAString& aString) {
1464   const int32_t totLen = aString.Length();
1465   int32_t newline{0};
1466 
1467   // Put the mail quote "> " chars in, if appropriate.
1468   // Have to put it in before every line.
1469   int32_t bol = 0;
1470   while (bol < totLen) {
1471     bool outputLineBreak = false;
1472     bool spacesOnly = true;
1473 
1474     // Find one of '\n' or '\r' using iterators since nsAString
1475     // doesn't have the old FindCharInSet function.
1476     nsAString::const_iterator iter;
1477     aString.BeginReading(iter);
1478     nsAString::const_iterator done_searching;
1479     aString.EndReading(done_searching);
1480     iter.advance(bol);
1481     int32_t new_newline = bol;
1482     newline = kNotFound;
1483     while (iter != done_searching) {
1484       if ('\n' == *iter || '\r' == *iter) {
1485         newline = new_newline;
1486         break;
1487       }
1488       if (' ' != *iter) {
1489         spacesOnly = false;
1490       }
1491       ++new_newline;
1492       ++iter;
1493     }
1494 
1495     // Done searching
1496     nsAutoString stringpart;
1497     if (newline == kNotFound) {
1498       // No new lines.
1499       stringpart.Assign(Substring(aString, bol, totLen - bol));
1500       if (!stringpart.IsEmpty()) {
1501         char16_t lastchar = stringpart.Last();
1502         mInWhitespace = IsLineFeedCarriageReturnBlankOrTab(lastchar);
1503       }
1504       mEmptyLines = -1;
1505       bol = totLen;
1506     } else {
1507       // There is a newline
1508       stringpart.Assign(Substring(aString, bol, newline - bol));
1509       mInWhitespace = true;
1510       outputLineBreak = true;
1511       mEmptyLines = 0;
1512       bol = newline + 1;
1513       if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1514         // There was a CRLF in the input. This used to be illegal and
1515         // stripped by the parser. Apparently not anymore. Let's skip
1516         // over the LF.
1517         bol++;
1518       }
1519     }
1520 
1521     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1522       if ((outputLineBreak || !spacesOnly) &&  // bugs 261467,125928
1523           !IsQuotedLine(stringpart) && !IsSignatureSeparator(stringpart)) {
1524         stringpart.Trim(" ", false, true, true);
1525       }
1526       if (IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart)) {
1527         mCurrentLine.mContent.Append(char16_t(' '));
1528       }
1529     }
1530     mCurrentLine.mContent.Append(stringpart);
1531 
1532     mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
1533 
1534     mOutputManager->Append(mCurrentLine,
1535                            OutputManager::StripTrailingWhitespaces::kNo);
1536     if (outputLineBreak) {
1537       mOutputManager->AppendLineBreak();
1538     }
1539 
1540     mCurrentLine.ResetContentAndIndentationHeader();
1541   }
1542 
1543 #ifdef DEBUG_wrapping
1544   printf("No wrapping: newline is %d, totLen is %d\n", newline, totLen);
1545 #endif
1546 }
1547 
1548 /**
1549  * Write a string. This is the highlevel function to use to get text output.
1550  * By using AddToLine, Output, EndLine and other functions it handles quotation,
1551  * line wrapping, indentation, whitespace compression and other things.
1552  */
Write(const nsAString & aStr)1553 void nsPlainTextSerializer::Write(const nsAString& aStr) {
1554   // XXX Copy necessary to use nsString methods and gain
1555   // access to underlying buffer
1556   nsAutoString str(aStr);
1557 
1558 #ifdef DEBUG_wrapping
1559   printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
1560          mSettings.GetWrapColumn());
1561 #endif
1562 
1563   const int32_t totLen = str.Length();
1564 
1565   // If the string is empty, do nothing:
1566   if (totLen <= 0) return;
1567 
1568   // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1569   // to be cut off along with usual spaces if required. (bug #125928)
1570   if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1571     ReplaceVisiblyTrailingNbsps(str);
1572   }
1573 
1574   // We have two major codepaths here. One that does preformatted text and one
1575   // that does normal formatted text. The one for preformatted text calls
1576   // Output directly while the other code path goes through AddToLine.
1577   if ((mPreFormattedMail && !mSettings.GetWrapColumn()) ||
1578       (IsElementPreformatted() && !mPreFormattedMail) ||
1579       (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
1580     // No intelligent wrapping.
1581 
1582     // This mustn't be mixed with intelligent wrapping without clearing
1583     // the mCurrentLine.mContent buffer before!!!
1584     NS_ASSERTION(mCurrentLine.mContent.IsEmpty() ||
1585                      (IsElementPreformatted() && !mPreFormattedMail),
1586                  "Mixed wrapping data and nonwrapping data on the same line");
1587     MOZ_ASSERT(mOutputManager);
1588 
1589     if (!mCurrentLine.mContent.IsEmpty()) {
1590       mOutputManager->Flush(mCurrentLine);
1591     }
1592 
1593     ConvertToLinesAndOutput(str);
1594     return;
1595   }
1596 
1597   // Intelligent handling of text
1598   // If needed, strip out all "end of lines"
1599   // and multiple whitespace between words
1600   int32_t nextpos;
1601   const char16_t* offsetIntoBuffer = nullptr;
1602 
1603   int32_t bol = 0;
1604   while (bol < totLen) {  // Loop over lines
1605     // Find a place where we may have to do whitespace compression
1606     nextpos = str.FindCharInSet(" \t\n\r", bol);
1607 #ifdef DEBUG_wrapping
1608     nsAutoString remaining;
1609     str.Right(remaining, totLen - bol);
1610     foo = ToNewCString(remaining);
1611     // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, "
1612     //        "string = '%s'\n", bol, nextpos, totLen, foo);
1613     free(foo);
1614 #endif
1615 
1616     if (nextpos == kNotFound) {
1617       // The rest of the string
1618       offsetIntoBuffer = str.get() + bol;
1619       AddToLine(offsetIntoBuffer, totLen - bol);
1620       bol = totLen;
1621       mInWhitespace = false;
1622     } else {
1623       // There's still whitespace left in the string
1624       if (nextpos != 0 && (nextpos + 1) < totLen) {
1625         offsetIntoBuffer = str.get() + nextpos;
1626         // skip '\n' if it is between CJ chars
1627         if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
1628             IS_CJ_CHAR(offsetIntoBuffer[1])) {
1629           offsetIntoBuffer = str.get() + bol;
1630           AddToLine(offsetIntoBuffer, nextpos - bol);
1631           bol = nextpos + 1;
1632           continue;
1633         }
1634       }
1635       // If we're already in whitespace and not preformatted, just skip it:
1636       if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
1637           !mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
1638         // Skip whitespace
1639         bol++;
1640         continue;
1641       }
1642 
1643       if (nextpos == bol) {
1644         // Note that we are in whitespace.
1645         mInWhitespace = true;
1646         offsetIntoBuffer = str.get() + nextpos;
1647         AddToLine(offsetIntoBuffer, 1);
1648         bol++;
1649         continue;
1650       }
1651 
1652       mInWhitespace = true;
1653 
1654       offsetIntoBuffer = str.get() + bol;
1655       if (mPreFormattedMail ||
1656           mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
1657         // Preserve the real whitespace character
1658         nextpos++;
1659         AddToLine(offsetIntoBuffer, nextpos - bol);
1660         bol = nextpos;
1661       } else {
1662         // Replace the whitespace with a space
1663         AddToLine(offsetIntoBuffer, nextpos - bol);
1664         AddToLine(kSpace.get(), 1);
1665         bol = nextpos + 1;  // Let's eat the whitespace
1666       }
1667     }
1668   }  // Continue looping over the string
1669 }
1670 
1671 /**
1672  * Gets the value of an attribute in a string. If the function returns
1673  * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1674  */
GetAttributeValue(const nsAtom * aName,nsString & aValueRet) const1675 nsresult nsPlainTextSerializer::GetAttributeValue(const nsAtom* aName,
1676                                                   nsString& aValueRet) const {
1677   if (mElement) {
1678     if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
1679       return NS_OK;
1680     }
1681   }
1682 
1683   return NS_ERROR_NOT_AVAILABLE;
1684 }
1685 
1686 /**
1687  * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1688  * In this case, we should ignore it.
1689  */
IsCurrentNodeConverted() const1690 bool nsPlainTextSerializer::IsCurrentNodeConverted() const {
1691   nsAutoString value;
1692   nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1693   return (NS_SUCCEEDED(rv) && (value.EqualsIgnoreCase("moz-txt", 7) ||
1694                                value.EqualsIgnoreCase("\"moz-txt", 8)));
1695 }
1696 
1697 // static
GetIdForContent(nsIContent * aContent)1698 nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) {
1699   if (!aContent->IsHTMLElement()) {
1700     return nullptr;
1701   }
1702 
1703   nsAtom* localName = aContent->NodeInfo()->NameAtom();
1704   return localName->IsStatic() ? localName : nullptr;
1705 }
1706 
IsElementPreformatted() const1707 bool nsPlainTextSerializer::IsElementPreformatted() const {
1708   return !mPreformatStack.empty() && mPreformatStack.top();
1709 }
1710 
IsElementPreformatted(Element * aElement)1711 bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
1712   RefPtr<ComputedStyle> computedStyle =
1713       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1714   if (computedStyle) {
1715     const nsStyleText* textStyle = computedStyle->StyleText();
1716     return textStyle->WhiteSpaceOrNewlineIsSignificant();
1717   }
1718   // Fall back to looking at the tag, in case there is no style information.
1719   return GetIdForContent(aElement) == nsGkAtoms::pre;
1720 }
1721 
IsCssBlockLevelElement(Element * aElement)1722 bool nsPlainTextSerializer::IsCssBlockLevelElement(Element* aElement) {
1723   RefPtr<ComputedStyle> computedStyle =
1724       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1725   if (computedStyle) {
1726     const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
1727     return displayStyle->IsBlockOutsideStyle();
1728   }
1729   // Fall back to looking at the tag, in case there is no style information.
1730   return nsContentUtils::IsHTMLBlockLevelElement(aElement);
1731 }
1732 
1733 /**
1734  * This method is required only to identify LI's inside OL.
1735  * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1736  */
IsInOL() const1737 bool nsPlainTextSerializer::IsInOL() const {
1738   int32_t i = mTagStackIndex;
1739   while (--i >= 0) {
1740     if (mTagStack[i] == nsGkAtoms::ol) return true;
1741     if (mTagStack[i] == nsGkAtoms::ul) {
1742       // If a UL is reached first, LI belongs the UL nested in OL.
1743       return false;
1744     }
1745   }
1746   // We may reach here for orphan LI's.
1747   return false;
1748 }
1749 
IsInOlOrUl() const1750 bool nsPlainTextSerializer::IsInOlOrUl() const {
1751   return (mULCount > 0) || !mOLStack.IsEmpty();
1752 }
1753 
1754 /*
1755   @return 0 = no header, 1 = h1, ..., 6 = h6
1756 */
HeaderLevel(const nsAtom * aTag)1757 int32_t HeaderLevel(const nsAtom* aTag) {
1758   if (aTag == nsGkAtoms::h1) {
1759     return 1;
1760   }
1761   if (aTag == nsGkAtoms::h2) {
1762     return 2;
1763   }
1764   if (aTag == nsGkAtoms::h3) {
1765     return 3;
1766   }
1767   if (aTag == nsGkAtoms::h4) {
1768     return 4;
1769   }
1770   if (aTag == nsGkAtoms::h5) {
1771     return 5;
1772   }
1773   if (aTag == nsGkAtoms::h6) {
1774     return 6;
1775   }
1776   return 0;
1777 }
1778 
1779 /*
1780  * This is an implementation of GetUnicharWidth() and
1781  * GetUnicharStringWidth() as defined in
1782  * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1783  * <http://www.UNIX-systems.org/online.html>
1784  *
1785  * Markus Kuhn -- 2000-02-08 -- public domain
1786  *
1787  * Minor alterations to fit Mozilla's data types by Daniel Bratell
1788  */
1789 
1790 /* These functions define the column width of an ISO 10646 character
1791  * as follows:
1792  *
1793  *    - The null character (U+0000) has a column width of 0.
1794  *
1795  *    - Other C0/C1 control characters and DEL will lead to a return
1796  *      value of -1.
1797  *
1798  *    - Non-spacing and enclosing combining characters (general
1799  *      category code Mn or Me in the Unicode database) have a
1800  *      column width of 0.
1801  *
1802  *    - Spacing characters in the East Asian Wide (W) or East Asian
1803  *      FullWidth (F) category as defined in Unicode Technical
1804  *      Report #11 have a column width of 2.
1805  *
1806  *    - All remaining characters (including all printable
1807  *      ISO 8859-1 and WGL4 characters, Unicode control characters,
1808  *      etc.) have a column width of 1.
1809  *
1810  * This implementation assumes that wchar_t characters are encoded
1811  * in ISO 10646.
1812  */
1813 
1814 namespace {
1815 
1816 struct interval {
1817   uint16_t first;
1818   uint16_t last;
1819 };
1820 
1821 struct CombiningComparator {
1822   const char16_t mUcs;
CombiningComparator__anon1c77cadb0111::CombiningComparator1823   explicit CombiningComparator(char16_t aUcs) : mUcs(aUcs) {}
operator ()__anon1c77cadb0111::CombiningComparator1824   int operator()(const interval& combining) const {
1825     if (mUcs > combining.last) return 1;
1826     if (mUcs < combining.first) return -1;
1827 
1828     MOZ_ASSERT(combining.first <= mUcs);
1829     MOZ_ASSERT(mUcs <= combining.last);
1830     return 0;
1831   }
1832 };
1833 
1834 }  // namespace
1835 
GetUnicharWidth(char16_t ucs)1836 int32_t GetUnicharWidth(char16_t ucs) {
1837   /* sorted list of non-overlapping intervals of non-spacing characters */
1838   static const interval combining[] = {
1839       {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486}, {0x0488, 0x0489},
1840       {0x0591, 0x05A1}, {0x05A3, 0x05B9}, {0x05BB, 0x05BD}, {0x05BF, 0x05BF},
1841       {0x05C1, 0x05C2}, {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
1842       {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
1843       {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
1844       {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954}, {0x0962, 0x0963},
1845       {0x0981, 0x0981}, {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD},
1846       {0x09E2, 0x09E3}, {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
1847       {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71}, {0x0A81, 0x0A82},
1848       {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD},
1849       {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
1850       {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0},
1851       {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
1852       {0x0C55, 0x0C56}, {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
1853       {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4},
1854       {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E},
1855       {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
1856       {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39},
1857       {0x0F71, 0x0F7E}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97},
1858       {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
1859       {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059}, {0x17B7, 0x17BD},
1860       {0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x18A9, 0x18A9}, {0x20D0, 0x20E3},
1861       {0x302A, 0x302F}, {0x3099, 0x309A}, {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}};
1862 
1863   /* test for 8-bit control characters */
1864   if (ucs == 0) return 0;
1865   if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return -1;
1866 
1867   /* first quick check for Latin-1 etc. characters */
1868   if (ucs < combining[0].first) return 1;
1869 
1870   /* binary search in table of non-spacing characters */
1871   size_t idx;
1872   if (BinarySearchIf(combining, 0, ArrayLength(combining),
1873                      CombiningComparator(ucs), &idx)) {
1874     return 0;
1875   }
1876 
1877   /* if we arrive here, ucs is not a combining or C0/C1 control character */
1878 
1879   /* fast test for majority of non-wide scripts */
1880   if (ucs < 0x1100) return 1;
1881 
1882   return 1 +
1883          ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
1884           (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
1885            ucs != 0x303f) ||                  /* CJK ... Yi */
1886           (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
1887           (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
1888           (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
1889           (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
1890           (ucs >= 0xffe0 && ucs <= 0xffe6));
1891 }
1892 
GetUnicharStringWidth(const nsString & aString)1893 int32_t GetUnicharStringWidth(const nsString& aString) {
1894   const char16_t* pwcs = aString.get();
1895   int32_t n = aString.Length();
1896 
1897   int32_t w, width = 0;
1898 
1899   for (; *pwcs && n-- > 0; pwcs++)
1900     if ((w = GetUnicharWidth(*pwcs)) < 0)
1901       ++width;  // Taking 1 as the width of non-printable character, for bug#
1902                 // 94475.
1903     else
1904       width += w;
1905 
1906   return width;
1907 }
1908