1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsTextRunTransformations.h"
8 
9 #include "mozilla/MemoryReporting.h"
10 #include "mozilla/Move.h"
11 
12 #include "nsGkAtoms.h"
13 #include "nsStyleConsts.h"
14 #include "nsStyleContextInlines.h"
15 #include "nsUnicharUtils.h"
16 #include "nsUnicodeProperties.h"
17 #include "nsSpecialCasingData.h"
18 #include "mozilla/gfx/2D.h"
19 #include "nsTextFrameUtils.h"
20 #include "nsIPersistentProperties2.h"
21 #include "GreekCasing.h"
22 #include "IrishCasing.h"
23 
24 using namespace mozilla;
25 using namespace mozilla::gfx;
26 
27 // Unicode characters needing special casing treatment in tr/az languages
28 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
29 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
30 
31 // Greek sigma needs custom handling for the lowercase transform; for details
32 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
33 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
34 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
35 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
36 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
37 
Create(const gfxTextRunFactory::Parameters * aParams,nsTransformingTextRunFactory * aFactory,gfxFontGroup * aFontGroup,const char16_t * aString,uint32_t aLength,const gfx::ShapedTextFlags aFlags,const nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)38 already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
39     const gfxTextRunFactory::Parameters* aParams,
40     nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
41     const char16_t* aString, uint32_t aLength,
42     const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
43     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
44   NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
45                "didn't expect text to be marked as 8-bit here");
46 
47   void* storage =
48       AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
49   if (!storage) {
50     return nullptr;
51   }
52 
53   RefPtr<nsTransformedTextRun> result = new (storage)
54       nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
55                            aFlags, aFlags2, Move(aStyles), aOwnsFactory);
56   return result.forget();
57 }
58 
SetCapitalization(uint32_t aStart,uint32_t aLength,bool * aCapitalization)59 void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
60                                              bool* aCapitalization) {
61   if (mCapitalize.IsEmpty()) {
62     if (!mCapitalize.AppendElements(GetLength())) return;
63     memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
64   }
65   memcpy(mCapitalize.Elements() + aStart, aCapitalization,
66          aLength * sizeof(bool));
67   mNeedsRebuild = true;
68 }
69 
SetPotentialLineBreaks(Range aRange,const uint8_t * aBreakBefore)70 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
71                                                   const uint8_t* aBreakBefore) {
72   bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
73   if (changed) {
74     mNeedsRebuild = true;
75   }
76   return changed;
77 }
78 
SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)79 size_t nsTransformedTextRun::SizeOfExcludingThis(
80     mozilla::MallocSizeOf aMallocSizeOf) {
81   size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
82   total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
83   total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
84   if (mOwnsFactory) {
85     total += aMallocSizeOf(mFactory);
86   }
87   return total;
88 }
89 
SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)90 size_t nsTransformedTextRun::SizeOfIncludingThis(
91     mozilla::MallocSizeOf aMallocSizeOf) {
92   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
93 }
94 
95 already_AddRefed<nsTransformedTextRun>
MakeTextRun(const char16_t * aString,uint32_t aLength,const gfxTextRunFactory::Parameters * aParams,gfxFontGroup * aFontGroup,gfx::ShapedTextFlags aFlags,nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)96 nsTransformingTextRunFactory::MakeTextRun(
97     const char16_t* aString, uint32_t aLength,
98     const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
99     gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
100     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
101   return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
102                                       aLength, aFlags, aFlags2, Move(aStyles),
103                                       aOwnsFactory);
104 }
105 
106 already_AddRefed<nsTransformedTextRun>
MakeTextRun(const uint8_t * aString,uint32_t aLength,const gfxTextRunFactory::Parameters * aParams,gfxFontGroup * aFontGroup,gfx::ShapedTextFlags aFlags,nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)107 nsTransformingTextRunFactory::MakeTextRun(
108     const uint8_t* aString, uint32_t aLength,
109     const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
110     gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
111     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
112   // We'll only have a Unicode code path to minimize the amount of code needed
113   // for these rarely used features
114   NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
115                                        aLength);
116   return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
117                      aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
118                      Move(aStyles), aOwnsFactory);
119 }
120 
MergeCharactersInTextRun(gfxTextRun * aDest,gfxTextRun * aSrc,const bool * aCharsToMerge,const bool * aDeletedChars)121 void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
122                               const bool* aCharsToMerge,
123                               const bool* aDeletedChars) {
124   aDest->ResetGlyphRuns();
125 
126   gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
127   uint32_t offset = 0;
128   AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
129   while (iter.NextRun()) {
130     const gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
131     nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
132                                      run->mOrientation);
133     if (NS_FAILED(rv)) return;
134 
135     bool anyMissing = false;
136     uint32_t mergeRunStart = iter.GetStringStart();
137     const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
138     gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
139     uint32_t stringEnd = iter.GetStringEnd();
140     for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {
141       const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
142       if (g.IsSimpleGlyph()) {
143         if (!anyMissing) {
144           gfxTextRun::DetailedGlyph details;
145           details.mGlyphID = g.GetSimpleGlyph();
146           details.mAdvance = g.GetSimpleAdvance();
147           glyphs.AppendElement(details);
148         }
149       } else {
150         if (g.IsMissing()) {
151           anyMissing = true;
152           glyphs.Clear();
153         }
154         if (g.GetGlyphCount() > 0) {
155           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
156         }
157       }
158 
159       if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
160         // next char is supposed to merge with current, so loop without
161         // writing current merged glyph to the destination
162         continue;
163       }
164 
165       // If the start of the merge run is actually a character that should
166       // have been merged with the previous character (this can happen
167       // if there's a font change in the middle of a case-mapped character,
168       // that decomposed into a sequence of base+diacritics, for example),
169       // just discard the entire merge run. See comment at start of this
170       // function.
171       NS_WARNING_ASSERTION(
172           !aCharsToMerge[mergeRunStart],
173           "unable to merge across a glyph run boundary, glyph(s) discarded");
174       if (!aCharsToMerge[mergeRunStart]) {
175         if (anyMissing) {
176           mergedGlyph.SetMissing(glyphs.Length());
177         } else {
178           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
179                                  mergedGlyph.IsLigatureGroupStart(),
180                                  glyphs.Length());
181         }
182         aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
183         ++offset;
184 
185         while (offset < aDest->GetLength() && aDeletedChars[offset]) {
186           aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr);
187         }
188       }
189 
190       glyphs.Clear();
191       anyMissing = false;
192       mergeRunStart = k + 1;
193       if (mergeRunStart < stringEnd) {
194         mergedGlyph = srcGlyphs[mergeRunStart];
195       }
196     }
197     NS_ASSERTION(glyphs.Length() == 0,
198                  "Leftover glyphs, don't request merging of the last character "
199                  "with its next!");
200   }
201   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
202 }
203 
GetParametersForInner(nsTransformedTextRun * aTextRun,gfx::ShapedTextFlags * aFlags,DrawTarget * aRefDrawTarget)204 gfxTextRunFactory::Parameters GetParametersForInner(
205     nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
206     DrawTarget* aRefDrawTarget) {
207   gfxTextRunFactory::Parameters params = {
208       aRefDrawTarget, nullptr, nullptr,
209       nullptr,        0,       aTextRun->GetAppUnitsPerDevUnit()};
210   *aFlags = aTextRun->GetFlags();
211   return params;
212 }
213 
214 // Some languages have special casing conventions that differ from the
215 // default Unicode mappings.
216 // The enum values here are named for well-known exemplar languages that
217 // exhibit the behavior in question; multiple lang tags may map to the
218 // same setting here, if the behavior is shared by other languages.
219 enum LanguageSpecificCasingBehavior {
220   eLSCB_None,    // default non-lang-specific behavior
221   eLSCB_Dutch,   // treat "ij" digraph as a unit for capitalization
222   eLSCB_Greek,   // strip accent when uppercasing Greek vowels
223   eLSCB_Irish,   // keep prefix letters as lowercase when uppercasing Irish
224   eLSCB_Turkish  // preserve dotted/dotless-i distinction in uppercase
225 };
226 
GetCasingFor(const nsAtom * aLang)227 static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
228   if (!aLang) {
229     return eLSCB_None;
230   }
231   if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az ||
232       aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh ||
233       aLang == nsGkAtoms::tt) {
234     return eLSCB_Turkish;
235   }
236   if (aLang == nsGkAtoms::nl) {
237     return eLSCB_Dutch;
238   }
239   if (aLang == nsGkAtoms::el) {
240     return eLSCB_Greek;
241   }
242   if (aLang == nsGkAtoms::ga) {
243     return eLSCB_Irish;
244   }
245 
246   // Is there a region subtag we should ignore?
247   nsAtomString langStr(const_cast<nsAtom*>(aLang));
248   int index = langStr.FindChar('-');
249   if (index > 0) {
250     langStr.Truncate(index);
251     RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr);
252     return GetCasingFor(truncatedLang);
253   }
254 
255   return eLSCB_None;
256 }
257 
TransformString(const nsAString & aString,nsString & aConvertedString,bool aAllUppercase,const nsAtom * aLanguage,nsTArray<bool> & aCharsToMergeArray,nsTArray<bool> & aDeletedCharsArray,const nsTransformedTextRun * aTextRun,uint32_t aOffsetInTextRun,nsTArray<uint8_t> * aCanBreakBeforeArray,nsTArray<RefPtr<nsTransformedCharStyle>> * aStyleArray)258 bool nsCaseTransformTextRunFactory::TransformString(
259     const nsAString& aString, nsString& aConvertedString, bool aAllUppercase,
260     const nsAtom* aLanguage, nsTArray<bool>& aCharsToMergeArray,
261     nsTArray<bool>& aDeletedCharsArray, const nsTransformedTextRun* aTextRun,
262     uint32_t aOffsetInTextRun, nsTArray<uint8_t>* aCanBreakBeforeArray,
263     nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
264   bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
265   MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
266              "text run must be provided to use aux output arrays");
267 
268   uint32_t length = aString.Length();
269   const char16_t* str = aString.BeginReading();
270 
271   bool mergeNeeded = false;
272 
273   bool capitalizeDutchIJ = false;
274   bool prevIsLetter = false;
275   bool ntPrefix = false;  // true immediately after a word-initial 'n' or 't'
276                           // when doing Irish lowercasing
277   uint32_t sigmaIndex = uint32_t(-1);
278   nsUGenCategory cat;
279 
280   uint8_t style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : 0;
281   bool forceNonFullWidth = false;
282   const nsAtom* lang = aLanguage;
283 
284   LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
285   mozilla::GreekCasing::State greekState;
286   mozilla::IrishCasing::State irishState;
287   uint32_t irishMark = uint32_t(-1);  // location of possible prefix letter(s)
288                                       // in the output string
289   uint32_t irishMarkSrc =
290       uint32_t(-1);                   // corresponding location in source
291                                       // string (may differ from output due to
292                                       // expansions like eszet -> 'SS')
293   uint32_t greekMark = uint32_t(-1);  // location of uppercase ETA that may need
294                                       // tonos added (if it is disjunctive eta)
295   const char16_t kGreekUpperEta = 0x0397;
296 
297   for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
298     uint32_t ch = str[i];
299 
300     RefPtr<nsTransformedCharStyle> charStyle;
301     if (aTextRun) {
302       charStyle = aTextRun->mStyles[aOffsetInTextRun];
303       style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
304                             : charStyle->mTextTransform;
305       forceNonFullWidth = charStyle->mForceNonFullWidth;
306 
307       nsAtom* newLang =
308           charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
309       if (lang != newLang) {
310         lang = newLang;
311         languageSpecificCasing = GetCasingFor(lang);
312         greekState.Reset();
313         irishState.Reset();
314         irishMark = uint32_t(-1);
315         irishMarkSrc = uint32_t(-1);
316         greekMark = uint32_t(-1);
317       }
318     }
319 
320     int extraChars = 0;
321     const mozilla::unicode::MultiCharMapping* mcm;
322     bool inhibitBreakBefore = false;  // have we just deleted preceding hyphen?
323 
324     if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 &&
325         NS_IS_LOW_SURROGATE(str[i + 1])) {
326       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
327     }
328 
329     switch (style) {
330       case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
331         if (languageSpecificCasing == eLSCB_Turkish) {
332           if (ch == 'I') {
333             ch = LATIN_SMALL_LETTER_DOTLESS_I;
334             prevIsLetter = true;
335             sigmaIndex = uint32_t(-1);
336             break;
337           }
338           if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
339             ch = 'i';
340             prevIsLetter = true;
341             sigmaIndex = uint32_t(-1);
342             break;
343           }
344         }
345 
346         cat = mozilla::unicode::GetGenCategory(ch);
347 
348         if (languageSpecificCasing == eLSCB_Irish &&
349             cat == nsUGenCategory::kLetter) {
350           // See bug 1018805 for Irish lowercasing requirements
351           if (!prevIsLetter && (ch == 'n' || ch == 't')) {
352             ntPrefix = true;
353           } else {
354             if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
355               aConvertedString.Append('-');
356               ++extraChars;
357             }
358             ntPrefix = false;
359           }
360         } else {
361           ntPrefix = false;
362         }
363 
364         // Special lowercasing behavior for Greek Sigma: note that this is
365         // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
366         // *not* a language-specific mapping; it applies regardless of the
367         // language of the element.
368         //
369         // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
370         // (i.e. the non-final form) whenever there is a following letter, or
371         // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
372         // followed by a LETTER); and to FINAL SIGMA when it is preceded by
373         // another letter but not followed by one.
374         //
375         // To implement the context-sensitive nature of this mapping, we keep
376         // track of whether the previous character was a letter. If not, CAPITAL
377         // SIGMA will map directly to SMALL SIGMA. If the previous character
378         // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
379         // position in the converted string; if we then encounter another
380         // letter, that FINAL SIGMA is replaced with a standard SMALL SIGMA.
381 
382         // If sigmaIndex is not -1, it marks where we have provisionally mapped
383         // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
384         // need to change it to SMALL SIGMA.
385         if (sigmaIndex != uint32_t(-1)) {
386           if (cat == nsUGenCategory::kLetter) {
387             aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
388           }
389         }
390 
391         if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
392           // If preceding char was a letter, map to FINAL instead of SMALL,
393           // and note where it occurred by setting sigmaIndex; we'll change it
394           // to standard SMALL SIGMA later if another letter follows
395           if (prevIsLetter) {
396             ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
397             sigmaIndex = aConvertedString.Length();
398           } else {
399             // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
400             // to SMALL SIGMA
401             ch = GREEK_SMALL_LETTER_SIGMA;
402             sigmaIndex = uint32_t(-1);
403           }
404           prevIsLetter = true;
405           break;
406         }
407 
408         // ignore diacritics for the purpose of contextual sigma mapping;
409         // otherwise, reset prevIsLetter appropriately and clear the
410         // sigmaIndex marker
411         if (cat != nsUGenCategory::kMark) {
412           prevIsLetter = (cat == nsUGenCategory::kLetter);
413           sigmaIndex = uint32_t(-1);
414         }
415 
416         mcm = mozilla::unicode::SpecialLower(ch);
417         if (mcm) {
418           int j = 0;
419           while (j < 2 && mcm->mMappedChars[j + 1]) {
420             aConvertedString.Append(mcm->mMappedChars[j]);
421             ++extraChars;
422             ++j;
423           }
424           ch = mcm->mMappedChars[j];
425           break;
426         }
427 
428         ch = ToLowerCase(ch);
429         break;
430 
431       case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
432         if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
433           ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
434           break;
435         }
436 
437         if (languageSpecificCasing == eLSCB_Greek) {
438           bool markEta;
439           bool updateEta;
440           ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
441                                                updateEta);
442           if (markEta) {
443             greekMark = aConvertedString.Length();
444           } else if (updateEta) {
445             // Remove the TONOS from an uppercase ETA-TONOS that turned out
446             // not to be disjunctive-eta.
447             MOZ_ASSERT(aConvertedString.Length() > 0 &&
448                            greekMark < aConvertedString.Length(),
449                        "bad greekMark!");
450             aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
451             greekMark = uint32_t(-1);
452           }
453           break;
454         }
455 
456         if (languageSpecificCasing == eLSCB_Irish) {
457           bool mark;
458           uint8_t action;
459           ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
460           if (mark) {
461             irishMark = aConvertedString.Length();
462             irishMarkSrc = i;
463             break;
464           } else if (action) {
465             nsString& str = aConvertedString;  // shorthand
466             switch (action) {
467               case 1:
468                 // lowercase a single prefix letter
469                 NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(),
470                              "bad irishMark!");
471                 str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
472                 irishMark = uint32_t(-1);
473                 irishMarkSrc = uint32_t(-1);
474                 break;
475               case 2:
476                 // lowercase two prefix letters (immediately before current pos)
477                 NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2,
478                              "bad irishMark!");
479                 str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
480                 str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
481                 irishMark = uint32_t(-1);
482                 irishMarkSrc = uint32_t(-1);
483                 break;
484               case 3:
485                 // lowercase one prefix letter, and delete following hyphen
486                 // (which must be the immediately-preceding char)
487                 NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2,
488                              "bad irishMark!");
489                 MOZ_ASSERT(
490                     irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
491                     "failed to set irishMarks");
492                 str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
493                 aDeletedCharsArray[irishMarkSrc + 1] = true;
494                 // Remove the trailing entries (corresponding to the deleted
495                 // hyphen) from the auxiliary arrays.
496                 aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1);
497                 if (auxiliaryOutputArrays) {
498                   aStyleArray->SetLength(aStyleArray->Length() - 1);
499                   aCanBreakBeforeArray->SetLength(
500                       aCanBreakBeforeArray->Length() - 1);
501                   inhibitBreakBefore = true;
502                 }
503                 mergeNeeded = true;
504                 irishMark = uint32_t(-1);
505                 irishMarkSrc = uint32_t(-1);
506                 break;
507             }
508             // ch has been set to the uppercase for current char;
509             // No need to check for SpecialUpper here as none of the characters
510             // that could trigger an Irish casing action have special mappings.
511             break;
512           }
513           // If we didn't have any special action to perform, fall through
514           // to check for special uppercase (ß)
515         }
516 
517         mcm = mozilla::unicode::SpecialUpper(ch);
518         if (mcm) {
519           int j = 0;
520           while (j < 2 && mcm->mMappedChars[j + 1]) {
521             aConvertedString.Append(mcm->mMappedChars[j]);
522             ++extraChars;
523             ++j;
524           }
525           ch = mcm->mMappedChars[j];
526           break;
527         }
528 
529         ch = ToUpperCase(ch);
530         break;
531 
532       case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
533         if (aTextRun) {
534           if (capitalizeDutchIJ && ch == 'j') {
535             ch = 'J';
536             capitalizeDutchIJ = false;
537             break;
538           }
539           capitalizeDutchIJ = false;
540           if (aOffsetInTextRun < aTextRun->mCapitalize.Length() &&
541               aTextRun->mCapitalize[aOffsetInTextRun]) {
542             if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
543               ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
544               break;
545             }
546             if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
547               ch = 'I';
548               capitalizeDutchIJ = true;
549               break;
550             }
551 
552             mcm = mozilla::unicode::SpecialTitle(ch);
553             if (mcm) {
554               int j = 0;
555               while (j < 2 && mcm->mMappedChars[j + 1]) {
556                 aConvertedString.Append(mcm->mMappedChars[j]);
557                 ++extraChars;
558                 ++j;
559               }
560               ch = mcm->mMappedChars[j];
561               break;
562             }
563 
564             ch = ToTitleCase(ch);
565           }
566         }
567         break;
568 
569       case NS_STYLE_TEXT_TRANSFORM_FULL_WIDTH:
570         ch = mozilla::unicode::GetFullWidth(ch);
571         break;
572 
573       default:
574         break;
575     }
576 
577     if (forceNonFullWidth) {
578       ch = mozilla::unicode::GetFullWidthInverse(ch);
579     }
580 
581     if (ch == uint32_t(-1)) {
582       aDeletedCharsArray.AppendElement(true);
583       mergeNeeded = true;
584     } else {
585       aDeletedCharsArray.AppendElement(false);
586       aCharsToMergeArray.AppendElement(false);
587       if (auxiliaryOutputArrays) {
588         aStyleArray->AppendElement(charStyle);
589         aCanBreakBeforeArray->AppendElement(
590             inhibitBreakBefore
591                 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
592                 : aTextRun->CanBreakBefore(aOffsetInTextRun));
593       }
594 
595       if (IS_IN_BMP(ch)) {
596         aConvertedString.Append(ch);
597       } else {
598         aConvertedString.Append(H_SURROGATE(ch));
599         aConvertedString.Append(L_SURROGATE(ch));
600         i++;
601         aOffsetInTextRun++;
602         aDeletedCharsArray.AppendElement(
603             true);  // not exactly deleted, but the
604                     // trailing surrogate is skipped
605         ++extraChars;
606       }
607 
608       while (extraChars-- > 0) {
609         mergeNeeded = true;
610         aCharsToMergeArray.AppendElement(true);
611         if (auxiliaryOutputArrays) {
612           aStyleArray->AppendElement(charStyle);
613           aCanBreakBeforeArray->AppendElement(
614               gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
615         }
616       }
617     }
618   }
619 
620   return mergeNeeded;
621 }
622 
RebuildTextRun(nsTransformedTextRun * aTextRun,DrawTarget * aRefDrawTarget,gfxMissingFontRecorder * aMFR)623 void nsCaseTransformTextRunFactory::RebuildTextRun(
624     nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
625     gfxMissingFontRecorder* aMFR) {
626   nsAutoString convertedString;
627   AutoTArray<bool, 50> charsToMergeArray;
628   AutoTArray<bool, 50> deletedCharsArray;
629   AutoTArray<uint8_t, 50> canBreakBeforeArray;
630   AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
631 
632   bool mergeNeeded =
633       TransformString(aTextRun->mString, convertedString, mAllUppercase,
634                       nullptr, charsToMergeArray, deletedCharsArray, aTextRun,
635                       0, &canBreakBeforeArray, &styleArray);
636 
637   gfx::ShapedTextFlags flags;
638   gfxTextRunFactory::Parameters innerParams =
639       GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
640   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
641 
642   RefPtr<nsTransformedTextRun> transformedChild;
643   RefPtr<gfxTextRun> cachedChild;
644   gfxTextRun* child;
645 
646   if (mInnerTransformingTextRunFactory) {
647     transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
648         convertedString.BeginReading(), convertedString.Length(), &innerParams,
649         fontGroup, flags, nsTextFrameUtils::Flags(), Move(styleArray), false);
650     child = transformedChild.get();
651   } else {
652     cachedChild = fontGroup->MakeTextRun(
653         convertedString.BeginReading(), convertedString.Length(), &innerParams,
654         flags, nsTextFrameUtils::Flags(), aMFR);
655     child = cachedChild.get();
656   }
657   if (!child) return;
658   // Copy potential linebreaks into child so they're preserved
659   // (and also child will be shaped appropriately)
660   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
661                "Dropped characters or break-before values somewhere!");
662   gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
663   child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
664   if (transformedChild) {
665     transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
666   }
667 
668   if (mergeNeeded) {
669     // Now merge multiple characters into one multi-glyph character as required
670     // and deal with skipping deleted accent chars
671     NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
672                  "source length mismatch");
673     NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
674                  "destination length mismatch");
675     MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
676                              deletedCharsArray.Elements());
677   } else {
678     // No merging to do, so just copy; this produces a more optimized textrun.
679     // We can't steal the data because the child may be cached and stealing
680     // the data would break the cache.
681     aTextRun->ResetGlyphRuns();
682     aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);
683   }
684 }
685