1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsTextRunTransformations.h"
8 
9 #include <utility>
10 
11 #include "GreekCasing.h"
12 #include "IrishCasing.h"
13 #include "mozilla/ComputedStyleInlines.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/TextEditor.h"
16 #include "mozilla/gfx/2D.h"
17 #include "nsGkAtoms.h"
18 #include "nsSpecialCasingData.h"
19 #include "nsStyleConsts.h"
20 #include "nsTextFrameUtils.h"
21 #include "nsUnicharUtils.h"
22 #include "nsUnicodeProperties.h"
23 
24 using namespace mozilla;
25 using namespace mozilla::gfx;
26 
27 // Unicode characters needing special casing treatment in tr/az languages
28 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
29 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
30 
31 // Greek sigma needs custom handling for the lowercase transform; for details
32 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
33 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
34 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
35 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
36 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
37 
Create(const gfxTextRunFactory::Parameters * aParams,nsTransformingTextRunFactory * aFactory,gfxFontGroup * aFontGroup,const char16_t * aString,uint32_t aLength,const gfx::ShapedTextFlags aFlags,const nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)38 already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
39     const gfxTextRunFactory::Parameters* aParams,
40     nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
41     const char16_t* aString, uint32_t aLength,
42     const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
43     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
44   NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
45                "didn't expect text to be marked as 8-bit here");
46 
47   void* storage =
48       AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
49   if (!storage) {
50     return nullptr;
51   }
52 
53   RefPtr<nsTransformedTextRun> result = new (storage)
54       nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
55                            aFlags, aFlags2, std::move(aStyles), aOwnsFactory);
56   return result.forget();
57 }
58 
SetCapitalization(uint32_t aStart,uint32_t aLength,bool * aCapitalization)59 void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
60                                              bool* aCapitalization) {
61   if (mCapitalize.IsEmpty()) {
62     // XXX(Bug 1631371) Check if this should use a fallible operation as it
63     // pretended earlier.
64     mCapitalize.AppendElements(GetLength());
65     memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
66   }
67   memcpy(mCapitalize.Elements() + aStart, aCapitalization,
68          aLength * sizeof(bool));
69   mNeedsRebuild = true;
70 }
71 
SetPotentialLineBreaks(Range aRange,const uint8_t * aBreakBefore)72 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
73                                                   const uint8_t* aBreakBefore) {
74   bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
75   if (changed) {
76     mNeedsRebuild = true;
77   }
78   return changed;
79 }
80 
SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)81 size_t nsTransformedTextRun::SizeOfExcludingThis(
82     mozilla::MallocSizeOf aMallocSizeOf) {
83   size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
84   total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
85   total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
86   if (mOwnsFactory) {
87     total += aMallocSizeOf(mFactory);
88   }
89   return total;
90 }
91 
SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)92 size_t nsTransformedTextRun::SizeOfIncludingThis(
93     mozilla::MallocSizeOf aMallocSizeOf) {
94   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
95 }
96 
97 already_AddRefed<nsTransformedTextRun>
MakeTextRun(const char16_t * aString,uint32_t aLength,const gfxTextRunFactory::Parameters * aParams,gfxFontGroup * aFontGroup,gfx::ShapedTextFlags aFlags,nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)98 nsTransformingTextRunFactory::MakeTextRun(
99     const char16_t* aString, uint32_t aLength,
100     const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
101     gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
102     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
103   return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
104                                       aLength, aFlags, aFlags2,
105                                       std::move(aStyles), aOwnsFactory);
106 }
107 
108 already_AddRefed<nsTransformedTextRun>
MakeTextRun(const uint8_t * aString,uint32_t aLength,const gfxTextRunFactory::Parameters * aParams,gfxFontGroup * aFontGroup,gfx::ShapedTextFlags aFlags,nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)109 nsTransformingTextRunFactory::MakeTextRun(
110     const uint8_t* aString, uint32_t aLength,
111     const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
112     gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
113     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
114   // We'll only have a Unicode code path to minimize the amount of code needed
115   // for these rarely used features
116   NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
117                                        aLength);
118   return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
119                      aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
120                      std::move(aStyles), aOwnsFactory);
121 }
122 
MergeCharactersInTextRun(gfxTextRun * aDest,gfxTextRun * aSrc,const bool * aCharsToMerge,const bool * aDeletedChars)123 void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
124                               const bool* aCharsToMerge,
125                               const bool* aDeletedChars) {
126   aDest->ResetGlyphRuns();
127 
128   gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
129   uint32_t offset = 0;
130   AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
131   const gfxTextRun::CompressedGlyph continuationGlyph =
132       gfxTextRun::CompressedGlyph::MakeComplex(false, false, 0);
133   while (iter.NextRun()) {
134     const gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
135     aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
136                        run->mOrientation, run->mIsCJK);
137 
138     bool anyMissing = false;
139     uint32_t mergeRunStart = iter.GetStringStart();
140     const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
141     gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
142     uint32_t stringEnd = iter.GetStringEnd();
143     for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {
144       const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
145       if (g.IsSimpleGlyph()) {
146         if (!anyMissing) {
147           gfxTextRun::DetailedGlyph details;
148           details.mGlyphID = g.GetSimpleGlyph();
149           details.mAdvance = g.GetSimpleAdvance();
150           glyphs.AppendElement(details);
151         }
152       } else {
153         if (g.IsMissing()) {
154           anyMissing = true;
155           glyphs.Clear();
156         }
157         if (g.GetGlyphCount() > 0) {
158           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
159         }
160       }
161 
162       if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
163         // next char is supposed to merge with current, so loop without
164         // writing current merged glyph to the destination
165         continue;
166       }
167 
168       // If the start of the merge run is actually a character that should
169       // have been merged with the previous character (this can happen
170       // if there's a font change in the middle of a case-mapped character,
171       // that decomposed into a sequence of base+diacritics, for example),
172       // just discard the entire merge run. See comment at start of this
173       // function.
174       NS_WARNING_ASSERTION(
175           !aCharsToMerge[mergeRunStart],
176           "unable to merge across a glyph run boundary, glyph(s) discarded");
177       if (!aCharsToMerge[mergeRunStart]) {
178         if (anyMissing) {
179           mergedGlyph.SetMissing(glyphs.Length());
180         } else {
181           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
182                                  mergedGlyph.IsLigatureGroupStart(),
183                                  glyphs.Length());
184         }
185         aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
186         ++offset;
187 
188         while (offset < aDest->GetLength() && aDeletedChars[offset]) {
189           aDest->SetGlyphs(offset++, continuationGlyph, nullptr);
190         }
191       }
192 
193       glyphs.Clear();
194       anyMissing = false;
195       mergeRunStart = k + 1;
196       if (mergeRunStart < stringEnd) {
197         mergedGlyph = srcGlyphs[mergeRunStart];
198       }
199     }
200     NS_ASSERTION(glyphs.Length() == 0,
201                  "Leftover glyphs, don't request merging of the last character "
202                  "with its next!");
203   }
204   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
205 }
206 
GetParametersForInner(nsTransformedTextRun * aTextRun,gfx::ShapedTextFlags * aFlags,DrawTarget * aRefDrawTarget)207 gfxTextRunFactory::Parameters GetParametersForInner(
208     nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
209     DrawTarget* aRefDrawTarget) {
210   gfxTextRunFactory::Parameters params = {
211       aRefDrawTarget, nullptr, nullptr,
212       nullptr,        0,       aTextRun->GetAppUnitsPerDevUnit()};
213   *aFlags = aTextRun->GetFlags();
214   return params;
215 }
216 
217 // Some languages have special casing conventions that differ from the
218 // default Unicode mappings.
219 // The enum values here are named for well-known exemplar languages that
220 // exhibit the behavior in question; multiple lang tags may map to the
221 // same setting here, if the behavior is shared by other languages.
222 enum LanguageSpecificCasingBehavior {
223   eLSCB_None,       // default non-lang-specific behavior
224   eLSCB_Dutch,      // treat "ij" digraph as a unit for capitalization
225   eLSCB_Greek,      // strip accent when uppercasing Greek vowels
226   eLSCB_Irish,      // keep prefix letters as lowercase when uppercasing Irish
227   eLSCB_Turkish,    // preserve dotted/dotless-i distinction in uppercase
228   eLSCB_Lithuanian  // retain dot on lowercase i/j when an accent is present
229 };
230 
GetCasingFor(const nsAtom * aLang)231 static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
232   if (!aLang) {
233     return eLSCB_None;
234   }
235   if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az ||
236       aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh ||
237       aLang == nsGkAtoms::tt) {
238     return eLSCB_Turkish;
239   }
240   if (aLang == nsGkAtoms::nl) {
241     return eLSCB_Dutch;
242   }
243   if (aLang == nsGkAtoms::el) {
244     return eLSCB_Greek;
245   }
246   if (aLang == nsGkAtoms::ga) {
247     return eLSCB_Irish;
248   }
249   if (aLang == nsGkAtoms::lt_) {
250     return eLSCB_Lithuanian;
251   }
252 
253   // Is there a region subtag we should ignore?
254   nsAtomString langStr(const_cast<nsAtom*>(aLang));
255   int index = langStr.FindChar('-');
256   if (index > 0) {
257     langStr.Truncate(index);
258     RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr);
259     return GetCasingFor(truncatedLang);
260   }
261 
262   return eLSCB_None;
263 }
264 
TransformString(const nsAString & aString,nsString & aConvertedString,bool aAllUppercase,bool aCaseTransformsOnly,const nsAtom * aLanguage,nsTArray<bool> & aCharsToMergeArray,nsTArray<bool> & aDeletedCharsArray,const nsTransformedTextRun * aTextRun,uint32_t aOffsetInTextRun,nsTArray<uint8_t> * aCanBreakBeforeArray,nsTArray<RefPtr<nsTransformedCharStyle>> * aStyleArray)265 bool nsCaseTransformTextRunFactory::TransformString(
266     const nsAString& aString, nsString& aConvertedString, bool aAllUppercase,
267     bool aCaseTransformsOnly, const nsAtom* aLanguage,
268     nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray,
269     const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun,
270     nsTArray<uint8_t>* aCanBreakBeforeArray,
271     nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
272   bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
273   MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
274              "text run must be provided to use aux output arrays");
275 
276   uint32_t length = aString.Length();
277   const char16_t* str = aString.BeginReading();
278   const char16_t kPasswordMask = TextEditor::PasswordMask();
279 
280   bool mergeNeeded = false;
281 
282   bool capitalizeDutchIJ = false;
283   bool prevIsLetter = false;
284   bool ntPrefix = false;  // true immediately after a word-initial 'n' or 't'
285                           // when doing Irish lowercasing
286   bool seenSoftDotted = false;  // true immediately after an I or J that is
287                                 // converted to lowercase in Lithuanian mode
288   uint32_t sigmaIndex = uint32_t(-1);
289   nsUGenCategory cat;
290 
291   StyleTextTransform style =
292       aAllUppercase ? StyleTextTransform{StyleTextTransformCase::Uppercase,
293                                          StyleTextTransformOther()}
294                     : StyleTextTransform::None();
295   bool forceNonFullWidth = false;
296   const nsAtom* lang = aLanguage;
297 
298   LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
299   mozilla::GreekCasing::State greekState;
300   mozilla::IrishCasing::State irishState;
301   uint32_t irishMark = uint32_t(-1);  // location of possible prefix letter(s)
302                                       // in the output string
303   uint32_t irishMarkSrc = uint32_t(-1);  // corresponding location in source
304                                          // string (may differ from output due
305                                          // to expansions like eszet -> 'SS')
306   uint32_t greekMark = uint32_t(-1);  // location of uppercase ETA that may need
307                                       // tonos added (if it is disjunctive eta)
308   const char16_t kGreekUpperEta = 0x0397;
309 
310   for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
311     uint32_t ch = str[i];
312 
313     RefPtr<nsTransformedCharStyle> charStyle;
314     if (aTextRun) {
315       charStyle = aTextRun->mStyles[aOffsetInTextRun];
316       style = aAllUppercase
317                   ? StyleTextTransform{StyleTextTransformCase::Uppercase,
318                                        StyleTextTransformOther()}
319                   : charStyle->mTextTransform;
320       forceNonFullWidth = charStyle->mForceNonFullWidth;
321 
322       nsAtom* newLang =
323           charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
324       if (lang != newLang) {
325         lang = newLang;
326         languageSpecificCasing = GetCasingFor(lang);
327         greekState.Reset();
328         irishState.Reset();
329         irishMark = uint32_t(-1);
330         irishMarkSrc = uint32_t(-1);
331         greekMark = uint32_t(-1);
332       }
333     }
334 
335     bool maskPassword = charStyle && charStyle->mMaskPassword;
336     int extraChars = 0;
337     const mozilla::unicode::MultiCharMapping* mcm;
338     bool inhibitBreakBefore = false;  // have we just deleted preceding hyphen?
339 
340     if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
341       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
342     }
343 
344     // Skip case transform if we're masking current character.
345     if (!maskPassword) {
346       switch (style.case_) {
347         case StyleTextTransformCase::None:
348           break;
349 
350         case StyleTextTransformCase::Lowercase:
351           if (languageSpecificCasing == eLSCB_Turkish) {
352             if (ch == 'I') {
353               ch = LATIN_SMALL_LETTER_DOTLESS_I;
354               prevIsLetter = true;
355               sigmaIndex = uint32_t(-1);
356               break;
357             }
358             if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
359               ch = 'i';
360               prevIsLetter = true;
361               sigmaIndex = uint32_t(-1);
362               break;
363             }
364           }
365 
366           if (languageSpecificCasing == eLSCB_Lithuanian) {
367             // clang-format off
368             /* From SpecialCasing.txt:
369              * # Introduce an explicit dot above when lowercasing capital I's and J's
370              * # whenever there are more accents above.
371              * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
372              *
373              * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
374              * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
375              * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
376              * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
377              * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
378              * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
379              */
380             // clang-format on
381             if (ch == 'I' || ch == 'J' || ch == 0x012E) {
382               ch = ToLowerCase(ch);
383               prevIsLetter = true;
384               seenSoftDotted = true;
385               sigmaIndex = uint32_t(-1);
386               break;
387             }
388             if (ch == 0x00CC) {
389               aConvertedString.Append('i');
390               aConvertedString.Append(0x0307);
391               extraChars += 2;
392               ch = 0x0300;
393               prevIsLetter = true;
394               seenSoftDotted = false;
395               sigmaIndex = uint32_t(-1);
396               break;
397             }
398             if (ch == 0x00CD) {
399               aConvertedString.Append('i');
400               aConvertedString.Append(0x0307);
401               extraChars += 2;
402               ch = 0x0301;
403               prevIsLetter = true;
404               seenSoftDotted = false;
405               sigmaIndex = uint32_t(-1);
406               break;
407             }
408             if (ch == 0x0128) {
409               aConvertedString.Append('i');
410               aConvertedString.Append(0x0307);
411               extraChars += 2;
412               ch = 0x0303;
413               prevIsLetter = true;
414               seenSoftDotted = false;
415               sigmaIndex = uint32_t(-1);
416               break;
417             }
418           }
419 
420           cat = mozilla::unicode::GetGenCategory(ch);
421 
422           if (languageSpecificCasing == eLSCB_Irish &&
423               cat == nsUGenCategory::kLetter) {
424             // See bug 1018805 for Irish lowercasing requirements
425             if (!prevIsLetter && (ch == 'n' || ch == 't')) {
426               ntPrefix = true;
427             } else {
428               if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
429                 aConvertedString.Append('-');
430                 ++extraChars;
431               }
432               ntPrefix = false;
433             }
434           } else {
435             ntPrefix = false;
436           }
437 
438           if (seenSoftDotted && cat == nsUGenCategory::kMark) {
439             // The seenSoftDotted flag will only be set in Lithuanian mode.
440             if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
441               aConvertedString.Append(0x0307);
442               ++extraChars;
443             }
444           }
445           seenSoftDotted = false;
446 
447           // Special lowercasing behavior for Greek Sigma: note that this is
448           // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
449           // *not* a language-specific mapping; it applies regardless of the
450           // language of the element.
451           //
452           // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
453           // (i.e. the non-final form) whenever there is a following letter, or
454           // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
455           // followed by a LETTER); and to FINAL SIGMA when it is preceded by
456           // another letter but not followed by one.
457           //
458           // To implement the context-sensitive nature of this mapping, we keep
459           // track of whether the previous character was a letter. If not,
460           // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
461           // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
462           // record the position in the converted string; if we then encounter
463           // another letter, that FINAL SIGMA is replaced with a standard
464           // SMALL SIGMA.
465 
466           // If sigmaIndex is not -1, it marks where we have provisionally
467           // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
468           // letter, we need to change it to SMALL SIGMA.
469           if (sigmaIndex != uint32_t(-1)) {
470             if (cat == nsUGenCategory::kLetter) {
471               aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
472             }
473           }
474 
475           if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
476             // If preceding char was a letter, map to FINAL instead of SMALL,
477             // and note where it occurred by setting sigmaIndex; we'll change
478             // it to standard SMALL SIGMA later if another letter follows
479             if (prevIsLetter) {
480               ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
481               sigmaIndex = aConvertedString.Length();
482             } else {
483               // CAPITAL SIGMA not preceded by a letter is unconditionally
484               // mapped to SMALL SIGMA
485               ch = GREEK_SMALL_LETTER_SIGMA;
486               sigmaIndex = uint32_t(-1);
487             }
488             prevIsLetter = true;
489             break;
490           }
491 
492           // ignore diacritics for the purpose of contextual sigma mapping;
493           // otherwise, reset prevIsLetter appropriately and clear the
494           // sigmaIndex marker
495           if (cat != nsUGenCategory::kMark) {
496             prevIsLetter = (cat == nsUGenCategory::kLetter);
497             sigmaIndex = uint32_t(-1);
498           }
499 
500           mcm = mozilla::unicode::SpecialLower(ch);
501           if (mcm) {
502             int j = 0;
503             while (j < 2 && mcm->mMappedChars[j + 1]) {
504               aConvertedString.Append(mcm->mMappedChars[j]);
505               ++extraChars;
506               ++j;
507             }
508             ch = mcm->mMappedChars[j];
509             break;
510           }
511 
512           ch = ToLowerCase(ch);
513           break;
514 
515         case StyleTextTransformCase::Uppercase:
516           if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
517             ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
518             break;
519           }
520 
521           if (languageSpecificCasing == eLSCB_Greek) {
522             bool markEta;
523             bool updateEta;
524             ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
525                                                  updateEta);
526             if (markEta) {
527               greekMark = aConvertedString.Length();
528             } else if (updateEta) {
529               // Remove the TONOS from an uppercase ETA-TONOS that turned out
530               // not to be disjunctive-eta.
531               MOZ_ASSERT(aConvertedString.Length() > 0 &&
532                              greekMark < aConvertedString.Length(),
533                          "bad greekMark!");
534               aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
535               greekMark = uint32_t(-1);
536             }
537             break;
538           }
539 
540           if (languageSpecificCasing == eLSCB_Lithuanian) {
541             /*
542              * # Remove DOT ABOVE after "i" with upper or titlecase
543              *
544              * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
545              */
546             if (ch == 'i' || ch == 'j' || ch == 0x012F) {
547               seenSoftDotted = true;
548               ch = ToTitleCase(ch);
549               break;
550             }
551             if (seenSoftDotted) {
552               seenSoftDotted = false;
553               if (ch == 0x0307) {
554                 ch = uint32_t(-1);
555                 break;
556               }
557             }
558           }
559 
560           if (languageSpecificCasing == eLSCB_Irish) {
561             bool mark;
562             uint8_t action;
563             ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
564             if (mark) {
565               irishMark = aConvertedString.Length();
566               irishMarkSrc = i;
567               break;
568             } else if (action) {
569               nsString& str = aConvertedString;  // shorthand
570               switch (action) {
571                 case 1:
572                   // lowercase a single prefix letter
573                   NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(),
574                                "bad irishMark!");
575                   str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
576                   irishMark = uint32_t(-1);
577                   irishMarkSrc = uint32_t(-1);
578                   break;
579                 case 2:
580                   // lowercase two prefix letters (immediately before current
581                   // pos)
582                   NS_ASSERTION(
583                       str.Length() >= 2 && irishMark == str.Length() - 2,
584                       "bad irishMark!");
585                   str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
586                   str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
587                   irishMark = uint32_t(-1);
588                   irishMarkSrc = uint32_t(-1);
589                   break;
590                 case 3:
591                   // lowercase one prefix letter, and delete following hyphen
592                   // (which must be the immediately-preceding char)
593                   NS_ASSERTION(
594                       str.Length() >= 2 && irishMark == str.Length() - 2,
595                       "bad irishMark!");
596                   MOZ_ASSERT(
597                       irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
598                       "failed to set irishMarks");
599                   str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
600                   aDeletedCharsArray[irishMarkSrc + 1] = true;
601                   // Remove the trailing entries (corresponding to the deleted
602                   // hyphen) from the auxiliary arrays.
603                   aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1);
604                   if (auxiliaryOutputArrays) {
605                     aStyleArray->SetLength(aStyleArray->Length() - 1);
606                     aCanBreakBeforeArray->SetLength(
607                         aCanBreakBeforeArray->Length() - 1);
608                     inhibitBreakBefore = true;
609                   }
610                   mergeNeeded = true;
611                   irishMark = uint32_t(-1);
612                   irishMarkSrc = uint32_t(-1);
613                   break;
614               }
615               // ch has been set to the uppercase for current char;
616               // No need to check for SpecialUpper here as none of the
617               // characters that could trigger an Irish casing action have
618               // special mappings.
619               break;
620             }
621             // If we didn't have any special action to perform, fall through
622             // to check for special uppercase (ß)
623           }
624 
625           mcm = mozilla::unicode::SpecialUpper(ch);
626           if (mcm) {
627             int j = 0;
628             while (j < 2 && mcm->mMappedChars[j + 1]) {
629               aConvertedString.Append(mcm->mMappedChars[j]);
630               ++extraChars;
631               ++j;
632             }
633             ch = mcm->mMappedChars[j];
634             break;
635           }
636 
637           // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
638           // lack of widespread font support for the corresponding Mtavruli
639           // characters at this time (July 2018).
640           // This condition is to be removed once the major platforms ship with
641           // fonts that support U+1C90..1CBF.
642           if (ch < 0x10D0 || ch > 0x10FF) {
643             ch = ToUpperCase(ch);
644           }
645           break;
646 
647         case StyleTextTransformCase::Capitalize:
648           if (aTextRun) {
649             if (capitalizeDutchIJ && ch == 'j') {
650               ch = 'J';
651               capitalizeDutchIJ = false;
652               break;
653             }
654             capitalizeDutchIJ = false;
655             if (aOffsetInTextRun < aTextRun->mCapitalize.Length() &&
656                 aTextRun->mCapitalize[aOffsetInTextRun]) {
657               if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
658                 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
659                 break;
660               }
661               if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
662                 ch = 'I';
663                 capitalizeDutchIJ = true;
664                 break;
665               }
666               if (languageSpecificCasing == eLSCB_Lithuanian) {
667                 /*
668                  * # Remove DOT ABOVE after "i" with upper or titlecase
669                  *
670                  * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
671                  */
672                 if (ch == 'i' || ch == 'j' || ch == 0x012F) {
673                   seenSoftDotted = true;
674                   ch = ToTitleCase(ch);
675                   break;
676                 }
677                 if (seenSoftDotted) {
678                   seenSoftDotted = false;
679                   if (ch == 0x0307) {
680                     ch = uint32_t(-1);
681                     break;
682                   }
683                 }
684               }
685 
686               mcm = mozilla::unicode::SpecialTitle(ch);
687               if (mcm) {
688                 int j = 0;
689                 while (j < 2 && mcm->mMappedChars[j + 1]) {
690                   aConvertedString.Append(mcm->mMappedChars[j]);
691                   ++extraChars;
692                   ++j;
693                 }
694                 ch = mcm->mMappedChars[j];
695                 break;
696               }
697 
698               ch = ToTitleCase(ch);
699             }
700           }
701           break;
702 
703         default:
704           MOZ_ASSERT_UNREACHABLE("all cases should be handled");
705           break;
706       }
707 
708       if (!aCaseTransformsOnly) {
709         if (!forceNonFullWidth &&
710             (style.other_ & StyleTextTransformOther::FULL_WIDTH)) {
711           ch = mozilla::unicode::GetFullWidth(ch);
712         }
713 
714         if (style.other_ & StyleTextTransformOther::FULL_SIZE_KANA) {
715           // clang-format off
716           static const uint16_t kSmallKanas[] = {
717               // ぁ   ぃ      ぅ      ぇ      ぉ      っ      ゃ      ゅ      ょ
718               0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
719               // ゎ   ゕ      ゖ
720               0x308E, 0x3095, 0x3096,
721               // ァ   ィ      ゥ      ェ      ォ      ッ      ャ      ュ      ョ
722               0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
723               // ヮ   ヵ      ヶ      ㇰ      ㇱ      ㇲ      ㇳ      ㇴ      ㇵ
724               0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
725               // ㇶ   ㇷ      ㇸ      ㇹ      ㇺ      ㇻ      ㇼ      ㇽ      ㇾ
726               0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
727               // ㇿ
728               0x31FF,
729               // ァ    ィ       ゥ       ェ       ォ       ャ       ュ       ョ       ッ
730               0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F};
731           static const uint16_t kFullSizeKanas[] = {
732               // あ   い      う      え      お      つ      や      ゆ      よ
733               0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
734               // わ   か      け
735               0x308F, 0x304B, 0x3051,
736               // ア   イ      ウ      エ      オ      ツ      ヤ      ユ      ヨ
737               0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
738               // ワ   カ      ケ      ク      シ      ス      ト      ヌ      ハ
739               0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
740               // ヒ   フ      ヘ      ホ      ム      ラ      リ      ル      レ
741               0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
742               // ロ
743               0x30ED,
744               // ア    イ       ウ       エ       オ       ヤ       ユ       ヨ        ツ
745               0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82};
746           // clang-format on
747 
748           size_t index;
749           const uint16_t len = MOZ_ARRAY_LENGTH(kSmallKanas);
750           if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) {
751             ch = kFullSizeKanas[index];
752           }
753         }
754       }
755 
756       if (forceNonFullWidth) {
757         ch = mozilla::unicode::GetFullWidthInverse(ch);
758       }
759     }
760 
761     if (ch == uint32_t(-1)) {
762       aDeletedCharsArray.AppendElement(true);
763       mergeNeeded = true;
764     } else {
765       aDeletedCharsArray.AppendElement(false);
766       aCharsToMergeArray.AppendElement(false);
767       if (auxiliaryOutputArrays) {
768         aStyleArray->AppendElement(charStyle);
769         aCanBreakBeforeArray->AppendElement(
770             inhibitBreakBefore
771                 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
772                 : aTextRun->CanBreakBefore(aOffsetInTextRun));
773       }
774 
775       if (IS_IN_BMP(ch)) {
776         aConvertedString.Append(maskPassword ? kPasswordMask : ch);
777       } else {
778         if (maskPassword) {
779           aConvertedString.Append(kPasswordMask);
780           // TODO: We should show a password mask for a surrogate pair later.
781           aConvertedString.Append(kPasswordMask);
782         } else {
783           aConvertedString.Append(H_SURROGATE(ch));
784           aConvertedString.Append(L_SURROGATE(ch));
785         }
786         ++extraChars;
787         ++i;
788         ++aOffsetInTextRun;
789         // Skip the trailing surrogate.
790         aDeletedCharsArray.AppendElement(true);
791       }
792 
793       while (extraChars-- > 0) {
794         mergeNeeded = true;
795         aCharsToMergeArray.AppendElement(true);
796         if (auxiliaryOutputArrays) {
797           aStyleArray->AppendElement(charStyle);
798           aCanBreakBeforeArray->AppendElement(
799               gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
800         }
801       }
802     }
803   }
804 
805   return mergeNeeded;
806 }
807 
RebuildTextRun(nsTransformedTextRun * aTextRun,DrawTarget * aRefDrawTarget,gfxMissingFontRecorder * aMFR)808 void nsCaseTransformTextRunFactory::RebuildTextRun(
809     nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
810     gfxMissingFontRecorder* aMFR) {
811   nsAutoString convertedString;
812   AutoTArray<bool, 50> charsToMergeArray;
813   AutoTArray<bool, 50> deletedCharsArray;
814   AutoTArray<uint8_t, 50> canBreakBeforeArray;
815   AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
816 
817   bool mergeNeeded = TransformString(
818       aTextRun->mString, convertedString, mAllUppercase,
819       /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray,
820       deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray);
821 
822   gfx::ShapedTextFlags flags;
823   gfxTextRunFactory::Parameters innerParams =
824       GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
825   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
826 
827   RefPtr<nsTransformedTextRun> transformedChild;
828   RefPtr<gfxTextRun> cachedChild;
829   gfxTextRun* child;
830 
831   if (mInnerTransformingTextRunFactory) {
832     transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
833         convertedString.BeginReading(), convertedString.Length(), &innerParams,
834         fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray),
835         false);
836     child = transformedChild.get();
837   } else {
838     cachedChild = fontGroup->MakeTextRun(
839         convertedString.BeginReading(), convertedString.Length(), &innerParams,
840         flags, nsTextFrameUtils::Flags(), aMFR);
841     child = cachedChild.get();
842   }
843   if (!child) {
844     return;
845   }
846   // Copy potential linebreaks into child so they're preserved
847   // (and also child will be shaped appropriately)
848   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
849                "Dropped characters or break-before values somewhere!");
850   gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
851   child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
852   if (transformedChild) {
853     transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
854   }
855 
856   if (mergeNeeded) {
857     // Now merge multiple characters into one multi-glyph character as required
858     // and deal with skipping deleted accent chars
859     NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
860                  "source length mismatch");
861     NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
862                  "destination length mismatch");
863     MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
864                              deletedCharsArray.Elements());
865   } else {
866     // No merging to do, so just copy; this produces a more optimized textrun.
867     // We can't steal the data because the child may be cached and stealing
868     // the data would break the cache.
869     aTextRun->ResetGlyphRuns();
870     aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);
871   }
872 }
873