1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "nsTextRunTransformations.h"
8
9 #include <utility>
10
11 #include "GreekCasing.h"
12 #include "IrishCasing.h"
13 #include "mozilla/ComputedStyleInlines.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/TextEditor.h"
16 #include "mozilla/gfx/2D.h"
17 #include "nsGkAtoms.h"
18 #include "nsSpecialCasingData.h"
19 #include "nsStyleConsts.h"
20 #include "nsTextFrameUtils.h"
21 #include "nsUnicharUtils.h"
22 #include "nsUnicodeProperties.h"
23
24 using namespace mozilla;
25 using namespace mozilla::gfx;
26
27 // Unicode characters needing special casing treatment in tr/az languages
28 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
29 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
30
31 // Greek sigma needs custom handling for the lowercase transform; for details
32 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
33 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
34 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
35 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
36 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
37
Create(const gfxTextRunFactory::Parameters * aParams,nsTransformingTextRunFactory * aFactory,gfxFontGroup * aFontGroup,const char16_t * aString,uint32_t aLength,const gfx::ShapedTextFlags aFlags,const nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)38 already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
39 const gfxTextRunFactory::Parameters* aParams,
40 nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
41 const char16_t* aString, uint32_t aLength,
42 const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
43 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
44 NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
45 "didn't expect text to be marked as 8-bit here");
46
47 void* storage =
48 AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
49 if (!storage) {
50 return nullptr;
51 }
52
53 RefPtr<nsTransformedTextRun> result = new (storage)
54 nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
55 aFlags, aFlags2, std::move(aStyles), aOwnsFactory);
56 return result.forget();
57 }
58
SetCapitalization(uint32_t aStart,uint32_t aLength,bool * aCapitalization)59 void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
60 bool* aCapitalization) {
61 if (mCapitalize.IsEmpty()) {
62 // XXX(Bug 1631371) Check if this should use a fallible operation as it
63 // pretended earlier.
64 mCapitalize.AppendElements(GetLength());
65 memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
66 }
67 memcpy(mCapitalize.Elements() + aStart, aCapitalization,
68 aLength * sizeof(bool));
69 mNeedsRebuild = true;
70 }
71
SetPotentialLineBreaks(Range aRange,const uint8_t * aBreakBefore)72 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
73 const uint8_t* aBreakBefore) {
74 bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
75 if (changed) {
76 mNeedsRebuild = true;
77 }
78 return changed;
79 }
80
SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)81 size_t nsTransformedTextRun::SizeOfExcludingThis(
82 mozilla::MallocSizeOf aMallocSizeOf) {
83 size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
84 total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
85 total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
86 if (mOwnsFactory) {
87 total += aMallocSizeOf(mFactory);
88 }
89 return total;
90 }
91
SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)92 size_t nsTransformedTextRun::SizeOfIncludingThis(
93 mozilla::MallocSizeOf aMallocSizeOf) {
94 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
95 }
96
97 already_AddRefed<nsTransformedTextRun>
MakeTextRun(const char16_t * aString,uint32_t aLength,const gfxTextRunFactory::Parameters * aParams,gfxFontGroup * aFontGroup,gfx::ShapedTextFlags aFlags,nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)98 nsTransformingTextRunFactory::MakeTextRun(
99 const char16_t* aString, uint32_t aLength,
100 const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
101 gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
102 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
103 return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
104 aLength, aFlags, aFlags2,
105 std::move(aStyles), aOwnsFactory);
106 }
107
108 already_AddRefed<nsTransformedTextRun>
MakeTextRun(const uint8_t * aString,uint32_t aLength,const gfxTextRunFactory::Parameters * aParams,gfxFontGroup * aFontGroup,gfx::ShapedTextFlags aFlags,nsTextFrameUtils::Flags aFlags2,nsTArray<RefPtr<nsTransformedCharStyle>> && aStyles,bool aOwnsFactory)109 nsTransformingTextRunFactory::MakeTextRun(
110 const uint8_t* aString, uint32_t aLength,
111 const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
112 gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
113 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
114 // We'll only have a Unicode code path to minimize the amount of code needed
115 // for these rarely used features
116 NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
117 aLength);
118 return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
119 aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
120 std::move(aStyles), aOwnsFactory);
121 }
122
MergeCharactersInTextRun(gfxTextRun * aDest,gfxTextRun * aSrc,const bool * aCharsToMerge,const bool * aDeletedChars)123 void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
124 const bool* aCharsToMerge,
125 const bool* aDeletedChars) {
126 aDest->ResetGlyphRuns();
127
128 gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
129 uint32_t offset = 0;
130 AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
131 const gfxTextRun::CompressedGlyph continuationGlyph =
132 gfxTextRun::CompressedGlyph::MakeComplex(false, false, 0);
133 while (iter.NextRun()) {
134 const gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
135 aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
136 run->mOrientation, run->mIsCJK);
137
138 bool anyMissing = false;
139 uint32_t mergeRunStart = iter.GetStringStart();
140 const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
141 gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
142 uint32_t stringEnd = iter.GetStringEnd();
143 for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {
144 const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
145 if (g.IsSimpleGlyph()) {
146 if (!anyMissing) {
147 gfxTextRun::DetailedGlyph details;
148 details.mGlyphID = g.GetSimpleGlyph();
149 details.mAdvance = g.GetSimpleAdvance();
150 glyphs.AppendElement(details);
151 }
152 } else {
153 if (g.IsMissing()) {
154 anyMissing = true;
155 glyphs.Clear();
156 }
157 if (g.GetGlyphCount() > 0) {
158 glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
159 }
160 }
161
162 if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
163 // next char is supposed to merge with current, so loop without
164 // writing current merged glyph to the destination
165 continue;
166 }
167
168 // If the start of the merge run is actually a character that should
169 // have been merged with the previous character (this can happen
170 // if there's a font change in the middle of a case-mapped character,
171 // that decomposed into a sequence of base+diacritics, for example),
172 // just discard the entire merge run. See comment at start of this
173 // function.
174 NS_WARNING_ASSERTION(
175 !aCharsToMerge[mergeRunStart],
176 "unable to merge across a glyph run boundary, glyph(s) discarded");
177 if (!aCharsToMerge[mergeRunStart]) {
178 if (anyMissing) {
179 mergedGlyph.SetMissing(glyphs.Length());
180 } else {
181 mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
182 mergedGlyph.IsLigatureGroupStart(),
183 glyphs.Length());
184 }
185 aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
186 ++offset;
187
188 while (offset < aDest->GetLength() && aDeletedChars[offset]) {
189 aDest->SetGlyphs(offset++, continuationGlyph, nullptr);
190 }
191 }
192
193 glyphs.Clear();
194 anyMissing = false;
195 mergeRunStart = k + 1;
196 if (mergeRunStart < stringEnd) {
197 mergedGlyph = srcGlyphs[mergeRunStart];
198 }
199 }
200 NS_ASSERTION(glyphs.Length() == 0,
201 "Leftover glyphs, don't request merging of the last character "
202 "with its next!");
203 }
204 NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
205 }
206
GetParametersForInner(nsTransformedTextRun * aTextRun,gfx::ShapedTextFlags * aFlags,DrawTarget * aRefDrawTarget)207 gfxTextRunFactory::Parameters GetParametersForInner(
208 nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
209 DrawTarget* aRefDrawTarget) {
210 gfxTextRunFactory::Parameters params = {
211 aRefDrawTarget, nullptr, nullptr,
212 nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()};
213 *aFlags = aTextRun->GetFlags();
214 return params;
215 }
216
217 // Some languages have special casing conventions that differ from the
218 // default Unicode mappings.
219 // The enum values here are named for well-known exemplar languages that
220 // exhibit the behavior in question; multiple lang tags may map to the
221 // same setting here, if the behavior is shared by other languages.
222 enum LanguageSpecificCasingBehavior {
223 eLSCB_None, // default non-lang-specific behavior
224 eLSCB_Dutch, // treat "ij" digraph as a unit for capitalization
225 eLSCB_Greek, // strip accent when uppercasing Greek vowels
226 eLSCB_Irish, // keep prefix letters as lowercase when uppercasing Irish
227 eLSCB_Turkish, // preserve dotted/dotless-i distinction in uppercase
228 eLSCB_Lithuanian // retain dot on lowercase i/j when an accent is present
229 };
230
GetCasingFor(const nsAtom * aLang)231 static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
232 if (!aLang) {
233 return eLSCB_None;
234 }
235 if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az ||
236 aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh ||
237 aLang == nsGkAtoms::tt) {
238 return eLSCB_Turkish;
239 }
240 if (aLang == nsGkAtoms::nl) {
241 return eLSCB_Dutch;
242 }
243 if (aLang == nsGkAtoms::el) {
244 return eLSCB_Greek;
245 }
246 if (aLang == nsGkAtoms::ga) {
247 return eLSCB_Irish;
248 }
249 if (aLang == nsGkAtoms::lt_) {
250 return eLSCB_Lithuanian;
251 }
252
253 // Is there a region subtag we should ignore?
254 nsAtomString langStr(const_cast<nsAtom*>(aLang));
255 int index = langStr.FindChar('-');
256 if (index > 0) {
257 langStr.Truncate(index);
258 RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr);
259 return GetCasingFor(truncatedLang);
260 }
261
262 return eLSCB_None;
263 }
264
TransformString(const nsAString & aString,nsString & aConvertedString,bool aAllUppercase,bool aCaseTransformsOnly,const nsAtom * aLanguage,nsTArray<bool> & aCharsToMergeArray,nsTArray<bool> & aDeletedCharsArray,const nsTransformedTextRun * aTextRun,uint32_t aOffsetInTextRun,nsTArray<uint8_t> * aCanBreakBeforeArray,nsTArray<RefPtr<nsTransformedCharStyle>> * aStyleArray)265 bool nsCaseTransformTextRunFactory::TransformString(
266 const nsAString& aString, nsString& aConvertedString, bool aAllUppercase,
267 bool aCaseTransformsOnly, const nsAtom* aLanguage,
268 nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray,
269 const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun,
270 nsTArray<uint8_t>* aCanBreakBeforeArray,
271 nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
272 bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
273 MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
274 "text run must be provided to use aux output arrays");
275
276 uint32_t length = aString.Length();
277 const char16_t* str = aString.BeginReading();
278 const char16_t kPasswordMask = TextEditor::PasswordMask();
279
280 bool mergeNeeded = false;
281
282 bool capitalizeDutchIJ = false;
283 bool prevIsLetter = false;
284 bool ntPrefix = false; // true immediately after a word-initial 'n' or 't'
285 // when doing Irish lowercasing
286 bool seenSoftDotted = false; // true immediately after an I or J that is
287 // converted to lowercase in Lithuanian mode
288 uint32_t sigmaIndex = uint32_t(-1);
289 nsUGenCategory cat;
290
291 StyleTextTransform style =
292 aAllUppercase ? StyleTextTransform{StyleTextTransformCase::Uppercase,
293 StyleTextTransformOther()}
294 : StyleTextTransform::None();
295 bool forceNonFullWidth = false;
296 const nsAtom* lang = aLanguage;
297
298 LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
299 mozilla::GreekCasing::State greekState;
300 mozilla::IrishCasing::State irishState;
301 uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s)
302 // in the output string
303 uint32_t irishMarkSrc = uint32_t(-1); // corresponding location in source
304 // string (may differ from output due
305 // to expansions like eszet -> 'SS')
306 uint32_t greekMark = uint32_t(-1); // location of uppercase ETA that may need
307 // tonos added (if it is disjunctive eta)
308 const char16_t kGreekUpperEta = 0x0397;
309
310 for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
311 uint32_t ch = str[i];
312
313 RefPtr<nsTransformedCharStyle> charStyle;
314 if (aTextRun) {
315 charStyle = aTextRun->mStyles[aOffsetInTextRun];
316 style = aAllUppercase
317 ? StyleTextTransform{StyleTextTransformCase::Uppercase,
318 StyleTextTransformOther()}
319 : charStyle->mTextTransform;
320 forceNonFullWidth = charStyle->mForceNonFullWidth;
321
322 nsAtom* newLang =
323 charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
324 if (lang != newLang) {
325 lang = newLang;
326 languageSpecificCasing = GetCasingFor(lang);
327 greekState.Reset();
328 irishState.Reset();
329 irishMark = uint32_t(-1);
330 irishMarkSrc = uint32_t(-1);
331 greekMark = uint32_t(-1);
332 }
333 }
334
335 bool maskPassword = charStyle && charStyle->mMaskPassword;
336 int extraChars = 0;
337 const mozilla::unicode::MultiCharMapping* mcm;
338 bool inhibitBreakBefore = false; // have we just deleted preceding hyphen?
339
340 if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
341 ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
342 }
343
344 // Skip case transform if we're masking current character.
345 if (!maskPassword) {
346 switch (style.case_) {
347 case StyleTextTransformCase::None:
348 break;
349
350 case StyleTextTransformCase::Lowercase:
351 if (languageSpecificCasing == eLSCB_Turkish) {
352 if (ch == 'I') {
353 ch = LATIN_SMALL_LETTER_DOTLESS_I;
354 prevIsLetter = true;
355 sigmaIndex = uint32_t(-1);
356 break;
357 }
358 if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
359 ch = 'i';
360 prevIsLetter = true;
361 sigmaIndex = uint32_t(-1);
362 break;
363 }
364 }
365
366 if (languageSpecificCasing == eLSCB_Lithuanian) {
367 // clang-format off
368 /* From SpecialCasing.txt:
369 * # Introduce an explicit dot above when lowercasing capital I's and J's
370 * # whenever there are more accents above.
371 * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
372 *
373 * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
374 * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
375 * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
376 * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
377 * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
378 * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
379 */
380 // clang-format on
381 if (ch == 'I' || ch == 'J' || ch == 0x012E) {
382 ch = ToLowerCase(ch);
383 prevIsLetter = true;
384 seenSoftDotted = true;
385 sigmaIndex = uint32_t(-1);
386 break;
387 }
388 if (ch == 0x00CC) {
389 aConvertedString.Append('i');
390 aConvertedString.Append(0x0307);
391 extraChars += 2;
392 ch = 0x0300;
393 prevIsLetter = true;
394 seenSoftDotted = false;
395 sigmaIndex = uint32_t(-1);
396 break;
397 }
398 if (ch == 0x00CD) {
399 aConvertedString.Append('i');
400 aConvertedString.Append(0x0307);
401 extraChars += 2;
402 ch = 0x0301;
403 prevIsLetter = true;
404 seenSoftDotted = false;
405 sigmaIndex = uint32_t(-1);
406 break;
407 }
408 if (ch == 0x0128) {
409 aConvertedString.Append('i');
410 aConvertedString.Append(0x0307);
411 extraChars += 2;
412 ch = 0x0303;
413 prevIsLetter = true;
414 seenSoftDotted = false;
415 sigmaIndex = uint32_t(-1);
416 break;
417 }
418 }
419
420 cat = mozilla::unicode::GetGenCategory(ch);
421
422 if (languageSpecificCasing == eLSCB_Irish &&
423 cat == nsUGenCategory::kLetter) {
424 // See bug 1018805 for Irish lowercasing requirements
425 if (!prevIsLetter && (ch == 'n' || ch == 't')) {
426 ntPrefix = true;
427 } else {
428 if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
429 aConvertedString.Append('-');
430 ++extraChars;
431 }
432 ntPrefix = false;
433 }
434 } else {
435 ntPrefix = false;
436 }
437
438 if (seenSoftDotted && cat == nsUGenCategory::kMark) {
439 // The seenSoftDotted flag will only be set in Lithuanian mode.
440 if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
441 aConvertedString.Append(0x0307);
442 ++extraChars;
443 }
444 }
445 seenSoftDotted = false;
446
447 // Special lowercasing behavior for Greek Sigma: note that this is
448 // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
449 // *not* a language-specific mapping; it applies regardless of the
450 // language of the element.
451 //
452 // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
453 // (i.e. the non-final form) whenever there is a following letter, or
454 // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
455 // followed by a LETTER); and to FINAL SIGMA when it is preceded by
456 // another letter but not followed by one.
457 //
458 // To implement the context-sensitive nature of this mapping, we keep
459 // track of whether the previous character was a letter. If not,
460 // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
461 // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
462 // record the position in the converted string; if we then encounter
463 // another letter, that FINAL SIGMA is replaced with a standard
464 // SMALL SIGMA.
465
466 // If sigmaIndex is not -1, it marks where we have provisionally
467 // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
468 // letter, we need to change it to SMALL SIGMA.
469 if (sigmaIndex != uint32_t(-1)) {
470 if (cat == nsUGenCategory::kLetter) {
471 aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
472 }
473 }
474
475 if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
476 // If preceding char was a letter, map to FINAL instead of SMALL,
477 // and note where it occurred by setting sigmaIndex; we'll change
478 // it to standard SMALL SIGMA later if another letter follows
479 if (prevIsLetter) {
480 ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
481 sigmaIndex = aConvertedString.Length();
482 } else {
483 // CAPITAL SIGMA not preceded by a letter is unconditionally
484 // mapped to SMALL SIGMA
485 ch = GREEK_SMALL_LETTER_SIGMA;
486 sigmaIndex = uint32_t(-1);
487 }
488 prevIsLetter = true;
489 break;
490 }
491
492 // ignore diacritics for the purpose of contextual sigma mapping;
493 // otherwise, reset prevIsLetter appropriately and clear the
494 // sigmaIndex marker
495 if (cat != nsUGenCategory::kMark) {
496 prevIsLetter = (cat == nsUGenCategory::kLetter);
497 sigmaIndex = uint32_t(-1);
498 }
499
500 mcm = mozilla::unicode::SpecialLower(ch);
501 if (mcm) {
502 int j = 0;
503 while (j < 2 && mcm->mMappedChars[j + 1]) {
504 aConvertedString.Append(mcm->mMappedChars[j]);
505 ++extraChars;
506 ++j;
507 }
508 ch = mcm->mMappedChars[j];
509 break;
510 }
511
512 ch = ToLowerCase(ch);
513 break;
514
515 case StyleTextTransformCase::Uppercase:
516 if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
517 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
518 break;
519 }
520
521 if (languageSpecificCasing == eLSCB_Greek) {
522 bool markEta;
523 bool updateEta;
524 ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
525 updateEta);
526 if (markEta) {
527 greekMark = aConvertedString.Length();
528 } else if (updateEta) {
529 // Remove the TONOS from an uppercase ETA-TONOS that turned out
530 // not to be disjunctive-eta.
531 MOZ_ASSERT(aConvertedString.Length() > 0 &&
532 greekMark < aConvertedString.Length(),
533 "bad greekMark!");
534 aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
535 greekMark = uint32_t(-1);
536 }
537 break;
538 }
539
540 if (languageSpecificCasing == eLSCB_Lithuanian) {
541 /*
542 * # Remove DOT ABOVE after "i" with upper or titlecase
543 *
544 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
545 */
546 if (ch == 'i' || ch == 'j' || ch == 0x012F) {
547 seenSoftDotted = true;
548 ch = ToTitleCase(ch);
549 break;
550 }
551 if (seenSoftDotted) {
552 seenSoftDotted = false;
553 if (ch == 0x0307) {
554 ch = uint32_t(-1);
555 break;
556 }
557 }
558 }
559
560 if (languageSpecificCasing == eLSCB_Irish) {
561 bool mark;
562 uint8_t action;
563 ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
564 if (mark) {
565 irishMark = aConvertedString.Length();
566 irishMarkSrc = i;
567 break;
568 } else if (action) {
569 nsString& str = aConvertedString; // shorthand
570 switch (action) {
571 case 1:
572 // lowercase a single prefix letter
573 NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(),
574 "bad irishMark!");
575 str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
576 irishMark = uint32_t(-1);
577 irishMarkSrc = uint32_t(-1);
578 break;
579 case 2:
580 // lowercase two prefix letters (immediately before current
581 // pos)
582 NS_ASSERTION(
583 str.Length() >= 2 && irishMark == str.Length() - 2,
584 "bad irishMark!");
585 str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
586 str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
587 irishMark = uint32_t(-1);
588 irishMarkSrc = uint32_t(-1);
589 break;
590 case 3:
591 // lowercase one prefix letter, and delete following hyphen
592 // (which must be the immediately-preceding char)
593 NS_ASSERTION(
594 str.Length() >= 2 && irishMark == str.Length() - 2,
595 "bad irishMark!");
596 MOZ_ASSERT(
597 irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
598 "failed to set irishMarks");
599 str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
600 aDeletedCharsArray[irishMarkSrc + 1] = true;
601 // Remove the trailing entries (corresponding to the deleted
602 // hyphen) from the auxiliary arrays.
603 aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1);
604 if (auxiliaryOutputArrays) {
605 aStyleArray->SetLength(aStyleArray->Length() - 1);
606 aCanBreakBeforeArray->SetLength(
607 aCanBreakBeforeArray->Length() - 1);
608 inhibitBreakBefore = true;
609 }
610 mergeNeeded = true;
611 irishMark = uint32_t(-1);
612 irishMarkSrc = uint32_t(-1);
613 break;
614 }
615 // ch has been set to the uppercase for current char;
616 // No need to check for SpecialUpper here as none of the
617 // characters that could trigger an Irish casing action have
618 // special mappings.
619 break;
620 }
621 // If we didn't have any special action to perform, fall through
622 // to check for special uppercase (ß)
623 }
624
625 mcm = mozilla::unicode::SpecialUpper(ch);
626 if (mcm) {
627 int j = 0;
628 while (j < 2 && mcm->mMappedChars[j + 1]) {
629 aConvertedString.Append(mcm->mMappedChars[j]);
630 ++extraChars;
631 ++j;
632 }
633 ch = mcm->mMappedChars[j];
634 break;
635 }
636
637 // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
638 // lack of widespread font support for the corresponding Mtavruli
639 // characters at this time (July 2018).
640 // This condition is to be removed once the major platforms ship with
641 // fonts that support U+1C90..1CBF.
642 if (ch < 0x10D0 || ch > 0x10FF) {
643 ch = ToUpperCase(ch);
644 }
645 break;
646
647 case StyleTextTransformCase::Capitalize:
648 if (aTextRun) {
649 if (capitalizeDutchIJ && ch == 'j') {
650 ch = 'J';
651 capitalizeDutchIJ = false;
652 break;
653 }
654 capitalizeDutchIJ = false;
655 if (aOffsetInTextRun < aTextRun->mCapitalize.Length() &&
656 aTextRun->mCapitalize[aOffsetInTextRun]) {
657 if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
658 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
659 break;
660 }
661 if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
662 ch = 'I';
663 capitalizeDutchIJ = true;
664 break;
665 }
666 if (languageSpecificCasing == eLSCB_Lithuanian) {
667 /*
668 * # Remove DOT ABOVE after "i" with upper or titlecase
669 *
670 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
671 */
672 if (ch == 'i' || ch == 'j' || ch == 0x012F) {
673 seenSoftDotted = true;
674 ch = ToTitleCase(ch);
675 break;
676 }
677 if (seenSoftDotted) {
678 seenSoftDotted = false;
679 if (ch == 0x0307) {
680 ch = uint32_t(-1);
681 break;
682 }
683 }
684 }
685
686 mcm = mozilla::unicode::SpecialTitle(ch);
687 if (mcm) {
688 int j = 0;
689 while (j < 2 && mcm->mMappedChars[j + 1]) {
690 aConvertedString.Append(mcm->mMappedChars[j]);
691 ++extraChars;
692 ++j;
693 }
694 ch = mcm->mMappedChars[j];
695 break;
696 }
697
698 ch = ToTitleCase(ch);
699 }
700 }
701 break;
702
703 default:
704 MOZ_ASSERT_UNREACHABLE("all cases should be handled");
705 break;
706 }
707
708 if (!aCaseTransformsOnly) {
709 if (!forceNonFullWidth &&
710 (style.other_ & StyleTextTransformOther::FULL_WIDTH)) {
711 ch = mozilla::unicode::GetFullWidth(ch);
712 }
713
714 if (style.other_ & StyleTextTransformOther::FULL_SIZE_KANA) {
715 // clang-format off
716 static const uint16_t kSmallKanas[] = {
717 // ぁ ぃ ぅ ぇ ぉ っ ゃ ゅ ょ
718 0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
719 // ゎ ゕ ゖ
720 0x308E, 0x3095, 0x3096,
721 // ァ ィ ゥ ェ ォ ッ ャ ュ ョ
722 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
723 // ヮ ヵ ヶ ㇰ ㇱ ㇲ ㇳ ㇴ ㇵ
724 0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
725 // ㇶ ㇷ ㇸ ㇹ ㇺ ㇻ ㇼ ㇽ ㇾ
726 0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
727 // ㇿ
728 0x31FF,
729 // ァ ィ ゥ ェ ォ ャ ュ ョ ッ
730 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F};
731 static const uint16_t kFullSizeKanas[] = {
732 // あ い う え お つ や ゆ よ
733 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
734 // わ か け
735 0x308F, 0x304B, 0x3051,
736 // ア イ ウ エ オ ツ ヤ ユ ヨ
737 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
738 // ワ カ ケ ク シ ス ト ヌ ハ
739 0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
740 // ヒ フ ヘ ホ ム ラ リ ル レ
741 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
742 // ロ
743 0x30ED,
744 // ア イ ウ エ オ ヤ ユ ヨ ツ
745 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82};
746 // clang-format on
747
748 size_t index;
749 const uint16_t len = MOZ_ARRAY_LENGTH(kSmallKanas);
750 if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) {
751 ch = kFullSizeKanas[index];
752 }
753 }
754 }
755
756 if (forceNonFullWidth) {
757 ch = mozilla::unicode::GetFullWidthInverse(ch);
758 }
759 }
760
761 if (ch == uint32_t(-1)) {
762 aDeletedCharsArray.AppendElement(true);
763 mergeNeeded = true;
764 } else {
765 aDeletedCharsArray.AppendElement(false);
766 aCharsToMergeArray.AppendElement(false);
767 if (auxiliaryOutputArrays) {
768 aStyleArray->AppendElement(charStyle);
769 aCanBreakBeforeArray->AppendElement(
770 inhibitBreakBefore
771 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
772 : aTextRun->CanBreakBefore(aOffsetInTextRun));
773 }
774
775 if (IS_IN_BMP(ch)) {
776 aConvertedString.Append(maskPassword ? kPasswordMask : ch);
777 } else {
778 if (maskPassword) {
779 aConvertedString.Append(kPasswordMask);
780 // TODO: We should show a password mask for a surrogate pair later.
781 aConvertedString.Append(kPasswordMask);
782 } else {
783 aConvertedString.Append(H_SURROGATE(ch));
784 aConvertedString.Append(L_SURROGATE(ch));
785 }
786 ++extraChars;
787 ++i;
788 ++aOffsetInTextRun;
789 // Skip the trailing surrogate.
790 aDeletedCharsArray.AppendElement(true);
791 }
792
793 while (extraChars-- > 0) {
794 mergeNeeded = true;
795 aCharsToMergeArray.AppendElement(true);
796 if (auxiliaryOutputArrays) {
797 aStyleArray->AppendElement(charStyle);
798 aCanBreakBeforeArray->AppendElement(
799 gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
800 }
801 }
802 }
803 }
804
805 return mergeNeeded;
806 }
807
RebuildTextRun(nsTransformedTextRun * aTextRun,DrawTarget * aRefDrawTarget,gfxMissingFontRecorder * aMFR)808 void nsCaseTransformTextRunFactory::RebuildTextRun(
809 nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
810 gfxMissingFontRecorder* aMFR) {
811 nsAutoString convertedString;
812 AutoTArray<bool, 50> charsToMergeArray;
813 AutoTArray<bool, 50> deletedCharsArray;
814 AutoTArray<uint8_t, 50> canBreakBeforeArray;
815 AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
816
817 bool mergeNeeded = TransformString(
818 aTextRun->mString, convertedString, mAllUppercase,
819 /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray,
820 deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray);
821
822 gfx::ShapedTextFlags flags;
823 gfxTextRunFactory::Parameters innerParams =
824 GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
825 gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
826
827 RefPtr<nsTransformedTextRun> transformedChild;
828 RefPtr<gfxTextRun> cachedChild;
829 gfxTextRun* child;
830
831 if (mInnerTransformingTextRunFactory) {
832 transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
833 convertedString.BeginReading(), convertedString.Length(), &innerParams,
834 fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray),
835 false);
836 child = transformedChild.get();
837 } else {
838 cachedChild = fontGroup->MakeTextRun(
839 convertedString.BeginReading(), convertedString.Length(), &innerParams,
840 flags, nsTextFrameUtils::Flags(), aMFR);
841 child = cachedChild.get();
842 }
843 if (!child) {
844 return;
845 }
846 // Copy potential linebreaks into child so they're preserved
847 // (and also child will be shaped appropriately)
848 NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
849 "Dropped characters or break-before values somewhere!");
850 gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
851 child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
852 if (transformedChild) {
853 transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
854 }
855
856 if (mergeNeeded) {
857 // Now merge multiple characters into one multi-glyph character as required
858 // and deal with skipping deleted accent chars
859 NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
860 "source length mismatch");
861 NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
862 "destination length mismatch");
863 MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
864 deletedCharsArray.Elements());
865 } else {
866 // No merging to do, so just copy; this produces a more optimized textrun.
867 // We can't steal the data because the child may be cached and stealing
868 // the data would break the cache.
869 aTextRun->ResetGlyphRuns();
870 aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);
871 }
872 }
873