1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef builtin_intl_SharedIntlData_h
8 #define builtin_intl_SharedIntlData_h
9 
10 #include "mozilla/MemoryReporting.h"
11 #include "mozilla/UniquePtr.h"
12 
13 #include <stddef.h>
14 
15 #include "js/AllocPolicy.h"
16 #include "js/CharacterEncoding.h"
17 #include "js/GCAPI.h"
18 #include "js/GCHashTable.h"
19 #include "js/Result.h"
20 #include "js/RootingAPI.h"
21 #include "js/Utility.h"
22 #include "vm/StringType.h"
23 
24 namespace mozilla::intl {
25 class DateTimePatternGenerator;
26 }  // namespace mozilla::intl
27 
28 namespace js {
29 
30 class ArrayObject;
31 
32 namespace intl {
33 
34 /**
35  * This deleter class exists so that mozilla::intl::DateTimePatternGenerator
36  * can be a forward declaration, but still be used inside of a UniquePtr.
37  */
38 class DateTimePatternGeneratorDeleter {
39  public:
40   void operator()(mozilla::intl::DateTimePatternGenerator* ptr);
41 };
42 
43 /**
44  * Stores Intl data which can be shared across compartments (but not contexts).
45  *
46  * Used for data which is expensive when computed repeatedly or is not
47  * available through ICU.
48  */
49 class SharedIntlData {
50   struct LinearStringLookup {
51     union {
52       const JS::Latin1Char* latin1Chars;
53       const char16_t* twoByteChars;
54     };
55     bool isLatin1;
56     size_t length;
57     JS::AutoCheckCannotGC nogc;
58     HashNumber hash = 0;
59 
LinearStringLookupLinearStringLookup60     explicit LinearStringLookup(JSLinearString* string)
61         : isLatin1(string->hasLatin1Chars()), length(string->length()) {
62       if (isLatin1) {
63         latin1Chars = string->latin1Chars(nogc);
64       } else {
65         twoByteChars = string->twoByteChars(nogc);
66       }
67     }
68 
LinearStringLookupLinearStringLookup69     LinearStringLookup(const char* chars, size_t length)
70         : isLatin1(true), length(length) {
71       latin1Chars = reinterpret_cast<const JS::Latin1Char*>(chars);
72     }
73   };
74 
75  public:
76   /**
77    * Information tracking the set of the supported time zone names, derived
78    * from the IANA time zone database <https://www.iana.org/time-zones>.
79    *
80    * There are two kinds of IANA time zone names: Zone and Link (denoted as
81    * such in database source files). Zone names are the canonical, preferred
82    * name for a time zone, e.g. Asia/Kolkata. Link names simply refer to
83    * target Zone names for their meaning, e.g. Asia/Calcutta targets
84    * Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a
85    * sense of deprecation: some Link names also exist partly for convenience,
86    * e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC.
87    *
88    * Two data sources determine the time zone names we support: those ICU
89    * supports and IANA's zone information.
90    *
91    * Unfortunately the names ICU and IANA support, and their Link
92    * relationships from name to target, aren't identical, so we can't simply
93    * implicitly trust ICU's name handling. We must perform various
94    * preprocessing of user-provided zone names and post-processing of
95    * ICU-provided zone names to implement ECMA-402's IANA-consistent behavior.
96    *
97    * Also see <https://ssl.icu-project.org/trac/ticket/12044> and
98    * <http://unicode.org/cldr/trac/ticket/9892>.
99    */
100 
101   using TimeZoneName = JSAtom*;
102 
103   struct TimeZoneHasher {
104     struct Lookup : LinearStringLookup {
105       explicit Lookup(JSLinearString* timeZone);
106     };
107 
hashTimeZoneHasher108     static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
109     static bool match(TimeZoneName key, const Lookup& lookup);
110   };
111 
112   using TimeZoneSet =
113       GCHashSet<TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
114   using TimeZoneMap =
115       GCHashMap<TimeZoneName, TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
116 
117  private:
118   /**
119    * As a threshold matter, available time zones are those time zones ICU
120    * supports, via ucal_openTimeZones. But ICU supports additional non-IANA
121    * time zones described in intl/icu/source/tools/tzcode/icuzones (listed in
122    * IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards
123    * compatibility purposes. This set consists of ICU's supported time zones,
124    * minus all backwards-compatibility time zones.
125    */
126   TimeZoneSet availableTimeZones;
127 
128   /**
129    * IANA treats some time zone names as Zones, that ICU instead treats as
130    * Links. For example, IANA considers "America/Indiana/Indianapolis" to be
131    * a Zone and "America/Fort_Wayne" a Link that targets it, but ICU
132    * considers the former a Link that targets "America/Indianapolis" (which
133    * IANA treats as a Link).
134    *
135    * ECMA-402 requires that we respect IANA data, so if we're asked to
136    * canonicalize a time zone name in this set, we must *not* return ICU's
137    * canonicalization.
138    */
139   TimeZoneSet ianaZonesTreatedAsLinksByICU;
140 
141   /**
142    * IANA treats some time zone names as Links to one target, that ICU
143    * instead treats as either Zones, or Links to different targets. An
144    * example of the former is "Asia/Calcutta, which IANA assigns the target
145    * "Asia/Kolkata" but ICU considers its own Zone. An example of the latter
146    * is "America/Virgin", which IANA assigns the target
147    * "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas".
148    *
149    * ECMA-402 requires that we respect IANA data, so if we're asked to
150    * canonicalize a time zone name that's a key in this map, we *must* return
151    * the corresponding value and *must not* return ICU's canonicalization.
152    */
153   TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU;
154 
155   bool timeZoneDataInitialized = false;
156 
157   /**
158    * Precomputes the available time zone names, because it's too expensive to
159    * call ucal_openTimeZones() repeatedly.
160    */
161   bool ensureTimeZones(JSContext* cx);
162 
163  public:
164   /**
165    * Returns the validated time zone name in |result|. If the input time zone
166    * isn't a valid IANA time zone name, |result| remains unchanged.
167    */
168   bool validateTimeZoneName(JSContext* cx, JS::Handle<JSString*> timeZone,
169                             JS::MutableHandle<JSAtom*> result);
170 
171   /**
172    * Returns the canonical time zone name in |result|. If no canonical name
173    * was found, |result| remains unchanged.
174    *
175    * This method only handles time zones which are canonicalized differently
176    * by ICU when compared to IANA.
177    */
178   bool tryCanonicalizeTimeZoneConsistentWithIANA(
179       JSContext* cx, JS::Handle<JSString*> timeZone,
180       JS::MutableHandle<JSAtom*> result);
181 
182   /**
183    * Returns an iterator over all available time zones supported by ICU. The
184    * returned time zone names aren't canonicalized.
185    */
186   JS::Result<TimeZoneSet::Iterator> availableTimeZonesIteration(JSContext* cx);
187 
188  private:
189   using Locale = JSAtom*;
190 
191   struct LocaleHasher {
192     struct Lookup : LinearStringLookup {
193       explicit Lookup(JSLinearString* locale);
194       Lookup(const char* chars, size_t length);
195     };
196 
hashLocaleHasher197     static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
198     static bool match(Locale key, const Lookup& lookup);
199   };
200 
201   using LocaleSet = GCHashSet<Locale, LocaleHasher, SystemAllocPolicy>;
202 
203   // Set of supported locales for all Intl service constructors except Collator,
204   // which uses its own set.
205   //
206   // UDateFormat:
207   // udat_[count,get]Available() return the same results as their
208   // uloc_[count,get]Available() counterparts.
209   //
210   // UNumberFormatter:
211   // unum_[count,get]Available() return the same results as their
212   // uloc_[count,get]Available() counterparts.
213   //
214   // UListFormatter, UPluralRules, and URelativeDateTimeFormatter:
215   // We're going to use ULocale availableLocales as per ICU recommendation:
216   // https://unicode-org.atlassian.net/browse/ICU-12756
217   LocaleSet supportedLocales;
218 
219   // ucol_[count,get]Available() return different results compared to
220   // uloc_[count,get]Available(), we can't use |supportedLocales| here.
221   LocaleSet collatorSupportedLocales;
222 
223   bool supportedLocalesInitialized = false;
224 
225   // CountAvailable and GetAvailable describe the signatures used for ICU API
226   // to determine available locales for various functionality.
227   using CountAvailable = int32_t (*)();
228   using GetAvailable = const char* (*)(int32_t localeIndex);
229 
230   template <class AvailableLocales>
231   static bool getAvailableLocales(JSContext* cx, LocaleSet& locales,
232                                   const AvailableLocales& availableLocales);
233 
234   /**
235    * Precomputes the available locales sets.
236    */
237   bool ensureSupportedLocales(JSContext* cx);
238 
239  public:
240   enum class SupportedLocaleKind {
241     Collator,
242     DateTimeFormat,
243     DisplayNames,
244     ListFormat,
245     NumberFormat,
246     PluralRules,
247     RelativeTimeFormat
248   };
249 
250   /**
251    * Sets |supported| to true if |locale| is supported by the requested Intl
252    * service constructor. Otherwise sets |supported| to false.
253    */
254   [[nodiscard]] bool isSupportedLocale(JSContext* cx, SupportedLocaleKind kind,
255                                        JS::Handle<JSString*> locale,
256                                        bool* supported);
257 
258   /**
259    * Returns all available locales for |kind|.
260    */
261   ArrayObject* availableLocalesOf(JSContext* cx, SupportedLocaleKind kind);
262 
263  private:
264   /**
265    * The case first parameter (BCP47 key "kf") allows to switch the order of
266    * upper- and lower-case characters. ICU doesn't directly provide an API
267    * to query the default case first value of a given locale, but instead
268    * requires to instantiate a collator object and then query the case first
269    * attribute (UCOL_CASE_FIRST).
270    * To avoid instantiating an additional collator object whenever we need
271    * to retrieve the default case first value of a specific locale, we
272    * compute the default case first value for every supported locale only
273    * once and then keep a list of all locales which don't use the default
274    * case first setting.
275    * There is almost no difference between lower-case first and when case
276    * first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to
277    * track locales which use upper-case first as their default setting.
278    *
279    * Instantiating collator objects for each available locale is slow
280    * (bug 1527879), therefore we're hardcoding the two locales using upper-case
281    * first ("da" (Danish) and "mt" (Maltese)) and only assert in debug-mode
282    * these two locales match the upper-case first locales returned by ICU. A
283    * system-ICU may support a different set of locales, therefore we're always
284    * calling into ICU to find the upper-case first locales in that case.
285    */
286 
287 #if DEBUG || MOZ_SYSTEM_ICU
288   LocaleSet upperCaseFirstLocales;
289 
290   bool upperCaseFirstInitialized = false;
291 
292   /**
293    * Precomputes the available locales which use upper-case first sorting.
294    */
295   bool ensureUpperCaseFirstLocales(JSContext* cx);
296 #endif
297 
298  public:
299   /**
300    * Sets |isUpperFirst| to true if |locale| sorts upper-case characters
301    * before lower-case characters.
302    */
303   bool isUpperCaseFirst(JSContext* cx, JS::Handle<JSString*> locale,
304                         bool* isUpperFirst);
305 
306  private:
307   using UniqueDateTimePatternGenerator =
308       mozilla::UniquePtr<mozilla::intl::DateTimePatternGenerator,
309                          DateTimePatternGeneratorDeleter>;
310 
311   UniqueDateTimePatternGenerator dateTimePatternGenerator;
312   JS::UniqueChars dateTimePatternGeneratorLocale;
313 
314  public:
315   /**
316    * Get a non-owned cached instance of the DateTimePatternGenerator, which is
317    * expensive to instantiate.
318    *
319    * See: https://bugzilla.mozilla.org/show_bug.cgi?id=1549578
320    */
321   mozilla::intl::DateTimePatternGenerator* getDateTimePatternGenerator(
322       JSContext* cx, const char* locale);
323 
324  public:
325   void destroyInstance();
326 
327   void trace(JSTracer* trc);
328 
329   size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
330 };
331 
332 }  // namespace intl
333 
334 }  // namespace js
335 
336 #endif /* builtin_intl_SharedIntlData_h */
337