1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif  // V8_INTL_SUPPORT
8 
9 #ifndef V8_OBJECTS_INTL_OBJECTS_H_
10 #define V8_OBJECTS_INTL_OBJECTS_H_
11 
12 #include <map>
13 #include <memory>
14 #include <set>
15 #include <string>
16 
17 #include "src/base/timezone-cache.h"
18 #include "src/objects/contexts.h"
19 #include "src/objects/managed.h"
20 #include "src/objects/objects.h"
21 #include "unicode/locid.h"
22 #include "unicode/uversion.h"
23 
24 #define V8_MINIMUM_ICU_VERSION 67
25 
26 namespace U_ICU_NAMESPACE {
27 class BreakIterator;
28 class Collator;
29 class FormattedValue;
30 class UnicodeString;
31 }  // namespace U_ICU_NAMESPACE
32 
33 namespace v8 {
34 namespace internal {
35 
36 template <typename T>
37 class Handle;
38 class JSCollator;
39 
40 class Intl {
41  public:
42   enum class BoundFunctionContextSlot {
43     kBoundFunction = Context::MIN_CONTEXT_SLOTS,
44     kLength
45   };
46 
47   // Build a set of ICU locales from a list of Locales. If there is a locale
48   // with a script tag then the locales also include a locale without the
49   // script; eg, pa_Guru_IN (language=Panjabi, script=Gurmukhi, country-India)
50   // would include pa_IN.
51   static std::set<std::string> BuildLocaleSet(
52       const std::vector<std::string>& locales, const char* path,
53       const char* validate_key);
54 
55   static Maybe<std::string> ToLanguageTag(const icu::Locale& locale);
56 
57   // Get the name of the numbering system from locale.
58   // ICU doesn't expose numbering system in any way, so we have to assume that
59   // for given locale NumberingSystem constructor produces the same digits as
60   // NumberFormat/Calendar would.
61   static std::string GetNumberingSystem(const icu::Locale& icu_locale);
62 
63   static V8_WARN_UNUSED_RESULT MaybeHandle<JSObject> SupportedLocalesOf(
64       Isolate* isolate, const char* method,
65       const std::set<std::string>& available_locales, Handle<Object> locales_in,
66       Handle<Object> options_in);
67 
68   // ECMA402 9.2.10. GetOption( options, property, type, values, fallback)
69   // ecma402/#sec-getoption
70   //
71   // This is specialized for the case when type is string.
72   //
73   // Instead of passing undefined for the values argument as the spec
74   // defines, pass in an empty vector.
75   //
76   // Returns true if options object has the property and stores the
77   // result in value. Returns false if the value is not found. The
78   // caller is required to use fallback value appropriately in this
79   // case.
80   //
81   // service is a string denoting the type of Intl object; used when
82   // printing the error message.
83   V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static Maybe<bool> GetStringOption(
84       Isolate* isolate, Handle<JSReceiver> options, const char* property,
85       std::vector<const char*> values, const char* service,
86       std::unique_ptr<char[]>* result);
87 
88   // A helper template to get string from option into a enum.
89   // The enum in the enum_values is the corresponding value to the strings
90   // in the str_values. If the option does not contains name,
91   // default_value will be return.
92   template <typename T>
GetStringOption(Isolate * isolate,Handle<JSReceiver> options,const char * name,const char * method,const std::vector<const char * > & str_values,const std::vector<T> & enum_values,T default_value)93   V8_WARN_UNUSED_RESULT static Maybe<T> GetStringOption(
94       Isolate* isolate, Handle<JSReceiver> options, const char* name,
95       const char* method, const std::vector<const char*>& str_values,
96       const std::vector<T>& enum_values, T default_value) {
97     DCHECK_EQ(str_values.size(), enum_values.size());
98     std::unique_ptr<char[]> cstr;
99     Maybe<bool> found = Intl::GetStringOption(isolate, options, name,
100                                               str_values, method, &cstr);
101     MAYBE_RETURN(found, Nothing<T>());
102     if (found.FromJust()) {
103       DCHECK_NOT_NULL(cstr.get());
104       for (size_t i = 0; i < str_values.size(); i++) {
105         if (strcmp(cstr.get(), str_values[i]) == 0) {
106           return Just(enum_values[i]);
107         }
108       }
109       UNREACHABLE();
110     }
111     return Just(default_value);
112   }
113 
114   // ECMA402 9.2.10. GetOption( options, property, type, values, fallback)
115   // ecma402/#sec-getoption
116   //
117   // This is specialized for the case when type is boolean.
118   //
119   // Returns true if options object has the property and stores the
120   // result in value. Returns false if the value is not found. The
121   // caller is required to use fallback value appropriately in this
122   // case.
123   //
124   // service is a string denoting the type of Intl object; used when
125   // printing the error message.
126   V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static Maybe<bool> GetBoolOption(
127       Isolate* isolate, Handle<JSReceiver> options, const char* property,
128       const char* service, bool* result);
129 
130   V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static Maybe<int> GetNumberOption(
131       Isolate* isolate, Handle<JSReceiver> options, Handle<String> property,
132       int min, int max, int fallback);
133 
134   // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist
135   // {only_return_one_result} is an optimization for callers that only
136   // care about the first result.
137   static Maybe<std::vector<std::string>> CanonicalizeLocaleList(
138       Isolate* isolate, Handle<Object> locales,
139       bool only_return_one_result = false);
140 
141   // ecma-402 #sec-intl.getcanonicallocales
142   V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> GetCanonicalLocales(
143       Isolate* isolate, Handle<Object> locales);
144 
145   // For locale sensitive functions
146   V8_WARN_UNUSED_RESULT static MaybeHandle<String> StringLocaleConvertCase(
147       Isolate* isolate, Handle<String> s, bool is_upper,
148       Handle<Object> locales);
149 
150   V8_WARN_UNUSED_RESULT static MaybeHandle<String> ConvertToUpper(
151       Isolate* isolate, Handle<String> s);
152 
153   V8_WARN_UNUSED_RESULT static MaybeHandle<String> ConvertToLower(
154       Isolate* isolate, Handle<String> s);
155 
156   V8_WARN_UNUSED_RESULT static MaybeHandle<Object> StringLocaleCompare(
157       Isolate* isolate, Handle<String> s1, Handle<String> s2,
158       Handle<Object> locales, Handle<Object> options, const char* method);
159 
160   V8_WARN_UNUSED_RESULT static Handle<Object> CompareStrings(
161       Isolate* isolate, const icu::Collator& collator, Handle<String> s1,
162       Handle<String> s2);
163 
164   // ecma402/#sup-properties-of-the-number-prototype-object
165   V8_WARN_UNUSED_RESULT static MaybeHandle<String> NumberToLocaleString(
166       Isolate* isolate, Handle<Object> num, Handle<Object> locales,
167       Handle<Object> options, const char* method);
168 
169   // ecma402/#sec-setnfdigitoptions
170   struct NumberFormatDigitOptions {
171     int minimum_integer_digits;
172     int minimum_fraction_digits;
173     int maximum_fraction_digits;
174     int minimum_significant_digits;
175     int maximum_significant_digits;
176   };
177   V8_WARN_UNUSED_RESULT static Maybe<NumberFormatDigitOptions>
178   SetNumberFormatDigitOptions(Isolate* isolate, Handle<JSReceiver> options,
179                               int mnfd_default, int mxfd_default,
180                               bool notation_is_compact);
181 
182   // Helper funciton to convert a UnicodeString to a Handle<String>
183   V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString(
184       Isolate* isolate, const icu::UnicodeString& string);
185 
186   // Helper function to convert a substring of UnicodeString to a Handle<String>
187   V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString(
188       Isolate* isolate, const icu::UnicodeString& string, int32_t begin,
189       int32_t end);
190 
191   // Helper function to convert a FormattedValue to String
192   V8_WARN_UNUSED_RESULT static MaybeHandle<String> FormattedToString(
193       Isolate* isolate, const icu::FormattedValue& formatted);
194 
195   // Helper function to convert number field id to type string.
196   static Handle<String> NumberFieldToType(Isolate* isolate,
197                                           Handle<Object> numeric_obj,
198                                           int32_t field_id);
199 
200   // A helper function to implement formatToParts which add element to array as
201   // $array[$index] = { type: $field_type_string, value: $value }
202   static void AddElement(Isolate* isolate, Handle<JSArray> array, int index,
203                          Handle<String> field_type_string,
204                          Handle<String> value);
205 
206   // A helper function to implement formatToParts which add element to array as
207   // $array[$index] = {
208   //   type: $field_type_string, value: $value,
209   //   $additional_property_name: $additional_property_value
210   // }
211   static void AddElement(Isolate* isolate, Handle<JSArray> array, int index,
212                          Handle<String> field_type_string, Handle<String> value,
213                          Handle<String> additional_property_name,
214                          Handle<String> additional_property_value);
215 
216   // In ECMA 402 v1, Intl constructors supported a mode of operation
217   // where calling them with an existing object as a receiver would
218   // transform the receiver into the relevant Intl instance with all
219   // internal slots. In ECMA 402 v2, this capability was removed, to
220   // avoid adding internal slots on existing objects. In ECMA 402 v3,
221   // the capability was re-added as "normative optional" in a mode
222   // which chains the underlying Intl instance on any object, when the
223   // constructor is called
224   //
225   // See ecma402/#legacy-constructor.
226   V8_WARN_UNUSED_RESULT static MaybeHandle<Object> LegacyUnwrapReceiver(
227       Isolate* isolate, Handle<JSReceiver> receiver,
228       Handle<JSFunction> constructor, bool has_initialized_slot);
229 
230   // enum for "localeMatcher" option: shared by many Intl objects.
231   enum class MatcherOption { kBestFit, kLookup };
232 
233   // Shared function to read the "localeMatcher" option.
234   V8_WARN_UNUSED_RESULT static Maybe<MatcherOption> GetLocaleMatcher(
235       Isolate* isolate, Handle<JSReceiver> options, const char* method);
236 
237   // Shared function to read the "numberingSystem" option.
238   V8_WARN_UNUSED_RESULT static Maybe<bool> GetNumberingSystem(
239       Isolate* isolate, Handle<JSReceiver> options, const char* method,
240       std::unique_ptr<char[]>* result);
241 
242   // Check the calendar is valid or not for that locale.
243   static bool IsValidCalendar(const icu::Locale& locale,
244                               const std::string& value);
245 
246   // Check the collation is valid or not for that locale.
247   static bool IsValidCollation(const icu::Locale& locale,
248                                const std::string& value);
249 
250   // Check the numberingSystem is valid.
251   static bool IsValidNumberingSystem(const std::string& value);
252 
253   // Check the calendar is well formed.
254   static bool IsWellFormedCalendar(const std::string& value);
255 
256   // Check the currency is well formed.
257   static bool IsWellFormedCurrency(const std::string& value);
258 
259   struct ResolvedLocale {
260     std::string locale;
261     icu::Locale icu_locale;
262     std::map<std::string, std::string> extensions;
263   };
264 
265   static Maybe<ResolvedLocale> ResolveLocale(
266       Isolate* isolate, const std::set<std::string>& available_locales,
267       const std::vector<std::string>& requested_locales, MatcherOption options,
268       const std::set<std::string>& relevant_extension_keys);
269 
270   // A helper template to implement the GetAvailableLocales
271   // Usage in src/objects/js-XXX.cc
272   // const std::set<std::string>& JSXxx::GetAvailableLocales() {
273   //   static base::LazyInstance<Intl::AvailableLocales<icu::YYY>>::type
274   //       available_locales = LAZY_INSTANCE_INITIALIZER;
275   //   return available_locales.Pointer()->Get();
276   // }
277 
278   struct SkipResourceCheck {
keySkipResourceCheck279     static const char* key() { return nullptr; }
pathSkipResourceCheck280     static const char* path() { return nullptr; }
281   };
282 
283   template <typename C = SkipResourceCheck>
284   class AvailableLocales {
285    public:
AvailableLocales()286     AvailableLocales() {
287       UErrorCode status = U_ZERO_ERROR;
288       UEnumeration* uenum =
289           uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status);
290       DCHECK(U_SUCCESS(status));
291 
292       std::vector<std::string> all_locales;
293       const char* loc;
294       while ((loc = uenum_next(uenum, nullptr, &status)) != nullptr) {
295         DCHECK(U_SUCCESS(status));
296         std::string locstr(loc);
297         std::replace(locstr.begin(), locstr.end(), '_', '-');
298         // Handle special case
299         if (locstr == "en-US-POSIX") locstr = "en-US-u-va-posix";
300         all_locales.push_back(locstr);
301       }
302       uenum_close(uenum);
303 
304       set_ = Intl::BuildLocaleSet(all_locales, C::path(), C::key());
305     }
Get()306     const std::set<std::string>& Get() const { return set_; }
307 
308    private:
309     std::set<std::string> set_;
310   };
311 
312   // Utility function to set text to BreakIterator.
313   static Handle<Managed<icu::UnicodeString>> SetTextToBreakIterator(
314       Isolate* isolate, Handle<String> text,
315       icu::BreakIterator* break_iterator);
316 
317   // ecma262 #sec-string.prototype.normalize
318   V8_WARN_UNUSED_RESULT static MaybeHandle<String> Normalize(
319       Isolate* isolate, Handle<String> string, Handle<Object> form_input);
320   static base::TimezoneCache* CreateTimeZoneCache();
321 
322   // Convert a Handle<String> to icu::UnicodeString
323   static icu::UnicodeString ToICUUnicodeString(Isolate* isolate,
324                                                Handle<String> string);
325 
326   static const uint8_t* ToLatin1LowerTable();
327 
328   static String ConvertOneByteToLower(String src, String dst);
329 
330   static const std::set<std::string>& GetAvailableLocales();
331 
332   static const std::set<std::string>& GetAvailableLocalesForDateFormat();
333 };
334 
335 }  // namespace internal
336 }  // namespace v8
337 
338 #endif  // V8_OBJECTS_INTL_OBJECTS_H_
339