1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "ui/base/l10n/l10n_util.h"
6
7 #include <algorithm>
8 #include <cstdlib>
9 #include <iterator>
10 #include <memory>
11 #include <string>
12
13 #include "base/check_op.h"
14 #include "base/command_line.h"
15 #include "base/compiler_specific.h"
16 #include "base/files/file_util.h"
17 #include "base/i18n/file_util_icu.h"
18 #include "base/i18n/message_formatter.h"
19 #include "base/i18n/number_formatting.h"
20 #include "base/i18n/rtl.h"
21 #include "base/i18n/string_compare.h"
22 #include "base/lazy_instance.h"
23 #include "base/notreached.h"
24 #include "base/stl_util.h"
25 #include "base/strings/string_number_conversions.h"
26 #include "base/strings/string_split.h"
27 #include "base/strings/string_util.h"
28 #include "base/strings/stringprintf.h"
29 #include "base/strings/sys_string_conversions.h"
30 #include "base/strings/utf_string_conversions.h"
31 #include "build/build_config.h"
32 #include "third_party/icu/source/common/unicode/rbbi.h"
33 #include "third_party/icu/source/common/unicode/uloc.h"
34 #include "ui/base/l10n/l10n_util_collator.h"
35 #include "ui/base/resource/resource_bundle.h"
36 #include "ui/base/ui_base_paths.h"
37
38 #if defined(OS_ANDROID)
39 #include "base/android/locale_utils.h"
40 #include "ui/base/l10n/l10n_util_android.h"
41 #endif
42
43 #if defined(USE_GLIB)
44 #include <glib.h>
45 #endif
46
47 #if defined(OS_WIN)
48 #include "base/logging.h"
49 #include "ui/base/l10n/l10n_util_win.h"
50 #endif // OS_WIN
51
52 namespace {
53
54 static const char* const kAcceptLanguageList[] = {
55 "af", // Afrikaans
56 "am", // Amharic
57 "an", // Aragonese
58 "ar", // Arabic
59 "ast", // Asturian
60 "az", // Azerbaijani
61 "be", // Belarusian
62 "bg", // Bulgarian
63 "bh", // Bihari
64 "bn", // Bengali
65 "br", // Breton
66 "bs", // Bosnian
67 "ca", // Catalan
68 "ceb", // Cebuano
69 "ckb", // Kurdish (Arabic), Sorani
70 "co", // Corsican
71 "cs", // Czech
72 "cy", // Welsh
73 "da", // Danish
74 "de", // German
75 "de-AT", // German (Austria)
76 "de-CH", // German (Switzerland)
77 "de-DE", // German (Germany)
78 "de-LI", // German (Liechtenstein)
79 "el", // Greek
80 "en", // English
81 "en-AU", // English (Australia)
82 "en-CA", // English (Canada)
83 "en-GB", // English (UK)
84 "en-GB-oxendict", // English (UK, OED spelling)
85 "en-IN", // English (India)
86 "en-NZ", // English (New Zealand)
87 "en-US", // English (US)
88 "en-ZA", // English (South Africa)
89 "eo", // Esperanto
90 // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
91 // Spanish speaking countries?
92 "es", // Spanish
93 "es-419", // Spanish (Latin America)
94 "es-AR", // Spanish (Argentina)
95 "es-CL", // Spanish (Chile)
96 "es-CO", // Spanish (Colombia)
97 "es-CR", // Spanish (Costa Rica)
98 "es-ES", // Spanish (Spain)
99 "es-HN", // Spanish (Honduras)
100 "es-MX", // Spanish (Mexico)
101 "es-PE", // Spanish (Peru)
102 "es-US", // Spanish (US)
103 "es-UY", // Spanish (Uruguay)
104 "es-VE", // Spanish (Venezuela)
105 "et", // Estonian
106 "eu", // Basque
107 "fa", // Persian
108 "fi", // Finnish
109 "fil", // Filipino
110 "fo", // Faroese
111 "fr", // French
112 "fr-CA", // French (Canada)
113 "fr-CH", // French (Switzerland)
114 "fr-FR", // French (France)
115 "fy", // Frisian
116 "ga", // Irish
117 "gd", // Scots Gaelic
118 "gl", // Galician
119 "gn", // Guarani
120 "gu", // Gujarati
121 "ha", // Hausa
122 "haw", // Hawaiian
123 "he", // Hebrew
124 "hi", // Hindi
125 "hmn", // Hmong
126 "hr", // Croatian
127 "ht", // Haitian Creole
128 "hu", // Hungarian
129 "hy", // Armenian
130 "ia", // Interlingua
131 "id", // Indonesian
132 "ig", // Igbo
133 "is", // Icelandic
134 "it", // Italian
135 "it-CH", // Italian (Switzerland)
136 "it-IT", // Italian (Italy)
137 "ja", // Japanese
138 "jv", // Javanese
139 "ka", // Georgian
140 "kk", // Kazakh
141 "km", // Cambodian
142 "kn", // Kannada
143 "ko", // Korean
144 "ku", // Kurdish
145 "ky", // Kyrgyz
146 "la", // Latin
147 "lb", // Luxembourgish
148 "ln", // Lingala
149 "lo", // Laothian
150 "lt", // Lithuanian
151 "lv", // Latvian
152 "mg", // Malagasy
153 "mi", // Maori
154 "mk", // Macedonian
155 "ml", // Malayalam
156 "mn", // Mongolian
157 "mo", // Moldavian
158 "mr", // Marathi
159 "ms", // Malay
160 "mt", // Maltese
161 "my", // Burmese
162 "nb", // Norwegian (Bokmal)
163 "ne", // Nepali
164 "nl", // Dutch
165 "nn", // Norwegian (Nynorsk)
166 "no", // Norwegian
167 "ny", // Nyanja
168 "oc", // Occitan
169 "om", // Oromo
170 "or", // Oriya
171 "pa", // Punjabi
172 "pl", // Polish
173 "ps", // Pashto
174 "pt", // Portuguese
175 "pt-BR", // Portuguese (Brazil)
176 "pt-PT", // Portuguese (Portugal)
177 "qu", // Quechua
178 "rm", // Romansh
179 "ro", // Romanian
180 "ru", // Russian
181 "sd", // Sindhi
182 "sh", // Serbo-Croatian
183 "si", // Sinhalese
184 "sk", // Slovak
185 "sl", // Slovenian
186 "sm", // Samoan
187 "sn", // Shona
188 "so", // Somali
189 "sq", // Albanian
190 "sr", // Serbian
191 "st", // Sesotho
192 "su", // Sundanese
193 "sv", // Swedish
194 "sw", // Swahili
195 "ta", // Tamil
196 "te", // Telugu
197 "tg", // Tajik
198 "th", // Thai
199 "ti", // Tigrinya
200 "tk", // Turkmen
201 "to", // Tonga
202 "tr", // Turkish
203 "tt", // Tatar
204 "tw", // Twi
205 "ug", // Uighur
206 "uk", // Ukrainian
207 "ur", // Urdu
208 "uz", // Uzbek
209 "vi", // Vietnamese
210 "wa", // Walloon
211 "xh", // Xhosa
212 "yi", // Yiddish
213 "yo", // Yoruba
214 "zh", // Chinese
215 "zh-CN", // Chinese (China)
216 "zh-HK", // Chinese (Hong Kong)
217 "zh-TW", // Chinese (Taiwan)
218 "zu", // Zulu
219 };
220
221 // Returns true if |locale_name| has an alias in the ICU data file.
IsDuplicateName(const std::string & locale_name)222 bool IsDuplicateName(const std::string& locale_name) {
223 static const char* const kDuplicateNames[] = {
224 "ar_001",
225 "en",
226 "en_001",
227 "en_150",
228 "pt", // pt-BR and pt-PT are used.
229 "zh",
230 "zh_hans_cn",
231 "zh_hant_hk",
232 "zh_hant_mo",
233 "zh_hans_sg",
234 "zh_hant_tw"
235 };
236
237 // Skip all the es_Foo other than es_419 for now.
238 if (base::StartsWith(locale_name, "es_",
239 base::CompareCase::INSENSITIVE_ASCII)) {
240 return !base::EndsWith(locale_name, "419", base::CompareCase::SENSITIVE);
241 }
242 for (const char* duplicate_name : kDuplicateNames) {
243 if (base::EqualsCaseInsensitiveASCII(duplicate_name, locale_name))
244 return true;
245 }
246 return false;
247 }
248
249 // We added 30+ minimally populated locales with only a few entries
250 // (exemplar character set, script, writing direction and its own
251 // lanaguage name). These locales have to be distinguished from the
252 // fully populated locales to which Chrome is localized.
IsLocalePartiallyPopulated(const std::string & locale_name)253 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
254 // For partially populated locales, even the translation for "English"
255 // is not available. A more robust/elegant way to check is to add a special
256 // field (say, 'isPartial' to our version of ICU locale files) and
257 // check its value, but this hack seems to work well.
258 return !l10n_util::IsLocaleNameTranslated("en", locale_name);
259 }
260
261 #if !defined(OS_APPLE)
IsLocaleAvailable(const std::string & locale)262 bool IsLocaleAvailable(const std::string& locale) {
263 // If locale has any illegal characters in it, we don't want to try to
264 // load it because it may be pointing outside the locale data file directory.
265 if (!base::i18n::IsFilenameLegal(base::ASCIIToUTF16(locale)))
266 return false;
267
268 // IsLocalePartiallyPopulated() can be called here for an early return w/o
269 // checking the resource availability below. It'd help when Chrome is run
270 // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
271 // but it'd slow down the start up time a little bit for locales Chrome is
272 // localized to. So, we don't call it here.
273 return ui::ResourceBundle::LocaleDataPakExists(locale);
274 }
275 #endif
276
277 // On Linux, the text layout engine Pango determines paragraph directionality
278 // by looking at the first strongly-directional character in the text. This
279 // means text such as "Google Chrome foo bar..." will be layed out LTR even
280 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
281 // cases.
AdjustParagraphDirectionality(base::string16 * paragraph)282 void AdjustParagraphDirectionality(base::string16* paragraph) {
283 #if defined(OS_POSIX) && !defined(OS_APPLE) && !defined(OS_ANDROID)
284 if (base::i18n::IsRTL() &&
285 base::i18n::StringContainsStrongRTLChars(*paragraph)) {
286 paragraph->insert(0, 1,
287 static_cast<base::char16>(base::i18n::kRightToLeftMark));
288 }
289 #endif
290 }
291
292 struct AvailableLocalesTraits
293 : base::internal::DestructorAtExitLazyInstanceTraits<
294 std::vector<std::string>> {
New__anon2c2479320111::AvailableLocalesTraits295 static std::vector<std::string>* New(void* instance) {
296 std::vector<std::string>* locales =
297 base::internal::DestructorAtExitLazyInstanceTraits<
298 std::vector<std::string>>::New(instance);
299 int num_locales = uloc_countAvailable();
300 for (int i = 0; i < num_locales; ++i) {
301 std::string locale_name = uloc_getAvailable(i);
302 // Filter out the names that have aliases.
303 if (IsDuplicateName(locale_name))
304 continue;
305 // Filter out locales for which we have only partially populated data
306 // and to which Chrome is not localized.
307 if (IsLocalePartiallyPopulated(locale_name))
308 continue;
309 // Normalize underscores to hyphens because that's what our locale files
310 // use.
311 std::replace(locale_name.begin(), locale_name.end(), '_', '-');
312
313 // Map the Chinese locale names over to zh-CN and zh-TW.
314 if (base::LowerCaseEqualsASCII(locale_name, "zh-hans")) {
315 locale_name = "zh-CN";
316 } else if (base::LowerCaseEqualsASCII(locale_name, "zh-hant")) {
317 locale_name = "zh-TW";
318 }
319 locales->push_back(locale_name);
320 }
321
322 return locales;
323 }
324 };
325
326 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
327 g_available_locales = LAZY_INSTANCE_INITIALIZER;
328
329 } // namespace
330
331 namespace l10n_util {
332
GetLanguage(const std::string & locale)333 std::string GetLanguage(const std::string& locale) {
334 const std::string::size_type hyphen_pos = locale.find('-');
335 return std::string(locale, 0, hyphen_pos);
336 }
337
338 // TODO(jshin): revamp this function completely to use a more sytematic
339 // and generic locale fallback based on ICU/CLDR.
CheckAndResolveLocale(const std::string & locale,std::string * resolved_locale)340 bool CheckAndResolveLocale(const std::string& locale,
341 std::string* resolved_locale) {
342 #if !defined(OS_APPLE)
343 if (IsLocaleAvailable(locale)) {
344 *resolved_locale = locale;
345 return true;
346 }
347
348 // If there's a variant, skip over it so we can try without the region
349 // code. For example, ca_ES@valencia should cause us to try ca@valencia
350 // before ca.
351 std::string::size_type variant_pos = locale.find('@');
352 if (variant_pos != std::string::npos)
353 return false;
354
355 // If the locale matches language but not country, use that instead.
356 // TODO(jungshik) : Nothing is done about languages that Chrome
357 // does not support but available on Windows. We fall
358 // back to en-US in GetApplicationLocale so that it's a not critical,
359 // but we can do better.
360 const std::string lang(GetLanguage(locale));
361 if (lang.size() < locale.size()) {
362 std::string region(locale, lang.size() + 1);
363 std::string tmp_locale(lang);
364 // Map es-RR other than es-ES to es-419 (Chrome's Latin American
365 // Spanish locale).
366 if (base::LowerCaseEqualsASCII(lang, "es") &&
367 !base::LowerCaseEqualsASCII(region, "es")) {
368 tmp_locale.append("-419");
369 } else if (base::LowerCaseEqualsASCII(lang, "pt")) {
370 // Map pt-RR other than pt-BR to pt-PT. Note that "pt" by itself maps to
371 // pt-BR (logic below).
372 tmp_locale.append("-PT");
373 } else if (base::LowerCaseEqualsASCII(lang, "zh")) {
374 // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
375 if (base::LowerCaseEqualsASCII(region, "hk") ||
376 base::LowerCaseEqualsASCII(region, "mo")) { // Macao
377 tmp_locale.append("-TW");
378 } else {
379 tmp_locale.append("-CN");
380 }
381 } else if (base::LowerCaseEqualsASCII(lang, "en")) {
382 // Map Liberian and Filipino English to US English, and everything
383 // else to British English.
384 // TODO(jungshik): en-CA may have to change sides once
385 // we have OS locale separate from app locale (Chrome's UI language).
386 if (base::LowerCaseEqualsASCII(region, "lr") ||
387 base::LowerCaseEqualsASCII(region, "ph")) {
388 tmp_locale.append("-US");
389 } else {
390 tmp_locale.append("-GB");
391 }
392 }
393 if (IsLocaleAvailable(tmp_locale)) {
394 resolved_locale->swap(tmp_locale);
395 return true;
396 }
397 }
398
399 // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
400 // Note that pt-RR is mapped to pt-PT above, but we want pt -> pt-BR here.
401 struct {
402 const char* source;
403 const char* dest;
404 } static constexpr kAliasMap[] = {
405 {"en", "en-US"}, {"iw", "he"}, {"no", "nb"},
406 {"pt", "pt-BR"}, {"tl", "fil"}, {"zh", "zh-CN"},
407 };
408 for (const auto& alias : kAliasMap) {
409 if (base::LowerCaseEqualsASCII(lang, alias.source)) {
410 std::string tmp_locale(alias.dest);
411 if (IsLocaleAvailable(tmp_locale)) {
412 resolved_locale->swap(tmp_locale);
413 return true;
414 }
415 }
416 }
417 #else
418 NOTIMPLEMENTED();
419 #endif // !defined(OS_APPLE)
420
421 return false;
422 }
423
424 #if defined(OS_APPLE)
GetApplicationLocaleInternalMac(const std::string & pref_locale)425 std::string GetApplicationLocaleInternalMac(const std::string& pref_locale) {
426 // Use any override (Cocoa for the browser), otherwise use the preference
427 // passed to the function.
428 std::string app_locale = l10n_util::GetLocaleOverride();
429 if (app_locale.empty())
430 app_locale = pref_locale;
431
432 // The above should handle all of the cases Chrome normally hits, but for some
433 // unit tests, we need something to fall back too.
434 if (app_locale.empty())
435 app_locale = "en-US";
436
437 return app_locale;
438 }
439 #endif
440
441 #if !defined(OS_APPLE)
GetApplicationLocaleInternalNonMac(const std::string & pref_locale)442 std::string GetApplicationLocaleInternalNonMac(const std::string& pref_locale) {
443 std::string resolved_locale;
444 std::vector<std::string> candidates;
445
446 // We only use --lang and the app pref on Windows. On Linux, we only
447 // look at the LC_*/LANG environment variables. We do, however, pass --lang
448 // to renderer and plugin processes so they know what language the parent
449 // process decided to use.
450
451 #if defined(OS_WIN)
452 // First, try the preference value.
453 if (!pref_locale.empty())
454 candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
455
456 // Next, try the overridden locale.
457 const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
458 if (!languages.empty()) {
459 candidates.reserve(candidates.size() + languages.size());
460 std::transform(languages.begin(), languages.end(),
461 std::back_inserter(candidates),
462 &base::i18n::GetCanonicalLocale);
463 } else {
464 // If no override was set, defer to ICU
465 candidates.push_back(base::i18n::GetConfiguredLocale());
466 }
467 #elif defined(OS_ANDROID)
468 // Try pref_locale first.
469 if (!pref_locale.empty())
470 candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
471
472 // On Android, query java.util.Locale for the default locale.
473 candidates.push_back(base::android::GetDefaultLocaleString());
474 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS)
475 // GLib implements correct environment variable parsing with
476 // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
477 // We used to use our custom parsing code along with ICU for this purpose.
478 // If we have a port that does not depend on GTK, we have to
479 // restore our custom code for that port.
480 const char* const* languages = g_get_language_names();
481 DCHECK(languages); // A valid pointer is guaranteed.
482 DCHECK(*languages); // At least one entry, "C", is guaranteed.
483
484 for (; *languages; ++languages) {
485 candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
486 }
487 #else
488 // By default, use the application locale preference. This applies to ChromeOS
489 // and linux systems without glib.
490 if (!pref_locale.empty())
491 candidates.push_back(pref_locale);
492 #endif // defined(OS_WIN)
493
494 std::vector<std::string>::const_iterator i = candidates.begin();
495 for (; i != candidates.end(); ++i) {
496 if (CheckAndResolveLocale(*i, &resolved_locale)) {
497 return resolved_locale;
498 }
499 }
500
501 // Fallback on en-US.
502 const std::string fallback_locale("en-US");
503 if (IsLocaleAvailable(fallback_locale))
504 return fallback_locale;
505
506 return std::string();
507 }
508 #endif // !defined(OS_APPLE)
509
GetApplicationLocaleInternal(const std::string & pref_locale)510 std::string GetApplicationLocaleInternal(const std::string& pref_locale) {
511 #if defined(OS_APPLE)
512 return GetApplicationLocaleInternalMac(pref_locale);
513 #else
514 return GetApplicationLocaleInternalNonMac(pref_locale);
515 #endif
516 }
517
GetApplicationLocale(const std::string & pref_locale,bool set_icu_locale)518 std::string GetApplicationLocale(const std::string& pref_locale,
519 bool set_icu_locale) {
520 const std::string locale = GetApplicationLocaleInternal(pref_locale);
521 if (set_icu_locale && !locale.empty())
522 base::i18n::SetICUDefaultLocale(locale);
523 return locale;
524 }
525
GetApplicationLocale(const std::string & pref_locale)526 std::string GetApplicationLocale(const std::string& pref_locale) {
527 return GetApplicationLocale(pref_locale, true /* set_icu_locale */);
528 }
529
IsLocaleNameTranslated(const char * locale,const std::string & display_locale)530 bool IsLocaleNameTranslated(const char* locale,
531 const std::string& display_locale) {
532 base::string16 display_name =
533 l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
534 // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
535 // uloc_getDisplayName returns the actual translation or the default
536 // value (locale code), we have to rely on this hack to tell whether
537 // the translation is available or not. If ICU doesn't have a translated
538 // name for this locale, GetDisplayNameForLocale will just return the
539 // locale code.
540 return !base::IsStringASCII(display_name) ||
541 base::UTF16ToASCII(display_name) != locale;
542 }
543
GetDisplayNameForLocale(const std::string & locale,const std::string & display_locale,bool is_for_ui,bool disallow_default)544 base::string16 GetDisplayNameForLocale(const std::string& locale,
545 const std::string& display_locale,
546 bool is_for_ui,
547 bool disallow_default) {
548 std::string locale_code = locale;
549 // Internally, we use the language code of zh-CN and zh-TW, but we want the
550 // display names to be Chinese (Simplified) and Chinese (Traditional) instead
551 // of Chinese (China) and Chinese (Taiwan).
552 // Translate uses "tl" (Tagalog) to mean "fil" (Filipino) until Google
553 // translate is changed to understand "fil". Make "tl" alias to "fil".
554 if (locale_code == "zh-CN")
555 locale_code = "zh-Hans";
556 else if (locale_code == "zh-TW")
557 locale_code = "zh-Hant";
558 else if (locale_code == "tl")
559 locale_code = "fil";
560 else if (locale_code == "mo")
561 locale_code = "ro-MD";
562
563 base::string16 display_name;
564 #if defined(OS_IOS)
565 // Use the Foundation API to get the localized display name, removing the need
566 // for the ICU data file to include this data.
567 display_name = GetDisplayNameForLocale(locale_code, display_locale);
568 #else
569 #if defined(OS_ANDROID)
570 // Use Java API to get locale display name so that we can remove most of
571 // the lang data from icu data to reduce binary size, except for zh-Hans and
572 // zh-Hant because the current Android Java API doesn't support scripts.
573 // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
574 // Android Java API supports scripts.
575 if (!base::StartsWith(locale_code, "zh-Han", base::CompareCase::SENSITIVE)) {
576 display_name = GetDisplayNameForLocale(locale_code, display_locale);
577 } else
578 #endif // defined(OS_ANDROID)
579 {
580 UErrorCode error = U_ZERO_ERROR;
581 const int kBufferSize = 1024;
582
583 int actual_size;
584 // For Country code in ICU64 we need to call uloc_getDisplayCountry
585 if (locale_code[0] == '-' || locale_code[0] == '_') {
586 actual_size = uloc_getDisplayCountry(
587 locale_code.c_str(), display_locale.c_str(),
588 base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
589 } else {
590 actual_size = uloc_getDisplayName(
591 locale_code.c_str(), display_locale.c_str(),
592 base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
593 }
594 if (disallow_default && U_USING_DEFAULT_WARNING == error)
595 return base::string16();
596 DCHECK(U_SUCCESS(error));
597 display_name.resize(actual_size);
598 }
599 #endif // defined(OS_IOS)
600
601 // Add directional markup so parentheses are properly placed.
602 if (is_for_ui && base::i18n::IsRTL())
603 base::i18n::AdjustStringForLocaleDirection(&display_name);
604 return display_name;
605 }
606
GetDisplayNameForCountry(const std::string & country_code,const std::string & display_locale)607 base::string16 GetDisplayNameForCountry(const std::string& country_code,
608 const std::string& display_locale) {
609 return GetDisplayNameForLocale("_" + country_code, display_locale, false);
610 }
611
NormalizeLocale(const std::string & locale)612 std::string NormalizeLocale(const std::string& locale) {
613 std::string normalized_locale(locale);
614 std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
615
616 return normalized_locale;
617 }
618
GetParentLocales(const std::string & current_locale,std::vector<std::string> * parent_locales)619 void GetParentLocales(const std::string& current_locale,
620 std::vector<std::string>* parent_locales) {
621 std::string locale(NormalizeLocale(current_locale));
622
623 const int kNameCapacity = 256;
624 char parent[kNameCapacity];
625 base::strlcpy(parent, locale.c_str(), kNameCapacity);
626 parent_locales->push_back(parent);
627 UErrorCode err = U_ZERO_ERROR;
628 while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
629 if (U_FAILURE(err))
630 break;
631 parent_locales->push_back(parent);
632 }
633 }
634
IsValidLocaleSyntax(const std::string & locale)635 bool IsValidLocaleSyntax(const std::string& locale) {
636 // Check that the length is plausible.
637 if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
638 return false;
639
640 // Strip off the part after an '@' sign, which might contain keywords,
641 // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
642 // We don't validate that part much, just check that there's at least one
643 // equals sign in a plausible place. Normalize the prefix so that hyphens
644 // are changed to underscores.
645 std::string prefix = NormalizeLocale(locale);
646 size_t split_point = locale.find("@");
647 if (split_point != std::string::npos) {
648 std::string keywords = locale.substr(split_point + 1);
649 prefix = locale.substr(0, split_point);
650
651 size_t equals_loc = keywords.find("=");
652 if (equals_loc == 0 || equals_loc == std::string::npos ||
653 equals_loc > keywords.size() - 2) {
654 return false;
655 }
656 }
657
658 // Check that all characters before the at-sign are alphanumeric or
659 // underscore.
660 for (char ch : prefix) {
661 if (!base::IsAsciiAlpha(ch) && !base::IsAsciiDigit(ch) && ch != '_')
662 return false;
663 }
664
665 // Check that the initial token (before the first hyphen/underscore)
666 // is 1 - 3 alphabetical characters (a language tag).
667 for (size_t i = 0; i < prefix.size(); i++) {
668 char ch = prefix[i];
669 if (ch == '_') {
670 if (i < 1 || i > 3)
671 return false;
672 break;
673 }
674 if (!base::IsAsciiAlpha(ch))
675 return false;
676 }
677
678 // Check that the all tokens after the initial token are 1 - 8 characters.
679 // (Tokenize/StringTokenizer don't work here, they collapse multiple
680 // delimiters into one.)
681 int token_len = 0;
682 int token_index = 0;
683 for (char ch : prefix) {
684 if (ch != '_') {
685 token_len++;
686 continue;
687 }
688
689 if (token_index > 0 && (token_len < 1 || token_len > 8)) {
690 return false;
691 }
692 token_index++;
693 token_len = 0;
694 }
695 if (token_index == 0 && (token_len < 1 || token_len > 3))
696 return false;
697 if (token_len < 1 || token_len > 8)
698 return false;
699
700 return true;
701 }
702
GetStringUTF8(int message_id)703 std::string GetStringUTF8(int message_id) {
704 return base::UTF16ToUTF8(GetStringUTF16(message_id));
705 }
706
GetStringUTF16(int message_id)707 base::string16 GetStringUTF16(int message_id) {
708 ui::ResourceBundle& rb = ui::ResourceBundle::GetSharedInstance();
709 base::string16 str = rb.GetLocalizedString(message_id);
710 AdjustParagraphDirectionality(&str);
711
712 return str;
713 }
714
GetStringFUTF16(int message_id,const std::vector<base::string16> & replacements,std::vector<size_t> * offsets)715 base::string16 GetStringFUTF16(int message_id,
716 const std::vector<base::string16>& replacements,
717 std::vector<size_t>* offsets) {
718 // TODO(tc): We could save a string copy if we got the raw string as
719 // a StringPiece and were able to call ReplaceStringPlaceholders with
720 // a StringPiece format string and base::string16 substitution strings. In
721 // practice, the strings should be relatively short.
722 ui::ResourceBundle& rb = ui::ResourceBundle::GetSharedInstance();
723 const base::string16& format_string = rb.GetLocalizedString(message_id);
724
725 #if DCHECK_IS_ON()
726 // Make sure every replacement string is being used, so we don't just
727 // silently fail to insert one. If |offsets| is non-NULL, then don't do this
728 // check as the code may simply want to find the placeholders rather than
729 // actually replacing them.
730 if (!offsets) {
731 // $9 is the highest allowed placeholder.
732 for (size_t i = 0; i < 9; ++i) {
733 bool placeholder_should_exist = replacements.size() > i;
734
735 base::string16 placeholder = base::ASCIIToUTF16("$");
736 placeholder += (L'1' + i);
737 size_t pos = format_string.find(placeholder);
738 if (placeholder_should_exist) {
739 DCHECK_NE(std::string::npos, pos) << " Didn't find a " << placeholder
740 << " placeholder in "
741 << format_string;
742 } else {
743 DCHECK_EQ(std::string::npos, pos) << " Unexpectedly found a "
744 << placeholder << " placeholder in "
745 << format_string;
746 }
747 }
748 }
749 #endif
750
751 base::string16 formatted = base::ReplaceStringPlaceholders(
752 format_string, replacements, offsets);
753 AdjustParagraphDirectionality(&formatted);
754
755 return formatted;
756 }
757
GetStringFUTF8(int message_id,const base::string16 & a)758 std::string GetStringFUTF8(int message_id,
759 const base::string16& a) {
760 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
761 }
762
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b)763 std::string GetStringFUTF8(int message_id,
764 const base::string16& a,
765 const base::string16& b) {
766 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
767 }
768
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)769 std::string GetStringFUTF8(int message_id,
770 const base::string16& a,
771 const base::string16& b,
772 const base::string16& c) {
773 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
774 }
775
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)776 std::string GetStringFUTF8(int message_id,
777 const base::string16& a,
778 const base::string16& b,
779 const base::string16& c,
780 const base::string16& d) {
781 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
782 }
783
GetStringFUTF16(int message_id,const base::string16 & a)784 base::string16 GetStringFUTF16(int message_id,
785 const base::string16& a) {
786 std::vector<base::string16> replacements = {a};
787 return GetStringFUTF16(message_id, replacements, nullptr);
788 }
789
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b)790 base::string16 GetStringFUTF16(int message_id,
791 const base::string16& a,
792 const base::string16& b) {
793 return GetStringFUTF16(message_id, a, b, nullptr);
794 }
795
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)796 base::string16 GetStringFUTF16(int message_id,
797 const base::string16& a,
798 const base::string16& b,
799 const base::string16& c) {
800 std::vector<base::string16> replacements = {a, b, c};
801 return GetStringFUTF16(message_id, replacements, nullptr);
802 }
803
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)804 base::string16 GetStringFUTF16(int message_id,
805 const base::string16& a,
806 const base::string16& b,
807 const base::string16& c,
808 const base::string16& d) {
809 std::vector<base::string16> replacements = {a, b, c, d};
810 return GetStringFUTF16(message_id, replacements, nullptr);
811 }
812
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d,const base::string16 & e)813 base::string16 GetStringFUTF16(int message_id,
814 const base::string16& a,
815 const base::string16& b,
816 const base::string16& c,
817 const base::string16& d,
818 const base::string16& e) {
819 std::vector<base::string16> replacements = {a, b, c, d, e};
820 return GetStringFUTF16(message_id, replacements, nullptr);
821 }
822
GetStringFUTF16(int message_id,const base::string16 & a,size_t * offset)823 base::string16 GetStringFUTF16(int message_id,
824 const base::string16& a,
825 size_t* offset) {
826 DCHECK(offset);
827 std::vector<size_t> offsets;
828 std::vector<base::string16> replacements = {a};
829 base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
830 DCHECK_EQ(1u, offsets.size());
831 *offset = offsets[0];
832 return result;
833 }
834
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,std::vector<size_t> * offsets)835 base::string16 GetStringFUTF16(int message_id,
836 const base::string16& a,
837 const base::string16& b,
838 std::vector<size_t>* offsets) {
839 std::vector<base::string16> replacements = {a, b};
840 return GetStringFUTF16(message_id, replacements, offsets);
841 }
842
GetStringFUTF16Int(int message_id,int a)843 base::string16 GetStringFUTF16Int(int message_id, int a) {
844 return GetStringFUTF16(message_id, base::FormatNumber(a));
845 }
846
GetStringFUTF16Int(int message_id,int64_t a)847 base::string16 GetStringFUTF16Int(int message_id, int64_t a) {
848 return GetStringFUTF16(message_id, base::FormatNumber(a));
849 }
850
GetPluralStringFUTF16(int message_id,int number)851 base::string16 GetPluralStringFUTF16(int message_id, int number) {
852 return base::i18n::MessageFormatter::FormatWithNumberedArgs(
853 GetStringUTF16(message_id), number);
854 }
855
GetPluralStringFUTF8(int message_id,int number)856 std::string GetPluralStringFUTF8(int message_id, int number) {
857 return base::UTF16ToUTF8(GetPluralStringFUTF16(message_id, number));
858 }
859
GetSingleOrMultipleStringUTF16(int message_id,bool is_multiple)860 base::string16 GetSingleOrMultipleStringUTF16(int message_id,
861 bool is_multiple) {
862 return base::i18n::MessageFormatter::FormatWithNumberedArgs(
863 GetStringUTF16(message_id), is_multiple ? "multiple" : "single");
864 }
865
SortStrings16(const std::string & locale,std::vector<base::string16> * strings)866 void SortStrings16(const std::string& locale,
867 std::vector<base::string16>* strings) {
868 SortVectorWithStringKey(locale, strings, false);
869 }
870
GetAvailableLocales()871 const std::vector<std::string>& GetAvailableLocales() {
872 return g_available_locales.Get();
873 }
874
GetAcceptLanguagesForLocale(const std::string & display_locale,std::vector<std::string> * locale_codes)875 void GetAcceptLanguagesForLocale(const std::string& display_locale,
876 std::vector<std::string>* locale_codes) {
877 for (const char* accept_language : kAcceptLanguageList) {
878 if (!l10n_util::IsLocaleNameTranslated(accept_language, display_locale)) {
879 // TODO(jungshik) : Put them at the end of the list with language codes
880 // enclosed by brackets instead of skipping.
881 continue;
882 }
883 locale_codes->push_back(accept_language);
884 }
885 }
886
GetAcceptLanguages(std::vector<std::string> * locale_codes)887 void GetAcceptLanguages(std::vector<std::string>* locale_codes) {
888 for (const char* accept_language : kAcceptLanguageList) {
889 locale_codes->push_back(accept_language);
890 }
891 }
892
IsLanguageAccepted(const std::string & display_locale,const std::string & locale)893 bool IsLanguageAccepted(const std::string& display_locale,
894 const std::string& locale) {
895 for (const char* accept_language : kAcceptLanguageList) {
896 if (accept_language == locale &&
897 l10n_util::IsLocaleNameTranslated(locale.c_str(), display_locale)) {
898 return true;
899 }
900 }
901 return false;
902 }
903
GetLocalizedContentsWidthInPixels(int pixel_resource_id)904 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
905 int width = 0;
906 base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
907 DCHECK_GT(width, 0);
908 return width;
909 }
910
GetAcceptLanguageListForTesting()911 const char* const* GetAcceptLanguageListForTesting() {
912 return kAcceptLanguageList;
913 }
914
GetAcceptLanguageListSizeForTesting()915 size_t GetAcceptLanguageListSizeForTesting() {
916 return base::size(kAcceptLanguageList);
917 }
918
919 } // namespace l10n_util
920