1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "ui/base/l10n/l10n_util.h"
6
7 #include <algorithm>
8 #include <cstdlib>
9 #include <iterator>
10 #include <memory>
11 #include <string>
12
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/files/file_util.h"
16 #include "base/i18n/file_util_icu.h"
17 #include "base/i18n/message_formatter.h"
18 #include "base/i18n/number_formatting.h"
19 #include "base/i18n/rtl.h"
20 #include "base/i18n/string_compare.h"
21 #include "base/lazy_instance.h"
22 #include "base/stl_util.h"
23 #include "base/strings/string_number_conversions.h"
24 #include "base/strings/string_split.h"
25 #include "base/strings/string_util.h"
26 #include "base/strings/stringprintf.h"
27 #include "base/strings/sys_string_conversions.h"
28 #include "base/strings/utf_string_conversions.h"
29 #include "build/build_config.h"
30 #include "third_party/icu/source/common/unicode/rbbi.h"
31 #include "third_party/icu/source/common/unicode/uloc.h"
32 #include "ui/base/l10n/l10n_util_collator.h"
33 #include "ui/base/resource/resource_bundle.h"
34 #include "ui/base/ui_base_paths.h"
35
36 #if defined(OS_ANDROID)
37 #include "base/android/locale_utils.h"
38 #include "ui/base/l10n/l10n_util_android.h"
39 #endif
40
41 #if defined(USE_GLIB)
42 #include <glib.h>
43 #endif
44
45 #if defined(OS_WIN)
46 #include "ui/base/l10n/l10n_util_win.h"
47 #endif // OS_WIN
48
49 namespace {
50
51 static const char* const kAcceptLanguageList[] = {
52 "af", // Afrikaans
53 "am", // Amharic
54 "an", // Aragonese
55 "ar", // Arabic
56 "ast", // Asturian
57 "az", // Azerbaijani
58 "be", // Belarusian
59 "bg", // Bulgarian
60 "bh", // Bihari
61 "bn", // Bengali
62 "br", // Breton
63 "bs", // Bosnian
64 "ca", // Catalan
65 "ceb", // Cebuano
66 "ckb", // Kurdish (Arabci), Sorani
67 "co", // Corsican
68 "cs", // Czech
69 "cy", // Welsh
70 "da", // Danish
71 "de", // German
72 "de-AT", // German (Austria)
73 "de-CH", // German (Switzerland)
74 "de-DE", // German (Germany)
75 "de-LI", // German (Liechtenstein)
76 "el", // Greek
77 "en", // English
78 "en-AU", // English (Australia)
79 "en-CA", // English (Canada)
80 "en-GB", // English (UK)
81 "en-IN", // English (India)
82 "en-NZ", // English (New Zealand)
83 "en-US", // English (US)
84 "en-ZA", // English (South Africa)
85 "eo", // Esperanto
86 // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
87 // Spanish speaking countries?
88 "es", // Spanish
89 "es-419", // Spanish (Latin America)
90 "es-AR", // Spanish (Argentina)
91 "es-CL", // Spanish (Chile)
92 "es-CO", // Spanish (Colombia)
93 "es-CR", // Spanish (Costa Rica)
94 "es-ES", // Spanish (Spain)
95 "es-HN", // Spanish (Honduras)
96 "es-MX", // Spanish (Mexico)
97 "es-PE", // Spanish (Peru)
98 "es-US", // Spanish (US)
99 "es-UY", // Spanish (Uruguay)
100 "es-VE", // Spanish (Venezuela)
101 "et", // Estonian
102 "eu", // Basque
103 "fa", // Persian
104 "fi", // Finnish
105 "fil", // Filipino
106 "fo", // Faroese
107 "fr", // French
108 "fr-CA", // French (Canada)
109 "fr-CH", // French (Switzerland)
110 "fr-FR", // French (France)
111 "fy", // Frisian
112 "ga", // Irish
113 "gd", // Scots Gaelic
114 "gl", // Galician
115 "gn", // Guarani
116 "gu", // Gujarati
117 "ha", // Hausa
118 "haw", // Hawaiian
119 "he", // Hebrew
120 "hi", // Hindi
121 "hmn", // Hmong
122 "hr", // Croatian
123 "ht", // Haitian Creole
124 "hu", // Hungarian
125 "hy", // Armenian
126 "ia", // Interlingua
127 "id", // Indonesian
128 "ig", // Igbo
129 "is", // Icelandic
130 "it", // Italian
131 "it-CH", // Italian (Switzerland)
132 "it-IT", // Italian (Italy)
133 "ja", // Japanese
134 "jv", // Javanese
135 "ka", // Georgian
136 "kk", // Kazakh
137 "km", // Cambodian
138 "kn", // Kannada
139 "ko", // Korean
140 "ku", // Kurdish
141 "ky", // Kyrgyz
142 "la", // Latin
143 "lb", // Luxembourgish
144 "ln", // Lingala
145 "lo", // Laothian
146 "lt", // Lithuanian
147 "lv", // Latvian
148 "mg", // Malagasy
149 "mi", // Maori
150 "mk", // Macedonian
151 "ml", // Malayalam
152 "mn", // Mongolian
153 "mo", // Moldavian
154 "mr", // Marathi
155 "ms", // Malay
156 "mt", // Maltese
157 "my", // Burmese
158 "nb", // Norwegian (Bokmal)
159 "ne", // Nepali
160 "nl", // Dutch
161 "nn", // Norwegian (Nynorsk)
162 "no", // Norwegian
163 "ny", // Nyanja
164 "oc", // Occitan
165 "om", // Oromo
166 "or", // Oriya
167 "pa", // Punjabi
168 "pl", // Polish
169 "ps", // Pashto
170 "pt", // Portuguese
171 "pt-BR", // Portuguese (Brazil)
172 "pt-PT", // Portuguese (Portugal)
173 "qu", // Quechua
174 "rm", // Romansh
175 "ro", // Romanian
176 "ru", // Russian
177 "sd", // Sindhi
178 "sh", // Serbo-Croatian
179 "si", // Sinhalese
180 "sk", // Slovak
181 "sl", // Slovenian
182 "sm", // Samoan
183 "sn", // Shona
184 "so", // Somali
185 "sq", // Albanian
186 "sr", // Serbian
187 "st", // Sesotho
188 "su", // Sundanese
189 "sv", // Swedish
190 "sw", // Swahili
191 "ta", // Tamil
192 "te", // Telugu
193 "tg", // Tajik
194 "th", // Thai
195 "ti", // Tigrinya
196 "tk", // Turkmen
197 "to", // Tonga
198 "tr", // Turkish
199 "tt", // Tatar
200 "tw", // Twi
201 "ug", // Uighur
202 "uk", // Ukrainian
203 "ur", // Urdu
204 "uz", // Uzbek
205 "vi", // Vietnamese
206 "wa", // Walloon
207 "xh", // Xhosa
208 "yi", // Yiddish
209 "yo", // Yoruba
210 "zh", // Chinese
211 "zh-CN", // Chinese (China)
212 "zh-HK", // Chinese (Hong Kong)
213 "zh-TW", // Chinese (Taiwan)
214 "zu", // Zulu
215 };
216
217 // Returns true if |locale_name| has an alias in the ICU data file.
IsDuplicateName(const std::string & locale_name)218 bool IsDuplicateName(const std::string& locale_name) {
219 static const char* const kDuplicateNames[] = {
220 "ar_001",
221 "en",
222 "en_001",
223 "en_150",
224 "pt", // pt-BR and pt-PT are used.
225 "zh",
226 "zh_hans_cn",
227 "zh_hant_hk",
228 "zh_hant_mo",
229 "zh_hans_sg",
230 "zh_hant_tw"
231 };
232
233 // Skip all the es_Foo other than es_419 for now.
234 if (base::StartsWith(locale_name, "es_",
235 base::CompareCase::INSENSITIVE_ASCII)) {
236 return !base::EndsWith(locale_name, "419", base::CompareCase::SENSITIVE);
237 }
238 for (const char* duplicate_name : kDuplicateNames) {
239 if (base::EqualsCaseInsensitiveASCII(duplicate_name, locale_name))
240 return true;
241 }
242 return false;
243 }
244
245 // We added 30+ minimally populated locales with only a few entries
246 // (exemplar character set, script, writing direction and its own
247 // lanaguage name). These locales have to be distinguished from the
248 // fully populated locales to which Chrome is localized.
IsLocalePartiallyPopulated(const std::string & locale_name)249 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
250 // For partially populated locales, even the translation for "English"
251 // is not available. A more robust/elegant way to check is to add a special
252 // field (say, 'isPartial' to our version of ICU locale files) and
253 // check its value, but this hack seems to work well.
254 return !l10n_util::IsLocaleNameTranslated("en", locale_name);
255 }
256
257 #if !defined(OS_MACOSX) || defined(TOOLKIT_QT)
IsLocaleAvailable(const std::string & locale)258 bool IsLocaleAvailable(const std::string& locale) {
259 // If locale has any illegal characters in it, we don't want to try to
260 // load it because it may be pointing outside the locale data file directory.
261 if (!base::i18n::IsFilenameLegal(base::ASCIIToUTF16(locale)))
262 return false;
263
264 // IsLocalePartiallyPopulated() can be called here for an early return w/o
265 // checking the resource availability below. It'd help when Chrome is run
266 // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
267 // but it'd slow down the start up time a little bit for locales Chrome is
268 // localized to. So, we don't call it here.
269 if (!l10n_util::IsLocaleSupportedByOS(locale))
270 return false;
271
272 return ui::ResourceBundle::LocaleDataPakExists(locale);
273 }
274 #endif
275
276 // On Linux, the text layout engine Pango determines paragraph directionality
277 // by looking at the first strongly-directional character in the text. This
278 // means text such as "Google Chrome foo bar..." will be layed out LTR even
279 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
280 // cases.
AdjustParagraphDirectionality(base::string16 * paragraph)281 void AdjustParagraphDirectionality(base::string16* paragraph) {
282 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
283 if (base::i18n::IsRTL() &&
284 base::i18n::StringContainsStrongRTLChars(*paragraph)) {
285 paragraph->insert(0, 1,
286 static_cast<base::char16>(base::i18n::kRightToLeftMark));
287 }
288 #endif
289 }
290
291 struct AvailableLocalesTraits
292 : base::internal::DestructorAtExitLazyInstanceTraits<
293 std::vector<std::string>> {
New__anon0a8ebb6d0111::AvailableLocalesTraits294 static std::vector<std::string>* New(void* instance) {
295 std::vector<std::string>* locales =
296 base::internal::DestructorAtExitLazyInstanceTraits<
297 std::vector<std::string>>::New(instance);
298 int num_locales = uloc_countAvailable();
299 for (int i = 0; i < num_locales; ++i) {
300 std::string locale_name = uloc_getAvailable(i);
301 // Filter out the names that have aliases.
302 if (IsDuplicateName(locale_name))
303 continue;
304 // Filter out locales for which we have only partially populated data
305 // and to which Chrome is not localized.
306 if (IsLocalePartiallyPopulated(locale_name))
307 continue;
308 if (!l10n_util::IsLocaleSupportedByOS(locale_name))
309 continue;
310 // Normalize underscores to hyphens because that's what our locale files
311 // use.
312 std::replace(locale_name.begin(), locale_name.end(), '_', '-');
313
314 // Map the Chinese locale names over to zh-CN and zh-TW.
315 if (base::LowerCaseEqualsASCII(locale_name, "zh-hans")) {
316 locale_name = "zh-CN";
317 } else if (base::LowerCaseEqualsASCII(locale_name, "zh-hant")) {
318 locale_name = "zh-TW";
319 }
320 locales->push_back(locale_name);
321 }
322
323 return locales;
324 }
325 };
326
327 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
328 g_available_locales = LAZY_INSTANCE_INITIALIZER;
329
330 } // namespace
331
332 namespace l10n_util {
333
GetLanguage(const std::string & locale)334 std::string GetLanguage(const std::string& locale) {
335 const std::string::size_type hyphen_pos = locale.find('-');
336 return std::string(locale, 0, hyphen_pos);
337 }
338
339 // TODO(jshin): revamp this function completely to use a more sytematic
340 // and generic locale fallback based on ICU/CLDR.
CheckAndResolveLocale(const std::string & locale,std::string * resolved_locale)341 bool CheckAndResolveLocale(const std::string& locale,
342 std::string* resolved_locale) {
343 #if !defined(OS_MACOSX) || defined(TOOLKIT_QT)
344 if (IsLocaleAvailable(locale)) {
345 *resolved_locale = locale;
346 return true;
347 }
348
349 // If there's a variant, skip over it so we can try without the region
350 // code. For example, ca_ES@valencia should cause us to try ca@valencia
351 // before ca.
352 std::string::size_type variant_pos = locale.find('@');
353 if (variant_pos != std::string::npos)
354 return false;
355
356 // If the locale matches language but not country, use that instead.
357 // TODO(jungshik) : Nothing is done about languages that Chrome
358 // does not support but available on Windows. We fall
359 // back to en-US in GetApplicationLocale so that it's a not critical,
360 // but we can do better.
361 const std::string lang(GetLanguage(locale));
362 if (lang.size() < locale.size()) {
363 std::string region(locale, lang.size() + 1);
364 std::string tmp_locale(lang);
365 // Map es-RR other than es-ES to es-419 (Chrome's Latin American
366 // Spanish locale).
367 if (base::LowerCaseEqualsASCII(lang, "es") &&
368 !base::LowerCaseEqualsASCII(region, "es")) {
369 tmp_locale.append("-419");
370 } else if (base::LowerCaseEqualsASCII(lang, "pt")) {
371 // Map pt-RR other than pt-BR to pt-PT. Note that "pt" by itself maps to
372 // pt-BR (logic below).
373 tmp_locale.append("-PT");
374 } else if (base::LowerCaseEqualsASCII(lang, "zh")) {
375 // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
376 if (base::LowerCaseEqualsASCII(region, "hk") ||
377 base::LowerCaseEqualsASCII(region, "mo")) { // Macao
378 tmp_locale.append("-TW");
379 } else {
380 tmp_locale.append("-CN");
381 }
382 } else if (base::LowerCaseEqualsASCII(lang, "en")) {
383 // Map Australian, Canadian, Indian, New Zealand and South African
384 // English to British English for now.
385 // TODO(jungshik): en-CA may have to change sides once
386 // we have OS locale separate from app locale (Chrome's UI language).
387 if (base::LowerCaseEqualsASCII(region, "au") ||
388 base::LowerCaseEqualsASCII(region, "ca") ||
389 base::LowerCaseEqualsASCII(region, "in") ||
390 base::LowerCaseEqualsASCII(region, "nz") ||
391 base::LowerCaseEqualsASCII(region, "za")) {
392 tmp_locale.append("-GB");
393 } else {
394 tmp_locale.append("-US");
395 }
396 }
397 if (IsLocaleAvailable(tmp_locale)) {
398 resolved_locale->swap(tmp_locale);
399 return true;
400 }
401 }
402
403 // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
404 // Note that pt-RR is mapped to pt-PT above, but we want pt -> pt-BR here.
405 struct {
406 const char* source;
407 const char* dest;
408 } static constexpr kAliasMap[] = {
409 {"en", "en-US"}, {"iw", "he"}, {"no", "nb"},
410 {"pt", "pt-BR"}, {"tl", "fil"}, {"zh", "zh-CN"},
411 };
412 for (const auto& alias : kAliasMap) {
413 if (base::LowerCaseEqualsASCII(lang, alias.source)) {
414 std::string tmp_locale(alias.dest);
415 if (IsLocaleAvailable(tmp_locale)) {
416 resolved_locale->swap(tmp_locale);
417 return true;
418 }
419 }
420 }
421 #else
422 NOTIMPLEMENTED();
423 #endif // !defined(OS_MACOSX)
424
425 return false;
426 }
427
428 #if defined(OS_MACOSX) && !defined(TOOLKIT_QT)
GetApplicationLocaleInternalMac(const std::string & pref_locale)429 std::string GetApplicationLocaleInternalMac(const std::string& pref_locale) {
430 // Use any override (Cocoa for the browser), otherwise use the preference
431 // passed to the function.
432 std::string app_locale = l10n_util::GetLocaleOverride();
433 if (app_locale.empty())
434 app_locale = pref_locale;
435
436 // The above should handle all of the cases Chrome normally hits, but for some
437 // unit tests, we need something to fall back too.
438 if (app_locale.empty())
439 app_locale = "en-US";
440
441 return app_locale;
442 }
443 #endif
444
445 #if !defined(OS_MACOSX) || defined(TOOLKIT_QT)
GetApplicationLocaleInternalNonMac(const std::string & pref_locale)446 std::string GetApplicationLocaleInternalNonMac(const std::string& pref_locale) {
447 std::string resolved_locale;
448 std::vector<std::string> candidates;
449
450 // We only use --lang and the app pref on Windows. On Linux, we only
451 // look at the LC_*/LANG environment variables. We do, however, pass --lang
452 // to renderer and plugin processes so they know what language the parent
453 // process decided to use.
454
455 #if defined(OS_WIN)
456 // First, try the preference value.
457 if (!pref_locale.empty())
458 candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
459
460 // Next, try the overridden locale.
461 const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
462 if (!languages.empty()) {
463 candidates.reserve(candidates.size() + languages.size());
464 std::transform(languages.begin(), languages.end(),
465 std::back_inserter(candidates),
466 &base::i18n::GetCanonicalLocale);
467 } else {
468 // If no override was set, defer to ICU
469 candidates.push_back(base::i18n::GetConfiguredLocale());
470 }
471 #elif defined(OS_ANDROID)
472 // Try pref_locale first.
473 if (!pref_locale.empty())
474 candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
475
476 // On Android, query java.util.Locale for the default locale.
477 candidates.push_back(base::android::GetDefaultLocaleString());
478 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS) && !defined(TOOLKIT_QT)
479 // GLib implements correct environment variable parsing with
480 // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
481 // We used to use our custom parsing code along with ICU for this purpose.
482 // If we have a port that does not depend on GTK, we have to
483 // restore our custom code for that port.
484 const char* const* languages = g_get_language_names();
485 DCHECK(languages); // A valid pointer is guaranteed.
486 DCHECK(*languages); // At least one entry, "C", is guaranteed.
487
488 for (; *languages; ++languages) {
489 candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
490 }
491 #else
492 // By default, use the application locale preference. This applies to ChromeOS
493 // and linux systems without glib.
494 if (!pref_locale.empty())
495 candidates.push_back(pref_locale);
496 #endif // defined(OS_WIN)
497
498 std::vector<std::string>::const_iterator i = candidates.begin();
499 for (; i != candidates.end(); ++i) {
500 if (CheckAndResolveLocale(*i, &resolved_locale)) {
501 return resolved_locale;
502 }
503 }
504
505 // Fallback on en-US.
506 const std::string fallback_locale("en-US");
507 if (IsLocaleAvailable(fallback_locale))
508 return fallback_locale;
509
510 return std::string();
511 }
512 #endif // !defined(OS_MACOSX)
513
GetApplicationLocaleInternal(const std::string & pref_locale)514 std::string GetApplicationLocaleInternal(const std::string& pref_locale) {
515 #if defined(OS_MACOSX) && !defined(TOOLKIT_QT)
516 return GetApplicationLocaleInternalMac(pref_locale);
517 #else
518 return GetApplicationLocaleInternalNonMac(pref_locale);
519 #endif
520 }
521
GetApplicationLocale(const std::string & pref_locale,bool set_icu_locale)522 std::string GetApplicationLocale(const std::string& pref_locale,
523 bool set_icu_locale) {
524 const std::string locale = GetApplicationLocaleInternal(pref_locale);
525 if (set_icu_locale && !locale.empty())
526 base::i18n::SetICUDefaultLocale(locale);
527 return locale;
528 }
529
GetApplicationLocale(const std::string & pref_locale)530 std::string GetApplicationLocale(const std::string& pref_locale) {
531 return GetApplicationLocale(pref_locale, true /* set_icu_locale */);
532 }
533
IsLocaleNameTranslated(const char * locale,const std::string & display_locale)534 bool IsLocaleNameTranslated(const char* locale,
535 const std::string& display_locale) {
536 base::string16 display_name =
537 l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
538 // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
539 // uloc_getDisplayName returns the actual translation or the default
540 // value (locale code), we have to rely on this hack to tell whether
541 // the translation is available or not. If ICU doesn't have a translated
542 // name for this locale, GetDisplayNameForLocale will just return the
543 // locale code.
544 return !base::IsStringASCII(display_name) ||
545 base::UTF16ToASCII(display_name) != locale;
546 }
547
GetDisplayNameForLocale(const std::string & locale,const std::string & display_locale,bool is_for_ui,bool disallow_default)548 base::string16 GetDisplayNameForLocale(const std::string& locale,
549 const std::string& display_locale,
550 bool is_for_ui,
551 bool disallow_default) {
552 std::string locale_code = locale;
553 // Internally, we use the language code of zh-CN and zh-TW, but we want the
554 // display names to be Chinese (Simplified) and Chinese (Traditional) instead
555 // of Chinese (China) and Chinese (Taiwan).
556 // Translate uses "tl" (Tagalog) to mean "fil" (Filipino) until Google
557 // translate is changed to understand "fil". Make "tl" alias to "fil".
558 if (locale_code == "zh-CN")
559 locale_code = "zh-Hans";
560 else if (locale_code == "zh-TW")
561 locale_code = "zh-Hant";
562 else if (locale_code == "tl")
563 locale_code = "fil";
564 else if (locale_code == "mo")
565 locale_code = "ro-MD";
566
567 base::string16 display_name;
568 #if defined(OS_IOS)
569 // Use the Foundation API to get the localized display name, removing the need
570 // for the ICU data file to include this data.
571 display_name = GetDisplayNameForLocale(locale_code, display_locale);
572 #else
573 #if defined(OS_ANDROID)
574 // Use Java API to get locale display name so that we can remove most of
575 // the lang data from icu data to reduce binary size, except for zh-Hans and
576 // zh-Hant because the current Android Java API doesn't support scripts.
577 // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
578 // Android Java API supports scripts.
579 if (!base::StartsWith(locale_code, "zh-Han", base::CompareCase::SENSITIVE)) {
580 display_name = GetDisplayNameForLocale(locale_code, display_locale);
581 } else
582 #endif // defined(OS_ANDROID)
583 {
584 UErrorCode error = U_ZERO_ERROR;
585 const int kBufferSize = 1024;
586
587 int actual_size;
588 // For Country code in ICU64 we need to call uloc_getDisplayCountry
589 if (locale_code[0] == '-' || locale_code[0] == '_') {
590 actual_size = uloc_getDisplayCountry(
591 locale_code.c_str(), display_locale.c_str(),
592 base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
593 } else {
594 actual_size = uloc_getDisplayName(
595 locale_code.c_str(), display_locale.c_str(),
596 base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
597 }
598 if (disallow_default && U_USING_DEFAULT_WARNING == error)
599 return base::string16();
600 DCHECK(U_SUCCESS(error));
601 display_name.resize(actual_size);
602 }
603 #endif // defined(OS_IOS)
604
605 // Add directional markup so parentheses are properly placed.
606 if (is_for_ui && base::i18n::IsRTL())
607 base::i18n::AdjustStringForLocaleDirection(&display_name);
608 return display_name;
609 }
610
GetDisplayNameForCountry(const std::string & country_code,const std::string & display_locale)611 base::string16 GetDisplayNameForCountry(const std::string& country_code,
612 const std::string& display_locale) {
613 return GetDisplayNameForLocale("_" + country_code, display_locale, false);
614 }
615
NormalizeLocale(const std::string & locale)616 std::string NormalizeLocale(const std::string& locale) {
617 std::string normalized_locale(locale);
618 std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
619
620 return normalized_locale;
621 }
622
GetParentLocales(const std::string & current_locale,std::vector<std::string> * parent_locales)623 void GetParentLocales(const std::string& current_locale,
624 std::vector<std::string>* parent_locales) {
625 std::string locale(NormalizeLocale(current_locale));
626
627 const int kNameCapacity = 256;
628 char parent[kNameCapacity];
629 base::strlcpy(parent, locale.c_str(), kNameCapacity);
630 parent_locales->push_back(parent);
631 UErrorCode err = U_ZERO_ERROR;
632 while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
633 if (U_FAILURE(err))
634 break;
635 parent_locales->push_back(parent);
636 }
637 }
638
IsValidLocaleSyntax(const std::string & locale)639 bool IsValidLocaleSyntax(const std::string& locale) {
640 // Check that the length is plausible.
641 if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
642 return false;
643
644 // Strip off the part after an '@' sign, which might contain keywords,
645 // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
646 // We don't validate that part much, just check that there's at least one
647 // equals sign in a plausible place. Normalize the prefix so that hyphens
648 // are changed to underscores.
649 std::string prefix = NormalizeLocale(locale);
650 size_t split_point = locale.find("@");
651 if (split_point != std::string::npos) {
652 std::string keywords = locale.substr(split_point + 1);
653 prefix = locale.substr(0, split_point);
654
655 size_t equals_loc = keywords.find("=");
656 if (equals_loc == 0 || equals_loc == std::string::npos ||
657 equals_loc > keywords.size() - 2) {
658 return false;
659 }
660 }
661
662 // Check that all characters before the at-sign are alphanumeric or
663 // underscore.
664 for (char ch : prefix) {
665 if (!base::IsAsciiAlpha(ch) && !base::IsAsciiDigit(ch) && ch != '_')
666 return false;
667 }
668
669 // Check that the initial token (before the first hyphen/underscore)
670 // is 1 - 3 alphabetical characters (a language tag).
671 for (size_t i = 0; i < prefix.size(); i++) {
672 char ch = prefix[i];
673 if (ch == '_') {
674 if (i < 1 || i > 3)
675 return false;
676 break;
677 }
678 if (!base::IsAsciiAlpha(ch))
679 return false;
680 }
681
682 // Check that the all tokens after the initial token are 1 - 8 characters.
683 // (Tokenize/StringTokenizer don't work here, they collapse multiple
684 // delimiters into one.)
685 int token_len = 0;
686 int token_index = 0;
687 for (char ch : prefix) {
688 if (ch != '_') {
689 token_len++;
690 continue;
691 }
692
693 if (token_index > 0 && (token_len < 1 || token_len > 8)) {
694 return false;
695 }
696 token_index++;
697 token_len = 0;
698 }
699 if (token_index == 0 && (token_len < 1 || token_len > 3))
700 return false;
701 if (token_len < 1 || token_len > 8)
702 return false;
703
704 return true;
705 }
706
GetStringUTF8(int message_id)707 std::string GetStringUTF8(int message_id) {
708 return base::UTF16ToUTF8(GetStringUTF16(message_id));
709 }
710
GetStringUTF16(int message_id)711 base::string16 GetStringUTF16(int message_id) {
712 ui::ResourceBundle& rb = ui::ResourceBundle::GetSharedInstance();
713 base::string16 str = rb.GetLocalizedString(message_id);
714 AdjustParagraphDirectionality(&str);
715
716 return str;
717 }
718
GetStringFUTF16(int message_id,const std::vector<base::string16> & replacements,std::vector<size_t> * offsets)719 base::string16 GetStringFUTF16(int message_id,
720 const std::vector<base::string16>& replacements,
721 std::vector<size_t>* offsets) {
722 // TODO(tc): We could save a string copy if we got the raw string as
723 // a StringPiece and were able to call ReplaceStringPlaceholders with
724 // a StringPiece format string and base::string16 substitution strings. In
725 // practice, the strings should be relatively short.
726 ui::ResourceBundle& rb = ui::ResourceBundle::GetSharedInstance();
727 const base::string16& format_string = rb.GetLocalizedString(message_id);
728
729 #ifndef NDEBUG
730 // Make sure every replacement string is being used, so we don't just
731 // silently fail to insert one. If |offsets| is non-NULL, then don't do this
732 // check as the code may simply want to find the placeholders rather than
733 // actually replacing them.
734 if (!offsets) {
735 // $9 is the highest allowed placeholder.
736 for (size_t i = 0; i < 9; ++i) {
737 bool placeholder_should_exist = replacements.size() > i;
738
739 base::string16 placeholder = base::ASCIIToUTF16("$");
740 placeholder += (L'1' + i);
741 size_t pos = format_string.find(placeholder);
742 if (placeholder_should_exist) {
743 DCHECK_NE(std::string::npos, pos) << " Didn't find a " << placeholder
744 << " placeholder in "
745 << format_string;
746 } else {
747 DCHECK_EQ(std::string::npos, pos) << " Unexpectedly found a "
748 << placeholder << " placeholder in "
749 << format_string;
750 }
751 }
752 }
753 #endif
754
755 base::string16 formatted = base::ReplaceStringPlaceholders(
756 format_string, replacements, offsets);
757 AdjustParagraphDirectionality(&formatted);
758
759 return formatted;
760 }
761
GetStringFUTF8(int message_id,const base::string16 & a)762 std::string GetStringFUTF8(int message_id,
763 const base::string16& a) {
764 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
765 }
766
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b)767 std::string GetStringFUTF8(int message_id,
768 const base::string16& a,
769 const base::string16& b) {
770 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
771 }
772
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)773 std::string GetStringFUTF8(int message_id,
774 const base::string16& a,
775 const base::string16& b,
776 const base::string16& c) {
777 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
778 }
779
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)780 std::string GetStringFUTF8(int message_id,
781 const base::string16& a,
782 const base::string16& b,
783 const base::string16& c,
784 const base::string16& d) {
785 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
786 }
787
GetStringFUTF16(int message_id,const base::string16 & a)788 base::string16 GetStringFUTF16(int message_id,
789 const base::string16& a) {
790 std::vector<base::string16> replacements = {a};
791 return GetStringFUTF16(message_id, replacements, nullptr);
792 }
793
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b)794 base::string16 GetStringFUTF16(int message_id,
795 const base::string16& a,
796 const base::string16& b) {
797 return GetStringFUTF16(message_id, a, b, nullptr);
798 }
799
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)800 base::string16 GetStringFUTF16(int message_id,
801 const base::string16& a,
802 const base::string16& b,
803 const base::string16& c) {
804 std::vector<base::string16> replacements = {a, b, c};
805 return GetStringFUTF16(message_id, replacements, nullptr);
806 }
807
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)808 base::string16 GetStringFUTF16(int message_id,
809 const base::string16& a,
810 const base::string16& b,
811 const base::string16& c,
812 const base::string16& d) {
813 std::vector<base::string16> replacements = {a, b, c, d};
814 return GetStringFUTF16(message_id, replacements, nullptr);
815 }
816
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d,const base::string16 & e)817 base::string16 GetStringFUTF16(int message_id,
818 const base::string16& a,
819 const base::string16& b,
820 const base::string16& c,
821 const base::string16& d,
822 const base::string16& e) {
823 std::vector<base::string16> replacements = {a, b, c, d, e};
824 return GetStringFUTF16(message_id, replacements, nullptr);
825 }
826
GetStringFUTF16(int message_id,const base::string16 & a,size_t * offset)827 base::string16 GetStringFUTF16(int message_id,
828 const base::string16& a,
829 size_t* offset) {
830 DCHECK(offset);
831 std::vector<size_t> offsets;
832 std::vector<base::string16> replacements = {a};
833 base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
834 DCHECK_EQ(1u, offsets.size());
835 *offset = offsets[0];
836 return result;
837 }
838
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,std::vector<size_t> * offsets)839 base::string16 GetStringFUTF16(int message_id,
840 const base::string16& a,
841 const base::string16& b,
842 std::vector<size_t>* offsets) {
843 std::vector<base::string16> replacements = {a, b};
844 return GetStringFUTF16(message_id, replacements, offsets);
845 }
846
GetStringFUTF16Int(int message_id,int a)847 base::string16 GetStringFUTF16Int(int message_id, int a) {
848 return GetStringFUTF16(message_id, base::FormatNumber(a));
849 }
850
GetStringFUTF16Int(int message_id,int64_t a)851 base::string16 GetStringFUTF16Int(int message_id, int64_t a) {
852 return GetStringFUTF16(message_id, base::FormatNumber(a));
853 }
854
GetPluralStringFUTF16(int message_id,int number)855 base::string16 GetPluralStringFUTF16(int message_id, int number) {
856 return base::i18n::MessageFormatter::FormatWithNumberedArgs(
857 GetStringUTF16(message_id), number);
858 }
859
GetPluralStringFUTF8(int message_id,int number)860 std::string GetPluralStringFUTF8(int message_id, int number) {
861 return base::UTF16ToUTF8(GetPluralStringFUTF16(message_id, number));
862 }
863
GetSingleOrMultipleStringUTF16(int message_id,bool is_multiple)864 base::string16 GetSingleOrMultipleStringUTF16(int message_id,
865 bool is_multiple) {
866 return base::i18n::MessageFormatter::FormatWithNumberedArgs(
867 GetStringUTF16(message_id), is_multiple ? "multiple" : "single");
868 }
869
SortStrings16(const std::string & locale,std::vector<base::string16> * strings)870 void SortStrings16(const std::string& locale,
871 std::vector<base::string16>* strings) {
872 SortVectorWithStringKey(locale, strings, false);
873 }
874
GetAvailableLocales()875 const std::vector<std::string>& GetAvailableLocales() {
876 return g_available_locales.Get();
877 }
878
GetAcceptLanguagesForLocale(const std::string & display_locale,std::vector<std::string> * locale_codes)879 void GetAcceptLanguagesForLocale(const std::string& display_locale,
880 std::vector<std::string>* locale_codes) {
881 for (const char* accept_language : kAcceptLanguageList) {
882 if (!l10n_util::IsLocaleNameTranslated(accept_language, display_locale)) {
883 // TODO(jungshik) : Put them at the end of the list with language codes
884 // enclosed by brackets instead of skipping.
885 continue;
886 }
887 locale_codes->push_back(accept_language);
888 }
889 }
890
IsLanguageAccepted(const std::string & display_locale,const std::string & locale)891 bool IsLanguageAccepted(const std::string& display_locale,
892 const std::string& locale) {
893 for (const char* accept_language : kAcceptLanguageList) {
894 if (accept_language == locale &&
895 l10n_util::IsLocaleNameTranslated(locale.c_str(), display_locale)) {
896 return true;
897 }
898 }
899 return false;
900 }
901
GetLocalizedContentsWidthInPixels(int pixel_resource_id)902 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
903 int width = 0;
904 base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
905 DCHECK_GT(width, 0);
906 return width;
907 }
908
GetAcceptLanguageListForTesting()909 const char* const* GetAcceptLanguageListForTesting() {
910 return kAcceptLanguageList;
911 }
912
GetAcceptLanguageListSizeForTesting()913 size_t GetAcceptLanguageListSizeForTesting() {
914 return base::size(kAcceptLanguageList);
915 }
916
917 } // namespace l10n_util
918