1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "ui/base/l10n/l10n_util.h"
6 
7 #include <algorithm>
8 #include <cstdlib>
9 #include <iterator>
10 #include <memory>
11 #include <string>
12 
13 #include "base/check_op.h"
14 #include "base/command_line.h"
15 #include "base/compiler_specific.h"
16 #include "base/files/file_util.h"
17 #include "base/i18n/file_util_icu.h"
18 #include "base/i18n/message_formatter.h"
19 #include "base/i18n/number_formatting.h"
20 #include "base/i18n/rtl.h"
21 #include "base/i18n/string_compare.h"
22 #include "base/lazy_instance.h"
23 #include "base/notreached.h"
24 #include "base/stl_util.h"
25 #include "base/strings/string_number_conversions.h"
26 #include "base/strings/string_split.h"
27 #include "base/strings/string_util.h"
28 #include "base/strings/stringprintf.h"
29 #include "base/strings/sys_string_conversions.h"
30 #include "base/strings/utf_string_conversions.h"
31 #include "build/build_config.h"
32 #include "third_party/icu/source/common/unicode/rbbi.h"
33 #include "third_party/icu/source/common/unicode/uloc.h"
34 #include "ui/base/l10n/l10n_util_collator.h"
35 #include "ui/base/resource/resource_bundle.h"
36 #include "ui/base/ui_base_paths.h"
37 
38 #if defined(OS_ANDROID)
39 #include "base/android/locale_utils.h"
40 #include "ui/base/l10n/l10n_util_android.h"
41 #endif
42 
43 #if defined(USE_GLIB)
44 #include <glib.h>
45 #endif
46 
47 #if defined(OS_WIN)
48 #include "base/logging.h"
49 #include "ui/base/l10n/l10n_util_win.h"
50 #endif  // OS_WIN
51 
52 namespace {
53 
54 static const char* const kAcceptLanguageList[] = {
55     "af",              // Afrikaans
56     "am",              // Amharic
57     "an",              // Aragonese
58     "ar",              // Arabic
59     "ast",             // Asturian
60     "az",              // Azerbaijani
61     "be",              // Belarusian
62     "bg",              // Bulgarian
63     "bh",              // Bihari
64     "bn",              // Bengali
65     "br",              // Breton
66     "bs",              // Bosnian
67     "ca",              // Catalan
68     "ceb",             // Cebuano
69     "ckb",             // Kurdish (Arabic),  Sorani
70     "co",              // Corsican
71     "cs",              // Czech
72     "cy",              // Welsh
73     "da",              // Danish
74     "de",              // German
75     "de-AT",           // German (Austria)
76     "de-CH",           // German (Switzerland)
77     "de-DE",           // German (Germany)
78     "de-LI",           // German (Liechtenstein)
79     "el",              // Greek
80     "en",              // English
81     "en-AU",           // English (Australia)
82     "en-CA",           // English (Canada)
83     "en-GB",           // English (UK)
84     "en-GB-oxendict",  // English (UK, OED spelling)
85     "en-IN",           // English (India)
86     "en-NZ",           // English (New Zealand)
87     "en-US",           // English (US)
88     "en-ZA",           // English (South Africa)
89     "eo",              // Esperanto
90     // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
91     // Spanish speaking countries?
92     "es",      // Spanish
93     "es-419",  // Spanish (Latin America)
94     "es-AR",   // Spanish (Argentina)
95     "es-CL",   // Spanish (Chile)
96     "es-CO",   // Spanish (Colombia)
97     "es-CR",   // Spanish (Costa Rica)
98     "es-ES",   // Spanish (Spain)
99     "es-HN",   // Spanish (Honduras)
100     "es-MX",   // Spanish (Mexico)
101     "es-PE",   // Spanish (Peru)
102     "es-US",   // Spanish (US)
103     "es-UY",   // Spanish (Uruguay)
104     "es-VE",   // Spanish (Venezuela)
105     "et",      // Estonian
106     "eu",      // Basque
107     "fa",      // Persian
108     "fi",      // Finnish
109     "fil",     // Filipino
110     "fo",      // Faroese
111     "fr",      // French
112     "fr-CA",   // French (Canada)
113     "fr-CH",   // French (Switzerland)
114     "fr-FR",   // French (France)
115     "fy",      // Frisian
116     "ga",      // Irish
117     "gd",      // Scots Gaelic
118     "gl",      // Galician
119     "gn",      // Guarani
120     "gu",      // Gujarati
121     "ha",      // Hausa
122     "haw",     // Hawaiian
123     "he",      // Hebrew
124     "hi",      // Hindi
125     "hmn",     // Hmong
126     "hr",      // Croatian
127     "ht",      // Haitian Creole
128     "hu",      // Hungarian
129     "hy",      // Armenian
130     "ia",      // Interlingua
131     "id",      // Indonesian
132     "ig",      // Igbo
133     "is",      // Icelandic
134     "it",      // Italian
135     "it-CH",   // Italian (Switzerland)
136     "it-IT",   // Italian (Italy)
137     "ja",      // Japanese
138     "jv",      // Javanese
139     "ka",      // Georgian
140     "kk",      // Kazakh
141     "km",      // Cambodian
142     "kn",      // Kannada
143     "ko",      // Korean
144     "ku",      // Kurdish
145     "ky",      // Kyrgyz
146     "la",      // Latin
147     "lb",      // Luxembourgish
148     "ln",      // Lingala
149     "lo",      // Laothian
150     "lt",      // Lithuanian
151     "lv",      // Latvian
152     "mg",      // Malagasy
153     "mi",      // Maori
154     "mk",      // Macedonian
155     "ml",      // Malayalam
156     "mn",      // Mongolian
157     "mo",      // Moldavian
158     "mr",      // Marathi
159     "ms",      // Malay
160     "mt",      // Maltese
161     "my",      // Burmese
162     "nb",      // Norwegian (Bokmal)
163     "ne",      // Nepali
164     "nl",      // Dutch
165     "nn",      // Norwegian (Nynorsk)
166     "no",      // Norwegian
167     "ny",      // Nyanja
168     "oc",      // Occitan
169     "om",      // Oromo
170     "or",      // Oriya
171     "pa",      // Punjabi
172     "pl",      // Polish
173     "ps",      // Pashto
174     "pt",      // Portuguese
175     "pt-BR",   // Portuguese (Brazil)
176     "pt-PT",   // Portuguese (Portugal)
177     "qu",      // Quechua
178     "rm",      // Romansh
179     "ro",      // Romanian
180     "ru",      // Russian
181     "sd",      // Sindhi
182     "sh",      // Serbo-Croatian
183     "si",      // Sinhalese
184     "sk",      // Slovak
185     "sl",      // Slovenian
186     "sm",      // Samoan
187     "sn",      // Shona
188     "so",      // Somali
189     "sq",      // Albanian
190     "sr",      // Serbian
191     "st",      // Sesotho
192     "su",      // Sundanese
193     "sv",      // Swedish
194     "sw",      // Swahili
195     "ta",      // Tamil
196     "te",      // Telugu
197     "tg",      // Tajik
198     "th",      // Thai
199     "ti",      // Tigrinya
200     "tk",      // Turkmen
201     "to",      // Tonga
202     "tr",      // Turkish
203     "tt",      // Tatar
204     "tw",      // Twi
205     "ug",      // Uighur
206     "uk",      // Ukrainian
207     "ur",      // Urdu
208     "uz",      // Uzbek
209     "vi",      // Vietnamese
210     "wa",      // Walloon
211     "xh",      // Xhosa
212     "yi",      // Yiddish
213     "yo",      // Yoruba
214     "zh",      // Chinese
215     "zh-CN",   // Chinese (China)
216     "zh-HK",   // Chinese (Hong Kong)
217     "zh-TW",   // Chinese (Taiwan)
218     "zu",      // Zulu
219 };
220 
221 // Returns true if |locale_name| has an alias in the ICU data file.
IsDuplicateName(const std::string & locale_name)222 bool IsDuplicateName(const std::string& locale_name) {
223   static const char* const kDuplicateNames[] = {
224     "ar_001",
225     "en",
226     "en_001",
227     "en_150",
228     "pt",  // pt-BR and pt-PT are used.
229     "zh",
230     "zh_hans_cn",
231     "zh_hant_hk",
232     "zh_hant_mo",
233     "zh_hans_sg",
234     "zh_hant_tw"
235   };
236 
237   // Skip all the es_Foo other than es_419 for now.
238   if (base::StartsWith(locale_name, "es_",
239                        base::CompareCase::INSENSITIVE_ASCII)) {
240     return !base::EndsWith(locale_name, "419", base::CompareCase::SENSITIVE);
241   }
242   for (const char* duplicate_name : kDuplicateNames) {
243     if (base::EqualsCaseInsensitiveASCII(duplicate_name, locale_name))
244       return true;
245   }
246   return false;
247 }
248 
249 // We added 30+ minimally populated locales with only a few entries
250 // (exemplar character set, script, writing direction and its own
251 // lanaguage name). These locales have to be distinguished from the
252 // fully populated locales to which Chrome is localized.
IsLocalePartiallyPopulated(const std::string & locale_name)253 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
254   // For partially populated locales, even the translation for "English"
255   // is not available. A more robust/elegant way to check is to add a special
256   // field (say, 'isPartial' to our version of ICU locale files) and
257   // check its value, but this hack seems to work well.
258   return !l10n_util::IsLocaleNameTranslated("en", locale_name);
259 }
260 
261 #if !defined(OS_APPLE)
IsLocaleAvailable(const std::string & locale)262 bool IsLocaleAvailable(const std::string& locale) {
263   // If locale has any illegal characters in it, we don't want to try to
264   // load it because it may be pointing outside the locale data file directory.
265   if (!base::i18n::IsFilenameLegal(base::ASCIIToUTF16(locale)))
266     return false;
267 
268   // IsLocalePartiallyPopulated() can be called here for an early return w/o
269   // checking the resource availability below. It'd help when Chrome is run
270   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
271   // but it'd slow down the start up time a little bit for locales Chrome is
272   // localized to. So, we don't call it here.
273   return ui::ResourceBundle::LocaleDataPakExists(locale);
274 }
275 #endif
276 
277 // On Linux, the text layout engine Pango determines paragraph directionality
278 // by looking at the first strongly-directional character in the text. This
279 // means text such as "Google Chrome foo bar..." will be layed out LTR even
280 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
281 // cases.
AdjustParagraphDirectionality(base::string16 * paragraph)282 void AdjustParagraphDirectionality(base::string16* paragraph) {
283 #if defined(OS_POSIX) && !defined(OS_APPLE) && !defined(OS_ANDROID)
284   if (base::i18n::IsRTL() &&
285       base::i18n::StringContainsStrongRTLChars(*paragraph)) {
286     paragraph->insert(0, 1,
287                       static_cast<base::char16>(base::i18n::kRightToLeftMark));
288   }
289 #endif
290 }
291 
292 struct AvailableLocalesTraits
293     : base::internal::DestructorAtExitLazyInstanceTraits<
294           std::vector<std::string>> {
New__anon2c2479320111::AvailableLocalesTraits295   static std::vector<std::string>* New(void* instance) {
296     std::vector<std::string>* locales =
297         base::internal::DestructorAtExitLazyInstanceTraits<
298             std::vector<std::string>>::New(instance);
299     int num_locales = uloc_countAvailable();
300     for (int i = 0; i < num_locales; ++i) {
301       std::string locale_name = uloc_getAvailable(i);
302       // Filter out the names that have aliases.
303       if (IsDuplicateName(locale_name))
304         continue;
305       // Filter out locales for which we have only partially populated data
306       // and to which Chrome is not localized.
307       if (IsLocalePartiallyPopulated(locale_name))
308         continue;
309       // Normalize underscores to hyphens because that's what our locale files
310       // use.
311       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
312 
313       // Map the Chinese locale names over to zh-CN and zh-TW.
314       if (base::LowerCaseEqualsASCII(locale_name, "zh-hans")) {
315         locale_name = "zh-CN";
316       } else if (base::LowerCaseEqualsASCII(locale_name, "zh-hant")) {
317         locale_name = "zh-TW";
318       }
319       locales->push_back(locale_name);
320     }
321 
322     return locales;
323   }
324 };
325 
326 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
327     g_available_locales = LAZY_INSTANCE_INITIALIZER;
328 
329 }  // namespace
330 
331 namespace l10n_util {
332 
GetLanguage(const std::string & locale)333 std::string GetLanguage(const std::string& locale) {
334   const std::string::size_type hyphen_pos = locale.find('-');
335   return std::string(locale, 0, hyphen_pos);
336 }
337 
338 // TODO(jshin): revamp this function completely to use a more sytematic
339 // and generic locale fallback based on ICU/CLDR.
CheckAndResolveLocale(const std::string & locale,std::string * resolved_locale)340 bool CheckAndResolveLocale(const std::string& locale,
341                            std::string* resolved_locale) {
342 #if !defined(OS_APPLE)
343   if (IsLocaleAvailable(locale)) {
344     *resolved_locale = locale;
345     return true;
346   }
347 
348   // If there's a variant, skip over it so we can try without the region
349   // code.  For example, ca_ES@valencia should cause us to try ca@valencia
350   // before ca.
351   std::string::size_type variant_pos = locale.find('@');
352   if (variant_pos != std::string::npos)
353     return false;
354 
355   // If the locale matches language but not country, use that instead.
356   // TODO(jungshik) : Nothing is done about languages that Chrome
357   // does not support but available on Windows. We fall
358   // back to en-US in GetApplicationLocale so that it's a not critical,
359   // but we can do better.
360   const std::string lang(GetLanguage(locale));
361   if (lang.size() < locale.size()) {
362     std::string region(locale, lang.size() + 1);
363     std::string tmp_locale(lang);
364     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
365     // Spanish locale).
366     if (base::LowerCaseEqualsASCII(lang, "es") &&
367         !base::LowerCaseEqualsASCII(region, "es")) {
368       tmp_locale.append("-419");
369     } else if (base::LowerCaseEqualsASCII(lang, "pt")) {
370       // Map pt-RR other than pt-BR to pt-PT. Note that "pt" by itself maps to
371       // pt-BR (logic below).
372       tmp_locale.append("-PT");
373     } else if (base::LowerCaseEqualsASCII(lang, "zh")) {
374       // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
375       if (base::LowerCaseEqualsASCII(region, "hk") ||
376           base::LowerCaseEqualsASCII(region, "mo")) {  // Macao
377         tmp_locale.append("-TW");
378       } else {
379         tmp_locale.append("-CN");
380       }
381     } else if (base::LowerCaseEqualsASCII(lang, "en")) {
382       // Map Liberian and Filipino English to US English, and everything
383       // else to British English.
384       // TODO(jungshik): en-CA may have to change sides once
385       // we have OS locale separate from app locale (Chrome's UI language).
386       if (base::LowerCaseEqualsASCII(region, "lr") ||
387           base::LowerCaseEqualsASCII(region, "ph")) {
388         tmp_locale.append("-US");
389       } else {
390         tmp_locale.append("-GB");
391       }
392     }
393     if (IsLocaleAvailable(tmp_locale)) {
394       resolved_locale->swap(tmp_locale);
395       return true;
396     }
397   }
398 
399   // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
400   // Note that pt-RR is mapped to pt-PT above, but we want pt -> pt-BR here.
401   struct {
402     const char* source;
403     const char* dest;
404   } static constexpr kAliasMap[] = {
405       {"en", "en-US"}, {"iw", "he"},  {"no", "nb"},
406       {"pt", "pt-BR"}, {"tl", "fil"}, {"zh", "zh-CN"},
407   };
408   for (const auto& alias : kAliasMap) {
409     if (base::LowerCaseEqualsASCII(lang, alias.source)) {
410       std::string tmp_locale(alias.dest);
411       if (IsLocaleAvailable(tmp_locale)) {
412         resolved_locale->swap(tmp_locale);
413         return true;
414       }
415     }
416   }
417 #else
418   NOTIMPLEMENTED();
419 #endif  // !defined(OS_APPLE)
420 
421   return false;
422 }
423 
424 #if defined(OS_APPLE)
GetApplicationLocaleInternalMac(const std::string & pref_locale)425 std::string GetApplicationLocaleInternalMac(const std::string& pref_locale) {
426   // Use any override (Cocoa for the browser), otherwise use the preference
427   // passed to the function.
428   std::string app_locale = l10n_util::GetLocaleOverride();
429   if (app_locale.empty())
430     app_locale = pref_locale;
431 
432   // The above should handle all of the cases Chrome normally hits, but for some
433   // unit tests, we need something to fall back too.
434   if (app_locale.empty())
435     app_locale = "en-US";
436 
437   return app_locale;
438 }
439 #endif
440 
441 #if !defined(OS_APPLE)
GetApplicationLocaleInternalNonMac(const std::string & pref_locale)442 std::string GetApplicationLocaleInternalNonMac(const std::string& pref_locale) {
443   std::string resolved_locale;
444   std::vector<std::string> candidates;
445 
446   // We only use --lang and the app pref on Windows.  On Linux, we only
447   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
448   // to renderer and plugin processes so they know what language the parent
449   // process decided to use.
450 
451 #if defined(OS_WIN)
452   // First, try the preference value.
453   if (!pref_locale.empty())
454     candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
455 
456   // Next, try the overridden locale.
457   const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
458   if (!languages.empty()) {
459     candidates.reserve(candidates.size() + languages.size());
460     std::transform(languages.begin(), languages.end(),
461                    std::back_inserter(candidates),
462                    &base::i18n::GetCanonicalLocale);
463   } else {
464     // If no override was set, defer to ICU
465     candidates.push_back(base::i18n::GetConfiguredLocale());
466   }
467 #elif defined(OS_ANDROID)
468   // Try pref_locale first.
469   if (!pref_locale.empty())
470     candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
471 
472   // On Android, query java.util.Locale for the default locale.
473   candidates.push_back(base::android::GetDefaultLocaleString());
474 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS)
475   // GLib implements correct environment variable parsing with
476   // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
477   // We used to use our custom parsing code along with ICU for this purpose.
478   // If we have a port that does not depend on GTK, we have to
479   // restore our custom code for that port.
480   const char* const* languages = g_get_language_names();
481   DCHECK(languages);  // A valid pointer is guaranteed.
482   DCHECK(*languages);  // At least one entry, "C", is guaranteed.
483 
484   for (; *languages; ++languages) {
485     candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
486   }
487 #else
488   // By default, use the application locale preference. This applies to ChromeOS
489   // and linux systems without glib.
490   if (!pref_locale.empty())
491     candidates.push_back(pref_locale);
492 #endif  // defined(OS_WIN)
493 
494   std::vector<std::string>::const_iterator i = candidates.begin();
495   for (; i != candidates.end(); ++i) {
496     if (CheckAndResolveLocale(*i, &resolved_locale)) {
497       return resolved_locale;
498     }
499   }
500 
501   // Fallback on en-US.
502   const std::string fallback_locale("en-US");
503   if (IsLocaleAvailable(fallback_locale))
504     return fallback_locale;
505 
506   return std::string();
507 }
508 #endif  // !defined(OS_APPLE)
509 
GetApplicationLocaleInternal(const std::string & pref_locale)510 std::string GetApplicationLocaleInternal(const std::string& pref_locale) {
511 #if defined(OS_APPLE)
512   return GetApplicationLocaleInternalMac(pref_locale);
513 #else
514   return GetApplicationLocaleInternalNonMac(pref_locale);
515 #endif
516 }
517 
GetApplicationLocale(const std::string & pref_locale,bool set_icu_locale)518 std::string GetApplicationLocale(const std::string& pref_locale,
519                                  bool set_icu_locale) {
520   const std::string locale = GetApplicationLocaleInternal(pref_locale);
521   if (set_icu_locale && !locale.empty())
522     base::i18n::SetICUDefaultLocale(locale);
523   return locale;
524 }
525 
GetApplicationLocale(const std::string & pref_locale)526 std::string GetApplicationLocale(const std::string& pref_locale) {
527   return GetApplicationLocale(pref_locale, true /* set_icu_locale */);
528 }
529 
IsLocaleNameTranslated(const char * locale,const std::string & display_locale)530 bool IsLocaleNameTranslated(const char* locale,
531                             const std::string& display_locale) {
532   base::string16 display_name =
533       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
534   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
535   // uloc_getDisplayName returns the actual translation or the default
536   // value (locale code), we have to rely on this hack to tell whether
537   // the translation is available or not.  If ICU doesn't have a translated
538   // name for this locale, GetDisplayNameForLocale will just return the
539   // locale code.
540   return !base::IsStringASCII(display_name) ||
541       base::UTF16ToASCII(display_name) != locale;
542 }
543 
GetDisplayNameForLocale(const std::string & locale,const std::string & display_locale,bool is_for_ui,bool disallow_default)544 base::string16 GetDisplayNameForLocale(const std::string& locale,
545                                        const std::string& display_locale,
546                                        bool is_for_ui,
547                                        bool disallow_default) {
548   std::string locale_code = locale;
549   // Internally, we use the language code of zh-CN and zh-TW, but we want the
550   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
551   // of Chinese (China) and Chinese (Taiwan).
552   // Translate uses "tl" (Tagalog) to mean "fil" (Filipino) until Google
553   // translate is changed to understand "fil". Make "tl" alias to "fil".
554   if (locale_code == "zh-CN")
555     locale_code = "zh-Hans";
556   else if (locale_code == "zh-TW")
557     locale_code = "zh-Hant";
558   else if (locale_code == "tl")
559     locale_code = "fil";
560   else if (locale_code == "mo")
561     locale_code = "ro-MD";
562 
563   base::string16 display_name;
564 #if defined(OS_IOS)
565   // Use the Foundation API to get the localized display name, removing the need
566   // for the ICU data file to include this data.
567   display_name = GetDisplayNameForLocale(locale_code, display_locale);
568 #else
569 #if defined(OS_ANDROID)
570   // Use Java API to get locale display name so that we can remove most of
571   // the lang data from icu data to reduce binary size, except for zh-Hans and
572   // zh-Hant because the current Android Java API doesn't support scripts.
573   // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
574   // Android Java API supports scripts.
575   if (!base::StartsWith(locale_code, "zh-Han", base::CompareCase::SENSITIVE)) {
576     display_name = GetDisplayNameForLocale(locale_code, display_locale);
577   } else
578 #endif  // defined(OS_ANDROID)
579   {
580     UErrorCode error = U_ZERO_ERROR;
581     const int kBufferSize = 1024;
582 
583     int actual_size;
584     // For Country code in ICU64 we need to call uloc_getDisplayCountry
585     if (locale_code[0] == '-' || locale_code[0] == '_') {
586       actual_size = uloc_getDisplayCountry(
587           locale_code.c_str(), display_locale.c_str(),
588           base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
589     } else {
590       actual_size = uloc_getDisplayName(
591           locale_code.c_str(), display_locale.c_str(),
592           base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
593     }
594     if (disallow_default && U_USING_DEFAULT_WARNING == error)
595       return base::string16();
596     DCHECK(U_SUCCESS(error));
597     display_name.resize(actual_size);
598   }
599 #endif  // defined(OS_IOS)
600 
601   // Add directional markup so parentheses are properly placed.
602   if (is_for_ui && base::i18n::IsRTL())
603     base::i18n::AdjustStringForLocaleDirection(&display_name);
604   return display_name;
605 }
606 
GetDisplayNameForCountry(const std::string & country_code,const std::string & display_locale)607 base::string16 GetDisplayNameForCountry(const std::string& country_code,
608                                         const std::string& display_locale) {
609   return GetDisplayNameForLocale("_" + country_code, display_locale, false);
610 }
611 
NormalizeLocale(const std::string & locale)612 std::string NormalizeLocale(const std::string& locale) {
613   std::string normalized_locale(locale);
614   std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
615 
616   return normalized_locale;
617 }
618 
GetParentLocales(const std::string & current_locale,std::vector<std::string> * parent_locales)619 void GetParentLocales(const std::string& current_locale,
620                       std::vector<std::string>* parent_locales) {
621   std::string locale(NormalizeLocale(current_locale));
622 
623   const int kNameCapacity = 256;
624   char parent[kNameCapacity];
625   base::strlcpy(parent, locale.c_str(), kNameCapacity);
626   parent_locales->push_back(parent);
627   UErrorCode err = U_ZERO_ERROR;
628   while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
629     if (U_FAILURE(err))
630       break;
631     parent_locales->push_back(parent);
632   }
633 }
634 
IsValidLocaleSyntax(const std::string & locale)635 bool IsValidLocaleSyntax(const std::string& locale) {
636   // Check that the length is plausible.
637   if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
638     return false;
639 
640   // Strip off the part after an '@' sign, which might contain keywords,
641   // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
642   // We don't validate that part much, just check that there's at least one
643   // equals sign in a plausible place. Normalize the prefix so that hyphens
644   // are changed to underscores.
645   std::string prefix = NormalizeLocale(locale);
646   size_t split_point = locale.find("@");
647   if (split_point != std::string::npos) {
648     std::string keywords = locale.substr(split_point + 1);
649     prefix = locale.substr(0, split_point);
650 
651     size_t equals_loc = keywords.find("=");
652     if (equals_loc == 0 || equals_loc == std::string::npos ||
653         equals_loc > keywords.size() - 2) {
654       return false;
655     }
656   }
657 
658   // Check that all characters before the at-sign are alphanumeric or
659   // underscore.
660   for (char ch : prefix) {
661     if (!base::IsAsciiAlpha(ch) && !base::IsAsciiDigit(ch) && ch != '_')
662       return false;
663   }
664 
665   // Check that the initial token (before the first hyphen/underscore)
666   // is 1 - 3 alphabetical characters (a language tag).
667   for (size_t i = 0; i < prefix.size(); i++) {
668     char ch = prefix[i];
669     if (ch == '_') {
670       if (i < 1 || i > 3)
671         return false;
672       break;
673     }
674     if (!base::IsAsciiAlpha(ch))
675       return false;
676   }
677 
678   // Check that the all tokens after the initial token are 1 - 8 characters.
679   // (Tokenize/StringTokenizer don't work here, they collapse multiple
680   // delimiters into one.)
681   int token_len = 0;
682   int token_index = 0;
683   for (char ch : prefix) {
684     if (ch != '_') {
685       token_len++;
686       continue;
687     }
688 
689     if (token_index > 0 && (token_len < 1 || token_len > 8)) {
690       return false;
691     }
692     token_index++;
693     token_len = 0;
694   }
695   if (token_index == 0 && (token_len < 1 || token_len > 3))
696     return false;
697   if (token_len < 1 || token_len > 8)
698     return false;
699 
700   return true;
701 }
702 
GetStringUTF8(int message_id)703 std::string GetStringUTF8(int message_id) {
704   return base::UTF16ToUTF8(GetStringUTF16(message_id));
705 }
706 
GetStringUTF16(int message_id)707 base::string16 GetStringUTF16(int message_id) {
708   ui::ResourceBundle& rb = ui::ResourceBundle::GetSharedInstance();
709   base::string16 str = rb.GetLocalizedString(message_id);
710   AdjustParagraphDirectionality(&str);
711 
712   return str;
713 }
714 
GetStringFUTF16(int message_id,const std::vector<base::string16> & replacements,std::vector<size_t> * offsets)715 base::string16 GetStringFUTF16(int message_id,
716                                const std::vector<base::string16>& replacements,
717                                std::vector<size_t>* offsets) {
718   // TODO(tc): We could save a string copy if we got the raw string as
719   // a StringPiece and were able to call ReplaceStringPlaceholders with
720   // a StringPiece format string and base::string16 substitution strings.  In
721   // practice, the strings should be relatively short.
722   ui::ResourceBundle& rb = ui::ResourceBundle::GetSharedInstance();
723   const base::string16& format_string = rb.GetLocalizedString(message_id);
724 
725 #if DCHECK_IS_ON()
726   // Make sure every replacement string is being used, so we don't just
727   // silently fail to insert one. If |offsets| is non-NULL, then don't do this
728   // check as the code may simply want to find the placeholders rather than
729   // actually replacing them.
730   if (!offsets) {
731     // $9 is the highest allowed placeholder.
732     for (size_t i = 0; i < 9; ++i) {
733       bool placeholder_should_exist = replacements.size() > i;
734 
735       base::string16 placeholder = base::ASCIIToUTF16("$");
736       placeholder += (L'1' + i);
737       size_t pos = format_string.find(placeholder);
738       if (placeholder_should_exist) {
739         DCHECK_NE(std::string::npos, pos) << " Didn't find a " << placeholder
740                                           << " placeholder in "
741                                           << format_string;
742       } else {
743         DCHECK_EQ(std::string::npos, pos) << " Unexpectedly found a "
744                                           << placeholder << " placeholder in "
745                                           << format_string;
746       }
747     }
748   }
749 #endif
750 
751   base::string16 formatted = base::ReplaceStringPlaceholders(
752       format_string, replacements, offsets);
753   AdjustParagraphDirectionality(&formatted);
754 
755   return formatted;
756 }
757 
GetStringFUTF8(int message_id,const base::string16 & a)758 std::string GetStringFUTF8(int message_id,
759                            const base::string16& a) {
760   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
761 }
762 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b)763 std::string GetStringFUTF8(int message_id,
764                            const base::string16& a,
765                            const base::string16& b) {
766   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
767 }
768 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)769 std::string GetStringFUTF8(int message_id,
770                            const base::string16& a,
771                            const base::string16& b,
772                            const base::string16& c) {
773   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
774 }
775 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)776 std::string GetStringFUTF8(int message_id,
777                            const base::string16& a,
778                            const base::string16& b,
779                            const base::string16& c,
780                            const base::string16& d) {
781   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
782 }
783 
GetStringFUTF16(int message_id,const base::string16 & a)784 base::string16 GetStringFUTF16(int message_id,
785                                const base::string16& a) {
786   std::vector<base::string16> replacements = {a};
787   return GetStringFUTF16(message_id, replacements, nullptr);
788 }
789 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b)790 base::string16 GetStringFUTF16(int message_id,
791                                const base::string16& a,
792                                const base::string16& b) {
793   return GetStringFUTF16(message_id, a, b, nullptr);
794 }
795 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)796 base::string16 GetStringFUTF16(int message_id,
797                                const base::string16& a,
798                                const base::string16& b,
799                                const base::string16& c) {
800   std::vector<base::string16> replacements = {a, b, c};
801   return GetStringFUTF16(message_id, replacements, nullptr);
802 }
803 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)804 base::string16 GetStringFUTF16(int message_id,
805                                const base::string16& a,
806                                const base::string16& b,
807                                const base::string16& c,
808                                const base::string16& d) {
809   std::vector<base::string16> replacements = {a, b, c, d};
810   return GetStringFUTF16(message_id, replacements, nullptr);
811 }
812 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d,const base::string16 & e)813 base::string16 GetStringFUTF16(int message_id,
814                                const base::string16& a,
815                                const base::string16& b,
816                                const base::string16& c,
817                                const base::string16& d,
818                                const base::string16& e) {
819   std::vector<base::string16> replacements = {a, b, c, d, e};
820   return GetStringFUTF16(message_id, replacements, nullptr);
821 }
822 
GetStringFUTF16(int message_id,const base::string16 & a,size_t * offset)823 base::string16 GetStringFUTF16(int message_id,
824                                const base::string16& a,
825                                size_t* offset) {
826   DCHECK(offset);
827   std::vector<size_t> offsets;
828   std::vector<base::string16> replacements = {a};
829   base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
830   DCHECK_EQ(1u, offsets.size());
831   *offset = offsets[0];
832   return result;
833 }
834 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,std::vector<size_t> * offsets)835 base::string16 GetStringFUTF16(int message_id,
836                                const base::string16& a,
837                                const base::string16& b,
838                                std::vector<size_t>* offsets) {
839   std::vector<base::string16> replacements = {a, b};
840   return GetStringFUTF16(message_id, replacements, offsets);
841 }
842 
GetStringFUTF16Int(int message_id,int a)843 base::string16 GetStringFUTF16Int(int message_id, int a) {
844   return GetStringFUTF16(message_id, base::FormatNumber(a));
845 }
846 
GetStringFUTF16Int(int message_id,int64_t a)847 base::string16 GetStringFUTF16Int(int message_id, int64_t a) {
848   return GetStringFUTF16(message_id, base::FormatNumber(a));
849 }
850 
GetPluralStringFUTF16(int message_id,int number)851 base::string16 GetPluralStringFUTF16(int message_id, int number) {
852   return base::i18n::MessageFormatter::FormatWithNumberedArgs(
853       GetStringUTF16(message_id), number);
854 }
855 
GetPluralStringFUTF8(int message_id,int number)856 std::string GetPluralStringFUTF8(int message_id, int number) {
857   return base::UTF16ToUTF8(GetPluralStringFUTF16(message_id, number));
858 }
859 
GetSingleOrMultipleStringUTF16(int message_id,bool is_multiple)860 base::string16 GetSingleOrMultipleStringUTF16(int message_id,
861                                                bool is_multiple) {
862   return base::i18n::MessageFormatter::FormatWithNumberedArgs(
863       GetStringUTF16(message_id), is_multiple ? "multiple" : "single");
864 }
865 
SortStrings16(const std::string & locale,std::vector<base::string16> * strings)866 void SortStrings16(const std::string& locale,
867                    std::vector<base::string16>* strings) {
868   SortVectorWithStringKey(locale, strings, false);
869 }
870 
GetAvailableLocales()871 const std::vector<std::string>& GetAvailableLocales() {
872   return g_available_locales.Get();
873 }
874 
GetAcceptLanguagesForLocale(const std::string & display_locale,std::vector<std::string> * locale_codes)875 void GetAcceptLanguagesForLocale(const std::string& display_locale,
876                                  std::vector<std::string>* locale_codes) {
877   for (const char* accept_language : kAcceptLanguageList) {
878     if (!l10n_util::IsLocaleNameTranslated(accept_language, display_locale)) {
879       // TODO(jungshik) : Put them at the end of the list with language codes
880       // enclosed by brackets instead of skipping.
881       continue;
882     }
883     locale_codes->push_back(accept_language);
884   }
885 }
886 
GetAcceptLanguages(std::vector<std::string> * locale_codes)887 void GetAcceptLanguages(std::vector<std::string>* locale_codes) {
888   for (const char* accept_language : kAcceptLanguageList) {
889     locale_codes->push_back(accept_language);
890   }
891 }
892 
IsLanguageAccepted(const std::string & display_locale,const std::string & locale)893 bool IsLanguageAccepted(const std::string& display_locale,
894                         const std::string& locale) {
895   for (const char* accept_language : kAcceptLanguageList) {
896     if (accept_language == locale &&
897         l10n_util::IsLocaleNameTranslated(locale.c_str(), display_locale)) {
898       return true;
899     }
900   }
901   return false;
902 }
903 
GetLocalizedContentsWidthInPixels(int pixel_resource_id)904 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
905   int width = 0;
906   base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
907   DCHECK_GT(width, 0);
908   return width;
909 }
910 
GetAcceptLanguageListForTesting()911 const char* const* GetAcceptLanguageListForTesting() {
912   return kAcceptLanguageList;
913 }
914 
GetAcceptLanguageListSizeForTesting()915 size_t GetAcceptLanguageListSizeForTesting() {
916   return base::size(kAcceptLanguageList);
917 }
918 
919 }  // namespace l10n_util
920