1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "ui/base/l10n/l10n_util.h"
6 
7 #include <algorithm>
8 #include <cstdlib>
9 #include <iterator>
10 #include <memory>
11 #include <string>
12 
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/files/file_util.h"
16 #include "base/i18n/file_util_icu.h"
17 #include "base/i18n/message_formatter.h"
18 #include "base/i18n/number_formatting.h"
19 #include "base/i18n/rtl.h"
20 #include "base/i18n/string_compare.h"
21 #include "base/lazy_instance.h"
22 #include "base/stl_util.h"
23 #include "base/strings/string_number_conversions.h"
24 #include "base/strings/string_split.h"
25 #include "base/strings/string_util.h"
26 #include "base/strings/stringprintf.h"
27 #include "base/strings/sys_string_conversions.h"
28 #include "base/strings/utf_string_conversions.h"
29 #include "build/build_config.h"
30 #include "third_party/icu/source/common/unicode/rbbi.h"
31 #include "third_party/icu/source/common/unicode/uloc.h"
32 #include "ui/base/l10n/l10n_util_collator.h"
33 #include "ui/base/resource/resource_bundle.h"
34 #include "ui/base/ui_base_paths.h"
35 
36 #if defined(OS_ANDROID)
37 #include "base/android/locale_utils.h"
38 #include "ui/base/l10n/l10n_util_android.h"
39 #endif
40 
41 #if defined(USE_GLIB)
42 #include <glib.h>
43 #endif
44 
45 #if defined(OS_WIN)
46 #include "ui/base/l10n/l10n_util_win.h"
47 #endif  // OS_WIN
48 
49 namespace {
50 
51 static const char* const kAcceptLanguageList[] = {
52     "af",     // Afrikaans
53     "am",     // Amharic
54     "an",     // Aragonese
55     "ar",     // Arabic
56     "ast",    // Asturian
57     "az",     // Azerbaijani
58     "be",     // Belarusian
59     "bg",     // Bulgarian
60     "bh",     // Bihari
61     "bn",     // Bengali
62     "br",     // Breton
63     "bs",     // Bosnian
64     "ca",     // Catalan
65     "ceb",    // Cebuano
66     "ckb",    // Kurdish (Arabci),  Sorani
67     "co",     // Corsican
68     "cs",     // Czech
69     "cy",     // Welsh
70     "da",     // Danish
71     "de",     // German
72     "de-AT",  // German (Austria)
73     "de-CH",  // German (Switzerland)
74     "de-DE",  // German (Germany)
75     "de-LI",  // German (Liechtenstein)
76     "el",     // Greek
77     "en",     // English
78     "en-AU",  // English (Australia)
79     "en-CA",  // English (Canada)
80     "en-GB",  // English (UK)
81     "en-IN",  // English (India)
82     "en-NZ",  // English (New Zealand)
83     "en-US",  // English (US)
84     "en-ZA",  // English (South Africa)
85     "eo",     // Esperanto
86     // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
87     // Spanish speaking countries?
88     "es",      // Spanish
89     "es-419",  // Spanish (Latin America)
90     "es-AR",   // Spanish (Argentina)
91     "es-CL",   // Spanish (Chile)
92     "es-CO",   // Spanish (Colombia)
93     "es-CR",   // Spanish (Costa Rica)
94     "es-ES",   // Spanish (Spain)
95     "es-HN",   // Spanish (Honduras)
96     "es-MX",   // Spanish (Mexico)
97     "es-PE",   // Spanish (Peru)
98     "es-US",   // Spanish (US)
99     "es-UY",   // Spanish (Uruguay)
100     "es-VE",   // Spanish (Venezuela)
101     "et",      // Estonian
102     "eu",      // Basque
103     "fa",      // Persian
104     "fi",      // Finnish
105     "fil",     // Filipino
106     "fo",      // Faroese
107     "fr",      // French
108     "fr-CA",   // French (Canada)
109     "fr-CH",   // French (Switzerland)
110     "fr-FR",   // French (France)
111     "fy",      // Frisian
112     "ga",      // Irish
113     "gd",      // Scots Gaelic
114     "gl",      // Galician
115     "gn",      // Guarani
116     "gu",      // Gujarati
117     "ha",      // Hausa
118     "haw",     // Hawaiian
119     "he",      // Hebrew
120     "hi",      // Hindi
121     "hmn",     // Hmong
122     "hr",      // Croatian
123     "ht",      // Haitian Creole
124     "hu",      // Hungarian
125     "hy",      // Armenian
126     "ia",      // Interlingua
127     "id",      // Indonesian
128     "ig",      // Igbo
129     "is",      // Icelandic
130     "it",      // Italian
131     "it-CH",   // Italian (Switzerland)
132     "it-IT",   // Italian (Italy)
133     "ja",      // Japanese
134     "jv",      // Javanese
135     "ka",      // Georgian
136     "kk",      // Kazakh
137     "km",      // Cambodian
138     "kn",      // Kannada
139     "ko",      // Korean
140     "ku",      // Kurdish
141     "ky",      // Kyrgyz
142     "la",      // Latin
143     "lb",      // Luxembourgish
144     "ln",      // Lingala
145     "lo",      // Laothian
146     "lt",      // Lithuanian
147     "lv",      // Latvian
148     "mg",      // Malagasy
149     "mi",      // Maori
150     "mk",      // Macedonian
151     "ml",      // Malayalam
152     "mn",      // Mongolian
153     "mo",      // Moldavian
154     "mr",      // Marathi
155     "ms",      // Malay
156     "mt",      // Maltese
157     "my",      // Burmese
158     "nb",      // Norwegian (Bokmal)
159     "ne",      // Nepali
160     "nl",      // Dutch
161     "nn",      // Norwegian (Nynorsk)
162     "no",      // Norwegian
163     "ny",      // Nyanja
164     "oc",      // Occitan
165     "om",      // Oromo
166     "or",      // Oriya
167     "pa",      // Punjabi
168     "pl",      // Polish
169     "ps",      // Pashto
170     "pt",      // Portuguese
171     "pt-BR",   // Portuguese (Brazil)
172     "pt-PT",   // Portuguese (Portugal)
173     "qu",      // Quechua
174     "rm",      // Romansh
175     "ro",      // Romanian
176     "ru",      // Russian
177     "sd",      // Sindhi
178     "sh",      // Serbo-Croatian
179     "si",      // Sinhalese
180     "sk",      // Slovak
181     "sl",      // Slovenian
182     "sm",      // Samoan
183     "sn",      // Shona
184     "so",      // Somali
185     "sq",      // Albanian
186     "sr",      // Serbian
187     "st",      // Sesotho
188     "su",      // Sundanese
189     "sv",      // Swedish
190     "sw",      // Swahili
191     "ta",      // Tamil
192     "te",      // Telugu
193     "tg",      // Tajik
194     "th",      // Thai
195     "ti",      // Tigrinya
196     "tk",      // Turkmen
197     "to",      // Tonga
198     "tr",      // Turkish
199     "tt",      // Tatar
200     "tw",      // Twi
201     "ug",      // Uighur
202     "uk",      // Ukrainian
203     "ur",      // Urdu
204     "uz",      // Uzbek
205     "vi",      // Vietnamese
206     "wa",      // Walloon
207     "xh",      // Xhosa
208     "yi",      // Yiddish
209     "yo",      // Yoruba
210     "zh",      // Chinese
211     "zh-CN",   // Chinese (China)
212     "zh-HK",   // Chinese (Hong Kong)
213     "zh-TW",   // Chinese (Taiwan)
214     "zu",      // Zulu
215 };
216 
217 // Returns true if |locale_name| has an alias in the ICU data file.
IsDuplicateName(const std::string & locale_name)218 bool IsDuplicateName(const std::string& locale_name) {
219   static const char* const kDuplicateNames[] = {
220     "ar_001",
221     "en",
222     "en_001",
223     "en_150",
224     "pt",  // pt-BR and pt-PT are used.
225     "zh",
226     "zh_hans_cn",
227     "zh_hant_hk",
228     "zh_hant_mo",
229     "zh_hans_sg",
230     "zh_hant_tw"
231   };
232 
233   // Skip all the es_Foo other than es_419 for now.
234   if (base::StartsWith(locale_name, "es_",
235                        base::CompareCase::INSENSITIVE_ASCII)) {
236     return !base::EndsWith(locale_name, "419", base::CompareCase::SENSITIVE);
237   }
238   for (const char* duplicate_name : kDuplicateNames) {
239     if (base::EqualsCaseInsensitiveASCII(duplicate_name, locale_name))
240       return true;
241   }
242   return false;
243 }
244 
245 // We added 30+ minimally populated locales with only a few entries
246 // (exemplar character set, script, writing direction and its own
247 // lanaguage name). These locales have to be distinguished from the
248 // fully populated locales to which Chrome is localized.
IsLocalePartiallyPopulated(const std::string & locale_name)249 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
250   // For partially populated locales, even the translation for "English"
251   // is not available. A more robust/elegant way to check is to add a special
252   // field (say, 'isPartial' to our version of ICU locale files) and
253   // check its value, but this hack seems to work well.
254   return !l10n_util::IsLocaleNameTranslated("en", locale_name);
255 }
256 
257 #if !defined(OS_MACOSX) || defined(TOOLKIT_QT)
IsLocaleAvailable(const std::string & locale)258 bool IsLocaleAvailable(const std::string& locale) {
259   // If locale has any illegal characters in it, we don't want to try to
260   // load it because it may be pointing outside the locale data file directory.
261   if (!base::i18n::IsFilenameLegal(base::ASCIIToUTF16(locale)))
262     return false;
263 
264   // IsLocalePartiallyPopulated() can be called here for an early return w/o
265   // checking the resource availability below. It'd help when Chrome is run
266   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
267   // but it'd slow down the start up time a little bit for locales Chrome is
268   // localized to. So, we don't call it here.
269   if (!l10n_util::IsLocaleSupportedByOS(locale))
270     return false;
271 
272   return ui::ResourceBundle::LocaleDataPakExists(locale);
273 }
274 #endif
275 
276 // On Linux, the text layout engine Pango determines paragraph directionality
277 // by looking at the first strongly-directional character in the text. This
278 // means text such as "Google Chrome foo bar..." will be layed out LTR even
279 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
280 // cases.
AdjustParagraphDirectionality(base::string16 * paragraph)281 void AdjustParagraphDirectionality(base::string16* paragraph) {
282 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
283   if (base::i18n::IsRTL() &&
284       base::i18n::StringContainsStrongRTLChars(*paragraph)) {
285     paragraph->insert(0, 1,
286                       static_cast<base::char16>(base::i18n::kRightToLeftMark));
287   }
288 #endif
289 }
290 
291 struct AvailableLocalesTraits
292     : base::internal::DestructorAtExitLazyInstanceTraits<
293           std::vector<std::string>> {
New__anon0a8ebb6d0111::AvailableLocalesTraits294   static std::vector<std::string>* New(void* instance) {
295     std::vector<std::string>* locales =
296         base::internal::DestructorAtExitLazyInstanceTraits<
297             std::vector<std::string>>::New(instance);
298     int num_locales = uloc_countAvailable();
299     for (int i = 0; i < num_locales; ++i) {
300       std::string locale_name = uloc_getAvailable(i);
301       // Filter out the names that have aliases.
302       if (IsDuplicateName(locale_name))
303         continue;
304       // Filter out locales for which we have only partially populated data
305       // and to which Chrome is not localized.
306       if (IsLocalePartiallyPopulated(locale_name))
307         continue;
308       if (!l10n_util::IsLocaleSupportedByOS(locale_name))
309         continue;
310       // Normalize underscores to hyphens because that's what our locale files
311       // use.
312       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
313 
314       // Map the Chinese locale names over to zh-CN and zh-TW.
315       if (base::LowerCaseEqualsASCII(locale_name, "zh-hans")) {
316         locale_name = "zh-CN";
317       } else if (base::LowerCaseEqualsASCII(locale_name, "zh-hant")) {
318         locale_name = "zh-TW";
319       }
320       locales->push_back(locale_name);
321     }
322 
323     return locales;
324   }
325 };
326 
327 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
328     g_available_locales = LAZY_INSTANCE_INITIALIZER;
329 
330 }  // namespace
331 
332 namespace l10n_util {
333 
GetLanguage(const std::string & locale)334 std::string GetLanguage(const std::string& locale) {
335   const std::string::size_type hyphen_pos = locale.find('-');
336   return std::string(locale, 0, hyphen_pos);
337 }
338 
339 // TODO(jshin): revamp this function completely to use a more sytematic
340 // and generic locale fallback based on ICU/CLDR.
CheckAndResolveLocale(const std::string & locale,std::string * resolved_locale)341 bool CheckAndResolveLocale(const std::string& locale,
342                            std::string* resolved_locale) {
343 #if !defined(OS_MACOSX) || defined(TOOLKIT_QT)
344   if (IsLocaleAvailable(locale)) {
345     *resolved_locale = locale;
346     return true;
347   }
348 
349   // If there's a variant, skip over it so we can try without the region
350   // code.  For example, ca_ES@valencia should cause us to try ca@valencia
351   // before ca.
352   std::string::size_type variant_pos = locale.find('@');
353   if (variant_pos != std::string::npos)
354     return false;
355 
356   // If the locale matches language but not country, use that instead.
357   // TODO(jungshik) : Nothing is done about languages that Chrome
358   // does not support but available on Windows. We fall
359   // back to en-US in GetApplicationLocale so that it's a not critical,
360   // but we can do better.
361   const std::string lang(GetLanguage(locale));
362   if (lang.size() < locale.size()) {
363     std::string region(locale, lang.size() + 1);
364     std::string tmp_locale(lang);
365     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
366     // Spanish locale).
367     if (base::LowerCaseEqualsASCII(lang, "es") &&
368         !base::LowerCaseEqualsASCII(region, "es")) {
369       tmp_locale.append("-419");
370     } else if (base::LowerCaseEqualsASCII(lang, "pt")) {
371       // Map pt-RR other than pt-BR to pt-PT. Note that "pt" by itself maps to
372       // pt-BR (logic below).
373       tmp_locale.append("-PT");
374     } else if (base::LowerCaseEqualsASCII(lang, "zh")) {
375       // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
376       if (base::LowerCaseEqualsASCII(region, "hk") ||
377           base::LowerCaseEqualsASCII(region, "mo")) {  // Macao
378         tmp_locale.append("-TW");
379       } else {
380         tmp_locale.append("-CN");
381       }
382     } else if (base::LowerCaseEqualsASCII(lang, "en")) {
383       // Map Australian, Canadian, Indian, New Zealand and South African
384       // English to British English for now.
385       // TODO(jungshik): en-CA may have to change sides once
386       // we have OS locale separate from app locale (Chrome's UI language).
387       if (base::LowerCaseEqualsASCII(region, "au") ||
388           base::LowerCaseEqualsASCII(region, "ca") ||
389           base::LowerCaseEqualsASCII(region, "in") ||
390           base::LowerCaseEqualsASCII(region, "nz") ||
391           base::LowerCaseEqualsASCII(region, "za")) {
392         tmp_locale.append("-GB");
393       } else {
394         tmp_locale.append("-US");
395       }
396     }
397     if (IsLocaleAvailable(tmp_locale)) {
398       resolved_locale->swap(tmp_locale);
399       return true;
400     }
401   }
402 
403   // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
404   // Note that pt-RR is mapped to pt-PT above, but we want pt -> pt-BR here.
405   struct {
406     const char* source;
407     const char* dest;
408   } static constexpr kAliasMap[] = {
409       {"en", "en-US"}, {"iw", "he"},  {"no", "nb"},
410       {"pt", "pt-BR"}, {"tl", "fil"}, {"zh", "zh-CN"},
411   };
412   for (const auto& alias : kAliasMap) {
413     if (base::LowerCaseEqualsASCII(lang, alias.source)) {
414       std::string tmp_locale(alias.dest);
415       if (IsLocaleAvailable(tmp_locale)) {
416         resolved_locale->swap(tmp_locale);
417         return true;
418       }
419     }
420   }
421 #else
422   NOTIMPLEMENTED();
423 #endif  // !defined(OS_MACOSX)
424 
425   return false;
426 }
427 
428 #if defined(OS_MACOSX) && !defined(TOOLKIT_QT)
GetApplicationLocaleInternalMac(const std::string & pref_locale)429 std::string GetApplicationLocaleInternalMac(const std::string& pref_locale) {
430   // Use any override (Cocoa for the browser), otherwise use the preference
431   // passed to the function.
432   std::string app_locale = l10n_util::GetLocaleOverride();
433   if (app_locale.empty())
434     app_locale = pref_locale;
435 
436   // The above should handle all of the cases Chrome normally hits, but for some
437   // unit tests, we need something to fall back too.
438   if (app_locale.empty())
439     app_locale = "en-US";
440 
441   return app_locale;
442 }
443 #endif
444 
445 #if !defined(OS_MACOSX) || defined(TOOLKIT_QT)
GetApplicationLocaleInternalNonMac(const std::string & pref_locale)446 std::string GetApplicationLocaleInternalNonMac(const std::string& pref_locale) {
447   std::string resolved_locale;
448   std::vector<std::string> candidates;
449 
450   // We only use --lang and the app pref on Windows.  On Linux, we only
451   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
452   // to renderer and plugin processes so they know what language the parent
453   // process decided to use.
454 
455 #if defined(OS_WIN)
456   // First, try the preference value.
457   if (!pref_locale.empty())
458     candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
459 
460   // Next, try the overridden locale.
461   const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
462   if (!languages.empty()) {
463     candidates.reserve(candidates.size() + languages.size());
464     std::transform(languages.begin(), languages.end(),
465                    std::back_inserter(candidates),
466                    &base::i18n::GetCanonicalLocale);
467   } else {
468     // If no override was set, defer to ICU
469     candidates.push_back(base::i18n::GetConfiguredLocale());
470   }
471 #elif defined(OS_ANDROID)
472   // Try pref_locale first.
473   if (!pref_locale.empty())
474     candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
475 
476   // On Android, query java.util.Locale for the default locale.
477   candidates.push_back(base::android::GetDefaultLocaleString());
478 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS) && !defined(TOOLKIT_QT)
479   // GLib implements correct environment variable parsing with
480   // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
481   // We used to use our custom parsing code along with ICU for this purpose.
482   // If we have a port that does not depend on GTK, we have to
483   // restore our custom code for that port.
484   const char* const* languages = g_get_language_names();
485   DCHECK(languages);  // A valid pointer is guaranteed.
486   DCHECK(*languages);  // At least one entry, "C", is guaranteed.
487 
488   for (; *languages; ++languages) {
489     candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
490   }
491 #else
492   // By default, use the application locale preference. This applies to ChromeOS
493   // and linux systems without glib.
494   if (!pref_locale.empty())
495     candidates.push_back(pref_locale);
496 #endif  // defined(OS_WIN)
497 
498   std::vector<std::string>::const_iterator i = candidates.begin();
499   for (; i != candidates.end(); ++i) {
500     if (CheckAndResolveLocale(*i, &resolved_locale)) {
501       return resolved_locale;
502     }
503   }
504 
505   // Fallback on en-US.
506   const std::string fallback_locale("en-US");
507   if (IsLocaleAvailable(fallback_locale))
508     return fallback_locale;
509 
510   return std::string();
511 }
512 #endif  // !defined(OS_MACOSX)
513 
GetApplicationLocaleInternal(const std::string & pref_locale)514 std::string GetApplicationLocaleInternal(const std::string& pref_locale) {
515 #if defined(OS_MACOSX) && !defined(TOOLKIT_QT)
516   return GetApplicationLocaleInternalMac(pref_locale);
517 #else
518   return GetApplicationLocaleInternalNonMac(pref_locale);
519 #endif
520 }
521 
GetApplicationLocale(const std::string & pref_locale,bool set_icu_locale)522 std::string GetApplicationLocale(const std::string& pref_locale,
523                                  bool set_icu_locale) {
524   const std::string locale = GetApplicationLocaleInternal(pref_locale);
525   if (set_icu_locale && !locale.empty())
526     base::i18n::SetICUDefaultLocale(locale);
527   return locale;
528 }
529 
GetApplicationLocale(const std::string & pref_locale)530 std::string GetApplicationLocale(const std::string& pref_locale) {
531   return GetApplicationLocale(pref_locale, true /* set_icu_locale */);
532 }
533 
IsLocaleNameTranslated(const char * locale,const std::string & display_locale)534 bool IsLocaleNameTranslated(const char* locale,
535                             const std::string& display_locale) {
536   base::string16 display_name =
537       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
538   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
539   // uloc_getDisplayName returns the actual translation or the default
540   // value (locale code), we have to rely on this hack to tell whether
541   // the translation is available or not.  If ICU doesn't have a translated
542   // name for this locale, GetDisplayNameForLocale will just return the
543   // locale code.
544   return !base::IsStringASCII(display_name) ||
545       base::UTF16ToASCII(display_name) != locale;
546 }
547 
GetDisplayNameForLocale(const std::string & locale,const std::string & display_locale,bool is_for_ui,bool disallow_default)548 base::string16 GetDisplayNameForLocale(const std::string& locale,
549                                        const std::string& display_locale,
550                                        bool is_for_ui,
551                                        bool disallow_default) {
552   std::string locale_code = locale;
553   // Internally, we use the language code of zh-CN and zh-TW, but we want the
554   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
555   // of Chinese (China) and Chinese (Taiwan).
556   // Translate uses "tl" (Tagalog) to mean "fil" (Filipino) until Google
557   // translate is changed to understand "fil". Make "tl" alias to "fil".
558   if (locale_code == "zh-CN")
559     locale_code = "zh-Hans";
560   else if (locale_code == "zh-TW")
561     locale_code = "zh-Hant";
562   else if (locale_code == "tl")
563     locale_code = "fil";
564   else if (locale_code == "mo")
565     locale_code = "ro-MD";
566 
567   base::string16 display_name;
568 #if defined(OS_IOS)
569   // Use the Foundation API to get the localized display name, removing the need
570   // for the ICU data file to include this data.
571   display_name = GetDisplayNameForLocale(locale_code, display_locale);
572 #else
573 #if defined(OS_ANDROID)
574   // Use Java API to get locale display name so that we can remove most of
575   // the lang data from icu data to reduce binary size, except for zh-Hans and
576   // zh-Hant because the current Android Java API doesn't support scripts.
577   // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
578   // Android Java API supports scripts.
579   if (!base::StartsWith(locale_code, "zh-Han", base::CompareCase::SENSITIVE)) {
580     display_name = GetDisplayNameForLocale(locale_code, display_locale);
581   } else
582 #endif  // defined(OS_ANDROID)
583   {
584     UErrorCode error = U_ZERO_ERROR;
585     const int kBufferSize = 1024;
586 
587     int actual_size;
588     // For Country code in ICU64 we need to call uloc_getDisplayCountry
589     if (locale_code[0] == '-' || locale_code[0] == '_') {
590       actual_size = uloc_getDisplayCountry(
591           locale_code.c_str(), display_locale.c_str(),
592           base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
593     } else {
594       actual_size = uloc_getDisplayName(
595           locale_code.c_str(), display_locale.c_str(),
596           base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
597     }
598     if (disallow_default && U_USING_DEFAULT_WARNING == error)
599       return base::string16();
600     DCHECK(U_SUCCESS(error));
601     display_name.resize(actual_size);
602   }
603 #endif  // defined(OS_IOS)
604 
605   // Add directional markup so parentheses are properly placed.
606   if (is_for_ui && base::i18n::IsRTL())
607     base::i18n::AdjustStringForLocaleDirection(&display_name);
608   return display_name;
609 }
610 
GetDisplayNameForCountry(const std::string & country_code,const std::string & display_locale)611 base::string16 GetDisplayNameForCountry(const std::string& country_code,
612                                         const std::string& display_locale) {
613   return GetDisplayNameForLocale("_" + country_code, display_locale, false);
614 }
615 
NormalizeLocale(const std::string & locale)616 std::string NormalizeLocale(const std::string& locale) {
617   std::string normalized_locale(locale);
618   std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
619 
620   return normalized_locale;
621 }
622 
GetParentLocales(const std::string & current_locale,std::vector<std::string> * parent_locales)623 void GetParentLocales(const std::string& current_locale,
624                       std::vector<std::string>* parent_locales) {
625   std::string locale(NormalizeLocale(current_locale));
626 
627   const int kNameCapacity = 256;
628   char parent[kNameCapacity];
629   base::strlcpy(parent, locale.c_str(), kNameCapacity);
630   parent_locales->push_back(parent);
631   UErrorCode err = U_ZERO_ERROR;
632   while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
633     if (U_FAILURE(err))
634       break;
635     parent_locales->push_back(parent);
636   }
637 }
638 
IsValidLocaleSyntax(const std::string & locale)639 bool IsValidLocaleSyntax(const std::string& locale) {
640   // Check that the length is plausible.
641   if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
642     return false;
643 
644   // Strip off the part after an '@' sign, which might contain keywords,
645   // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
646   // We don't validate that part much, just check that there's at least one
647   // equals sign in a plausible place. Normalize the prefix so that hyphens
648   // are changed to underscores.
649   std::string prefix = NormalizeLocale(locale);
650   size_t split_point = locale.find("@");
651   if (split_point != std::string::npos) {
652     std::string keywords = locale.substr(split_point + 1);
653     prefix = locale.substr(0, split_point);
654 
655     size_t equals_loc = keywords.find("=");
656     if (equals_loc == 0 || equals_loc == std::string::npos ||
657         equals_loc > keywords.size() - 2) {
658       return false;
659     }
660   }
661 
662   // Check that all characters before the at-sign are alphanumeric or
663   // underscore.
664   for (char ch : prefix) {
665     if (!base::IsAsciiAlpha(ch) && !base::IsAsciiDigit(ch) && ch != '_')
666       return false;
667   }
668 
669   // Check that the initial token (before the first hyphen/underscore)
670   // is 1 - 3 alphabetical characters (a language tag).
671   for (size_t i = 0; i < prefix.size(); i++) {
672     char ch = prefix[i];
673     if (ch == '_') {
674       if (i < 1 || i > 3)
675         return false;
676       break;
677     }
678     if (!base::IsAsciiAlpha(ch))
679       return false;
680   }
681 
682   // Check that the all tokens after the initial token are 1 - 8 characters.
683   // (Tokenize/StringTokenizer don't work here, they collapse multiple
684   // delimiters into one.)
685   int token_len = 0;
686   int token_index = 0;
687   for (char ch : prefix) {
688     if (ch != '_') {
689       token_len++;
690       continue;
691     }
692 
693     if (token_index > 0 && (token_len < 1 || token_len > 8)) {
694       return false;
695     }
696     token_index++;
697     token_len = 0;
698   }
699   if (token_index == 0 && (token_len < 1 || token_len > 3))
700     return false;
701   if (token_len < 1 || token_len > 8)
702     return false;
703 
704   return true;
705 }
706 
GetStringUTF8(int message_id)707 std::string GetStringUTF8(int message_id) {
708   return base::UTF16ToUTF8(GetStringUTF16(message_id));
709 }
710 
GetStringUTF16(int message_id)711 base::string16 GetStringUTF16(int message_id) {
712   ui::ResourceBundle& rb = ui::ResourceBundle::GetSharedInstance();
713   base::string16 str = rb.GetLocalizedString(message_id);
714   AdjustParagraphDirectionality(&str);
715 
716   return str;
717 }
718 
GetStringFUTF16(int message_id,const std::vector<base::string16> & replacements,std::vector<size_t> * offsets)719 base::string16 GetStringFUTF16(int message_id,
720                                const std::vector<base::string16>& replacements,
721                                std::vector<size_t>* offsets) {
722   // TODO(tc): We could save a string copy if we got the raw string as
723   // a StringPiece and were able to call ReplaceStringPlaceholders with
724   // a StringPiece format string and base::string16 substitution strings.  In
725   // practice, the strings should be relatively short.
726   ui::ResourceBundle& rb = ui::ResourceBundle::GetSharedInstance();
727   const base::string16& format_string = rb.GetLocalizedString(message_id);
728 
729 #ifndef NDEBUG
730   // Make sure every replacement string is being used, so we don't just
731   // silently fail to insert one. If |offsets| is non-NULL, then don't do this
732   // check as the code may simply want to find the placeholders rather than
733   // actually replacing them.
734   if (!offsets) {
735     // $9 is the highest allowed placeholder.
736     for (size_t i = 0; i < 9; ++i) {
737       bool placeholder_should_exist = replacements.size() > i;
738 
739       base::string16 placeholder = base::ASCIIToUTF16("$");
740       placeholder += (L'1' + i);
741       size_t pos = format_string.find(placeholder);
742       if (placeholder_should_exist) {
743         DCHECK_NE(std::string::npos, pos) << " Didn't find a " << placeholder
744                                           << " placeholder in "
745                                           << format_string;
746       } else {
747         DCHECK_EQ(std::string::npos, pos) << " Unexpectedly found a "
748                                           << placeholder << " placeholder in "
749                                           << format_string;
750       }
751     }
752   }
753 #endif
754 
755   base::string16 formatted = base::ReplaceStringPlaceholders(
756       format_string, replacements, offsets);
757   AdjustParagraphDirectionality(&formatted);
758 
759   return formatted;
760 }
761 
GetStringFUTF8(int message_id,const base::string16 & a)762 std::string GetStringFUTF8(int message_id,
763                            const base::string16& a) {
764   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
765 }
766 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b)767 std::string GetStringFUTF8(int message_id,
768                            const base::string16& a,
769                            const base::string16& b) {
770   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
771 }
772 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)773 std::string GetStringFUTF8(int message_id,
774                            const base::string16& a,
775                            const base::string16& b,
776                            const base::string16& c) {
777   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
778 }
779 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)780 std::string GetStringFUTF8(int message_id,
781                            const base::string16& a,
782                            const base::string16& b,
783                            const base::string16& c,
784                            const base::string16& d) {
785   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
786 }
787 
GetStringFUTF16(int message_id,const base::string16 & a)788 base::string16 GetStringFUTF16(int message_id,
789                                const base::string16& a) {
790   std::vector<base::string16> replacements = {a};
791   return GetStringFUTF16(message_id, replacements, nullptr);
792 }
793 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b)794 base::string16 GetStringFUTF16(int message_id,
795                                const base::string16& a,
796                                const base::string16& b) {
797   return GetStringFUTF16(message_id, a, b, nullptr);
798 }
799 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)800 base::string16 GetStringFUTF16(int message_id,
801                                const base::string16& a,
802                                const base::string16& b,
803                                const base::string16& c) {
804   std::vector<base::string16> replacements = {a, b, c};
805   return GetStringFUTF16(message_id, replacements, nullptr);
806 }
807 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)808 base::string16 GetStringFUTF16(int message_id,
809                                const base::string16& a,
810                                const base::string16& b,
811                                const base::string16& c,
812                                const base::string16& d) {
813   std::vector<base::string16> replacements = {a, b, c, d};
814   return GetStringFUTF16(message_id, replacements, nullptr);
815 }
816 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d,const base::string16 & e)817 base::string16 GetStringFUTF16(int message_id,
818                                const base::string16& a,
819                                const base::string16& b,
820                                const base::string16& c,
821                                const base::string16& d,
822                                const base::string16& e) {
823   std::vector<base::string16> replacements = {a, b, c, d, e};
824   return GetStringFUTF16(message_id, replacements, nullptr);
825 }
826 
GetStringFUTF16(int message_id,const base::string16 & a,size_t * offset)827 base::string16 GetStringFUTF16(int message_id,
828                                const base::string16& a,
829                                size_t* offset) {
830   DCHECK(offset);
831   std::vector<size_t> offsets;
832   std::vector<base::string16> replacements = {a};
833   base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
834   DCHECK_EQ(1u, offsets.size());
835   *offset = offsets[0];
836   return result;
837 }
838 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,std::vector<size_t> * offsets)839 base::string16 GetStringFUTF16(int message_id,
840                                const base::string16& a,
841                                const base::string16& b,
842                                std::vector<size_t>* offsets) {
843   std::vector<base::string16> replacements = {a, b};
844   return GetStringFUTF16(message_id, replacements, offsets);
845 }
846 
GetStringFUTF16Int(int message_id,int a)847 base::string16 GetStringFUTF16Int(int message_id, int a) {
848   return GetStringFUTF16(message_id, base::FormatNumber(a));
849 }
850 
GetStringFUTF16Int(int message_id,int64_t a)851 base::string16 GetStringFUTF16Int(int message_id, int64_t a) {
852   return GetStringFUTF16(message_id, base::FormatNumber(a));
853 }
854 
GetPluralStringFUTF16(int message_id,int number)855 base::string16 GetPluralStringFUTF16(int message_id, int number) {
856   return base::i18n::MessageFormatter::FormatWithNumberedArgs(
857       GetStringUTF16(message_id), number);
858 }
859 
GetPluralStringFUTF8(int message_id,int number)860 std::string GetPluralStringFUTF8(int message_id, int number) {
861   return base::UTF16ToUTF8(GetPluralStringFUTF16(message_id, number));
862 }
863 
GetSingleOrMultipleStringUTF16(int message_id,bool is_multiple)864 base::string16 GetSingleOrMultipleStringUTF16(int message_id,
865                                                bool is_multiple) {
866   return base::i18n::MessageFormatter::FormatWithNumberedArgs(
867       GetStringUTF16(message_id), is_multiple ? "multiple" : "single");
868 }
869 
SortStrings16(const std::string & locale,std::vector<base::string16> * strings)870 void SortStrings16(const std::string& locale,
871                    std::vector<base::string16>* strings) {
872   SortVectorWithStringKey(locale, strings, false);
873 }
874 
GetAvailableLocales()875 const std::vector<std::string>& GetAvailableLocales() {
876   return g_available_locales.Get();
877 }
878 
GetAcceptLanguagesForLocale(const std::string & display_locale,std::vector<std::string> * locale_codes)879 void GetAcceptLanguagesForLocale(const std::string& display_locale,
880                                  std::vector<std::string>* locale_codes) {
881   for (const char* accept_language : kAcceptLanguageList) {
882     if (!l10n_util::IsLocaleNameTranslated(accept_language, display_locale)) {
883       // TODO(jungshik) : Put them at the end of the list with language codes
884       // enclosed by brackets instead of skipping.
885       continue;
886     }
887     locale_codes->push_back(accept_language);
888   }
889 }
890 
IsLanguageAccepted(const std::string & display_locale,const std::string & locale)891 bool IsLanguageAccepted(const std::string& display_locale,
892                         const std::string& locale) {
893   for (const char* accept_language : kAcceptLanguageList) {
894     if (accept_language == locale &&
895         l10n_util::IsLocaleNameTranslated(locale.c_str(), display_locale)) {
896       return true;
897     }
898   }
899   return false;
900 }
901 
GetLocalizedContentsWidthInPixels(int pixel_resource_id)902 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
903   int width = 0;
904   base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
905   DCHECK_GT(width, 0);
906   return width;
907 }
908 
GetAcceptLanguageListForTesting()909 const char* const* GetAcceptLanguageListForTesting() {
910   return kAcceptLanguageList;
911 }
912 
GetAcceptLanguageListSizeForTesting()913 size_t GetAcceptLanguageListSizeForTesting() {
914   return base::size(kAcceptLanguageList);
915 }
916 
917 }  // namespace l10n_util
918