1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/spellcheck/common/spellcheck_common.h"
6 
7 #include "base/check.h"
8 #include "base/command_line.h"
9 #include "base/files/file_path.h"
10 #include "base/metrics/field_trial.h"
11 #include "base/stl_util.h"
12 #include "base/strings/string_util.h"
13 #include "third_party/icu/source/common/unicode/uloc.h"
14 #include "third_party/icu/source/common/unicode/urename.h"
15 #include "third_party/icu/source/common/unicode/utypes.h"
16 
17 namespace spellcheck {
18 
19 struct LanguageRegion {
20   const char* language;         // The language.
21   const char* language_region;  // language & region, used by dictionaries.
22 };
23 
24 struct LanguageVersion {
25   const char* language;  // The language input.
26   const char* version;   // The corresponding version.
27 };
28 
29 static constexpr LanguageRegion kSupportedSpellCheckerLanguages[] = {
30     // Several languages are not to be included in the spellchecker list:
31     // th-TH, vi-VI.
32     // clang-format off
33     {"af", "af-ZA"},
34     {"bg", "bg-BG"},
35     {"ca", "ca-ES"},
36     {"cs", "cs-CZ"},
37     {"cy", "cy-GB"},
38     {"da", "da-DK"},
39     {"de", "de-DE"},
40     {"el", "el-GR"},
41     {"en-AU", "en-AU"},
42     {"en-CA", "en-CA"},
43     {"en-GB", "en-GB"},
44     {"en-GB-oxendict", "en-GB-oxendict"},
45     {"en-US", "en-US"},
46     {"es", "es-ES"},
47     {"es-419", "es-ES"},
48     {"es-AR", "es-ES"},
49     {"es-ES", "es-ES"},
50     {"es-MX", "es-ES"},
51     {"es-US", "es-ES"},
52     {"et", "et-EE"},
53     {"fa", "fa-IR"},
54     {"fo", "fo-FO"},
55     {"fr", "fr-FR"},
56     {"he", "he-IL"},
57     {"hi", "hi-IN"},
58     {"hr", "hr-HR"},
59     {"hu", "hu-HU"},
60     {"hy", "hy"},
61     {"id", "id-ID"},
62     {"it", "it-IT"},
63     {"ko", "ko"},
64     {"lt", "lt-LT"},
65     {"lv", "lv-LV"},
66     {"nb", "nb-NO"},
67     {"nl", "nl-NL"},
68     {"pl", "pl-PL"},
69     {"pt-BR", "pt-BR"},
70     {"pt-PT", "pt-PT"},
71     {"ro", "ro-RO"},
72     {"ru", "ru-RU"},
73     {"sh", "sh"},
74     {"sk", "sk-SK"},
75     {"sl", "sl-SI"},
76     {"sq", "sq"},
77     {"sr", "sr"},
78     {"sv", "sv-SE"},
79     {"ta", "ta-IN"},
80     {"tg", "tg-TG"},
81     {"tr", "tr-TR"},
82     {"uk", "uk-UA"},
83     {"vi", "vi-VN"},
84     // clang-format on
85 };
86 
IsValidRegion(const std::string & region)87 bool IsValidRegion(const std::string& region) {
88   for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
89     if (lang_region.language_region == region)
90       return true;
91   }
92   return false;
93 }
94 
95 // This function returns the language-region version of language name.
96 // e.g. returns hi-IN for hi.
GetSpellCheckLanguageRegion(base::StringPiece input_language)97 std::string GetSpellCheckLanguageRegion(base::StringPiece input_language) {
98   for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
99     if (lang_region.language == input_language)
100       return lang_region.language_region;
101   }
102 
103   return input_language.as_string();
104 }
105 
GetVersionedFileName(base::StringPiece input_language,const base::FilePath & dict_dir)106 base::FilePath GetVersionedFileName(base::StringPiece input_language,
107                                     const base::FilePath& dict_dir) {
108   // The default dictionary version is 3-0. This version indicates that the bdic
109   // file contains a checksum.
110   static const char kDefaultVersionString[] = "-3-0";
111 
112   // Add non-default version strings here. Use the same version for all the
113   // dictionaries that you add at the same time. Increment the major version
114   // number if you're updating either dic or aff files. Increment the minor
115   // version number if you're updating only dic_delta files.
116   static constexpr LanguageVersion kSpecialVersionString[] = {
117       // Jan 9, 2013: Add "FLAG num" to aff to avoid heapcheck crash.
118       {"tr-TR", "-4-0"},
119 
120       // Mar 4, 2014: Add Tajik dictionary.
121       {"tg-TG", "-5-0"},
122 
123       // Feb 2019: Initial check-in of Welsh.
124       {"cy-GB", "-1-0"},
125 
126       // April 2019: Initial check-in of Armenian.
127       {"hy", "-1-0"},
128 
129       // November 2019: Update Serbian-Latin and Serbian-Cyrillic
130       {"sh", "-4-0"},
131       {"sr", "-4-0"},
132 
133       // January 2020: Update en-* and fa-IR dictionaries from upstream.
134       {"en-AU", "-9-0"},
135       {"en-CA", "-9-0"},
136       {"en-GB", "-9-0"},
137       {"en-US", "-9-0"},
138       {"fa-IR", "-9-0"},
139 
140       // March 2020: Update uk-UA dictionary from upstream.
141       {"uk-UA", "-4-0"},
142 
143       // June 2020: Add the en-GB-oxendict dictionary.
144       {"en-GB-oxendict", "-9-0"},
145   };
146 
147   // Generate the bdict file name using default version string or special
148   // version string, depending on the language.
149   std::string language = GetSpellCheckLanguageRegion(input_language);
150   std::string version = kDefaultVersionString;
151   for (const auto& lang_ver : kSpecialVersionString) {
152     if (language == lang_ver.language) {
153       version = lang_ver.version;
154       break;
155     }
156   }
157   std::string versioned_bdict_file_name(language + version + ".bdic");
158   return dict_dir.AppendASCII(versioned_bdict_file_name);
159 }
160 
GetCorrespondingSpellCheckLanguage(base::StringPiece language)161 std::string GetCorrespondingSpellCheckLanguage(base::StringPiece language) {
162   std::string best_match;
163   // Look for exact match in the Spell Check language list.
164   for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
165     // First look for exact match in the language region of the list.
166     if (lang_region.language == language)
167       return language.as_string();
168 
169     // Next, look for exact match in the language_region part of the list.
170     if (lang_region.language_region == language) {
171       if (best_match.empty())
172         best_match = lang_region.language;
173     }
174   }
175 
176   // No match found - return best match, if any.
177   return best_match;
178 }
179 
SpellCheckLanguages()180 std::vector<std::string> SpellCheckLanguages() {
181   std::vector<std::string> languages;
182   for (const auto& lang_region : kSupportedSpellCheckerLanguages)
183     languages.push_back(lang_region.language);
184   return languages;
185 }
186 
GetISOLanguageCountryCodeFromLocale(const std::string & locale,std::string * language_code,std::string * country_code)187 void GetISOLanguageCountryCodeFromLocale(const std::string& locale,
188                                          std::string* language_code,
189                                          std::string* country_code) {
190   DCHECK(language_code);
191   DCHECK(country_code);
192   char language[ULOC_LANG_CAPACITY] = ULOC_ENGLISH;
193   const char* country = "USA";
194   if (!locale.empty()) {
195     UErrorCode error = U_ZERO_ERROR;
196     char id[ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY];
197     uloc_addLikelySubtags(locale.c_str(), id, base::size(id), &error);
198     error = U_ZERO_ERROR;
199     uloc_getLanguage(id, language, base::size(language), &error);
200     country = uloc_getISO3Country(id);
201   }
202   *language_code = std::string(language);
203   *country_code = std::string(country);
204 }
205 
FillSuggestions(const std::vector<std::vector<base::string16>> & suggestions_list,std::vector<base::string16> * optional_suggestions)206 void FillSuggestions(
207     const std::vector<std::vector<base::string16>>& suggestions_list,
208     std::vector<base::string16>* optional_suggestions) {
209   DCHECK(optional_suggestions);
210   size_t num_languages = suggestions_list.size();
211 
212   // Compute maximum number of suggestions in a single language.
213   size_t max_suggestions = 0;
214   for (const auto& suggestions : suggestions_list)
215     max_suggestions = std::max(max_suggestions, suggestions.size());
216 
217   for (size_t count = 0; count < (max_suggestions * num_languages); ++count) {
218     size_t language = count % num_languages;
219     size_t index = count / num_languages;
220 
221     if (suggestions_list[language].size() <= index)
222       continue;
223 
224     const base::string16& suggestion = suggestions_list[language][index];
225     // Only add the suggestion if it's unique.
226     if (!base::Contains(*optional_suggestions, suggestion)) {
227       optional_suggestions->push_back(suggestion);
228     }
229     if (optional_suggestions->size() >= kMaxSuggestions) {
230       break;
231     }
232   }
233 }
234 
235 }  // namespace spellcheck
236