1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <cstddef>
8 #include <set>
9 #include <utility>
10
11 #include "base/logging.h"
12 #include "base/macros.h"
13 #include "third_party/libaddressinput/chromium/trie.h"
14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h"
16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h"
17 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
18
19 namespace autofill {
20
21 using ::i18n::addressinput::AddressData;
22 using ::i18n::addressinput::AddressField;
23 using ::i18n::addressinput::BuildCallback;
24 using ::i18n::addressinput::FieldProblemMap;
25 using ::i18n::addressinput::PreloadSupplier;
26 using ::i18n::addressinput::RegionData;
27 using ::i18n::addressinput::RegionDataBuilder;
28
29 using ::i18n::addressinput::ADMIN_AREA;
30 using ::i18n::addressinput::COUNTRY;
31 using ::i18n::addressinput::DEPENDENT_LOCALITY;
32 using ::i18n::addressinput::LOCALITY;
33 using ::i18n::addressinput::POSTAL_CODE;
34
35 using ::i18n::addressinput::INVALID_FORMAT;
36 using ::i18n::addressinput::MISMATCHING_VALUE;
37
38 namespace {
39
40 // Initial size for the buffer used in the canonicalizer.
41 static const size_t kInitialBufferSize = 32;
42
43 // A region and its metadata useful for constructing a suggestion.
44 struct Suggestion {
45 public:
46 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
47 // |region_to_suggest|, which should not be NULL.
Suggestionautofill::__anon3bf76f440111::Suggestion48 Suggestion(const RegionData* region_to_suggest,
49 AddressField matching_address_field,
50 bool region_key_matches)
51 : region_to_suggest(region_to_suggest),
52 matching_address_field(matching_address_field),
53 region_key_matches(region_key_matches) {
54 DCHECK(region_to_suggest);
55 }
56
~Suggestionautofill::__anon3bf76f440111::Suggestion57 ~Suggestion() {}
58
59 // The region that should be suggested. For example, if the region is ("CA",
60 // "California"), then either "CA" or "California" should be suggested.
61 const RegionData* region_to_suggest;
62
63 // The field in the address for which the suggestion should be made. For
64 // example, ADMIN_AREA in US means the suggestion should be made for the field
65 // labeled "State".
66 AddressField matching_address_field;
67
68 // True if the key of the region matches user input (the name may or may not
69 // match). "CA" should be suggested for a ("CA", "California") region.
70 //
71 // False if only the name of the region matches user input (the key does not
72 // match). "California" should be suggested for a ("CA", "California") region.
73 bool region_key_matches;
74 };
75
76 // Suggestions for an address. Contains lists of suggestions for administrative
77 // area, locality, and dependent locality fields of an address.
78 class AddressSuggestions {
79 public:
AddressSuggestions()80 AddressSuggestions() {}
~AddressSuggestions()81 ~AddressSuggestions() {}
82
83 // Marks all regions at |address_field| level as matching user input.
AllRegionsMatchForField(AddressField address_field)84 void AllRegionsMatchForField(AddressField address_field) {
85 all_regions_match_input_.insert(address_field);
86 }
87
88 // Marks given regions at |address_field| level as matching user input. The
89 // |regions_match_key| parameter contains the regions that match user input by
90 // their keys. The |regions_match_name| parameter contains the regions that
91 // match user input by their names.
92 //
93 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
94 // DEPENDENT_LOCALITY.
AddRegions(AddressField address_field,const std::set<const RegionData * > & regions_match_key,const std::set<const RegionData * > & regions_match_name)95 bool AddRegions(AddressField address_field,
96 const std::set<const RegionData*>& regions_match_key,
97 const std::set<const RegionData*>& regions_match_name) {
98 DCHECK(address_field >= ADMIN_AREA);
99 DCHECK(address_field <= DEPENDENT_LOCALITY);
100
101 AddressField parent_address_field =
102 static_cast<AddressField>(address_field - 1);
103
104 bool all_parents_match =
105 parent_address_field == COUNTRY ||
106 all_regions_match_input_.find(parent_address_field) !=
107 all_regions_match_input_.end();
108
109 // Cannot build |address_field| level suggestions if there are no matches in
110 // |parent_address_field| level regions.
111 const RegionsMatchInput* parents = NULL;
112 if (address_field > ADMIN_AREA && !all_parents_match) {
113 parents = ®ions_match_input_[parent_address_field];
114 if (parents->keys.empty() && parents->names.empty())
115 return false;
116 }
117
118 RegionsMatchInput* regions = NULL;
119 if (address_field < DEPENDENT_LOCALITY)
120 regions = ®ions_match_input_[address_field];
121
122 std::vector<Suggestion>* suggestions = &suggestions_[address_field];
123 bool added_suggestions = false;
124
125 // Iterate over both |regions_match_key| and |regions_match_name| and build
126 // Suggestion objects based on the given RegionData objects. Advance either
127 // one iterator at a time (if they point to different data) or both
128 // iterators at once (if they point to the same data).
129 for (std::set<const RegionData*>::const_iterator
130 key_it = regions_match_key.begin(),
131 name_it = regions_match_name.begin();
132 key_it != regions_match_key.end() ||
133 name_it != regions_match_name.end();) {
134 const RegionData* key_region =
135 key_it != regions_match_key.end() ? *key_it : NULL;
136 const RegionData* name_region =
137 name_it != regions_match_name.end() ? *name_it : NULL;
138
139 // Regions that do not have a parent that also matches input will not
140 // become suggestions.
141 bool key_region_has_parent =
142 all_parents_match ||
143 (parents && !parents->keys.empty() && key_region &&
144 parents->keys.find(&key_region->parent()) != parents->keys.end());
145 bool name_region_has_parent =
146 all_parents_match ||
147 (parents && !parents->names.empty() && name_region &&
148 parents->names.find(&name_region->parent()) != parents->names.end());
149
150 if (name_region && (!key_region || name_region < key_region)) {
151 if (name_region_has_parent) {
152 suggestions->push_back(Suggestion(name_region, address_field, false));
153 added_suggestions = true;
154 if (regions)
155 regions->names.insert(name_region);
156 }
157
158 ++name_it;
159 } else if (key_region && (!name_region || key_region < name_region)) {
160 if (key_region_has_parent) {
161 suggestions->push_back(Suggestion(key_region, address_field, true));
162 added_suggestions = true;
163 if (regions)
164 regions->keys.insert(key_region);
165 }
166
167 ++key_it;
168 } else {
169 if (key_region_has_parent) {
170 suggestions->push_back(Suggestion(key_region, address_field, true));
171 added_suggestions = true;
172 if (regions) {
173 regions->keys.insert(key_region);
174 regions->names.insert(name_region);
175 }
176 }
177
178 ++key_it;
179 ++name_it;
180 }
181 }
182
183 return added_suggestions;
184 }
185
186 // Swaps the suggestions for the smallest sub-region into |suggestions|.
187 // |this| is not usable after this call due to using the swap() operation.
188 //
189 // The |suggestions| parameter should not be NULL.
SwapSmallestSubRegionSuggestions(std::vector<Suggestion> * suggestions)190 void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
191 DCHECK(suggestions);
192 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
193 std::vector<Suggestion>* result =
194 &suggestions_[static_cast<AddressField>(i)];
195 if (!result->empty()) {
196 suggestions->swap(*result);
197 return;
198 }
199 }
200 }
201
202 private:
203 // The sets of non-owned regions used for looking up regions that match user
204 // input by keys and names.
205 struct RegionsMatchInput {
206 std::set<const RegionData*> keys;
207 std::set<const RegionData*> names;
208 };
209
210 // The regions that match user input at ADMIN_AREA and LOCALITY levels.
211 std::map<AddressField, RegionsMatchInput> regions_match_input_;
212
213 // The set of fields for which all regions match user input. Used to avoid
214 // storing a long list in |regions_match_input_| and later looking it up
215 // there.
216 std::set<AddressField> all_regions_match_input_;
217
218 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
219 std::map<AddressField, std::vector<Suggestion> > suggestions_;
220
221 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
222 };
223
224 } // namespace
225
StringCanonicalizer()226 InputSuggester::StringCanonicalizer::StringCanonicalizer()
227 : buffer_(kInitialBufferSize, 0) {
228 UErrorCode error_code = U_ZERO_ERROR;
229 collator_.reset(
230 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
231 if (!collator_ || !U_SUCCESS(error_code)) {
232 // On some systems, the default locale is invalid to the eyes of the ICU
233 // library. This could be due to a device-specific issue (has been seen in
234 // the wild on Android and iOS devices). In the failure case, |collator_|
235 // will be null. See http://crbug.com/558625.
236
237 // Attempt to load the English locale.
238 error_code = U_ZERO_ERROR;
239 collator_.reset(
240 icu::Collator::createInstance(icu::Locale::getEnglish(), error_code));
241 if (!collator_) {
242 LOG(ERROR) << "Failed to initialize the ICU Collator with the English "
243 << "locale.";
244 }
245 }
246
247 DCHECK(U_SUCCESS(error_code));
248 if (collator_ && U_SUCCESS(error_code))
249 collator_->setStrength(icu::Collator::PRIMARY);
250 }
251
~StringCanonicalizer()252 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
253
Canonicalize(const std::string & original) const254 const std::vector<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize(
255 const std::string& original) const {
256 DCHECK(!original.empty());
257
258 icu::UnicodeString icu_str(original.c_str(),
259 static_cast<int32_t>(original.length()));
260 int32_t sort_key_size = 0;
261 if (collator_)
262 sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
263 DCHECK_LT(0, sort_key_size);
264
265 if (sort_key_size > buffer_size()) {
266 buffer_.resize(sort_key_size * 2, 0);
267 sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
268 DCHECK_LT(0, sort_key_size);
269 DCHECK_GT(buffer_size(), sort_key_size);
270 }
271
272 return buffer_;
273 }
274
buffer_size() const275 int32_t InputSuggester::StringCanonicalizer::buffer_size() const {
276 return static_cast<int32_t>(buffer_.size());
277 }
278
279 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
280 // region name or key.
281 class InputSuggester::SubRegionData {
282 public:
SubRegionData()283 SubRegionData()
284 : initialized_(false),
285 smallest_region_size_(COUNTRY),
286 canonicalizer_(NULL) {}
287
~SubRegionData()288 ~SubRegionData() {}
289
is_initialized() const290 bool is_initialized() const { return initialized_; }
291
292 // Adds the sub-regions of |country_region| into tries. Uses
293 // |shared_canonicalizer| for case and diacritic insensitive lookup of the
294 // sub-regions. Should be called at most once.
Initialize(const RegionData & country_region,const StringCanonicalizer & shared_canonicalizer)295 void Initialize(const RegionData& country_region,
296 const StringCanonicalizer& shared_canonicalizer) {
297 DCHECK(!initialized_);
298 DCHECK(!country_region.has_parent());
299
300 initialized_ = true;
301 canonicalizer_ = &shared_canonicalizer;
302
303 if (!country_region.sub_regions().empty())
304 AddSubRegionsOf(country_region, COUNTRY);
305 }
306
307 // Adds the suggestions for |user_input| into |suggestions| when user is
308 // typing in |focused_field|.
BuildSuggestions(const AddressData & user_input,AddressField focused_field,std::vector<Suggestion> * suggestions)309 void BuildSuggestions(const AddressData& user_input,
310 AddressField focused_field,
311 std::vector<Suggestion>* suggestions) {
312 DCHECK(initialized_);
313
314 // Do not suggest anything if there's no suggestion data for the focused
315 // field.
316 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
317 return;
318
319 // Non-owned regions that match a field value by region key.
320 std::set<const RegionData*> regions_match_key;
321
322 // Non-owned regions that match a field value by region name.
323 std::set<const RegionData*> regions_match_name;
324
325 AddressSuggestions address_suggestions;
326 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
327 ++i) {
328 AddressField address_field = static_cast<AddressField>(i);
329 AddressField parent_address_field = static_cast<AddressField>(i - 1);
330
331 const std::string& field_value = user_input.GetFieldValue(address_field);
332 const std::string& parent_field_value =
333 user_input.GetFieldValue(parent_address_field);
334
335 if (field_value.empty() &&
336 (address_field == ADMIN_AREA || parent_field_value.empty())) {
337 address_suggestions.AllRegionsMatchForField(address_field);
338 continue;
339 }
340
341 if (field_value.empty()) {
342 DCHECK_NE(address_field, focused_field);
343 continue;
344 }
345
346 regions_match_key.clear();
347 regions_match_name.clear();
348
349 const FieldTries& field_tries = field_tries_[address_field];
350
351 const std::vector<uint8_t>& canonicalized_value =
352 canonicalizer_->Canonicalize(field_value);
353
354 field_tries.keys.FindDataForKeyPrefix(canonicalized_value,
355 ®ions_match_key);
356 field_tries.names.FindDataForKeyPrefix(canonicalized_value,
357 ®ions_match_name);
358
359 bool added_suggestions = address_suggestions.AddRegions(
360 address_field, regions_match_key, regions_match_name);
361
362 // Do not suggest anything if the focused field does not have suggestions.
363 if (address_field == focused_field && !added_suggestions)
364 return;
365 }
366
367 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
368 }
369
370 private:
371 // The tries to lookup regions for a specific field by keys and names. For
372 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
373 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
374 // struct is uncopyable due to Trie objects being uncopyable.
375 struct FieldTries {
376 Trie<const RegionData*> keys;
377 Trie<const RegionData*> names;
378 };
379
380 // Adds the sub-regions of |parent_region| into tries.
AddSubRegionsOf(const RegionData & parent_region,AddressField parent_field)381 void AddSubRegionsOf(const RegionData& parent_region,
382 AddressField parent_field) {
383 DCHECK(!parent_region.sub_regions().empty());
384
385 AddressField address_field = static_cast<AddressField>(parent_field + 1);
386 DCHECK(address_field >= ADMIN_AREA);
387 DCHECK(address_field <= DEPENDENT_LOCALITY);
388
389 FieldTries* field_tries = &field_tries_[address_field];
390 for (std::vector<const RegionData*>::const_iterator it =
391 parent_region.sub_regions().begin();
392 it != parent_region.sub_regions().end();
393 ++it) {
394 const RegionData* region = *it;
395 DCHECK(region);
396 DCHECK(!region->key().empty());
397 DCHECK(!region->name().empty());
398
399 field_tries->keys.AddDataForKey(
400 canonicalizer_->Canonicalize(region->key()), region);
401
402 field_tries->names.AddDataForKey(
403 canonicalizer_->Canonicalize(region->name()), region);
404
405 if (smallest_region_size_ < address_field)
406 smallest_region_size_ = address_field;
407
408 if (!region->sub_regions().empty())
409 AddSubRegionsOf(*region, address_field);
410 }
411 }
412
413 // True after Initialize() has been called.
414 bool initialized_;
415
416 // The tries to lookup regions for ADMIN_AREA, LOCALITY, and
417 // DEPENDENT_LOCALITY.
418 std::map<AddressField, FieldTries> field_tries_;
419
420 // The smallest size of a sub-region that has data. For example, this is
421 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
422 AddressField smallest_region_size_;
423
424 // A shared instance of string canonicalizer for case and diacritic comparison
425 // of region keys and names.
426 const StringCanonicalizer* canonicalizer_;
427 };
428
InputSuggester(PreloadSupplier * supplier)429 InputSuggester::InputSuggester(PreloadSupplier* supplier)
430 : region_data_builder_(supplier),
431 input_helper_(supplier),
432 validator_(supplier),
433 validated_(BuildCallback(this, &InputSuggester::Validated)) {}
434
~InputSuggester()435 InputSuggester::~InputSuggester() {}
436
GetSuggestions(const AddressData & user_input,AddressField focused_field,size_t suggestions_limit,std::vector<AddressData> * suggestions)437 void InputSuggester::GetSuggestions(const AddressData& user_input,
438 AddressField focused_field,
439 size_t suggestions_limit,
440 std::vector<AddressData>* suggestions) {
441 DCHECK(suggestions);
442 DCHECK(focused_field == POSTAL_CODE ||
443 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
444
445 AddressData address_copy = user_input;
446
447 // Do not suggest anything if the user input is empty.
448 if (address_copy.IsFieldEmpty(focused_field))
449 return;
450
451 if (focused_field == POSTAL_CODE) {
452 // Do not suggest anything if the user is typing an invalid postal code.
453 FieldProblemMap problems;
454 FieldProblemMap filter;
455 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
456 validator_.Validate(address_copy,
457 true, // Allow postal office boxes.
458 false, // Do not require recipient name.
459 &filter,
460 &problems,
461 *validated_);
462 if (!problems.empty())
463 return;
464
465 // Fill in the sub-regions based on the postal code.
466 input_helper_.FillAddress(&address_copy);
467 }
468
469 // Lazily initialize the mapping from COUNTRY level regions to all of their
470 // sub-regions with metadata for generating suggestions.
471 std::string unused_best_language;
472 const RegionData& region_data =
473 region_data_builder_.Build(address_copy.region_code,
474 address_copy.language_code,
475 &unused_best_language);
476 SubRegionData* sub_region_data = &sub_regions_[®ion_data];
477 if (!sub_region_data->is_initialized())
478 sub_region_data->Initialize(region_data, canonicalizer_);
479
480 // Build the list of regions that match |address_copy| when the user is typing
481 // in the |focused_field|.
482 std::vector<Suggestion> suggested_regions;
483 sub_region_data->BuildSuggestions(
484 address_copy, focused_field, &suggested_regions);
485
486 FieldProblemMap problems;
487 FieldProblemMap filter;
488 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
489
490 // Generate suggestions based on the regions.
491 for (std::vector<Suggestion>::const_iterator suggested_region_it =
492 suggested_regions.begin();
493 suggested_region_it != suggested_regions.end();
494 ++suggested_region_it) {
495 AddressData address;
496 address.region_code = address_copy.region_code;
497 address.postal_code = address_copy.postal_code;
498
499 // Traverse the tree of regions from the smallest |region_to_suggest| to the
500 // country-wide "root" of the tree. Use the region names or keys found at
501 // each of the levels of the tree to build the |address| to suggest.
502 AddressField address_field = suggested_region_it->matching_address_field;
503 for (const RegionData* region = suggested_region_it->region_to_suggest;
504 region->has_parent();
505 region = ®ion->parent()) {
506 address.SetFieldValue(address_field,
507 suggested_region_it->region_key_matches
508 ? region->key()
509 : region->name());
510 address_field = static_cast<AddressField>(address_field - 1);
511 }
512
513 // Do not suggest an address with a mismatching postal code.
514 problems.clear();
515 validator_.Validate(address,
516 true, // Allow postal office boxes.
517 false, // Do not require recipient name.
518 &filter,
519 &problems,
520 *validated_);
521 if (!problems.empty())
522 continue;
523
524 // Do not add more suggestions than |suggestions_limit|.
525 if (suggestions->size() >= suggestions_limit) {
526 suggestions->clear();
527 return;
528 }
529
530 suggestions->push_back(address);
531 }
532 }
533
Validated(bool success,const AddressData &,const FieldProblemMap &)534 void InputSuggester::Validated(bool success,
535 const AddressData&,
536 const FieldProblemMap&) {
537 DCHECK(success);
538 }
539
540 } // namespace autofill
541