1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6 
7 #include <cstddef>
8 #include <set>
9 #include <utility>
10 
11 #include "base/logging.h"
12 #include "base/macros.h"
13 #include "third_party/libaddressinput/chromium/trie.h"
14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h"
16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h"
17 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
18 
19 namespace autofill {
20 
21 using ::i18n::addressinput::AddressData;
22 using ::i18n::addressinput::AddressField;
23 using ::i18n::addressinput::BuildCallback;
24 using ::i18n::addressinput::FieldProblemMap;
25 using ::i18n::addressinput::PreloadSupplier;
26 using ::i18n::addressinput::RegionData;
27 using ::i18n::addressinput::RegionDataBuilder;
28 
29 using ::i18n::addressinput::ADMIN_AREA;
30 using ::i18n::addressinput::COUNTRY;
31 using ::i18n::addressinput::DEPENDENT_LOCALITY;
32 using ::i18n::addressinput::LOCALITY;
33 using ::i18n::addressinput::POSTAL_CODE;
34 
35 using ::i18n::addressinput::INVALID_FORMAT;
36 using ::i18n::addressinput::MISMATCHING_VALUE;
37 
38 namespace {
39 
40 // Initial size for the buffer used in the canonicalizer.
41 static const size_t kInitialBufferSize = 32;
42 
43 // A region and its metadata useful for constructing a suggestion.
44 struct Suggestion {
45  public:
46   // Builds a suggestion of |region_to_suggest|. Does not take ownership of
47   // |region_to_suggest|, which should not be NULL.
Suggestionautofill::__anon3bf76f440111::Suggestion48   Suggestion(const RegionData* region_to_suggest,
49              AddressField matching_address_field,
50              bool region_key_matches)
51       : region_to_suggest(region_to_suggest),
52         matching_address_field(matching_address_field),
53         region_key_matches(region_key_matches) {
54     DCHECK(region_to_suggest);
55   }
56 
~Suggestionautofill::__anon3bf76f440111::Suggestion57   ~Suggestion() {}
58 
59   // The region that should be suggested. For example, if the region is ("CA",
60   // "California"), then either "CA" or "California" should be suggested.
61   const RegionData* region_to_suggest;
62 
63   // The field in the address for which the suggestion should be made. For
64   // example, ADMIN_AREA in US means the suggestion should be made for the field
65   // labeled "State".
66   AddressField matching_address_field;
67 
68   // True if the key of the region matches user input (the name may or may not
69   // match). "CA" should be suggested for a ("CA", "California") region.
70   //
71   // False if only the name of the region matches user input (the key does not
72   // match). "California" should be suggested for a ("CA", "California") region.
73   bool region_key_matches;
74 };
75 
76 // Suggestions for an address. Contains lists of suggestions for administrative
77 // area, locality, and dependent locality fields of an address.
78 class AddressSuggestions {
79  public:
AddressSuggestions()80   AddressSuggestions() {}
~AddressSuggestions()81   ~AddressSuggestions() {}
82 
83   // Marks all regions at |address_field| level as matching user input.
AllRegionsMatchForField(AddressField address_field)84   void AllRegionsMatchForField(AddressField address_field) {
85     all_regions_match_input_.insert(address_field);
86   }
87 
88   // Marks given regions at |address_field| level as matching user input. The
89   // |regions_match_key| parameter contains the regions that match user input by
90   // their keys. The |regions_match_name| parameter contains the regions that
91   // match user input by their names.
92   //
93   // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
94   // DEPENDENT_LOCALITY.
AddRegions(AddressField address_field,const std::set<const RegionData * > & regions_match_key,const std::set<const RegionData * > & regions_match_name)95   bool AddRegions(AddressField address_field,
96                   const std::set<const RegionData*>& regions_match_key,
97                   const std::set<const RegionData*>& regions_match_name) {
98     DCHECK(address_field >= ADMIN_AREA);
99     DCHECK(address_field <= DEPENDENT_LOCALITY);
100 
101     AddressField parent_address_field =
102         static_cast<AddressField>(address_field - 1);
103 
104     bool all_parents_match =
105         parent_address_field == COUNTRY ||
106         all_regions_match_input_.find(parent_address_field) !=
107             all_regions_match_input_.end();
108 
109     // Cannot build |address_field| level suggestions if there are no matches in
110     // |parent_address_field| level regions.
111     const RegionsMatchInput* parents = NULL;
112     if (address_field > ADMIN_AREA && !all_parents_match) {
113       parents = &regions_match_input_[parent_address_field];
114       if (parents->keys.empty() && parents->names.empty())
115         return false;
116     }
117 
118     RegionsMatchInput* regions = NULL;
119     if (address_field < DEPENDENT_LOCALITY)
120       regions = &regions_match_input_[address_field];
121 
122     std::vector<Suggestion>* suggestions = &suggestions_[address_field];
123     bool added_suggestions = false;
124 
125     // Iterate over both |regions_match_key| and |regions_match_name| and build
126     // Suggestion objects based on the given RegionData objects. Advance either
127     // one iterator at a time (if they point to different data) or both
128     // iterators at once (if they point to the same data).
129     for (std::set<const RegionData*>::const_iterator
130              key_it = regions_match_key.begin(),
131              name_it = regions_match_name.begin();
132          key_it != regions_match_key.end() ||
133              name_it != regions_match_name.end();) {
134       const RegionData* key_region =
135           key_it != regions_match_key.end() ? *key_it : NULL;
136       const RegionData* name_region =
137           name_it != regions_match_name.end() ? *name_it : NULL;
138 
139       // Regions that do not have a parent that also matches input will not
140       // become suggestions.
141       bool key_region_has_parent =
142           all_parents_match ||
143           (parents && !parents->keys.empty() && key_region &&
144            parents->keys.find(&key_region->parent()) != parents->keys.end());
145       bool name_region_has_parent =
146           all_parents_match ||
147           (parents && !parents->names.empty() && name_region &&
148            parents->names.find(&name_region->parent()) != parents->names.end());
149 
150       if (name_region && (!key_region || name_region < key_region)) {
151         if (name_region_has_parent) {
152           suggestions->push_back(Suggestion(name_region, address_field, false));
153           added_suggestions = true;
154           if (regions)
155             regions->names.insert(name_region);
156         }
157 
158         ++name_it;
159       } else if (key_region && (!name_region || key_region < name_region)) {
160         if (key_region_has_parent) {
161           suggestions->push_back(Suggestion(key_region, address_field, true));
162           added_suggestions = true;
163           if (regions)
164             regions->keys.insert(key_region);
165         }
166 
167         ++key_it;
168       } else {
169         if (key_region_has_parent) {
170           suggestions->push_back(Suggestion(key_region, address_field, true));
171           added_suggestions = true;
172           if (regions) {
173             regions->keys.insert(key_region);
174             regions->names.insert(name_region);
175           }
176         }
177 
178         ++key_it;
179         ++name_it;
180       }
181     }
182 
183     return added_suggestions;
184   }
185 
186   // Swaps the suggestions for the smallest sub-region into |suggestions|.
187   // |this| is not usable after this call due to using the swap() operation.
188   //
189   // The |suggestions| parameter should not be NULL.
SwapSmallestSubRegionSuggestions(std::vector<Suggestion> * suggestions)190   void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
191     DCHECK(suggestions);
192     for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
193       std::vector<Suggestion>* result =
194           &suggestions_[static_cast<AddressField>(i)];
195       if (!result->empty()) {
196         suggestions->swap(*result);
197         return;
198       }
199     }
200   }
201 
202  private:
203   // The sets of non-owned regions used for looking up regions that match user
204   // input by keys and names.
205   struct RegionsMatchInput {
206     std::set<const RegionData*> keys;
207     std::set<const RegionData*> names;
208   };
209 
210   // The regions that match user input at ADMIN_AREA and LOCALITY levels.
211   std::map<AddressField, RegionsMatchInput> regions_match_input_;
212 
213   // The set of fields for which all regions match user input. Used to avoid
214   // storing a long list in |regions_match_input_| and later looking it up
215   // there.
216   std::set<AddressField> all_regions_match_input_;
217 
218   // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
219   std::map<AddressField, std::vector<Suggestion> > suggestions_;
220 
221   DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
222 };
223 
224 }  // namespace
225 
StringCanonicalizer()226 InputSuggester::StringCanonicalizer::StringCanonicalizer()
227     : buffer_(kInitialBufferSize, 0) {
228   UErrorCode error_code = U_ZERO_ERROR;
229   collator_.reset(
230       icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
231   if (!collator_ || !U_SUCCESS(error_code)) {
232     // On some systems, the default locale is invalid to the eyes of the ICU
233     // library. This could be due to a device-specific issue (has been seen in
234     // the wild on Android and iOS devices). In the failure case, |collator_|
235     // will be null. See http://crbug.com/558625.
236 
237     // Attempt to load the English locale.
238     error_code = U_ZERO_ERROR;
239     collator_.reset(
240         icu::Collator::createInstance(icu::Locale::getEnglish(), error_code));
241     if (!collator_) {
242       LOG(ERROR) << "Failed to initialize the ICU Collator with the English "
243                  << "locale.";
244     }
245   }
246 
247   DCHECK(U_SUCCESS(error_code));
248   if (collator_ && U_SUCCESS(error_code))
249     collator_->setStrength(icu::Collator::PRIMARY);
250 }
251 
~StringCanonicalizer()252 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
253 
Canonicalize(const std::string & original) const254 const std::vector<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize(
255     const std::string& original) const {
256   DCHECK(!original.empty());
257 
258   icu::UnicodeString icu_str(original.c_str(),
259                              static_cast<int32_t>(original.length()));
260   int32_t sort_key_size = 0;
261   if (collator_)
262     sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
263   DCHECK_LT(0, sort_key_size);
264 
265   if (sort_key_size > buffer_size()) {
266     buffer_.resize(sort_key_size * 2, 0);
267     sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
268     DCHECK_LT(0, sort_key_size);
269     DCHECK_GT(buffer_size(), sort_key_size);
270   }
271 
272   return buffer_;
273 }
274 
buffer_size() const275 int32_t InputSuggester::StringCanonicalizer::buffer_size() const {
276   return static_cast<int32_t>(buffer_.size());
277 }
278 
279 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
280 // region name or key.
281 class InputSuggester::SubRegionData {
282  public:
SubRegionData()283   SubRegionData()
284       : initialized_(false),
285         smallest_region_size_(COUNTRY),
286         canonicalizer_(NULL) {}
287 
~SubRegionData()288   ~SubRegionData() {}
289 
is_initialized() const290   bool is_initialized() const { return initialized_; }
291 
292   // Adds the sub-regions of |country_region| into tries. Uses
293   // |shared_canonicalizer| for case and diacritic insensitive lookup of the
294   // sub-regions. Should be called at most once.
Initialize(const RegionData & country_region,const StringCanonicalizer & shared_canonicalizer)295   void Initialize(const RegionData& country_region,
296                   const StringCanonicalizer& shared_canonicalizer) {
297     DCHECK(!initialized_);
298     DCHECK(!country_region.has_parent());
299 
300     initialized_ = true;
301     canonicalizer_ = &shared_canonicalizer;
302 
303     if (!country_region.sub_regions().empty())
304       AddSubRegionsOf(country_region, COUNTRY);
305   }
306 
307   // Adds the suggestions for |user_input| into |suggestions| when user is
308   // typing in |focused_field|.
BuildSuggestions(const AddressData & user_input,AddressField focused_field,std::vector<Suggestion> * suggestions)309   void BuildSuggestions(const AddressData& user_input,
310                         AddressField focused_field,
311                         std::vector<Suggestion>* suggestions) {
312     DCHECK(initialized_);
313 
314     // Do not suggest anything if there's no suggestion data for the focused
315     // field.
316     if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
317       return;
318 
319     // Non-owned regions that match a field value by region key.
320     std::set<const RegionData*> regions_match_key;
321 
322     // Non-owned regions that match a field value by region name.
323     std::set<const RegionData*> regions_match_name;
324 
325     AddressSuggestions address_suggestions;
326     for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
327          ++i) {
328       AddressField address_field = static_cast<AddressField>(i);
329       AddressField parent_address_field = static_cast<AddressField>(i - 1);
330 
331       const std::string& field_value = user_input.GetFieldValue(address_field);
332       const std::string& parent_field_value =
333           user_input.GetFieldValue(parent_address_field);
334 
335       if (field_value.empty() &&
336           (address_field == ADMIN_AREA || parent_field_value.empty())) {
337         address_suggestions.AllRegionsMatchForField(address_field);
338         continue;
339       }
340 
341       if (field_value.empty()) {
342         DCHECK_NE(address_field, focused_field);
343         continue;
344       }
345 
346       regions_match_key.clear();
347       regions_match_name.clear();
348 
349       const FieldTries& field_tries = field_tries_[address_field];
350 
351       const std::vector<uint8_t>& canonicalized_value =
352           canonicalizer_->Canonicalize(field_value);
353 
354       field_tries.keys.FindDataForKeyPrefix(canonicalized_value,
355                                             &regions_match_key);
356       field_tries.names.FindDataForKeyPrefix(canonicalized_value,
357                                              &regions_match_name);
358 
359       bool added_suggestions = address_suggestions.AddRegions(
360           address_field, regions_match_key, regions_match_name);
361 
362       // Do not suggest anything if the focused field does not have suggestions.
363       if (address_field == focused_field && !added_suggestions)
364         return;
365     }
366 
367     address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
368   }
369 
370  private:
371   // The tries to lookup regions for a specific field by keys and names. For
372   // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
373   // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
374   // struct is uncopyable due to Trie objects being uncopyable.
375   struct FieldTries {
376     Trie<const RegionData*> keys;
377     Trie<const RegionData*> names;
378   };
379 
380   // Adds the sub-regions of |parent_region| into tries.
AddSubRegionsOf(const RegionData & parent_region,AddressField parent_field)381   void AddSubRegionsOf(const RegionData& parent_region,
382                        AddressField parent_field) {
383     DCHECK(!parent_region.sub_regions().empty());
384 
385     AddressField address_field = static_cast<AddressField>(parent_field + 1);
386     DCHECK(address_field >= ADMIN_AREA);
387     DCHECK(address_field <= DEPENDENT_LOCALITY);
388 
389     FieldTries* field_tries = &field_tries_[address_field];
390     for (std::vector<const RegionData*>::const_iterator it =
391              parent_region.sub_regions().begin();
392          it != parent_region.sub_regions().end();
393          ++it) {
394       const RegionData* region = *it;
395       DCHECK(region);
396       DCHECK(!region->key().empty());
397       DCHECK(!region->name().empty());
398 
399       field_tries->keys.AddDataForKey(
400           canonicalizer_->Canonicalize(region->key()), region);
401 
402       field_tries->names.AddDataForKey(
403           canonicalizer_->Canonicalize(region->name()), region);
404 
405       if (smallest_region_size_ < address_field)
406         smallest_region_size_ = address_field;
407 
408       if (!region->sub_regions().empty())
409         AddSubRegionsOf(*region, address_field);
410     }
411   }
412 
413   // True after Initialize() has been called.
414   bool initialized_;
415 
416   // The tries to lookup regions for ADMIN_AREA, LOCALITY, and
417   // DEPENDENT_LOCALITY.
418   std::map<AddressField, FieldTries> field_tries_;
419 
420   // The smallest size of a sub-region that has data. For example, this is
421   // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
422   AddressField smallest_region_size_;
423 
424   // A shared instance of string canonicalizer for case and diacritic comparison
425   // of region keys and names.
426   const StringCanonicalizer* canonicalizer_;
427 };
428 
InputSuggester(PreloadSupplier * supplier)429 InputSuggester::InputSuggester(PreloadSupplier* supplier)
430     : region_data_builder_(supplier),
431       input_helper_(supplier),
432       validator_(supplier),
433       validated_(BuildCallback(this, &InputSuggester::Validated)) {}
434 
~InputSuggester()435 InputSuggester::~InputSuggester() {}
436 
GetSuggestions(const AddressData & user_input,AddressField focused_field,size_t suggestions_limit,std::vector<AddressData> * suggestions)437 void InputSuggester::GetSuggestions(const AddressData& user_input,
438                                     AddressField focused_field,
439                                     size_t suggestions_limit,
440                                     std::vector<AddressData>* suggestions) {
441   DCHECK(suggestions);
442   DCHECK(focused_field == POSTAL_CODE ||
443          (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
444 
445   AddressData address_copy = user_input;
446 
447   // Do not suggest anything if the user input is empty.
448   if (address_copy.IsFieldEmpty(focused_field))
449     return;
450 
451   if (focused_field == POSTAL_CODE) {
452     // Do not suggest anything if the user is typing an invalid postal code.
453     FieldProblemMap problems;
454     FieldProblemMap filter;
455     filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
456     validator_.Validate(address_copy,
457                         true,   // Allow postal office boxes.
458                         false,  // Do not require recipient name.
459                         &filter,
460                         &problems,
461                         *validated_);
462     if (!problems.empty())
463       return;
464 
465     // Fill in the sub-regions based on the postal code.
466     input_helper_.FillAddress(&address_copy);
467   }
468 
469   // Lazily initialize the mapping from COUNTRY level regions to all of their
470   // sub-regions with metadata for generating suggestions.
471   std::string unused_best_language;
472   const RegionData& region_data =
473       region_data_builder_.Build(address_copy.region_code,
474                                  address_copy.language_code,
475                                  &unused_best_language);
476   SubRegionData* sub_region_data = &sub_regions_[&region_data];
477   if (!sub_region_data->is_initialized())
478     sub_region_data->Initialize(region_data, canonicalizer_);
479 
480   // Build the list of regions that match |address_copy| when the user is typing
481   // in the |focused_field|.
482   std::vector<Suggestion> suggested_regions;
483   sub_region_data->BuildSuggestions(
484       address_copy, focused_field, &suggested_regions);
485 
486   FieldProblemMap problems;
487   FieldProblemMap filter;
488   filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
489 
490   // Generate suggestions based on the regions.
491   for (std::vector<Suggestion>::const_iterator suggested_region_it =
492            suggested_regions.begin();
493        suggested_region_it != suggested_regions.end();
494        ++suggested_region_it) {
495     AddressData address;
496     address.region_code = address_copy.region_code;
497     address.postal_code = address_copy.postal_code;
498 
499     // Traverse the tree of regions from the smallest |region_to_suggest| to the
500     // country-wide "root" of the tree. Use the region names or keys found at
501     // each of the levels of the tree to build the |address| to suggest.
502     AddressField address_field = suggested_region_it->matching_address_field;
503     for (const RegionData* region = suggested_region_it->region_to_suggest;
504          region->has_parent();
505          region = &region->parent()) {
506       address.SetFieldValue(address_field,
507                             suggested_region_it->region_key_matches
508                                 ? region->key()
509                                 : region->name());
510       address_field = static_cast<AddressField>(address_field - 1);
511     }
512 
513     // Do not suggest an address with a mismatching postal code.
514     problems.clear();
515     validator_.Validate(address,
516                         true,   // Allow postal office boxes.
517                         false,  // Do not require recipient name.
518                         &filter,
519                         &problems,
520                         *validated_);
521     if (!problems.empty())
522       continue;
523 
524     // Do not add more suggestions than |suggestions_limit|.
525     if (suggestions->size() >= suggestions_limit) {
526       suggestions->clear();
527       return;
528     }
529 
530     suggestions->push_back(address);
531   }
532 }
533 
Validated(bool success,const AddressData &,const FieldProblemMap &)534 void InputSuggester::Validated(bool success,
535                                const AddressData&,
536                                const FieldProblemMap&) {
537   DCHECK(success);
538 }
539 
540 }  // namespace autofill
541