1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/core/browser/data_model/autofill_structured_address_component.h"
6
7 #include <algorithm>
8 #include <map>
9 #include <string>
10 #include <utility>
11
12 #include "base/feature_list.h"
13 #include "base/notreached.h"
14 #include "base/strings/strcat.h"
15 #include "base/strings/string16.h"
16 #include "base/strings/string_piece.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "components/autofill/core/browser/autofill_type.h"
21 #include "components/autofill/core/browser/data_model/autofill_structured_address_constants.h"
22 #include "components/autofill/core/browser/data_model/autofill_structured_address_utils.h"
23 #include "components/autofill/core/browser/field_types.h"
24 #include "components/autofill/core/common/autofill_features.h"
25
26 namespace autofill {
27
28 namespace structured_address {
29
IsLessSignificantVerificationStatus(VerificationStatus left,VerificationStatus right)30 bool IsLessSignificantVerificationStatus(VerificationStatus left,
31 VerificationStatus right) {
32 // Both the KUserVerified and kObserved are larger then kServerParsed although
33 // the underlying integer suggests differently.
34 if (left == VerificationStatus::kServerParsed &&
35 (right == VerificationStatus::kObserved ||
36 right == VerificationStatus::kUserVerified)) {
37 return true;
38 }
39
40 if (right == VerificationStatus::kServerParsed &&
41 (left == VerificationStatus::kObserved ||
42 left == VerificationStatus::kUserVerified)) {
43 return false;
44 }
45
46 // In all other cases, it is sufficient to compare the underlying integer
47 // values.
48 return static_cast<std::underlying_type_t<VerificationStatus>>(left) <
49 static_cast<std::underlying_type_t<VerificationStatus>>(right);
50 }
51
AddressComponent(ServerFieldType storage_type,AddressComponent * parent,std::vector<AddressComponent * > subcomponents,unsigned int merge_mode)52 AddressComponent::AddressComponent(ServerFieldType storage_type,
53 AddressComponent* parent,
54 std::vector<AddressComponent*> subcomponents,
55 unsigned int merge_mode)
56 : value_verification_status_(VerificationStatus::kNoStatus),
57 storage_type_(storage_type),
58 subcomponents_(subcomponents),
59 parent_(parent),
60 merge_mode_(merge_mode) {}
61
62 AddressComponent::~AddressComponent() = default;
63
GetStorageType() const64 ServerFieldType AddressComponent::GetStorageType() const {
65 return storage_type_;
66 }
67
GetStorageTypeName() const68 std::string AddressComponent::GetStorageTypeName() const {
69 return AutofillType(storage_type_).ToString();
70 }
71
operator =(const AddressComponent & right)72 AddressComponent& AddressComponent::operator=(const AddressComponent& right) {
73 DCHECK(GetStorageType() == right.GetStorageType());
74 if (this == &right)
75 return *this;
76
77 if (right.IsValueAssigned()) {
78 value_ = right.value_;
79 value_verification_status_ = right.value_verification_status_;
80 sorted_normalized_tokens_ = right.sorted_normalized_tokens_;
81 } else {
82 UnsetValue();
83 }
84
85 DCHECK(right.subcomponents_.size() == subcomponents_.size());
86
87 for (size_t i = 0; i < right.subcomponents_.size(); i++)
88 *subcomponents_[i] = *right.subcomponents_[i];
89
90 PostAssignSanitization();
91
92 return *this;
93 }
94
operator ==(const AddressComponent & right) const95 bool AddressComponent::operator==(const AddressComponent& right) const {
96 if (this == &right)
97 return true;
98
99 if (GetStorageType() != right.GetStorageType())
100 return false;
101
102 if (GetValue() != right.GetValue() ||
103 value_verification_status_ != right.value_verification_status_) {
104 return false;
105 }
106
107 DCHECK(right.subcomponents_.size() == subcomponents_.size());
108 for (size_t i = 0; i < right.subcomponents_.size(); i++)
109 if (!(*subcomponents_[i] == *right.subcomponents_[i]))
110 return false;
111 return true;
112 }
113
operator !=(const AddressComponent & right) const114 bool AddressComponent::operator!=(const AddressComponent& right) const {
115 return !(*this == right);
116 }
117
IsAtomic() const118 bool AddressComponent::IsAtomic() const {
119 return subcomponents_.empty();
120 }
121
IsValueValid() const122 bool AddressComponent::IsValueValid() const {
123 return true;
124 }
125
IsValueForTypeValid(const std::string & field_type_name,bool wipe_if_not)126 bool AddressComponent::IsValueForTypeValid(const std::string& field_type_name,
127 bool wipe_if_not) {
128 bool validity_status;
129 if (GetIsValueForTypeValidIfPossible(field_type_name, &validity_status,
130 wipe_if_not))
131 return validity_status;
132 return false;
133 }
134
IsValueForTypeValid(ServerFieldType field_type,bool wipe_if_not)135 bool AddressComponent::IsValueForTypeValid(ServerFieldType field_type,
136 bool wipe_if_not) {
137 return IsValueForTypeValid(AutofillType(field_type).ToString(), wipe_if_not);
138 }
139
GetIsValueForTypeValidIfPossible(const std::string & field_type_name,bool * validity_status,bool wipe_if_not)140 bool AddressComponent::GetIsValueForTypeValidIfPossible(
141 const std::string& field_type_name,
142 bool* validity_status,
143 bool wipe_if_not) {
144 if (field_type_name == GetStorageTypeName()) {
145 *validity_status = IsValueValid();
146 if (!(*validity_status) && wipe_if_not)
147 UnsetValue();
148 return true;
149 }
150
151 for (auto* subcomponent : subcomponents_) {
152 if (subcomponent->GetIsValueForTypeValidIfPossible(
153 field_type_name, validity_status, wipe_if_not))
154 return true;
155 }
156 return false;
157 }
158
GetVerificationStatus() const159 VerificationStatus AddressComponent::GetVerificationStatus() const {
160 return value_verification_status_;
161 }
162
GetValue() const163 const base::string16& AddressComponent::GetValue() const {
164 if (value_.has_value())
165 return value_.value();
166 return base::EmptyString16();
167 }
168
IsValueAssigned() const169 bool AddressComponent::IsValueAssigned() const {
170 return value_.has_value();
171 }
172
SetValue(base::string16 value,VerificationStatus status)173 void AddressComponent::SetValue(base::string16 value,
174 VerificationStatus status) {
175 value_ = std::move(value);
176 value_verification_status_ = status;
177 }
178
UnsetValue()179 void AddressComponent::UnsetValue() {
180 value_.reset();
181 value_verification_status_ = VerificationStatus::kNoStatus;
182 sorted_normalized_tokens_.reset();
183 }
184
GetSupportedTypes(ServerFieldTypeSet * supported_types) const185 void AddressComponent::GetSupportedTypes(
186 ServerFieldTypeSet* supported_types) const {
187 // A proper AddressComponent tree contains every type only once.
188 DCHECK(supported_types->find(storage_type_) == supported_types->end())
189 << "The AddressComponent already contains a node that supports this "
190 "type: "
191 << storage_type_;
192 supported_types->insert(storage_type_);
193 GetAdditionalSupportedFieldTypes(supported_types);
194 for (auto* subcomponent : subcomponents_)
195 subcomponent->GetSupportedTypes(supported_types);
196 }
197
ConvertAndSetValueForAdditionalFieldTypeName(const std::string & field_type_name,const base::string16 & value,const VerificationStatus & status)198 bool AddressComponent::ConvertAndSetValueForAdditionalFieldTypeName(
199 const std::string& field_type_name,
200 const base::string16& value,
201 const VerificationStatus& status) {
202 return false;
203 }
204
ConvertAndGetTheValueForAdditionalFieldTypeName(const std::string & field_type_name,base::string16 * value) const205 bool AddressComponent::ConvertAndGetTheValueForAdditionalFieldTypeName(
206 const std::string& field_type_name,
207 base::string16* value) const {
208 return false;
209 }
210
GetBestFormatString() const211 base::string16 AddressComponent::GetBestFormatString() const {
212 // If the component is atomic, the format string is just the value.
213 if (IsAtomic())
214 return base::ASCIIToUTF16(GetPlaceholderToken(GetStorageTypeName()));
215
216 // Otherwise, the canonical format string is the concatenation of all
217 // subcomponents by their natural order.
218 std::vector<std::string> format_pieces;
219 for (const auto* subcomponent : subcomponents_) {
220 std::string format_piece = GetPlaceholderToken(
221 AutofillType(subcomponent->GetStorageType()).ToString());
222 format_pieces.emplace_back(std::move(format_piece));
223 }
224 return base::ASCIIToUTF16(base::JoinString(format_pieces, " "));
225 }
226
GetSubcomponentTypes() const227 std::vector<ServerFieldType> AddressComponent::GetSubcomponentTypes() const {
228 std::vector<ServerFieldType> subcomponent_types;
229 subcomponent_types.reserve(subcomponents_.size());
230 for (const auto* subcomponent : subcomponents_) {
231 subcomponent_types.emplace_back(subcomponent->GetStorageType());
232 }
233 return subcomponent_types;
234 }
235
SetValueForTypeIfPossible(const ServerFieldType & type,const base::string16 & value,const VerificationStatus & verification_status,bool invalidate_child_nodes,bool invalidate_parent_nodes)236 bool AddressComponent::SetValueForTypeIfPossible(
237 const ServerFieldType& type,
238 const base::string16& value,
239 const VerificationStatus& verification_status,
240 bool invalidate_child_nodes,
241 bool invalidate_parent_nodes) {
242 return SetValueForTypeIfPossible(AutofillType(type).ToString(), value,
243 verification_status, invalidate_child_nodes,
244 invalidate_parent_nodes);
245 }
246
SetValueForTypeIfPossible(const ServerFieldType & type,const std::string & value,const VerificationStatus & verification_status,bool invalidate_child_nodes,bool invalidate_parent_nodes)247 bool AddressComponent::SetValueForTypeIfPossible(
248 const ServerFieldType& type,
249 const std::string& value,
250 const VerificationStatus& verification_status,
251 bool invalidate_child_nodes,
252 bool invalidate_parent_nodes) {
253 return SetValueForTypeIfPossible(type, base::UTF8ToUTF16(value),
254 verification_status, invalidate_child_nodes,
255 invalidate_parent_nodes);
256 }
257
SetValueForTypeIfPossible(const std::string & type_name,const base::string16 & value,const VerificationStatus & verification_status,bool invalidate_child_nodes,bool invalidate_parent_nodes)258 bool AddressComponent::SetValueForTypeIfPossible(
259 const std::string& type_name,
260 const base::string16& value,
261 const VerificationStatus& verification_status,
262 bool invalidate_child_nodes,
263 bool invalidate_parent_nodes) {
264 bool value_set = false;
265 // If the type is the storage type of the component, it can directly be
266 // returned.
267 if (type_name == GetStorageTypeName()) {
268 SetValue(value, verification_status);
269 value_set = true;
270 } else if (ConvertAndSetValueForAdditionalFieldTypeName(
271 type_name, value, verification_status)) {
272 // The conversion using a field type was successful.
273 value_set = true;
274 }
275
276 if (value_set) {
277 if (invalidate_child_nodes)
278 UnsetSubcomponents();
279 return true;
280 }
281
282 // Finally, probe if the type is supported by one of the subcomponents.
283 for (auto* subcomponent : subcomponents_) {
284 if (subcomponent->SetValueForTypeIfPossible(
285 type_name, value, verification_status, invalidate_child_nodes,
286 invalidate_parent_nodes)) {
287 if (invalidate_parent_nodes)
288 UnsetValue();
289 return true;
290 }
291 }
292
293 return false;
294 }
295
SetValueForTypeIfPossible(const std::string & type_name,const std::string & value,const VerificationStatus & verification_status,bool invalidate_child_nodes,bool invalidate_parent_nodes)296 bool AddressComponent::SetValueForTypeIfPossible(
297 const std::string& type_name,
298 const std::string& value,
299 const VerificationStatus& verification_status,
300 bool invalidate_child_nodes,
301 bool invalidate_parent_nodes) {
302 return SetValueForTypeIfPossible(type_name, base::UTF8ToUTF16(value),
303 verification_status, invalidate_child_nodes,
304 invalidate_parent_nodes);
305 }
306
UnsetAddressComponentAndItsSubcomponents()307 void AddressComponent::UnsetAddressComponentAndItsSubcomponents() {
308 UnsetValue();
309 UnsetSubcomponents();
310 }
311
UnsetSubcomponents()312 void AddressComponent::UnsetSubcomponents() {
313 for (auto* component : subcomponents_)
314 component->UnsetAddressComponentAndItsSubcomponents();
315 }
316
GetValueAndStatusForTypeIfPossible(const ServerFieldType & type,base::string16 * value,VerificationStatus * status) const317 bool AddressComponent::GetValueAndStatusForTypeIfPossible(
318 const ServerFieldType& type,
319 base::string16* value,
320 VerificationStatus* status) const {
321 return GetValueAndStatusForTypeIfPossible(AutofillType(type).ToString(),
322 value, status);
323 }
324
GetValueAndStatusForTypeIfPossible(const std::string & type_name,base::string16 * value,VerificationStatus * status) const325 bool AddressComponent::GetValueAndStatusForTypeIfPossible(
326 const std::string& type_name,
327 base::string16* value,
328 VerificationStatus* status) const {
329 // If the value is the storage type, it can be simply returned.
330 if (type_name == GetStorageTypeName()) {
331 if (value)
332 *value = value_.value_or(base::string16());
333 if (status)
334 *status = GetVerificationStatus();
335 return true;
336 }
337
338 // Otherwise, probe if it is a supported field type that can be converted.
339 if (this->ConvertAndGetTheValueForAdditionalFieldTypeName(type_name, value)) {
340 if (status)
341 *status = GetVerificationStatus();
342 return true;
343 }
344
345 // Finally, try to retrieve the value from one of the subcomponents.
346 for (const auto* subcomponent : subcomponents_) {
347 if (subcomponent->GetValueAndStatusForTypeIfPossible(type_name, value,
348 status))
349 return true;
350 }
351 return false;
352 }
353
GetValueForType(const ServerFieldType & type) const354 base::string16 AddressComponent::GetValueForType(
355 const ServerFieldType& type) const {
356 return GetValueForType(AutofillType(type).ToString());
357 }
358
GetValueForType(const std::string & type_name) const359 base::string16 AddressComponent::GetValueForType(
360 const std::string& type_name) const {
361 base::string16 value;
362 bool success = GetValueAndStatusForTypeIfPossible(type_name, &value, nullptr);
363 // TODO(crbug.com/1113617): Honorifics are temporally disabled.
364 DCHECK(success || type_name == AutofillType(NAME_HONORIFIC_PREFIX).ToString())
365 << type_name;
366 return value;
367 }
368
GetVerificationStatusForType(const ServerFieldType & type) const369 VerificationStatus AddressComponent::GetVerificationStatusForType(
370 const ServerFieldType& type) const {
371 return GetVerificationStatusForType(AutofillType(type).ToString());
372 }
373
GetVerificationStatusForType(const std::string & type_name) const374 VerificationStatus AddressComponent::GetVerificationStatusForType(
375 const std::string& type_name) const {
376 VerificationStatus status = VerificationStatus::kNoStatus;
377 bool success =
378 GetValueAndStatusForTypeIfPossible(type_name, nullptr, &status);
379 // TODO(crbug.com/1113617): Honorifics are temporally disabled.
380 DCHECK(success ||
381 type_name == AutofillType(NAME_HONORIFIC_PREFIX).ToString());
382 return status;
383 }
384
UnsetValueForTypeIfSupported(const ServerFieldType & type)385 bool AddressComponent::UnsetValueForTypeIfSupported(
386 const ServerFieldType& type) {
387 if (type == storage_type_) {
388 UnsetAddressComponentAndItsSubcomponents();
389 return true;
390 }
391
392 for (auto* subcomponent : subcomponents_) {
393 if (subcomponent->UnsetValueForTypeIfSupported(type))
394 return true;
395 }
396
397 return false;
398 }
399
ParseValueAndAssignSubcomponentsByMethod()400 bool AddressComponent::ParseValueAndAssignSubcomponentsByMethod() {
401 return false;
402 }
403
404 std::vector<const re2::RE2*>
GetParseRegularExpressionsByRelevance() const405 AddressComponent::GetParseRegularExpressionsByRelevance() const {
406 return {};
407 }
408
ParseValueAndAssignSubcomponents()409 void AddressComponent::ParseValueAndAssignSubcomponents() {
410 // Set the values of all subcomponents to the empty string and set the
411 // verification status to kParsed.
412 for (auto* subcomponent : subcomponents_)
413 subcomponent->SetValue(base::string16(), VerificationStatus::kParsed);
414
415 // First attempt, try to parse by method.
416 if (ParseValueAndAssignSubcomponentsByMethod())
417 return;
418
419 // Second attempt, try to parse by expressions.
420 if (ParseValueAndAssignSubcomponentsByRegularExpressions())
421 return;
422
423 // As a final fallback, parse using the fallback method.
424 ParseValueAndAssignSubcomponentsByFallbackMethod();
425 }
426
ParseValueAndAssignSubcomponentsByRegularExpressions()427 bool AddressComponent::ParseValueAndAssignSubcomponentsByRegularExpressions() {
428 for (const auto* parse_expression : GetParseRegularExpressionsByRelevance()) {
429 if (!parse_expression)
430 continue;
431 if (ParseValueAndAssignSubcomponentsByRegularExpression(GetValue(),
432 parse_expression))
433 return true;
434 }
435 return false;
436 }
437
ParseValueAndAssignSubcomponentsByRegularExpression(const base::string16 & value,const RE2 * parse_expression)438 bool AddressComponent::ParseValueAndAssignSubcomponentsByRegularExpression(
439 const base::string16& value,
440 const RE2* parse_expression) {
441 std::map<std::string, std::string> result_map;
442 if (ParseValueByRegularExpression(base::UTF16ToUTF8(value), parse_expression,
443 &result_map)) {
444 // Parsing was successful and results from the result map can be written
445 // to the structure.
446 for (const auto& result_entry : result_map) {
447 const std::string& field_type = result_entry.first;
448 base::string16 field_value = base::UTF8ToUTF16(result_entry.second);
449 // Do not reassign the value of this node.
450 if (field_type == GetStorageTypeName())
451 continue;
452 // crbug.com(1113617): Honorifics are temporarily disabled.
453 if (field_type == AutofillType(NAME_HONORIFIC_PREFIX).ToString())
454 continue;
455 bool success = SetValueForTypeIfPossible(field_type, field_value,
456 VerificationStatus::kParsed);
457 // Setting the value should always work unless the regular expression is
458 // invalid.
459 DCHECK(success);
460 }
461 return true;
462 }
463 return false;
464 }
465
ParseValueAndAssignSubcomponentsByFallbackMethod()466 void AddressComponent::ParseValueAndAssignSubcomponentsByFallbackMethod() {
467 // There is nothing to do for an atomic component.
468 if (IsAtomic())
469 return;
470
471 // An empty string is trivially parsable.
472 if (GetValue().empty())
473 return;
474
475 // Split the string by spaces.
476 std::vector<base::string16> space_separated_tokens =
477 base::SplitString(GetValue(), base::UTF8ToUTF16(" "),
478 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
479
480 auto token_iterator = space_separated_tokens.begin();
481 auto subcomponent_types = GetSubcomponentTypes();
482
483 // Assign one space-separated token each to all but the last subcomponent.
484 for (size_t i = 0; (i + 1) < subcomponent_types.size(); i++) {
485 // If there are no tokens left, parsing is done.
486 if (token_iterator == space_separated_tokens.end())
487 return;
488 // Set the current token to the type and advance the token iterator.
489 bool success = SetValueForTypeIfPossible(
490 subcomponent_types[i], *token_iterator, VerificationStatus::kParsed);
491 // By design, setting the value should never fail.
492 DCHECK(success);
493 token_iterator++;
494 }
495
496 // Collect all remaining tokens in the last subcomponent.
497 base::string16 remaining_tokens = base::JoinString(
498 std::vector<base::string16>(token_iterator, space_separated_tokens.end()),
499 base::ASCIIToUTF16(" "));
500 // By design, it should be possible to assign the value unless the regular
501 // expression is wrong.
502 bool success = SetValueForTypeIfPossible(
503 subcomponent_types.back(), remaining_tokens, VerificationStatus::kParsed);
504 DCHECK(success);
505 }
506
WipeInvalidStructure()507 bool AddressComponent::WipeInvalidStructure() {
508 if (IsAtomic()) {
509 return false;
510 }
511
512 // Test that each structured token is part of the subcomponent.
513 // This is not perfect, because different components can match with an
514 // overlapping portion of the unstructured string, but it guarantees that all
515 // information in the components is contained in the unstructured
516 // representation.
517 for (const auto* component : Subcomponents()) {
518 if (GetValue().find(component->GetValue()) == base::string16::npos) {
519 // If the value of one component could not have been found, wipe the full
520 // structure.
521 RecursivelyUnsetSubcomponents();
522 return true;
523 }
524 }
525 return false;
526 }
527
FormatValueFromSubcomponents()528 void AddressComponent::FormatValueFromSubcomponents() {
529 // Get the most suited format string.
530 base::string16 format_string = GetBestFormatString();
531
532 // Perform the following steps on a copy of the format string.
533 // * Replace all the placeholders of the form ${TYPE_NAME} with the
534 // corresponding value.
535 // * Strip away double spaces as they may occur after replacing a placeholder
536 // with an empty value.
537
538 base::string16 result = ReplacePlaceholderTypesWithValues(format_string);
539 result = base::CollapseWhitespace(result, /*trim_line_breaks=*/false);
540 SetValue(result, VerificationStatus::kFormatted);
541 }
542
ReplacePlaceholderTypesWithValues(const base::string16 & format) const543 base::string16 AddressComponent::ReplacePlaceholderTypesWithValues(
544 const base::string16& format) const {
545 // Replaces placeholders using the following rules.
546 // Assumptions: Placeholder values are not nested.
547 //
548 // * Search for a substring of the form "{$[^}]*}".
549 // The substring can contain semicolon-separated tokens. The first token is
550 // always the type name. If present, the second token is a prefix that is only
551 // inserted if the corresponding value is not empty. Accordingly, the third
552 // token is a suffix.
553 //
554 // * Check if this substring is a supported type of this component.
555 //
556 // * If yes, replace the substring with the corresponding value.
557 //
558 // * If the corresponding value is empty, return false.
559
560 auto control_parmater = base::ASCIIToUTF16("$").at(0);
561 auto control_parmater_open_delimitor = base::ASCIIToUTF16("{").at(0);
562 auto control_parmater_close_delimitor = base::ASCIIToUTF16("}").at(0);
563
564 // Create a result vector for the tokens that are joined in the end.
565 std::vector<base::StringPiece16> result_pieces;
566
567 // Store the token pieces that are joined in the end.
568 std::vector<base::string16> inserted_values;
569 inserted_values.reserve(20);
570
571 // Use a StringPiece rather than the string since this allows for getting
572 // cheap views onto substrings.
573 const base::StringPiece16 format_piece = format;
574
575 bool started_control_sequence = false;
576 // Track until which index the format string was fully processed.
577 size_t processed_until_index = 0;
578
579 for (size_t i = 0; i < format_piece.size(); ++i) {
580 // Check if a control sequence is started by '${'
581 if (format_piece[i] == control_parmater && i < format_piece.size() - 1 &&
582 format_piece[i + 1] == control_parmater_open_delimitor) {
583 // A control sequence is started.
584 started_control_sequence = true;
585 // Append the preceding string since it can't be a valid placeholder.
586 if (i > 0) {
587 inserted_values.emplace_back(format_piece.substr(
588 processed_until_index, i - processed_until_index));
589 }
590 processed_until_index = i;
591 ++i;
592 } else if (started_control_sequence &&
593 format_piece[i] == control_parmater_close_delimitor) {
594 // The control sequence came to an end.
595 started_control_sequence = false;
596 size_t placeholder_start = processed_until_index + 2;
597 base::string16 placeholder(
598 format_piece.substr(placeholder_start, i - placeholder_start));
599
600 std::vector<base::string16> placeholder_tokens =
601 base::SplitString(placeholder, base::ASCIIToUTF16(";"),
602 base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
603 DCHECK(placeholder_tokens.size() > 0);
604
605 // By convention, the first token is the type of the placeholder.
606 base::string16 type_name = placeholder_tokens.at(0);
607 // If present, the second token is the prefix.
608 base::string16 prefix = placeholder_tokens.size() > 1
609 ? placeholder_tokens.at(1)
610 : base::string16();
611 // And the third token the suffix.
612 base::string16 suffix = placeholder_tokens.size() > 2
613 ? placeholder_tokens.at(2)
614 : base::string16();
615
616 base::string16 value;
617 if (GetValueAndStatusForTypeIfPossible(base::UTF16ToASCII(type_name),
618 &value, nullptr)) {
619 // The type is valid and should be substituted.
620 if (!value.empty()) {
621 // Add the prefix if present.
622 if (!prefix.empty())
623 inserted_values.emplace_back(std::move(prefix));
624
625 // Add the substituted value.
626 inserted_values.emplace_back(std::move(value));
627 // Add the suffix if present.
628 if (!suffix.empty())
629 inserted_values.emplace_back(std::move(suffix));
630 }
631 } else {
632 // Append the control sequence as it is, because the type is not
633 // supported by the component tree.
634 inserted_values.emplace_back(format_piece.substr(
635 processed_until_index, i - processed_until_index + 1));
636 }
637 processed_until_index = i + 1;
638 }
639 }
640 // Append the rest of the string.
641 inserted_values.emplace_back(
642 format_piece.substr(processed_until_index, base::string16::npos));
643
644 // Build the final result.
645 return base::JoinString(inserted_values, base::ASCIIToUTF16(""));
646 }
647
CompleteFullTree()648 bool AddressComponent::CompleteFullTree() {
649 int max_nodes_on_root_to_leaf_path =
650 GetRootNode().MaximumNumberOfAssignedAddressComponentsOnNodeToLeafPaths();
651 // With more than one node the tree cannot be completed.
652 switch (max_nodes_on_root_to_leaf_path) {
653 // An empty tree is already complete.
654 case 0:
655 return true;
656 // With a single node, the tree is completable.
657 case 1:
658 GetRootNode().RecursivelyCompleteTree();
659 return true;
660 // In any other case, the tree is not completable.
661 default:
662 return false;
663 }
664 }
665
RecursivelyCompleteTree()666 void AddressComponent::RecursivelyCompleteTree() {
667 if (IsAtomic())
668 return;
669
670 // If the value is assigned, parse the subcomponents from the value.
671 if (!GetValue().empty() &&
672 MaximumNumberOfAssignedAddressComponentsOnNodeToLeafPaths() == 1)
673 ParseValueAndAssignSubcomponents();
674
675 // First call completion on all subcomponents.
676 for (auto* subcomponent : subcomponents_)
677 subcomponent->RecursivelyCompleteTree();
678
679 // Finally format the value from the sucomponents if it is not already
680 // assigned.
681 if (GetValue().empty())
682 FormatValueFromSubcomponents();
683 }
684
685 int AddressComponent::
MaximumNumberOfAssignedAddressComponentsOnNodeToLeafPaths() const686 MaximumNumberOfAssignedAddressComponentsOnNodeToLeafPaths() const {
687 int result = 0;
688
689 for (auto* subcomponent : subcomponents_) {
690 result = std::max(
691 result,
692 subcomponent
693 ->MaximumNumberOfAssignedAddressComponentsOnNodeToLeafPaths());
694 }
695
696 // Only count non-empty nodes.
697 if (!GetValue().empty())
698 ++result;
699
700 return result;
701 }
702
IsTreeCompletable()703 bool AddressComponent::IsTreeCompletable() {
704 // An empty tree is also a completable tree.
705 return MaximumNumberOfAssignedAddressComponentsOnNodeToLeafPaths() <= 1;
706 }
707
GetRootNode() const708 const AddressComponent& AddressComponent::GetRootNode() const {
709 if (!parent_)
710 return *this;
711 return parent_->GetRootNode();
712 }
713
GetRootNode()714 AddressComponent& AddressComponent::GetRootNode() {
715 return const_cast<AddressComponent&>(
716 const_cast<const AddressComponent*>(this)->GetRootNode());
717 }
718
RecursivelyUnsetParsedAndFormattedValues()719 void AddressComponent::RecursivelyUnsetParsedAndFormattedValues() {
720 if (IsValueAssigned() &&
721 (GetVerificationStatus() == VerificationStatus::kFormatted ||
722 GetVerificationStatus() == VerificationStatus::kParsed))
723 UnsetValue();
724
725 for (auto* component : subcomponents_)
726 component->RecursivelyUnsetParsedAndFormattedValues();
727 }
728
RecursivelyUnsetSubcomponents()729 void AddressComponent::RecursivelyUnsetSubcomponents() {
730 for (auto* subcomponent : subcomponents_) {
731 subcomponent->UnsetValue();
732 subcomponent->RecursivelyUnsetSubcomponents();
733 }
734 }
735
UnsetParsedAndFormattedValuesInEntireTree()736 void AddressComponent::UnsetParsedAndFormattedValuesInEntireTree() {
737 GetRootNode().RecursivelyUnsetParsedAndFormattedValues();
738 }
739
MergeVerificationStatuses(const AddressComponent & newer_component)740 void AddressComponent::MergeVerificationStatuses(
741 const AddressComponent& newer_component) {
742 if (IsValueAssigned() && (GetValue() == newer_component.GetValue()) &&
743 HasNewerValuePrecendenceInMerging(newer_component)) {
744 value_verification_status_ = newer_component.GetVerificationStatus();
745 }
746
747 DCHECK(newer_component.subcomponents_.size() == subcomponents_.size());
748 for (size_t i = 0; i < newer_component.subcomponents_.size(); i++) {
749 subcomponents_[i]->MergeVerificationStatuses(
750 *newer_component.subcomponents_.at(i));
751 }
752 }
753
GetSortedTokens() const754 const std::vector<AddressToken> AddressComponent::GetSortedTokens() const {
755 return TokenizeValue(GetValue());
756 }
757
IsMergeableWithComponent(const AddressComponent & newer_component) const758 bool AddressComponent::IsMergeableWithComponent(
759 const AddressComponent& newer_component) const {
760 const base::string16 value = ValueForComparison();
761 const base::string16 value_newer = newer_component.ValueForComparison();
762
763 // If both components are the same, there is nothing to do.
764 if (*this == newer_component)
765 return true;
766
767 if ((merge_mode_ & kReplaceEmpty) && (value.empty() || value_newer.empty())) {
768 return true;
769 }
770
771 if (merge_mode_ & kUseBetterOrNewerForSameValue) {
772 if (base::ToUpperASCII(value) == base::ToUpperASCII(value_newer)) {
773 return true;
774 }
775 }
776
777 SortedTokenComparisonResult token_comparison_result =
778 CompareSortedTokens(value, value_newer);
779
780 if ((merge_mode_ & (kRecursivelyMergeTokenEquivalentValues |
781 kRecursivelyMergeSingleTokenSubset)) &&
782 token_comparison_result.status == MATCH) {
783 return true;
784 }
785
786 if ((merge_mode_ & (kReplaceSubset | kReplaceSuperset)) &&
787 (token_comparison_result.OneIsSubset() ||
788 token_comparison_result.status == MATCH)) {
789 return true;
790 }
791
792 if ((merge_mode_ & kRecursivelyMergeSingleTokenSubset) &&
793 token_comparison_result.IsSingleTokenSuperset()) {
794 return true;
795 }
796
797 if (merge_mode_ == kUseNewerIfDifferent)
798 return true;
799
800 // If the one value is a substring of the other, use the substring of the
801 // corresponding mode is active.
802 if ((merge_mode_ & kUseMostRecentSubstring) &&
803 (value.find(value_newer) != base::string16::npos ||
804 value_newer.find(value) != base::string16::npos)) {
805 return true;
806 }
807
808 if ((merge_mode_ & kPickShorterIfOneContainsTheOther) &&
809 token_comparison_result.ContainEachOther()) {
810 return true;
811 }
812
813 if (merge_mode_ == kMergeChildrenAndReformat) {
814 bool is_mergeable = true;
815 DCHECK(newer_component.subcomponents_.size() == subcomponents_.size());
816 for (size_t i = 0; i < newer_component.subcomponents_.size(); i++) {
817 if (!subcomponents_[i]->IsMergeableWithComponent(
818 *newer_component.subcomponents_[i])) {
819 is_mergeable = false;
820 break;
821 }
822 }
823 if (is_mergeable)
824 return true;
825 }
826 return false;
827 }
828
MergeWithComponent(const AddressComponent & newer_component,bool newer_was_more_recently_used)829 bool AddressComponent::MergeWithComponent(
830 const AddressComponent& newer_component,
831 bool newer_was_more_recently_used) {
832 // If both components are the same, there is nothing to do.
833
834 const base::string16 value = ValueForComparison();
835 const base::string16 value_newer = newer_component.ValueForComparison();
836
837 if (*this == newer_component)
838 return true;
839
840 // Now, it is guaranteed that both values are not identical.
841 // Use the non empty one if the corresponding mode is active.
842 if (merge_mode_ & kReplaceEmpty) {
843 if (value.empty()) {
844 *this = newer_component;
845 return true;
846 }
847 if (value_newer.empty())
848 return true;
849 }
850
851 // If the normalized values are the same, optimize the verification status.
852 if ((merge_mode_ & kUseBetterOrNewerForSameValue) && (value == value_newer)) {
853 if (HasNewerValuePrecendenceInMerging(newer_component)) {
854 *this = newer_component;
855 }
856 return true;
857 }
858
859 // Compare the tokens of both values.
860 SortedTokenComparisonResult token_comparison_result =
861 CompareSortedTokens(value, value_newer);
862
863 // Use the recursive merge strategy for token equivalent values if the
864 // corresponding mode is active.
865 if ((merge_mode_ & kRecursivelyMergeTokenEquivalentValues) &&
866 (token_comparison_result.status == MATCH)) {
867 return MergeTokenEquivalentComponent(newer_component);
868 }
869
870 // Replace the subset with the superset if the corresponding mode is active.
871 if ((merge_mode_ & kReplaceSubset) && token_comparison_result.OneIsSubset()) {
872 if (token_comparison_result.status == SUBSET)
873 *this = newer_component;
874 return true;
875 }
876
877 // Replace the superset with the subset if the corresponding mode is active.
878 if ((merge_mode_ & kReplaceSuperset) &&
879 token_comparison_result.OneIsSubset()) {
880 if (token_comparison_result.status == SUPERSET)
881 *this = newer_component;
882 return true;
883 }
884
885 // If the tokens are already equivalent, use the more recently used one.
886 if ((merge_mode_ & (kReplaceSuperset | kReplaceSubset)) &&
887 token_comparison_result.status == MATCH) {
888 if (newer_was_more_recently_used)
889 *this = newer_component;
890 return true;
891 }
892
893 // Recursively merge a single-token subset if the corresponding mode is
894 // active.
895 if ((merge_mode_ & kRecursivelyMergeSingleTokenSubset) &&
896 token_comparison_result.IsSingleTokenSuperset()) {
897 // For the merging of subset token, the tokenization must be done without
898 // prior normalization of the values.
899 SortedTokenComparisonResult token_comparison_result =
900 CompareSortedTokens(GetValue(), newer_component.GetValue());
901 return MergeSubsetComponent(newer_component, token_comparison_result);
902 }
903
904 // Replace the older value with the newer one if the corresponding mode is
905 // active.
906 if (merge_mode_ & kUseNewerIfDifferent) {
907 *this = newer_component;
908 return true;
909 }
910
911 // If the one value is a substring of the other, use the substring of the
912 // corresponding mode is active.
913 if ((merge_mode_ & kUseMostRecentSubstring) &&
914 (value.find(value_newer) != base::string16::npos ||
915 value_newer.find(value) != base::string16::npos)) {
916 if (newer_was_more_recently_used)
917 *this = newer_component;
918 return true;
919 }
920
921 if ((merge_mode_ & kPickShorterIfOneContainsTheOther) &&
922 token_comparison_result.ContainEachOther()) {
923 if (newer_component.GetValue().size() <= GetValue().size())
924 *this = newer_component;
925 return true;
926 }
927
928 // If the corresponding mode is active, ignore this mode and pair-wise merge
929 // the child tokens. Reformat this nodes from its children after the merge.
930 if (merge_mode_ & kMergeChildrenAndReformat) {
931 DCHECK(newer_component.subcomponents_.size() == subcomponents_.size());
932 for (size_t i = 0; i < newer_component.subcomponents_.size(); i++) {
933 bool success = subcomponents_[i]->MergeWithComponent(
934 *newer_component.subcomponents_[i], newer_was_more_recently_used);
935 if (!success)
936 return false;
937 }
938 FormatValueFromSubcomponents();
939 return true;
940 }
941 return false;
942 }
943
HasNewerValuePrecendenceInMerging(const AddressComponent & newer_component) const944 bool AddressComponent::HasNewerValuePrecendenceInMerging(
945 const AddressComponent& newer_component) const {
946 return !IsLessSignificantVerificationStatus(
947 newer_component.GetVerificationStatus(), GetVerificationStatus());
948 }
949
MergeTokenEquivalentComponent(const AddressComponent & newer_component)950 bool AddressComponent::MergeTokenEquivalentComponent(
951 const AddressComponent& newer_component) {
952 if (!AreSortedTokensEqual(
953 TokenizeValue(ValueForComparison()),
954 TokenizeValue(newer_component.ValueForComparison()))) {
955 return false;
956 }
957
958 // Assumption:
959 // The values of both components are a permutation of the same tokens.
960 // The componentization of the components can be different in terms of
961 // how the tokens are divided between the subomponents. The valdiation
962 // status of the component and its subcomponent can be different.
963 //
964 // Merge Strategy:
965 // * Adopt the exact value (and validation status) of the node with the higher
966 // validation status.
967 //
968 // * For all subcomponents that have the same value, make a recursive call and
969 // use the result.
970 //
971 // * For the set of all non-matching subcomponents. Either use the ones from
972 // this component or the other depending on which substructure is better in
973 // terms of the number of validated tokens.
974
975 if (HasNewerValuePrecendenceInMerging(newer_component)) {
976 SetValue(newer_component.GetValue(),
977 newer_component.GetVerificationStatus());
978 }
979
980 // Now, the substructure of the node must be merged. There are three cases:
981 //
982 // * All nodes of the substructure are pairwise mergeable. In this case it
983 // is sufficient to apply a recursive merging strategy.
984 //
985 // * None of the nodes of the substructure are pairwise mergeable. In this
986 // case, either the complete substructure of |this| or |newer_component|
987 // must be used. Which one to use can be decided by the higher validation
988 // score.
989 //
990 // * In a mixed scenario, there is at least one pair of mergeable nodes
991 // in the substructure and at least on pair of non-mergeable nodes. Here,
992 // the mergeable nodes are merged while all other nodes are taken either
993 // from |this| or the |newer_component| decided by the higher validation
994 // score of the unmerged nodes.
995 //
996 // The following algorithm combines the three cases by first trying to merge
997 // all components pair-wise. For all components that couldn't be merged, the
998 // verification score is summed for this and the other component. If the other
999 // component has an equal or larger score, finalize the merge by using its
1000 // components. It is assumed that the other component is the newer of the two
1001 // components. By favoring the other component in a tie, the most recently
1002 // used structure wins.
1003
1004 const std::vector<AddressComponent*> other_subcomponents =
1005 newer_component.Subcomponents();
1006
1007 DCHECK(subcomponents_.size() == other_subcomponents.size());
1008
1009 int this_component_verification_score = 0;
1010 int newer_component_verification_score = 0;
1011
1012 std::vector<int> unmerged_indices;
1013 unmerged_indices.reserve(subcomponents_.size());
1014
1015 for (size_t i = 0; i < subcomponents_.size(); i++) {
1016 DCHECK(subcomponents_[i]->GetStorageType() ==
1017 other_subcomponents.at(i)->GetStorageType());
1018
1019 // If the components can't be merged directly, store the ungermed index and
1020 // sum the verification scores to decide which component's substructure to
1021 // use.
1022 if (!subcomponents_[i]->MergeTokenEquivalentComponent(
1023 *other_subcomponents.at(i))) {
1024 this_component_verification_score +=
1025 subcomponents_[i]->GetStructureVerificationScore();
1026 newer_component_verification_score +=
1027 other_subcomponents.at(i)->GetStructureVerificationScore();
1028 unmerged_indices.emplace_back(i);
1029 }
1030 }
1031
1032 // If the total verification score of all unmerged components of the other
1033 // component is equal or larger than the score of this component, use its
1034 // subcomponents including their substructure for all unmerged components.
1035 if (newer_component_verification_score >= this_component_verification_score) {
1036 for (size_t i : unmerged_indices)
1037 *subcomponents_[i] = *other_subcomponents[i];
1038 }
1039
1040 return true;
1041 }
1042
ConsumeAdditionalToken(const base::string16 & token_value)1043 void AddressComponent::ConsumeAdditionalToken(
1044 const base::string16& token_value) {
1045 if (IsAtomic()) {
1046 if (GetValue().empty()) {
1047 SetValue(token_value, VerificationStatus::kParsed);
1048 } else {
1049 SetValue(base::StrCat({GetValue(), base::ASCIIToUTF16(" "), token_value}),
1050 VerificationStatus::kParsed);
1051 }
1052 return;
1053 }
1054
1055 // Try the first free subcomponent.
1056 for (auto* subcomponent : subcomponents_) {
1057 if (subcomponent->GetValue().empty()) {
1058 subcomponent->SetValue(token_value, VerificationStatus::kParsed);
1059 return;
1060 }
1061 }
1062
1063 // Otherwise append the value to the first component.
1064 subcomponents_[0]->SetValue(
1065 base::StrCat({GetValue(), base::ASCIIToUTF16(" "), token_value}),
1066 VerificationStatus::kParsed);
1067 }
1068
MergeSubsetComponent(const AddressComponent & subset_component,const SortedTokenComparisonResult & token_comparison_result)1069 bool AddressComponent::MergeSubsetComponent(
1070 const AddressComponent& subset_component,
1071 const SortedTokenComparisonResult& token_comparison_result) {
1072 DCHECK(token_comparison_result.IsSingleTokenSuperset());
1073 DCHECK(token_comparison_result.additional_tokens.size() == 1);
1074
1075 base::string16 token_to_consume =
1076 token_comparison_result.additional_tokens.back().value;
1077
1078 int this_component_verification_score = 0;
1079 int newer_component_verification_score = 0;
1080 bool found_subset_component = false;
1081
1082 std::vector<int> unmerged_indices;
1083 unmerged_indices.reserve(subcomponents_.size());
1084
1085 const std::vector<AddressComponent*>& subset_subcomponents =
1086 subset_component.Subcomponents();
1087
1088 unmerged_indices.reserve(subcomponents_.size());
1089
1090 for (size_t i = 0; i < subcomponents_.size(); i++) {
1091 DCHECK(subcomponents_[i]->GetStorageType() ==
1092 subset_subcomponents.at(i)->GetStorageType());
1093
1094 AddressComponent* subcomponent = subcomponents_[i];
1095 const AddressComponent* subset_subcomponent = subset_subcomponents.at(i);
1096
1097 base::string16 additional_token;
1098
1099 // If the additional token is the value of this token. Just leave it in.
1100 if (!found_subset_component &&
1101 subcomponent->GetValue() == token_to_consume &&
1102 subset_subcomponent->GetValue().empty()) {
1103 found_subset_component = true;
1104 continue;
1105 }
1106
1107 SortedTokenComparisonResult subtoken_comparison_result =
1108 CompareSortedTokens(subcomponent->GetSortedTokens(),
1109 subset_subcomponent->GetSortedTokens());
1110
1111 // Recursive case.
1112 if (!found_subset_component &&
1113 subtoken_comparison_result.IsSingleTokenSuperset()) {
1114 found_subset_component = true;
1115 subcomponent->MergeSubsetComponent(*subset_subcomponent,
1116 subtoken_comparison_result);
1117 continue;
1118 }
1119
1120 // If the tokens are the equivalent, they can directly be merged.
1121 if (subtoken_comparison_result.status == MATCH) {
1122 subcomponent->MergeTokenEquivalentComponent(*subset_subcomponent);
1123 continue;
1124 }
1125
1126 // Otherwise calculate the verification score.
1127 this_component_verification_score +=
1128 subcomponent->GetStructureVerificationScore();
1129 newer_component_verification_score +=
1130 subset_subcomponent->GetStructureVerificationScore();
1131 unmerged_indices.emplace_back(i);
1132 }
1133
1134 // If the total verification score of all unmerged components of the other
1135 // component is equal or larger than the score of this component, use its
1136 // subcomponents including their substructure for all unmerged components.
1137 if (newer_component_verification_score >= this_component_verification_score) {
1138 for (size_t i : unmerged_indices)
1139 *subcomponents_[i] = *subset_subcomponents[i];
1140
1141 if (!found_subset_component)
1142 this->ConsumeAdditionalToken(token_to_consume);
1143 }
1144
1145 // In the current implementation it is always possible to merge.
1146 // Once more tokens are supported this may change.
1147 return true;
1148 }
1149
GetStructureVerificationScore() const1150 int AddressComponent::GetStructureVerificationScore() const {
1151 int result = 0;
1152 switch (GetVerificationStatus()) {
1153 case VerificationStatus::kNoStatus:
1154 case VerificationStatus::kParsed:
1155 case VerificationStatus::kFormatted:
1156 case VerificationStatus::kServerParsed:
1157 break;
1158 case VerificationStatus::kObserved:
1159 result += 1;
1160 break;
1161 case VerificationStatus::kUserVerified:
1162 // In the current implementation, only the root not can be verified by
1163 // the user.
1164 NOTREACHED();
1165 break;
1166 }
1167 for (const AddressComponent* component : subcomponents_)
1168 result += component->GetStructureVerificationScore();
1169
1170 return result;
1171 }
1172
NormalizedValue() const1173 base::string16 AddressComponent::NormalizedValue() const {
1174 return NormalizeValue(GetValue());
1175 }
1176
ValueForComparison() const1177 base::string16 AddressComponent::ValueForComparison() const {
1178 return NormalizedValue();
1179 }
1180
1181 } // namespace structured_address
1182
1183 } // namespace autofill
1184