1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/core/browser/form_structure.h"
6
7 #include <stdint.h>
8
9 #include <algorithm>
10 #include <map>
11 #include <memory>
12 #include <unordered_map>
13 #include <unordered_set>
14 #include <utility>
15 #include <vector>
16
17 #include "base/base64.h"
18 #include "base/command_line.h"
19 #include "base/feature_list.h"
20 #include "base/i18n/case_conversion.h"
21 #include "base/logging.h"
22 #include "base/metrics/field_trial.h"
23 #include "base/metrics/histogram_macros.h"
24 #include "base/no_destructor.h"
25 #include "base/strings/strcat.h"
26 #include "base/strings/string_number_conversions.h"
27 #include "base/strings/string_piece.h"
28 #include "base/strings/string_split.h"
29 #include "base/strings/string_util.h"
30 #include "base/strings/stringprintf.h"
31 #include "base/strings/utf_string_conversions.h"
32 #include "base/time/time.h"
33 #include "components/autofill/core/browser/autofill_data_util.h"
34 #include "components/autofill/core/browser/autofill_metrics.h"
35 #include "components/autofill/core/browser/autofill_regex_constants.h"
36 #include "components/autofill/core/browser/autofill_regexes.h"
37 #include "components/autofill/core/browser/autofill_type.h"
38 #include "components/autofill/core/browser/field_types.h"
39 #include "components/autofill/core/browser/form_parsing/field_candidates.h"
40 #include "components/autofill/core/browser/form_parsing/form_field.h"
41 #include "components/autofill/core/browser/logging/log_manager.h"
42 #include "components/autofill/core/browser/randomized_encoder.h"
43 #include "components/autofill/core/browser/rationalization_util.h"
44 #include "components/autofill/core/browser/validation.h"
45 #include "components/autofill/core/common/autofill_constants.h"
46 #include "components/autofill/core/common/autofill_features.h"
47 #include "components/autofill/core/common/autofill_internals/log_message.h"
48 #include "components/autofill/core/common/autofill_internals/logging_scope.h"
49 #include "components/autofill/core/common/autofill_payments_features.h"
50 #include "components/autofill/core/common/autofill_tick_clock.h"
51 #include "components/autofill/core/common/autofill_util.h"
52 #include "components/autofill/core/common/form_data.h"
53 #include "components/autofill/core/common/form_data_predictions.h"
54 #include "components/autofill/core/common/form_field_data.h"
55 #include "components/autofill/core/common/form_field_data_predictions.h"
56 #include "components/autofill/core/common/logging/log_buffer.h"
57 #include "components/autofill/core/common/signatures.h"
58 #include "components/security_state/core/security_state.h"
59 #include "components/version_info/version_info.h"
60 #include "url/origin.h"
61
62 namespace autofill {
63
64 using mojom::SubmissionIndicatorEvent;
65
66 namespace {
67
68 constexpr char kBillingMode[] = "billing";
69 constexpr char kShippingMode[] = "shipping";
70
71 // Default section name for the fields.
72 constexpr char kDefaultSection[] = "-default";
73
74 // Only removing common name prefixes if we have a minimum number of fields and
75 // a minimum prefix length. These values are chosen to avoid cases such as two
76 // fields with "address1" and "address2" and be effective against web frameworks
77 // which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all
78 // fields.
79 constexpr int kCommonNamePrefixRemovalFieldThreshold = 3;
80 constexpr int kMinCommonNamePrefixLength = 16;
81
82 // Affix removal configuration. Only remove short affixes if they are common
83 // to all field names and there is at least the minimum number of fields.
84 // If no affix common to all field names is found, search for a long
85 // prefix common to a subset of the fields. This case helps include cases of
86 // prefixes prepended by web frameworks.
87 //
88 // Minimum required number of available fields for trying to remove affixes.
89 constexpr int kCommonNameAffixRemovalFieldNumberThreshold = 3;
90 // Minimum required length for affixes common to all field names.
91 constexpr int kMinCommonNameAffixLength = 3;
92 // Minimum required length for prefixes common to a subset of the field names.
93 constexpr int kMinCommonNameLongPrefixLength = 16;
94 // Regex for checking if |parseable_name| is valid after stripping affixes.
95 constexpr char kParseableNameValidationRe[] = "\\D";
96
97 // Returns true if the scheme given by |url| is one for which autofill is
98 // allowed to activate. By default this only returns true for HTTP and HTTPS.
HasAllowedScheme(const GURL & url)99 bool HasAllowedScheme(const GURL& url) {
100 return url.SchemeIsHTTPOrHTTPS() ||
101 base::FeatureList::IsEnabled(
102 features::kAutofillAllowNonHttpActivation);
103 }
104
105 // Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
106 // |available_field_types| and returns the hex representation as a string.
EncodeFieldTypes(const ServerFieldTypeSet & available_field_types)107 std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
108 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
109 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
110 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
111
112 // Pack the types in |available_field_types| into |bit_field|.
113 std::vector<uint8_t> bit_field(kNumBytes, 0);
114 for (const auto& field_type : available_field_types) {
115 // Set the appropriate bit in the field. The bit we set is the one
116 // |field_type| % 8 from the left of the byte.
117 const size_t byte = field_type / 8;
118 const size_t bit = 0x80 >> (field_type % 8);
119 DCHECK(byte < bit_field.size());
120 bit_field[byte] |= bit;
121 }
122
123 // Discard any trailing zeroes.
124 // If there are no available types, we return the empty string.
125 size_t data_end = bit_field.size();
126 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
127 }
128
129 // Print all meaningfull bytes into a string.
130 std::string data_presence;
131 data_presence.reserve(data_end * 2 + 1);
132 for (size_t i = 0; i < data_end; ++i) {
133 base::StringAppendF(&data_presence, "%02x", bit_field[i]);
134 }
135
136 return data_presence;
137 }
138
139 // Returns |true| iff the |token| is a type hint for a contact field, as
140 // specified in the implementation section of http://is.gd/whatwg_autocomplete
141 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
142 // support filling either type of information.
IsContactTypeHint(const std::string & token)143 bool IsContactTypeHint(const std::string& token) {
144 return token == "home" || token == "work" || token == "mobile";
145 }
146
147 // Returns |true| iff the |token| is a type hint appropriate for a field of the
148 // given |field_type|, as specified in the implementation section of
149 // http://is.gd/whatwg_autocomplete
ContactTypeHintMatchesFieldType(const std::string & token,HtmlFieldType field_type)150 bool ContactTypeHintMatchesFieldType(const std::string& token,
151 HtmlFieldType field_type) {
152 // The "home" and "work" type hints are only appropriate for email and phone
153 // number field types.
154 if (token == "home" || token == "work") {
155 return field_type == HTML_TYPE_EMAIL ||
156 (field_type >= HTML_TYPE_TEL &&
157 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
158 }
159
160 // The "mobile" type hint is only appropriate for phone number field types.
161 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
162 // support filling either type of information.
163 if (token == "mobile") {
164 return field_type >= HTML_TYPE_TEL &&
165 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
166 }
167
168 return false;
169 }
170
171 // Returns the Chrome Autofill-supported field type corresponding to the given
172 // |autocomplete_attribute_value|, if there is one, in the context of the given
173 // |field|. Chrome Autofill supports a subset of the field types listed at
174 // http://is.gd/whatwg_autocomplete
FieldTypeFromAutocompleteAttributeValue(const std::string & autocomplete_attribute_value,const AutofillField & field)175 HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
176 const std::string& autocomplete_attribute_value,
177 const AutofillField& field) {
178 if (autocomplete_attribute_value == "")
179 return HTML_TYPE_UNSPECIFIED;
180
181 if (autocomplete_attribute_value == "name")
182 return HTML_TYPE_NAME;
183
184 if (autocomplete_attribute_value == "honorific-prefix")
185 return HTML_TYPE_HONORIFIC_PREFIX;
186
187 if (autocomplete_attribute_value == "given-name" ||
188 autocomplete_attribute_value == "given_name" ||
189 autocomplete_attribute_value == "first-name" ||
190 autocomplete_attribute_value == "first_name")
191 return HTML_TYPE_GIVEN_NAME;
192
193 if (autocomplete_attribute_value == "additional-name" ||
194 autocomplete_attribute_value == "additional_name") {
195 if (field.max_length == 1)
196 return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
197 return HTML_TYPE_ADDITIONAL_NAME;
198 }
199
200 if (autocomplete_attribute_value == "family-name" ||
201 autocomplete_attribute_value == "family_name")
202 return HTML_TYPE_FAMILY_NAME;
203
204 if (autocomplete_attribute_value == "organization" ||
205 autocomplete_attribute_value == "company")
206 return HTML_TYPE_ORGANIZATION;
207
208 if (autocomplete_attribute_value == "street-address" ||
209 autocomplete_attribute_value == "street_address" ||
210 autocomplete_attribute_value == "address")
211 return HTML_TYPE_STREET_ADDRESS;
212
213 if (autocomplete_attribute_value == "address-line1" ||
214 autocomplete_attribute_value == "address_line1")
215 return HTML_TYPE_ADDRESS_LINE1;
216
217 if (autocomplete_attribute_value == "address-line2" ||
218 autocomplete_attribute_value == "address_line2")
219 return HTML_TYPE_ADDRESS_LINE2;
220
221 if (autocomplete_attribute_value == "address-line3" ||
222 autocomplete_attribute_value == "address_line3")
223 return HTML_TYPE_ADDRESS_LINE3;
224
225 // TODO(estade): remove support for "locality" and "region".
226 if (autocomplete_attribute_value == "locality")
227 return HTML_TYPE_ADDRESS_LEVEL2;
228
229 if (autocomplete_attribute_value == "region")
230 return HTML_TYPE_ADDRESS_LEVEL1;
231
232 if (autocomplete_attribute_value == "address-level1" ||
233 autocomplete_attribute_value == "address_level1")
234 return HTML_TYPE_ADDRESS_LEVEL1;
235
236 if (autocomplete_attribute_value == "address-level2" ||
237 autocomplete_attribute_value == "address_level2")
238 return HTML_TYPE_ADDRESS_LEVEL2;
239
240 if (autocomplete_attribute_value == "address-level3" ||
241 autocomplete_attribute_value == "address_level3")
242 return HTML_TYPE_ADDRESS_LEVEL3;
243
244 if (autocomplete_attribute_value == "country")
245 return HTML_TYPE_COUNTRY_CODE;
246
247 if (autocomplete_attribute_value == "country-name" ||
248 autocomplete_attribute_value == "country_name")
249 return HTML_TYPE_COUNTRY_NAME;
250
251 if (autocomplete_attribute_value == "postal-code" ||
252 autocomplete_attribute_value == "postal_code")
253 return HTML_TYPE_POSTAL_CODE;
254
255 // content_switches.h isn't accessible from here, hence we have
256 // to copy the string literal. This should be removed soon anyway.
257 if (autocomplete_attribute_value == "address" &&
258 base::CommandLine::ForCurrentProcess()->HasSwitch(
259 "enable-experimental-web-platform-features")) {
260 return HTML_TYPE_FULL_ADDRESS;
261 }
262
263 if (autocomplete_attribute_value == "cc-name" ||
264 autocomplete_attribute_value == "cc_name")
265 return HTML_TYPE_CREDIT_CARD_NAME_FULL;
266
267 if (autocomplete_attribute_value == "cc-given-name" ||
268 autocomplete_attribute_value == "cc_given_name")
269 return HTML_TYPE_CREDIT_CARD_NAME_FIRST;
270
271 if (autocomplete_attribute_value == "cc-family-name" ||
272 autocomplete_attribute_value == "cc_family_name")
273 return HTML_TYPE_CREDIT_CARD_NAME_LAST;
274
275 if (autocomplete_attribute_value == "cc-number" ||
276 autocomplete_attribute_value == "cc_number")
277 return HTML_TYPE_CREDIT_CARD_NUMBER;
278
279 if (autocomplete_attribute_value == "cc-exp" ||
280 autocomplete_attribute_value == "cc_exp") {
281 if (field.max_length == 5)
282 return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
283 if (field.max_length == 7)
284 return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
285 return HTML_TYPE_CREDIT_CARD_EXP;
286 }
287
288 if (autocomplete_attribute_value == "cc-exp-month" ||
289 autocomplete_attribute_value == "cc_exp_month")
290 return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
291
292 if (autocomplete_attribute_value == "cc-exp-year" ||
293 autocomplete_attribute_value == "cc_exp_year") {
294 if (field.max_length == 2)
295 return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
296 if (field.max_length == 4)
297 return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
298 return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
299 }
300
301 if (autocomplete_attribute_value == "cc-csc" ||
302 autocomplete_attribute_value == "cc_csc")
303 return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
304
305 if (autocomplete_attribute_value == "cc-type" ||
306 autocomplete_attribute_value == "cc_type")
307 return HTML_TYPE_CREDIT_CARD_TYPE;
308
309 if (autocomplete_attribute_value == "transaction-amount" ||
310 autocomplete_attribute_value == "transaction_amount")
311 return HTML_TYPE_TRANSACTION_AMOUNT;
312
313 if (autocomplete_attribute_value == "transaction-currency" ||
314 autocomplete_attribute_value == "transaction_currency")
315 return HTML_TYPE_TRANSACTION_CURRENCY;
316
317 if (autocomplete_attribute_value == "tel" ||
318 autocomplete_attribute_value == "phone")
319 return HTML_TYPE_TEL;
320
321 if (autocomplete_attribute_value == "tel-country-code" ||
322 autocomplete_attribute_value == "phone-country-code" ||
323 autocomplete_attribute_value == "tel_country_code" ||
324 autocomplete_attribute_value == "phone_country_code")
325 return HTML_TYPE_TEL_COUNTRY_CODE;
326
327 if (autocomplete_attribute_value == "tel-national" ||
328 autocomplete_attribute_value == "phone-national" ||
329 autocomplete_attribute_value == "tel_national" ||
330 autocomplete_attribute_value == "phone_national")
331 return HTML_TYPE_TEL_NATIONAL;
332
333 if (autocomplete_attribute_value == "tel-area-code" ||
334 autocomplete_attribute_value == "phone-area-code" ||
335 autocomplete_attribute_value == "tel_area_code" ||
336 autocomplete_attribute_value == "phone_area_code")
337 return HTML_TYPE_TEL_AREA_CODE;
338
339 if (autocomplete_attribute_value == "tel-local" ||
340 autocomplete_attribute_value == "phone-local" ||
341 autocomplete_attribute_value == "tel_local" ||
342 autocomplete_attribute_value == "phone_local")
343 return HTML_TYPE_TEL_LOCAL;
344
345 if (autocomplete_attribute_value == "tel-local-prefix" ||
346 autocomplete_attribute_value == "phone-local-prefix" ||
347 autocomplete_attribute_value == "tel_local_prefix" ||
348 autocomplete_attribute_value == "phone_local_prefix")
349 return HTML_TYPE_TEL_LOCAL_PREFIX;
350
351 if (autocomplete_attribute_value == "tel-local-suffix" ||
352 autocomplete_attribute_value == "phone-local-suffix" ||
353 autocomplete_attribute_value == "tel_local_suffix" ||
354 autocomplete_attribute_value == "phone_local_suffix")
355 return HTML_TYPE_TEL_LOCAL_SUFFIX;
356
357 if (autocomplete_attribute_value == "tel-extension" ||
358 autocomplete_attribute_value == "phone-extension" ||
359 autocomplete_attribute_value == "phone-ext" ||
360 autocomplete_attribute_value == "tel_extension" ||
361 autocomplete_attribute_value == "phone_extension" ||
362 autocomplete_attribute_value == "phone_ext")
363 return HTML_TYPE_TEL_EXTENSION;
364
365 if (autocomplete_attribute_value == "email" ||
366 autocomplete_attribute_value == "username")
367 return HTML_TYPE_EMAIL;
368
369 if (autocomplete_attribute_value == "upi-vpa" ||
370 autocomplete_attribute_value == "upi_vpa" ||
371 autocomplete_attribute_value == "upi")
372 return HTML_TYPE_UPI_VPA;
373
374 if (autocomplete_attribute_value == "one-time-code")
375 return HTML_TYPE_ONE_TIME_CODE;
376
377 return HTML_TYPE_UNRECOGNIZED;
378 }
379
operator <<(std::ostream & out,const autofill::AutofillQueryResponse & response)380 std::ostream& operator<<(std::ostream& out,
381 const autofill::AutofillQueryResponse& response) {
382 for (const auto& form : response.form_suggestions()) {
383 out << "\nForm";
384 for (const auto& field : form.field_suggestions()) {
385 out << "\n Field\n signature: " << field.field_signature();
386 if (field.has_primary_type_prediction())
387 out << "\n primary_type_prediction: "
388 << field.primary_type_prediction();
389 for (const auto& prediction : field.predictions())
390 out << "\n prediction: " << prediction.type();
391 }
392 }
393 return out;
394 }
395
396 // Returns true iff all form fields autofill types are in |contained_types|.
AllTypesCaptured(const FormStructure & form,const ServerFieldTypeSet & contained_types)397 bool AllTypesCaptured(const FormStructure& form,
398 const ServerFieldTypeSet& contained_types) {
399 for (const auto& field : form) {
400 for (const auto& type : field->possible_types()) {
401 if (type != UNKNOWN_TYPE && type != EMPTY_TYPE &&
402 !contained_types.count(type))
403 return false;
404 }
405 }
406 return true;
407 }
408
409 // Encode password attributes and length into |upload|.
EncodePasswordAttributesVote(const std::pair<PasswordAttribute,bool> & password_attributes_vote,const size_t password_length_vote,const int password_symbol_vote,AutofillUploadContents * upload)410 void EncodePasswordAttributesVote(
411 const std::pair<PasswordAttribute, bool>& password_attributes_vote,
412 const size_t password_length_vote,
413 const int password_symbol_vote,
414 AutofillUploadContents* upload) {
415 switch (password_attributes_vote.first) {
416 case PasswordAttribute::kHasLowercaseLetter:
417 upload->set_password_has_lowercase_letter(
418 password_attributes_vote.second);
419 break;
420 case PasswordAttribute::kHasSpecialSymbol:
421 upload->set_password_has_special_symbol(password_attributes_vote.second);
422 if (password_attributes_vote.second)
423 upload->set_password_special_symbol(password_symbol_vote);
424 break;
425 case PasswordAttribute::kPasswordAttributesCount:
426 NOTREACHED();
427 }
428 upload->set_password_length(password_length_vote);
429 }
430
EncodeRandomizedValue(const RandomizedEncoder & encoder,FormSignature form_signature,FieldSignature field_signature,base::StringPiece data_type,base::StringPiece data_value,bool include_checksum,AutofillRandomizedValue * output)431 void EncodeRandomizedValue(const RandomizedEncoder& encoder,
432 FormSignature form_signature,
433 FieldSignature field_signature,
434 base::StringPiece data_type,
435 base::StringPiece data_value,
436 bool include_checksum,
437 AutofillRandomizedValue* output) {
438 DCHECK(output);
439 output->set_encoding_type(encoder.encoding_type());
440 output->set_encoded_bits(
441 encoder.Encode(form_signature, field_signature, data_type, data_value));
442 if (include_checksum) {
443 DCHECK(data_type == RandomizedEncoder::FORM_URL);
444 output->set_checksum(StrToHash32Bit(data_value.data()));
445 }
446 }
447
EncodeRandomizedValue(const RandomizedEncoder & encoder,FormSignature form_signature,FieldSignature field_signature,base::StringPiece data_type,base::StringPiece16 data_value,bool include_checksum,AutofillRandomizedValue * output)448 void EncodeRandomizedValue(const RandomizedEncoder& encoder,
449 FormSignature form_signature,
450 FieldSignature field_signature,
451 base::StringPiece data_type,
452 base::StringPiece16 data_value,
453 bool include_checksum,
454 AutofillRandomizedValue* output) {
455 EncodeRandomizedValue(encoder, form_signature, field_signature, data_type,
456 base::UTF16ToUTF8(data_value), include_checksum,
457 output);
458 }
459
PopulateRandomizedFormMetadata(const RandomizedEncoder & encoder,const FormStructure & form,AutofillRandomizedFormMetadata * metadata)460 void PopulateRandomizedFormMetadata(const RandomizedEncoder& encoder,
461 const FormStructure& form,
462 AutofillRandomizedFormMetadata* metadata) {
463 const FormSignature form_signature = form.form_signature();
464 constexpr FieldSignature
465 kNullFieldSignature; // Not relevant for form level metadata.
466 if (!form.id_attribute().empty()) {
467 EncodeRandomizedValue(encoder, form_signature, kNullFieldSignature,
468 RandomizedEncoder::FORM_ID, form.id_attribute(),
469 /*include_checksum=*/false, metadata->mutable_id());
470 }
471 if (!form.name_attribute().empty()) {
472 EncodeRandomizedValue(encoder, form_signature, kNullFieldSignature,
473 RandomizedEncoder::FORM_NAME, form.name_attribute(),
474 /*include_checksum=*/false, metadata->mutable_name());
475 }
476
477 for (const ButtonTitleInfo& e : form.button_titles()) {
478 auto* button_title = metadata->add_button_title();
479 DCHECK(!e.first.empty());
480 EncodeRandomizedValue(encoder, form_signature, kNullFieldSignature,
481 RandomizedEncoder::FORM_BUTTON_TITLES, e.first,
482 /*include_checksum=*/false,
483 button_title->mutable_title());
484 button_title->set_type(static_cast<ButtonTitleType>(e.second));
485 }
486 auto full_source_url = form.full_source_url().spec();
487 if (encoder.AnonymousUrlCollectionIsEnabled() && !full_source_url.empty()) {
488 EncodeRandomizedValue(encoder, form_signature, kNullFieldSignature,
489 RandomizedEncoder::FORM_URL, full_source_url,
490 /*include_checksum=*/true, metadata->mutable_url());
491 }
492 }
493
PopulateRandomizedFieldMetadata(const RandomizedEncoder & encoder,const FormStructure & form,const AutofillField & field,AutofillRandomizedFieldMetadata * metadata)494 void PopulateRandomizedFieldMetadata(
495 const RandomizedEncoder& encoder,
496 const FormStructure& form,
497 const AutofillField& field,
498 AutofillRandomizedFieldMetadata* metadata) {
499 const FormSignature form_signature = form.form_signature();
500 const FieldSignature field_signature = field.GetFieldSignature();
501 if (!field.id_attribute.empty()) {
502 EncodeRandomizedValue(encoder, form_signature, field_signature,
503 RandomizedEncoder::FIELD_ID, field.id_attribute,
504 /*include_checksum=*/false, metadata->mutable_id());
505 }
506 if (!field.name_attribute.empty()) {
507 EncodeRandomizedValue(encoder, form_signature, field_signature,
508 RandomizedEncoder::FIELD_NAME, field.name_attribute,
509 /*include_checksum=*/false, metadata->mutable_name());
510 }
511 if (!field.form_control_type.empty()) {
512 EncodeRandomizedValue(encoder, form_signature, field_signature,
513 RandomizedEncoder::FIELD_CONTROL_TYPE,
514 field.form_control_type, /*include_checksum=*/false,
515 metadata->mutable_type());
516 }
517 if (!field.label.empty()) {
518 EncodeRandomizedValue(encoder, form_signature, field_signature,
519 RandomizedEncoder::FIELD_LABEL, field.label,
520 /*include_checksum=*/false,
521 metadata->mutable_label());
522 }
523 if (!field.aria_label.empty()) {
524 EncodeRandomizedValue(encoder, form_signature, field_signature,
525 RandomizedEncoder::FIELD_ARIA_LABEL, field.aria_label,
526 /*include_checksum=*/false,
527 metadata->mutable_aria_label());
528 }
529 if (!field.aria_description.empty()) {
530 EncodeRandomizedValue(encoder, form_signature, field_signature,
531 RandomizedEncoder::FIELD_ARIA_DESCRIPTION,
532 field.aria_description, /*include_checksum=*/false,
533 metadata->mutable_aria_description());
534 }
535 if (!field.css_classes.empty()) {
536 EncodeRandomizedValue(encoder, form_signature, field_signature,
537 RandomizedEncoder::FIELD_CSS_CLASS, field.css_classes,
538 /*include_checksum=*/false,
539 metadata->mutable_css_class());
540 }
541 if (!field.placeholder.empty()) {
542 EncodeRandomizedValue(encoder, form_signature, field_signature,
543 RandomizedEncoder::FIELD_PLACEHOLDER,
544 field.placeholder, /*include_checksum=*/false,
545 metadata->mutable_placeholder());
546 }
547 }
548
EncodeFormMetadataForQuery(const FormStructure & form,AutofillRandomizedFormMetadata * metadata)549 void EncodeFormMetadataForQuery(const FormStructure& form,
550 AutofillRandomizedFormMetadata* metadata) {
551 DCHECK(metadata);
552 metadata->mutable_id()->set_encoded_bits(
553 base::UTF16ToUTF8(form.id_attribute()));
554 metadata->mutable_name()->set_encoded_bits(
555 base::UTF16ToUTF8(form.name_attribute()));
556 }
557
EncodeFieldMetadataForQuery(const FormFieldData & field,AutofillRandomizedFieldMetadata * metadata)558 void EncodeFieldMetadataForQuery(const FormFieldData& field,
559 AutofillRandomizedFieldMetadata* metadata) {
560 DCHECK(metadata);
561 metadata->mutable_id()->set_encoded_bits(
562 base::UTF16ToUTF8(field.id_attribute));
563 metadata->mutable_name()->set_encoded_bits(
564 base::UTF16ToUTF8(field.name_attribute));
565 metadata->mutable_type()->set_encoded_bits(field.form_control_type);
566 metadata->mutable_label()->set_encoded_bits(base::UTF16ToUTF8(field.label));
567 metadata->mutable_aria_label()->set_encoded_bits(
568 base::UTF16ToUTF8(field.aria_label));
569 metadata->mutable_aria_description()->set_encoded_bits(
570 base::UTF16ToUTF8(field.aria_description));
571 metadata->mutable_css_class()->set_encoded_bits(
572 base::UTF16ToUTF8(field.css_classes));
573 metadata->mutable_placeholder()->set_encoded_bits(
574 base::UTF16ToUTF8(field.placeholder));
575 }
576
577 // Creates the type relationship rules map. The keys represent the type that has
578 // rules, and the value represents the list of required types for the given
579 // key. In order to respect the rule, only one of the required types is needed.
580 // For example, for Autofill to support fields of type
581 // "PHONE_HOME_COUNTRY_CODE", there would need to be at least one other field
582 // of type "PHONE_HOME_NUMBER" or "PHONE_HOME_CITY_AND_NUMBER".
583 const std::unordered_map<ServerFieldType, ServerFieldTypeSet>&
GetTypeRelationshipMap()584 GetTypeRelationshipMap() {
585 // Initialized and cached on first use.
586 static const auto* const rules =
587 new std::unordered_map<ServerFieldType, ServerFieldTypeSet>(
588 {{PHONE_HOME_COUNTRY_CODE,
589 {PHONE_HOME_NUMBER, PHONE_HOME_CITY_AND_NUMBER}}});
590 return *rules;
591 }
592
593 } // namespace
594
FormStructure(const FormData & form)595 FormStructure::FormStructure(const FormData& form)
596 : id_attribute_(form.id_attribute),
597 name_attribute_(form.name_attribute),
598 form_name_(form.name),
599 button_titles_(form.button_titles),
600 source_url_(form.url),
601 full_source_url_(form.full_url),
602 target_url_(form.action),
603 main_frame_origin_(form.main_frame_origin),
604 is_form_tag_(form.is_form_tag),
605 is_formless_checkout_(form.is_formless_checkout),
606 all_fields_are_passwords_(!form.fields.empty()),
607 form_parsed_timestamp_(AutofillTickClock::NowTicks()),
608 passwords_were_revealed_(false),
609 password_symbol_vote_(0),
610 developer_engagement_metrics_(0),
611 unique_renderer_id_(form.unique_renderer_id) {
612 // Copy the form fields.
613 std::map<base::string16, size_t> unique_names;
614 for (const FormFieldData& field : form.fields) {
615 if (!ShouldSkipField(field))
616 ++active_field_count_;
617
618 if (field.form_control_type == "password")
619 has_password_field_ = true;
620 else
621 all_fields_are_passwords_ = false;
622
623 // Generate a unique name for this field by appending a counter to the name.
624 // Make sure to prepend the counter with a non-numeric digit so that we are
625 // guaranteed to avoid collisions.
626 base::string16 unique_name =
627 field.name + base::ASCIIToUTF16("_") +
628 base::NumberToString16(++unique_names[field.name]);
629 fields_.push_back(std::make_unique<AutofillField>(field, unique_name));
630 }
631
632 form_signature_ = autofill::CalculateFormSignature(form);
633 // Do further processing on the fields, as needed.
634 ProcessExtractedFields();
635 }
636
FormStructure(FormSignature form_signature,const std::vector<FieldSignature> & field_signatures)637 FormStructure::FormStructure(
638 FormSignature form_signature,
639 const std::vector<FieldSignature>& field_signatures)
640 : form_signature_(form_signature) {
641 for (const auto& signature : field_signatures)
642 fields_.push_back(AutofillField::CreateForPasswordManagerUpload(signature));
643 }
644
645 FormStructure::~FormStructure() = default;
646
DetermineHeuristicTypes(LogManager * log_manager)647 void FormStructure::DetermineHeuristicTypes(LogManager* log_manager) {
648 const auto determine_heuristic_types_start_time =
649 AutofillTickClock::NowTicks();
650
651 // First, try to detect field types based on each field's |autocomplete|
652 // attribute value.
653 if (!was_parsed_for_autocomplete_attributes_)
654 ParseFieldTypesFromAutocompleteAttributes();
655
656 // Then if there are enough active fields, and if we are dealing with either a
657 // proper <form> or a <form>-less checkout, run the heuristics and server
658 // prediction routines.
659 if (ShouldRunHeuristics()) {
660 const FieldCandidatesMap field_type_map = FormField::ParseFormFields(
661 fields_, page_language_, is_form_tag_, log_manager);
662 for (const auto& field : fields_) {
663 const auto iter = field_type_map.find(field->unique_name());
664 if (iter != field_type_map.end()) {
665 field->set_heuristic_type(iter->second.BestHeuristicType());
666 }
667 }
668 }
669
670 UpdateAutofillCount();
671 IdentifySections(has_author_specified_sections_);
672
673 developer_engagement_metrics_ = 0;
674 if (IsAutofillable()) {
675 AutofillMetrics::DeveloperEngagementMetric metric =
676 has_author_specified_types_
677 ? AutofillMetrics::FILLABLE_FORM_PARSED_WITH_TYPE_HINTS
678 : AutofillMetrics::FILLABLE_FORM_PARSED_WITHOUT_TYPE_HINTS;
679 developer_engagement_metrics_ |= 1 << metric;
680 AutofillMetrics::LogDeveloperEngagementMetric(metric);
681 }
682
683 if (has_author_specified_upi_vpa_hint_) {
684 AutofillMetrics::LogDeveloperEngagementMetric(
685 AutofillMetrics::FORM_CONTAINS_UPI_VPA_HINT);
686 developer_engagement_metrics_ |=
687 1 << AutofillMetrics::FORM_CONTAINS_UPI_VPA_HINT;
688 }
689
690 RationalizeFieldTypePredictions();
691
692 AutofillMetrics::LogDetermineHeuristicTypesTiming(
693 AutofillTickClock::NowTicks() - determine_heuristic_types_start_time);
694 }
695
EncodeUploadRequest(const ServerFieldTypeSet & available_field_types,bool form_was_autofilled,const std::string & login_form_signature,bool observed_submission,AutofillUploadContents * upload,std::vector<FormSignature> * encoded_signatures) const696 bool FormStructure::EncodeUploadRequest(
697 const ServerFieldTypeSet& available_field_types,
698 bool form_was_autofilled,
699 const std::string& login_form_signature,
700 bool observed_submission,
701 AutofillUploadContents* upload,
702 std::vector<FormSignature>* encoded_signatures) const {
703 DCHECK(AllTypesCaptured(*this, available_field_types));
704 encoded_signatures->clear();
705
706 upload->set_submission(observed_submission);
707 upload->set_client_version(
708 version_info::GetProductNameAndVersionForUserAgent());
709 upload->set_form_signature(form_signature().value());
710 upload->set_autofill_used(form_was_autofilled);
711 upload->set_data_present(EncodeFieldTypes(available_field_types));
712 upload->set_passwords_revealed(passwords_were_revealed_);
713 upload->set_has_form_tag(is_form_tag_);
714 if (!page_language_.empty() && randomized_encoder_ != nullptr) {
715 upload->set_language(page_language_);
716 }
717
718 auto triggering_event = (submission_event_ != SubmissionIndicatorEvent::NONE)
719 ? submission_event_
720 : ToSubmissionIndicatorEvent(submission_source_);
721
722 DCHECK(autofill::mojom::IsKnownEnumValue(triggering_event));
723 upload->set_submission_event(
724 static_cast<AutofillUploadContents_SubmissionIndicatorEvent>(
725 triggering_event));
726
727 if (password_attributes_vote_) {
728 EncodePasswordAttributesVote(*password_attributes_vote_,
729 password_length_vote_, password_symbol_vote_,
730 upload);
731 }
732
733 if (IsAutofillFieldMetadataEnabled()) {
734 upload->set_action_signature(StrToHash64Bit(target_url_.host()));
735 if (!form_name().empty())
736 upload->set_form_name(base::UTF16ToUTF8(form_name()));
737 for (const ButtonTitleInfo& e : button_titles_) {
738 auto* button_title = upload->add_button_title();
739 button_title->set_title(base::UTF16ToUTF8(e.first));
740 button_title->set_type(static_cast<ButtonTitleType>(e.second));
741 }
742 }
743
744 if (!login_form_signature.empty()) {
745 uint64_t login_sig;
746 if (base::StringToUint64(login_form_signature, &login_sig))
747 upload->set_login_form_signature(login_sig);
748 }
749
750 if (IsMalformed())
751 return false; // Malformed form, skip it.
752
753 EncodeFormForUpload(upload, encoded_signatures);
754 return true;
755 }
756
757 // static
EncodeQueryRequest(const std::vector<FormStructure * > & forms,AutofillPageQueryRequest * query,std::vector<FormSignature> * queried_form_signatures)758 bool FormStructure::EncodeQueryRequest(
759 const std::vector<FormStructure*>& forms,
760 AutofillPageQueryRequest* query,
761 std::vector<FormSignature>* queried_form_signatures) {
762 DCHECK(queried_form_signatures);
763 queried_form_signatures->clear();
764 queried_form_signatures->reserve(forms.size());
765
766 query->set_client_version(
767 version_info::GetProductNameAndVersionForUserAgent());
768
769 // If a page contains repeated forms, detect that and encode only one form as
770 // the returned data would be the same for all the repeated forms.
771 // TODO(crbug/1064709#c11): the statement is not entirely correct because
772 // (1) distinct forms can have identical form signatures because we truncate
773 // (large) numbers in the form signature calculation while these are
774 // considered for field signatures; (2) for dynamic forms we will hold on to
775 // the original form signature.
776 std::set<FormSignature> processed_forms;
777 for (const auto* form : forms) {
778 if (processed_forms.find(form->form_signature()) != processed_forms.end())
779 continue;
780 processed_forms.insert(form->form_signature());
781 UMA_HISTOGRAM_COUNTS_1000("Autofill.FieldCount", form->field_count());
782 if (form->IsMalformed())
783 continue;
784
785 form->EncodeFormForQuery(query->add_forms(), queried_form_signatures);
786 }
787
788 return !queried_form_signatures->empty();
789 }
790
791 // static
ParseApiQueryResponse(base::StringPiece payload,const std::vector<FormStructure * > & forms,const std::vector<FormSignature> & queried_form_signatures,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)792 void FormStructure::ParseApiQueryResponse(
793 base::StringPiece payload,
794 const std::vector<FormStructure*>& forms,
795 const std::vector<FormSignature>& queried_form_signatures,
796 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
797 AutofillMetrics::LogServerQueryMetric(
798 AutofillMetrics::QUERY_RESPONSE_RECEIVED);
799
800 std::string decoded_payload;
801 if (!base::Base64Decode(payload, &decoded_payload)) {
802 VLOG(1) << "Could not decode payload from base64 to bytes";
803 return;
804 }
805
806 // Parse the response.
807 AutofillQueryResponse response;
808 if (!response.ParseFromString(decoded_payload))
809 return;
810
811 VLOG(1) << "Autofill query response from API was successfully parsed: "
812 << response;
813
814 ProcessQueryResponse(response, forms, queried_form_signatures,
815 form_interactions_ukm_logger);
816 }
817
818 // static
ProcessQueryResponse(const AutofillQueryResponse & response,const std::vector<FormStructure * > & forms,const std::vector<FormSignature> & queried_form_signatures,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)819 void FormStructure::ProcessQueryResponse(
820 const AutofillQueryResponse& response,
821 const std::vector<FormStructure*>& forms,
822 const std::vector<FormSignature>& queried_form_signatures,
823 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
824 AutofillMetrics::LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
825
826 bool heuristics_detected_fillable_field = false;
827 bool query_response_overrode_heuristics = false;
828
829 std::map<std::pair<FormSignature, FieldSignature>,
830 AutofillQueryResponse::FormSuggestion::FieldSuggestion>
831 field_types;
832 for (int form_idx = 0;
833 form_idx < std::min(response.form_suggestions_size(),
834 static_cast<int>(queried_form_signatures.size()));
835 ++form_idx) {
836 FormSignature form_sig = queried_form_signatures.at(form_idx);
837 for (const auto& field :
838 response.form_suggestions(form_idx).field_suggestions()) {
839 FieldSignature field_sig(field.field_signature());
840 field_types[std::make_pair(form_sig, field_sig)] = field;
841 }
842 }
843
844 // Copy the field types into the actual form.
845 for (FormStructure* form : forms) {
846 bool query_response_has_no_server_data = true;
847 for (auto& field : form->fields_) {
848 auto it = field_types.find(
849 std::make_pair(form->form_signature(), field->GetFieldSignature()));
850 if (it == field_types.end())
851 continue;
852
853 const auto& current_field = it->second;
854
855 ServerFieldType field_type =
856 static_cast<ServerFieldType>(current_field.primary_type_prediction());
857 query_response_has_no_server_data &= field_type == NO_SERVER_DATA;
858
859 ServerFieldType heuristic_type = field->heuristic_type();
860 if (heuristic_type != UNKNOWN_TYPE)
861 heuristics_detected_fillable_field = true;
862
863 field->set_server_type(field_type);
864 std::vector<AutofillQueryResponse::FormSuggestion::FieldSuggestion::
865 FieldPrediction>
866 server_predictions;
867 if (current_field.predictions_size() == 0) {
868 AutofillQueryResponse::FormSuggestion::FieldSuggestion::FieldPrediction
869 field_prediction;
870 field_prediction.set_type(field_type);
871 server_predictions.push_back(field_prediction);
872 } else {
873 server_predictions.assign(current_field.predictions().begin(),
874 current_field.predictions().end());
875 }
876 field->set_server_predictions(std::move(server_predictions));
877 field->set_may_use_prefilled_placeholder(
878 current_field.may_use_prefilled_placeholder());
879
880 if (heuristic_type != field->Type().GetStorableType())
881 query_response_overrode_heuristics = true;
882
883 if (current_field.has_password_requirements())
884 field->SetPasswordRequirements(current_field.password_requirements());
885 }
886
887 AutofillMetrics::LogServerResponseHasDataForForm(
888 !query_response_has_no_server_data);
889
890 form->UpdateAutofillCount();
891 form->RationalizeRepeatedFields(form_interactions_ukm_logger);
892 form->RationalizeFieldTypePredictions();
893 form->IdentifySections(false);
894 }
895
896 AutofillMetrics::ServerQueryMetric metric;
897 if (query_response_overrode_heuristics) {
898 if (heuristics_detected_fillable_field) {
899 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
900 } else {
901 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
902 }
903 } else {
904 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
905 }
906 AutofillMetrics::LogServerQueryMetric(metric);
907 }
908
909 // static
GetFieldTypePredictions(const std::vector<FormStructure * > & form_structures)910 std::vector<FormDataPredictions> FormStructure::GetFieldTypePredictions(
911 const std::vector<FormStructure*>& form_structures) {
912 std::vector<FormDataPredictions> forms;
913 forms.reserve(form_structures.size());
914 for (const FormStructure* form_structure : form_structures) {
915 FormDataPredictions form;
916 form.data = form_structure->ToFormData();
917 form.signature = form_structure->FormSignatureAsStr();
918
919 for (const auto& field : form_structure->fields_) {
920 FormFieldDataPredictions annotated_field;
921 annotated_field.signature = field->FieldSignatureAsStr();
922 annotated_field.heuristic_type =
923 AutofillType(field->heuristic_type()).ToString();
924 annotated_field.server_type =
925 AutofillType(field->server_type()).ToString();
926 annotated_field.overall_type = field->Type().ToString();
927 annotated_field.parseable_name =
928 base::UTF16ToUTF8(field->parseable_name());
929 annotated_field.section = field->section;
930 form.fields.push_back(annotated_field);
931 }
932
933 forms.push_back(form);
934 }
935 return forms;
936 }
937
938 // static
IsAutofillFieldMetadataEnabled()939 bool FormStructure::IsAutofillFieldMetadataEnabled() {
940 const std::string group_name =
941 base::FieldTrialList::FindFullName("AutofillFieldMetadata");
942 return base::StartsWith(group_name, "Enabled", base::CompareCase::SENSITIVE);
943 }
944
CreateForPasswordManagerUpload(FormSignature form_signature,const std::vector<FieldSignature> & field_signatures)945 std::unique_ptr<FormStructure> FormStructure::CreateForPasswordManagerUpload(
946 FormSignature form_signature,
947 const std::vector<FieldSignature>& field_signatures) {
948 std::unique_ptr<FormStructure> form;
949 form.reset(new FormStructure(form_signature, field_signatures));
950 return form;
951 }
952
FormSignatureAsStr() const953 std::string FormStructure::FormSignatureAsStr() const {
954 return base::NumberToString(form_signature().value());
955 }
956
IsAutofillable() const957 bool FormStructure::IsAutofillable() const {
958 size_t min_required_fields =
959 std::min({kMinRequiredFieldsForHeuristics, kMinRequiredFieldsForQuery,
960 kMinRequiredFieldsForUpload});
961 if (autofill_count() < min_required_fields)
962 return false;
963
964 return ShouldBeParsed();
965 }
966
IsCompleteCreditCardForm() const967 bool FormStructure::IsCompleteCreditCardForm() const {
968 bool found_cc_number = false;
969 bool found_cc_expiration = false;
970 for (const auto& field : fields_) {
971 ServerFieldType type = field->Type().GetStorableType();
972 if (!found_cc_expiration && data_util::IsCreditCardExpirationType(type)) {
973 found_cc_expiration = true;
974 } else if (!found_cc_number && type == CREDIT_CARD_NUMBER) {
975 found_cc_number = true;
976 }
977 if (found_cc_expiration && found_cc_number)
978 return true;
979 }
980 return false;
981 }
982
UpdateAutofillCount()983 void FormStructure::UpdateAutofillCount() {
984 autofill_count_ = 0;
985 for (const auto& field : *this) {
986 if (field && field->IsFieldFillable())
987 ++autofill_count_;
988 }
989 }
990
ShouldBeParsed(LogManager * log_manager) const991 bool FormStructure::ShouldBeParsed(LogManager* log_manager) const {
992 // Exclude URLs not on the web via HTTP(S).
993 if (!HasAllowedScheme(source_url_)) {
994 if (log_manager) {
995 log_manager->Log() << LoggingScope::kAbortParsing
996 << LogMessage::kAbortParsingNotAllowedScheme << *this;
997 }
998 return false;
999 }
1000
1001 size_t min_required_fields =
1002 std::min({kMinRequiredFieldsForHeuristics, kMinRequiredFieldsForQuery,
1003 kMinRequiredFieldsForUpload});
1004 if (active_field_count() < min_required_fields &&
1005 (!all_fields_are_passwords() ||
1006 active_field_count() < kRequiredFieldsForFormsWithOnlyPasswordFields) &&
1007 !has_author_specified_types_) {
1008 if (log_manager) {
1009 log_manager->Log() << LoggingScope::kAbortParsing
1010 << LogMessage::kAbortParsingNotEnoughFields
1011 << active_field_count() << *this;
1012 }
1013 return false;
1014 }
1015
1016 // Rule out search forms.
1017 static const base::string16 kUrlSearchActionPattern =
1018 base::UTF8ToUTF16(kUrlSearchActionRe);
1019 if (MatchesPattern(base::UTF8ToUTF16(target_url_.path_piece()),
1020 kUrlSearchActionPattern)) {
1021 if (log_manager) {
1022 log_manager->Log() << LoggingScope::kAbortParsing
1023 << LogMessage::kAbortParsingUrlMatchesSearchRegex
1024 << *this;
1025 }
1026 return false;
1027 }
1028
1029 bool has_text_field = false;
1030 for (const auto& it : *this) {
1031 has_text_field |= it->form_control_type != "select-one";
1032 }
1033
1034 if (!has_text_field && log_manager) {
1035 log_manager->Log() << LoggingScope::kAbortParsing
1036 << LogMessage::kAbortParsingFormHasNoTextfield << *this;
1037 }
1038
1039 return has_text_field;
1040 }
1041
ShouldRunHeuristics() const1042 bool FormStructure::ShouldRunHeuristics() const {
1043 return active_field_count() >= kMinRequiredFieldsForHeuristics &&
1044 HasAllowedScheme(source_url_) &&
1045 (is_form_tag_ || is_formless_checkout_ ||
1046 !base::FeatureList::IsEnabled(
1047 features::kAutofillRestrictUnownedFieldsToFormlessCheckout));
1048 }
1049
ShouldBeQueried() const1050 bool FormStructure::ShouldBeQueried() const {
1051 return (has_password_field_ ||
1052 active_field_count() >= kMinRequiredFieldsForQuery) &&
1053 ShouldBeParsed();
1054 }
1055
ShouldBeUploaded() const1056 bool FormStructure::ShouldBeUploaded() const {
1057 return active_field_count() >= kMinRequiredFieldsForUpload &&
1058 ShouldBeParsed();
1059 }
1060
RetrieveFromCache(const FormStructure & cached_form,const bool should_keep_cached_value,const bool only_server_and_autofill_state)1061 void FormStructure::RetrieveFromCache(
1062 const FormStructure& cached_form,
1063 const bool should_keep_cached_value,
1064 const bool only_server_and_autofill_state) {
1065 std::map<FieldRendererId, const AutofillField*> cached_fields_by_id;
1066 for (size_t i = 0; i < cached_form.field_count(); ++i) {
1067 auto* const field = cached_form.field(i);
1068 cached_fields_by_id[field->unique_renderer_id] = field;
1069 }
1070 for (auto& field : *this) {
1071 const AutofillField* cached_field = nullptr;
1072 const auto& it = cached_fields_by_id.find(field->unique_renderer_id);
1073 if (it != cached_fields_by_id.end())
1074 cached_field = it->second;
1075
1076 // If the unique renderer id (or the name) is not stable due to some Java
1077 // Script magic in the website, use the field signature as a fallback
1078 // solution to find the field in the cached form.
1079 if (!cached_field) {
1080 // Iterates over the fields to find the field with the same form
1081 // signature.
1082 for (size_t i = 0; i < cached_form.field_count(); ++i) {
1083 auto* const cfield = cached_form.field(i);
1084 if (field->GetFieldSignature() == cfield->GetFieldSignature()) {
1085 // If there are multiple matches, do not retrieve the field and stop
1086 // the process.
1087 if (cached_field) {
1088 cached_field = nullptr;
1089 break;
1090 } else {
1091 cached_field = cfield;
1092 }
1093 }
1094 }
1095 }
1096
1097 if (cached_field) {
1098 if (!only_server_and_autofill_state) {
1099 // Transfer attributes of the cached AutofillField to the newly created
1100 // AutofillField.
1101 field->set_heuristic_type(cached_field->heuristic_type());
1102 field->SetHtmlType(cached_field->html_type(),
1103 cached_field->html_mode());
1104 field->section = cached_field->section;
1105 field->set_only_fill_when_focused(
1106 cached_field->only_fill_when_focused());
1107 }
1108 if (should_keep_cached_value) {
1109 field->is_autofilled = cached_field->is_autofilled;
1110 }
1111 if (field->form_control_type != "select-one") {
1112 if (should_keep_cached_value) {
1113 field->value = cached_field->value;
1114 value_from_dynamic_change_form_ = true;
1115 } else if (field->value == cached_field->value &&
1116 (field->server_type() != ADDRESS_HOME_COUNTRY &&
1117 field->server_type() != ADDRESS_HOME_STATE)) {
1118 // From the perspective of learning user data, text fields containing
1119 // default values are equivalent to empty fields.
1120 // Since a website can prefill country and state values basedw on
1121 // GeoIp, the mechanism is deactivated for state and country fields.
1122 field->value = base::string16();
1123 }
1124 }
1125 field->set_server_type(cached_field->server_type());
1126 field->set_previously_autofilled(cached_field->previously_autofilled());
1127 }
1128 }
1129
1130 UpdateAutofillCount();
1131
1132 // Update form parsed timestamp
1133 form_parsed_timestamp_ =
1134 std::min(form_parsed_timestamp_, cached_form.form_parsed_timestamp_);
1135
1136 // The form signature should match between query and upload requests to the
1137 // server. On many websites, form elements are dynamically added, removed, or
1138 // rearranged via JavaScript between page load and form submission, so we
1139 // copy over the |form_signature_field_names_| corresponding to the query
1140 // request.
1141 form_signature_ = cached_form.form_signature_;
1142 }
1143
LogQualityMetrics(const base::TimeTicks & load_time,const base::TimeTicks & interaction_time,const base::TimeTicks & submission_time,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger,bool did_show_suggestions,bool observed_submission) const1144 void FormStructure::LogQualityMetrics(
1145 const base::TimeTicks& load_time,
1146 const base::TimeTicks& interaction_time,
1147 const base::TimeTicks& submission_time,
1148 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger,
1149 bool did_show_suggestions,
1150 bool observed_submission) const {
1151 // Use the same timestamp on UKM Metrics generated within this method's scope.
1152 AutofillMetrics::UkmTimestampPin timestamp_pin(form_interactions_ukm_logger);
1153
1154 size_t num_detected_field_types = 0;
1155 size_t num_edited_autofilled_fields = 0;
1156 bool did_autofill_all_possible_fields = true;
1157 bool did_autofill_some_possible_fields = false;
1158 bool is_for_credit_card = IsCompleteCreditCardForm();
1159 bool has_upi_vpa_field = false;
1160
1161 // Determine the correct suffix for the metric, depending on whether or
1162 // not a submission was observed.
1163 const AutofillMetrics::QualityMetricType metric_type =
1164 observed_submission ? AutofillMetrics::TYPE_SUBMISSION
1165 : AutofillMetrics::TYPE_NO_SUBMISSION;
1166
1167 for (size_t i = 0; i < field_count(); ++i) {
1168 auto* const field = this->field(i);
1169 if (IsUPIVirtualPaymentAddress(field->value)) {
1170 has_upi_vpa_field = true;
1171 AutofillMetrics::LogUserHappinessMetric(
1172 AutofillMetrics::USER_DID_ENTER_UPI_VPA, field->Type().group(),
1173 security_state::SecurityLevel::SECURITY_LEVEL_COUNT,
1174 data_util::DetermineGroups(*this));
1175 }
1176
1177 form_interactions_ukm_logger->LogFieldFillStatus(*this, *field,
1178 metric_type);
1179
1180 AutofillMetrics::LogHeuristicPredictionQualityMetrics(
1181 form_interactions_ukm_logger, *this, *field, metric_type);
1182 AutofillMetrics::LogServerPredictionQualityMetrics(
1183 form_interactions_ukm_logger, *this, *field, metric_type);
1184 AutofillMetrics::LogOverallPredictionQualityMetrics(
1185 form_interactions_ukm_logger, *this, *field, metric_type);
1186 // We count fields that were autofilled but later modified, regardless of
1187 // whether the data now in the field is recognized.
1188 if (field->previously_autofilled())
1189 num_edited_autofilled_fields++;
1190
1191 const ServerFieldTypeSet& field_types = field->possible_types();
1192 DCHECK(!field_types.empty());
1193 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE)) {
1194 DCHECK_EQ(field_types.size(), 1u);
1195 continue;
1196 }
1197
1198 ++num_detected_field_types;
1199 if (field->is_autofilled)
1200 did_autofill_some_possible_fields = true;
1201 else if (!field->only_fill_when_focused())
1202 did_autofill_all_possible_fields = false;
1203
1204 // If the form was submitted, record if field types have been filled and
1205 // subsequently edited by the user.
1206 if (observed_submission) {
1207 if (field->is_autofilled || field->previously_autofilled()) {
1208 AutofillMetrics::LogEditedAutofilledFieldAtSubmission(
1209 form_interactions_ukm_logger, *this, *field);
1210 }
1211 }
1212 }
1213
1214 AutofillMetrics::LogNumberOfEditedAutofilledFields(
1215 num_edited_autofilled_fields, observed_submission);
1216
1217 // We log "submission" and duration metrics if we are here after observing a
1218 // submission event.
1219 if (observed_submission) {
1220 AutofillMetrics::AutofillFormSubmittedState state;
1221 if (num_detected_field_types < kMinRequiredFieldsForHeuristics &&
1222 num_detected_field_types < kMinRequiredFieldsForQuery) {
1223 state = AutofillMetrics::NON_FILLABLE_FORM_OR_NEW_DATA;
1224 } else {
1225 if (did_autofill_all_possible_fields) {
1226 state = AutofillMetrics::FILLABLE_FORM_AUTOFILLED_ALL;
1227 } else if (did_autofill_some_possible_fields) {
1228 state = AutofillMetrics::FILLABLE_FORM_AUTOFILLED_SOME;
1229 } else if (!did_show_suggestions) {
1230 state = AutofillMetrics::
1231 FILLABLE_FORM_AUTOFILLED_NONE_DID_NOT_SHOW_SUGGESTIONS;
1232 } else {
1233 state =
1234 AutofillMetrics::FILLABLE_FORM_AUTOFILLED_NONE_DID_SHOW_SUGGESTIONS;
1235 }
1236
1237 // Unlike the other times, the |submission_time| should always be
1238 // available.
1239 DCHECK(!submission_time.is_null());
1240
1241 // The |load_time| might be unset, in the case that the form was
1242 // dynamically added to the DOM.
1243 if (!load_time.is_null()) {
1244 // Submission should always chronologically follow form load.
1245 DCHECK_GE(submission_time, load_time);
1246 base::TimeDelta elapsed = submission_time - load_time;
1247 if (did_autofill_some_possible_fields)
1248 AutofillMetrics::LogFormFillDurationFromLoadWithAutofill(elapsed);
1249 else
1250 AutofillMetrics::LogFormFillDurationFromLoadWithoutAutofill(elapsed);
1251 }
1252
1253 // The |interaction_time| might be unset, in the case that the user
1254 // submitted a blank form.
1255 if (!interaction_time.is_null()) {
1256 // Submission should always chronologically follow interaction.
1257 DCHECK(submission_time > interaction_time);
1258 base::TimeDelta elapsed = submission_time - interaction_time;
1259 AutofillMetrics::LogFormFillDurationFromInteraction(
1260 GetFormTypes(), did_autofill_some_possible_fields, elapsed);
1261 }
1262 }
1263
1264 AutofillMetrics::LogAutofillFormSubmittedState(
1265 state, is_for_credit_card, has_upi_vpa_field, GetFormTypes(),
1266 form_parsed_timestamp_, form_signature(), form_interactions_ukm_logger);
1267 }
1268 }
1269
LogQualityMetricsBasedOnAutocomplete(AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger) const1270 void FormStructure::LogQualityMetricsBasedOnAutocomplete(
1271 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger)
1272 const {
1273 const AutofillMetrics::QualityMetricType metric_type =
1274 AutofillMetrics::TYPE_AUTOCOMPLETE_BASED;
1275 for (const auto& field : fields_) {
1276 if (field->html_type() != HTML_TYPE_UNSPECIFIED &&
1277 field->html_type() != HTML_TYPE_UNRECOGNIZED) {
1278 AutofillMetrics::LogHeuristicPredictionQualityMetrics(
1279 form_interactions_ukm_logger, *this, *field, metric_type);
1280 AutofillMetrics::LogServerPredictionQualityMetrics(
1281 form_interactions_ukm_logger, *this, *field, metric_type);
1282 }
1283 }
1284 }
1285
ParseFieldTypesFromAutocompleteAttributes()1286 void FormStructure::ParseFieldTypesFromAutocompleteAttributes() {
1287 has_author_specified_types_ = false;
1288 has_author_specified_sections_ = false;
1289 has_author_specified_upi_vpa_hint_ = false;
1290 for (const std::unique_ptr<AutofillField>& field : fields_) {
1291 // To prevent potential section name collisions, add a default suffix for
1292 // other fields. Without this, 'autocomplete' attribute values
1293 // "section--shipping street-address" and "shipping street-address" would be
1294 // parsed identically, given the section handling code below. We do this
1295 // before any validation so that fields with invalid attributes still end up
1296 // in the default section. These default section names will be overridden
1297 // by subsequent heuristic parsing steps if there are no author-specified
1298 // section names.
1299 field->section = kDefaultSection;
1300
1301 std::vector<std::string> tokens =
1302 LowercaseAndTokenizeAttributeString(field->autocomplete_attribute);
1303
1304 // The autocomplete attribute is overloaded: it can specify either a field
1305 // type hint or whether autocomplete should be enabled at all. Ignore the
1306 // latter type of attribute value.
1307 if (tokens.empty() ||
1308 (tokens.size() == 1 &&
1309 (tokens[0] == "on" || tokens[0] == "off" || tokens[0] == "false"))) {
1310 continue;
1311 }
1312
1313 // Any other value, even it is invalid, is considered to be a type hint.
1314 // This allows a website's author to specify an attribute like
1315 // autocomplete="other" on a field to disable all Autofill heuristics for
1316 // the form.
1317 has_author_specified_types_ = true;
1318
1319 // Per the spec, the tokens are parsed in reverse order. The expected
1320 // pattern is:
1321 // [section-*] [shipping|billing] [type_hint] field_type
1322
1323 // (1) The final token must be the field type. If it is not one of the known
1324 // types, abort.
1325 std::string field_type_token = tokens.back();
1326 tokens.pop_back();
1327 HtmlFieldType field_type =
1328 FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1329 if (field_type == HTML_TYPE_UPI_VPA) {
1330 has_author_specified_upi_vpa_hint_ = true;
1331 // TODO(crbug.com/702223): Flesh out support for UPI-VPA.
1332 field_type = HTML_TYPE_UNRECOGNIZED;
1333 }
1334 if (field_type == HTML_TYPE_UNSPECIFIED)
1335 continue;
1336
1337 // (2) The preceding token, if any, may be a type hint.
1338 if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1339 // If it is, it must match the field type; otherwise, abort.
1340 // Note that an invalid token invalidates the entire attribute value, even
1341 // if the other tokens are valid.
1342 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1343 continue;
1344
1345 // Chrome Autofill ignores these type hints.
1346 tokens.pop_back();
1347 }
1348
1349 DCHECK_EQ(kDefaultSection, field->section);
1350 std::string section = field->section;
1351 HtmlFieldMode mode = HTML_MODE_NONE;
1352
1353 // (3) The preceding token, if any, may be a fixed string that is either
1354 // "shipping" or "billing". Chrome Autofill treats these as implicit
1355 // section name suffixes.
1356 if (!tokens.empty()) {
1357 if (tokens.back() == kShippingMode)
1358 mode = HTML_MODE_SHIPPING;
1359 else if (tokens.back() == kBillingMode)
1360 mode = HTML_MODE_BILLING;
1361
1362 if (mode != HTML_MODE_NONE) {
1363 section = "-" + tokens.back();
1364 tokens.pop_back();
1365 }
1366 }
1367
1368 // (4) The preceding token, if any, may be a named section.
1369 const base::StringPiece kSectionPrefix = "section-";
1370 if (!tokens.empty() && base::StartsWith(tokens.back(), kSectionPrefix,
1371 base::CompareCase::SENSITIVE)) {
1372 // Prepend this section name to the suffix set in the preceding block.
1373 section = tokens.back().substr(kSectionPrefix.size()) + section;
1374 tokens.pop_back();
1375 }
1376
1377 // (5) No other tokens are allowed. If there are any remaining, abort.
1378 if (!tokens.empty())
1379 continue;
1380
1381 if (section != kDefaultSection) {
1382 has_author_specified_sections_ = true;
1383 field->section = section;
1384 }
1385
1386 // No errors encountered while parsing!
1387 // Update the |field|'s type based on what was parsed from the attribute.
1388 field->SetHtmlType(field_type, mode);
1389 }
1390
1391 was_parsed_for_autocomplete_attributes_ = true;
1392 }
1393
PossibleValues(ServerFieldType type)1394 std::set<base::string16> FormStructure::PossibleValues(ServerFieldType type) {
1395 std::set<base::string16> values;
1396 AutofillType target_type(type);
1397 for (const auto& field : fields_) {
1398 if (field->Type().GetStorableType() != target_type.GetStorableType() ||
1399 field->Type().group() != target_type.group()) {
1400 continue;
1401 }
1402
1403 // No option values; anything goes.
1404 if (field->option_values.empty()) {
1405 values.clear();
1406 break;
1407 }
1408
1409 for (const base::string16& val : field->option_values) {
1410 if (!val.empty())
1411 values.insert(base::i18n::ToUpper(val));
1412 }
1413
1414 for (const base::string16& content : field->option_contents) {
1415 if (!content.empty())
1416 values.insert(base::i18n::ToUpper(content));
1417 }
1418 }
1419
1420 return values;
1421 }
1422
field(size_t index) const1423 const AutofillField* FormStructure::field(size_t index) const {
1424 if (index >= fields_.size()) {
1425 NOTREACHED();
1426 return nullptr;
1427 }
1428
1429 return fields_[index].get();
1430 }
1431
field(size_t index)1432 AutofillField* FormStructure::field(size_t index) {
1433 return const_cast<AutofillField*>(
1434 static_cast<const FormStructure*>(this)->field(index));
1435 }
1436
field_count() const1437 size_t FormStructure::field_count() const {
1438 return fields_.size();
1439 }
1440
active_field_count() const1441 size_t FormStructure::active_field_count() const {
1442 return active_field_count_;
1443 }
1444
ToFormData() const1445 FormData FormStructure::ToFormData() const {
1446 FormData data;
1447 data.id_attribute = id_attribute_;
1448 data.name_attribute = name_attribute_;
1449 data.name = form_name_;
1450 data.button_titles = button_titles_;
1451 data.url = source_url_;
1452 data.full_url = full_source_url_;
1453 data.action = target_url_;
1454 data.main_frame_origin = main_frame_origin_;
1455 data.is_form_tag = is_form_tag_;
1456 data.is_formless_checkout = is_formless_checkout_;
1457 data.unique_renderer_id = unique_renderer_id_;
1458
1459 for (size_t i = 0; i < fields_.size(); ++i) {
1460 data.fields.push_back(FormFieldData(*fields_[i]));
1461 }
1462
1463 return data;
1464 }
1465
SectionedFieldsIndexes()1466 FormStructure::SectionedFieldsIndexes::SectionedFieldsIndexes() {}
1467
~SectionedFieldsIndexes()1468 FormStructure::SectionedFieldsIndexes::~SectionedFieldsIndexes() {}
1469
RationalizeCreditCardFieldPredictions()1470 void FormStructure::RationalizeCreditCardFieldPredictions() {
1471 bool cc_first_name_found = false;
1472 bool cc_last_name_found = false;
1473 bool cc_num_found = false;
1474 bool cc_month_found = false;
1475 bool cc_year_found = false;
1476 bool cc_type_found = false;
1477 bool cc_cvc_found = false;
1478 size_t num_months_found = 0;
1479 size_t num_other_fields_found = 0;
1480 for (const auto& field : fields_) {
1481 ServerFieldType current_field_type =
1482 field->ComputedType().GetStorableType();
1483 switch (current_field_type) {
1484 case CREDIT_CARD_NAME_FIRST:
1485 cc_first_name_found = true;
1486 break;
1487 case CREDIT_CARD_NAME_LAST:
1488 cc_last_name_found = true;
1489 break;
1490 case CREDIT_CARD_NAME_FULL:
1491 cc_first_name_found = true;
1492 cc_last_name_found = true;
1493 break;
1494 case CREDIT_CARD_NUMBER:
1495 cc_num_found = true;
1496 break;
1497 case CREDIT_CARD_EXP_MONTH:
1498 cc_month_found = true;
1499 ++num_months_found;
1500 break;
1501 case CREDIT_CARD_EXP_2_DIGIT_YEAR:
1502 case CREDIT_CARD_EXP_4_DIGIT_YEAR:
1503 cc_year_found = true;
1504 break;
1505 case CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR:
1506 case CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR:
1507 cc_month_found = true;
1508 cc_year_found = true;
1509 ++num_months_found;
1510 break;
1511 case CREDIT_CARD_TYPE:
1512 cc_type_found = true;
1513 break;
1514 case CREDIT_CARD_VERIFICATION_CODE:
1515 cc_cvc_found = true;
1516 break;
1517 case ADDRESS_HOME_ZIP:
1518 case ADDRESS_BILLING_ZIP:
1519 // Zip/Postal code often appears as part of a Credit Card form. Do
1520 // not count it as a non-cc-related field.
1521 break;
1522 default:
1523 ++num_other_fields_found;
1524 }
1525 }
1526
1527 // A partial CC name is unlikely. Prefer to consider these profile names
1528 // when partial.
1529 bool cc_name_found = cc_first_name_found && cc_last_name_found;
1530
1531 // A partial CC expiry date should not be filled. These are often confused
1532 // with quantity/height fields and/or generic year fields.
1533 bool cc_date_found = cc_month_found && cc_year_found;
1534
1535 // Count the credit card related fields in the form.
1536 size_t num_cc_fields_found =
1537 static_cast<int>(cc_name_found) + static_cast<int>(cc_num_found) +
1538 static_cast<int>(cc_date_found) + static_cast<int>(cc_type_found) +
1539 static_cast<int>(cc_cvc_found);
1540
1541 // Retain credit card related fields if the form has multiple fields or has
1542 // no unrelated fields (useful for single cc-field forms). Credit card number
1543 // is permitted to be alone in an otherwise unrelated form because some
1544 // dynamic forms reveal the remainder of the fields only after the credit
1545 // card number is entered and identified as a credit card by the site.
1546 bool keep_cc_fields =
1547 cc_num_found || num_cc_fields_found >= 3 || num_other_fields_found == 0;
1548
1549 // Do an update pass over the fields to rewrite the types if credit card
1550 // fields are not to be retained. Some special handling is given to expiry
1551 // dates if the full date is not found or multiple expiry date fields are
1552 // found. See comments inline below.
1553 for (auto it = fields_.begin(); it != fields_.end(); ++it) {
1554 auto& field = *it;
1555 ServerFieldType current_field_type = field->Type().GetStorableType();
1556 switch (current_field_type) {
1557 case CREDIT_CARD_NAME_FIRST:
1558 if (!keep_cc_fields)
1559 field->SetTypeTo(AutofillType(NAME_FIRST));
1560 break;
1561 case CREDIT_CARD_NAME_LAST:
1562 if (!keep_cc_fields)
1563 field->SetTypeTo(AutofillType(NAME_LAST));
1564 break;
1565 case CREDIT_CARD_NAME_FULL:
1566 if (!keep_cc_fields)
1567 field->SetTypeTo(AutofillType(NAME_FULL));
1568 break;
1569 case CREDIT_CARD_NUMBER:
1570 case CREDIT_CARD_TYPE:
1571 case CREDIT_CARD_VERIFICATION_CODE:
1572 case CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR:
1573 case CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR:
1574 if (!keep_cc_fields)
1575 field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1576 break;
1577 case CREDIT_CARD_EXP_MONTH:
1578 // Do not preserve an expiry month prediction if any of the following
1579 // are true:
1580 // (1) the form is determined to be be non-cc related, so all cc
1581 // field predictions are to be discarded
1582 // (2) the expiry month was found without a corresponding year
1583 // (3) multiple month fields were found in a form having a full
1584 // expiry date. This usually means the form is a checkout form
1585 // that also has one or more quantity fields. Suppress the expiry
1586 // month field(s) not immediately preceding an expiry year field.
1587 if (!keep_cc_fields || !cc_date_found) {
1588 field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1589 } else if (num_months_found > 1) {
1590 auto it2 = it + 1;
1591 if (it2 == fields_.end()) {
1592 field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1593 } else {
1594 ServerFieldType next_field_type = (*it2)->Type().GetStorableType();
1595 if (next_field_type != CREDIT_CARD_EXP_2_DIGIT_YEAR &&
1596 next_field_type != CREDIT_CARD_EXP_4_DIGIT_YEAR) {
1597 field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1598 }
1599 }
1600 }
1601 break;
1602 case CREDIT_CARD_EXP_2_DIGIT_YEAR:
1603 case CREDIT_CARD_EXP_4_DIGIT_YEAR:
1604 if (!keep_cc_fields || !cc_date_found)
1605 field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1606 break;
1607 default:
1608 break;
1609 }
1610 }
1611 }
1612
RationalizePhoneNumbersInSection(std::string section)1613 void FormStructure::RationalizePhoneNumbersInSection(std::string section) {
1614 if (phone_rationalized_[section])
1615 return;
1616 std::vector<AutofillField*> fields;
1617 for (size_t i = 0; i < field_count(); ++i) {
1618 if (field(i)->section != section)
1619 continue;
1620 fields.push_back(field(i));
1621 }
1622 rationalization_util::RationalizePhoneNumberFields(fields);
1623 phone_rationalized_[section] = true;
1624 }
1625
ApplyRationalizationsToFieldAndLog(size_t field_index,ServerFieldType new_type,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1626 void FormStructure::ApplyRationalizationsToFieldAndLog(
1627 size_t field_index,
1628 ServerFieldType new_type,
1629 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1630 if (field_index >= fields_.size())
1631 return;
1632 auto old_type = fields_[field_index]->Type().GetStorableType();
1633 fields_[field_index]->SetTypeTo(AutofillType(new_type));
1634 if (form_interactions_ukm_logger) {
1635 form_interactions_ukm_logger->LogRepeatedServerTypePredictionRationalized(
1636 form_signature_, *fields_[field_index], old_type);
1637 }
1638 }
1639
RationalizeAddressLineFields(SectionedFieldsIndexes * sections_of_address_indexes,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1640 void FormStructure::RationalizeAddressLineFields(
1641 SectionedFieldsIndexes* sections_of_address_indexes,
1642 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1643 // The rationalization happens within sections.
1644 for (sections_of_address_indexes->Reset();
1645 !sections_of_address_indexes->IsFinished();
1646 sections_of_address_indexes->WalkForwardToTheNextSection()) {
1647 auto current_section = sections_of_address_indexes->CurrentSection();
1648
1649 // The rationalization only applies to sections that have 2 or 3 visible
1650 // street address predictions.
1651 if (current_section.size() != 2 && current_section.size() != 3) {
1652 continue;
1653 }
1654
1655 int nb_address_rationalized = 0;
1656 for (auto field_index : current_section) {
1657 switch (nb_address_rationalized) {
1658 case 0:
1659 ApplyRationalizationsToFieldAndLog(field_index, ADDRESS_HOME_LINE1,
1660 form_interactions_ukm_logger);
1661 break;
1662 case 1:
1663 ApplyRationalizationsToFieldAndLog(field_index, ADDRESS_HOME_LINE2,
1664 form_interactions_ukm_logger);
1665 break;
1666 case 2:
1667 ApplyRationalizationsToFieldAndLog(field_index, ADDRESS_HOME_LINE3,
1668 form_interactions_ukm_logger);
1669 break;
1670 default:
1671 NOTREACHED();
1672 break;
1673 }
1674 ++nb_address_rationalized;
1675 }
1676 }
1677 }
1678
ApplyRationalizationsToHiddenSelects(size_t field_index,ServerFieldType new_type,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1679 void FormStructure::ApplyRationalizationsToHiddenSelects(
1680 size_t field_index,
1681 ServerFieldType new_type,
1682 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1683 ServerFieldType old_type = fields_[field_index]->Type().GetStorableType();
1684
1685 // Walk on the hidden select fields right after the field_index which share
1686 // the same type with the field_index, and apply the rationalization to them
1687 // as well. These fields, if any, function as one field with the field_index.
1688 for (auto current_index = field_index + 1; current_index < fields_.size();
1689 current_index++) {
1690 if (fields_[current_index]->IsVisible() ||
1691 fields_[current_index]->form_control_type != "select-one" ||
1692 fields_[current_index]->Type().GetStorableType() != old_type)
1693 break;
1694 ApplyRationalizationsToFieldAndLog(current_index, new_type,
1695 form_interactions_ukm_logger);
1696 }
1697
1698 // Same for the fields coming right before the field_index. (No need to check
1699 // for the fields appearing before the first field!)
1700 if (field_index == 0)
1701 return;
1702 for (auto current_index = field_index - 1;; current_index--) {
1703 if (fields_[current_index]->IsVisible() ||
1704 fields_[current_index]->form_control_type != "select-one" ||
1705 fields_[current_index]->Type().GetStorableType() != old_type)
1706 break;
1707 ApplyRationalizationsToFieldAndLog(current_index, new_type,
1708 form_interactions_ukm_logger);
1709 if (current_index == 0)
1710 break;
1711 }
1712 }
1713
HeuristicsPredictionsAreApplicable(size_t upper_index,size_t lower_index,ServerFieldType first_type,ServerFieldType second_type)1714 bool FormStructure::HeuristicsPredictionsAreApplicable(
1715 size_t upper_index,
1716 size_t lower_index,
1717 ServerFieldType first_type,
1718 ServerFieldType second_type) {
1719 // The predictions are applicable if one field has one of the two types, and
1720 // the other has the other type.
1721 if (fields_[upper_index]->heuristic_type() ==
1722 fields_[lower_index]->heuristic_type())
1723 return false;
1724 if ((fields_[upper_index]->heuristic_type() == first_type ||
1725 fields_[upper_index]->heuristic_type() == second_type) &&
1726 (fields_[lower_index]->heuristic_type() == first_type ||
1727 fields_[lower_index]->heuristic_type() == second_type))
1728 return true;
1729 return false;
1730 }
1731
ApplyRationalizationsToFields(size_t upper_index,size_t lower_index,ServerFieldType upper_type,ServerFieldType lower_type,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1732 void FormStructure::ApplyRationalizationsToFields(
1733 size_t upper_index,
1734 size_t lower_index,
1735 ServerFieldType upper_type,
1736 ServerFieldType lower_type,
1737 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1738 // Hidden fields are ignored during the rationalization, but 'select' hidden
1739 // fields also get autofilled to support their corresponding visible
1740 // 'synthetic fields'. So, if a field's type is rationalized, we should make
1741 // sure that the rationalization is also applied to its corresponding hidden
1742 // fields, if any.
1743 ApplyRationalizationsToHiddenSelects(upper_index, upper_type,
1744 form_interactions_ukm_logger);
1745 ApplyRationalizationsToFieldAndLog(upper_index, upper_type,
1746 form_interactions_ukm_logger);
1747
1748 ApplyRationalizationsToHiddenSelects(lower_index, lower_type,
1749 form_interactions_ukm_logger);
1750 ApplyRationalizationsToFieldAndLog(lower_index, lower_type,
1751 form_interactions_ukm_logger);
1752 }
1753
FieldShouldBeRationalizedToCountry(size_t upper_index)1754 bool FormStructure::FieldShouldBeRationalizedToCountry(size_t upper_index) {
1755 // Upper field is country if and only if it's the first visible address field
1756 // in its section. Otherwise, the upper field is a state, and the lower one
1757 // is a country.
1758 for (int field_index = upper_index - 1; field_index >= 0; --field_index) {
1759 if (fields_[field_index]->IsVisible() &&
1760 AutofillType(fields_[field_index]->Type().GetStorableType()).group() ==
1761 ADDRESS_HOME &&
1762 fields_[field_index]->section == fields_[upper_index]->section) {
1763 return false;
1764 }
1765 }
1766 return true;
1767 }
1768
RationalizeAddressStateCountry(SectionedFieldsIndexes * sections_of_state_indexes,SectionedFieldsIndexes * sections_of_country_indexes,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1769 void FormStructure::RationalizeAddressStateCountry(
1770 SectionedFieldsIndexes* sections_of_state_indexes,
1771 SectionedFieldsIndexes* sections_of_country_indexes,
1772 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1773 // Walk on the sections of state and country indexes simultaneously. If they
1774 // both point to the same section, it means that that section includes both
1775 // the country and the state type. This means that no that rationalization is
1776 // needed. So, walk both pointers forward. Otherwise, look at the section that
1777 // appears earlier on the form. That section doesn't have any field of the
1778 // other type. Rationalize the fields on the earlier section if needed. Walk
1779 // the pointer that points to the earlier section forward. Stop when both
1780 // sections of indexes are processed. (This resembles the merge in the merge
1781 // sort.)
1782 sections_of_state_indexes->Reset();
1783 sections_of_country_indexes->Reset();
1784
1785 while (!sections_of_state_indexes->IsFinished() ||
1786 !sections_of_country_indexes->IsFinished()) {
1787 auto current_section_of_state_indexes =
1788 sections_of_state_indexes->CurrentSection();
1789 auto current_section_of_country_indexes =
1790 sections_of_country_indexes->CurrentSection();
1791 // If there are still sections left with both country and state type, and
1792 // state and country current sections are equal, then that section has both
1793 // state and country. No rationalization needed.
1794 if (!sections_of_state_indexes->IsFinished() &&
1795 !sections_of_country_indexes->IsFinished() &&
1796 fields_[sections_of_state_indexes->CurrentIndex()]->section ==
1797 fields_[sections_of_country_indexes->CurrentIndex()]->section) {
1798 sections_of_state_indexes->WalkForwardToTheNextSection();
1799 sections_of_country_indexes->WalkForwardToTheNextSection();
1800 continue;
1801 }
1802
1803 size_t upper_index = 0, lower_index = 0;
1804
1805 // If country section is before the state ones, it means that that section
1806 // misses states, and the other way around.
1807 if (current_section_of_state_indexes < current_section_of_country_indexes) {
1808 // We only rationalize when we have exactly two visible fields of a kind.
1809 if (current_section_of_state_indexes.size() == 2) {
1810 upper_index = current_section_of_state_indexes[0];
1811 lower_index = current_section_of_state_indexes[1];
1812 }
1813 sections_of_state_indexes->WalkForwardToTheNextSection();
1814 } else {
1815 // We only rationalize when we have exactly two visible fields of a kind.
1816 if (current_section_of_country_indexes.size() == 2) {
1817 upper_index = current_section_of_country_indexes[0];
1818 lower_index = current_section_of_country_indexes[1];
1819 }
1820 sections_of_country_indexes->WalkForwardToTheNextSection();
1821 }
1822
1823 // This is when upper and lower indexes are not changed, meaning that there
1824 // is no need for rationalization.
1825 if (upper_index == lower_index) {
1826 continue;
1827 }
1828
1829 if (HeuristicsPredictionsAreApplicable(upper_index, lower_index,
1830 ADDRESS_HOME_STATE,
1831 ADDRESS_HOME_COUNTRY)) {
1832 ApplyRationalizationsToFields(
1833 upper_index, lower_index, fields_[upper_index]->heuristic_type(),
1834 fields_[lower_index]->heuristic_type(), form_interactions_ukm_logger);
1835 continue;
1836 }
1837
1838 if (FieldShouldBeRationalizedToCountry(upper_index)) {
1839 ApplyRationalizationsToFields(upper_index, lower_index,
1840 ADDRESS_HOME_COUNTRY, ADDRESS_HOME_STATE,
1841 form_interactions_ukm_logger);
1842 } else {
1843 ApplyRationalizationsToFields(upper_index, lower_index,
1844 ADDRESS_HOME_STATE, ADDRESS_HOME_COUNTRY,
1845 form_interactions_ukm_logger);
1846 }
1847 }
1848 }
1849
RationalizeRepeatedFields(AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1850 void FormStructure::RationalizeRepeatedFields(
1851 AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1852 // The type of every field whose index is in
1853 // sectioned_field_indexes_by_type[|type|] is predicted by server as |type|.
1854 // Example: sectioned_field_indexes_by_type[FULL_NAME] is a sectioned fields
1855 // indexes of fields whose types are predicted as FULL_NAME by the server.
1856 SectionedFieldsIndexes sectioned_field_indexes_by_type[MAX_VALID_FIELD_TYPE];
1857
1858 for (const auto& field : fields_) {
1859 // The hidden fields are not considered when rationalizing.
1860 if (!field->IsVisible())
1861 continue;
1862 // The billing and non-billing types are aggregated.
1863 auto current_type = field->Type().GetStorableType();
1864
1865 if (current_type != UNKNOWN_TYPE && current_type < MAX_VALID_FIELD_TYPE) {
1866 // Look at the sectioned field indexes for the current type, if the
1867 // current field belongs to that section, then the field index should be
1868 // added to that same section, otherwise, start a new section.
1869 sectioned_field_indexes_by_type[current_type].AddFieldIndex(
1870 &field - &fields_[0],
1871 /*is_new_section*/ sectioned_field_indexes_by_type[current_type]
1872 .Empty() ||
1873 fields_[sectioned_field_indexes_by_type[current_type]
1874 .LastFieldIndex()]
1875 ->section != field->section);
1876 }
1877 }
1878
1879 RationalizeAddressLineFields(
1880 &(sectioned_field_indexes_by_type[ADDRESS_HOME_STREET_ADDRESS]),
1881 form_interactions_ukm_logger);
1882 // Since the billing types are mapped to the non-billing ones, no need to
1883 // take care of ADDRESS_BILLING_STATE and .. .
1884 RationalizeAddressStateCountry(
1885 &(sectioned_field_indexes_by_type[ADDRESS_HOME_STATE]),
1886 &(sectioned_field_indexes_by_type[ADDRESS_HOME_COUNTRY]),
1887 form_interactions_ukm_logger);
1888 }
1889
RationalizeFieldTypePredictions()1890 void FormStructure::RationalizeFieldTypePredictions() {
1891 RationalizeCreditCardFieldPredictions();
1892 for (const auto& field : fields_) {
1893 if (base::FeatureList::IsEnabled(features::kAutofillOffNoServerData) &&
1894 !field->should_autocomplete && field->server_type() == NO_SERVER_DATA &&
1895 field->heuristic_type() != CREDIT_CARD_VERIFICATION_CODE) {
1896 // When the field has autocomplete off, and the server returned no
1897 // prediction, then assume Autofill is not useful for the current field.
1898 // Special case for CVC (crbug.com/968036). We never send votes for CVC
1899 // fields, but we still fill them when the user inputs them via the CVC
1900 // prompt. Since Autofill doesn't trigger from a CVC field, we can keep
1901 // the client-side predictions for this type.
1902 field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1903 } else {
1904 field->SetTypeTo(field->Type());
1905 }
1906 }
1907 RationalizeTypeRelationships();
1908 }
1909
EncodeFormForQuery(AutofillPageQueryRequest::Form * query_form,std::vector<FormSignature> * queried_form_signatures) const1910 void FormStructure::EncodeFormForQuery(
1911 AutofillPageQueryRequest::Form* query_form,
1912 std::vector<FormSignature>* queried_form_signatures) const {
1913 DCHECK(!IsMalformed());
1914
1915 query_form->set_signature(form_signature().value());
1916 queried_form_signatures->push_back(form_signature());
1917
1918 if (is_rich_query_enabled_) {
1919 EncodeFormMetadataForQuery(*this, query_form->mutable_metadata());
1920 }
1921
1922 for (const auto& field : fields_) {
1923 if (ShouldSkipField(*field))
1924 continue;
1925
1926 AutofillPageQueryRequest::Form::Field* added_field =
1927 query_form->add_fields();
1928 added_field->set_signature(field->GetFieldSignature().value());
1929
1930 if (is_rich_query_enabled_) {
1931 EncodeFieldMetadataForQuery(*field, added_field->mutable_metadata());
1932 }
1933
1934 if (IsAutofillFieldMetadataEnabled()) {
1935 added_field->set_control_type(field->form_control_type);
1936
1937 if (!field->name.empty())
1938 added_field->set_name(base::UTF16ToUTF8(field->name));
1939 }
1940 }
1941 }
1942
EncodeFormForUpload(AutofillUploadContents * upload,std::vector<FormSignature> * encoded_signatures) const1943 void FormStructure::EncodeFormForUpload(
1944 AutofillUploadContents* upload,
1945 std::vector<FormSignature>* encoded_signatures) const {
1946 DCHECK(!IsMalformed());
1947
1948 encoded_signatures->push_back(form_signature());
1949
1950 if (randomized_encoder_) {
1951 PopulateRandomizedFormMetadata(*randomized_encoder_, *this,
1952 upload->mutable_randomized_form_metadata());
1953 }
1954
1955 for (const auto& field : fields_) {
1956 // Don't upload checkable fields.
1957 if (IsCheckable(field->check_status))
1958 continue;
1959
1960 // Add the same field elements as the query and a few more below.
1961 if (ShouldSkipField(*field))
1962 continue;
1963
1964 auto* added_field = upload->add_field();
1965
1966 for (const auto& field_type : field->possible_types()) {
1967 added_field->add_autofill_type(field_type);
1968 }
1969
1970 field->NormalizePossibleTypesValidities();
1971
1972 for (const auto& field_type_validities :
1973 field->possible_types_validities()) {
1974 auto* type_validities = added_field->add_autofill_type_validities();
1975 type_validities->set_type(field_type_validities.first);
1976 for (const auto& validity : field_type_validities.second) {
1977 type_validities->add_validity(validity);
1978 }
1979 }
1980
1981 if (field->generation_type()) {
1982 added_field->set_generation_type(field->generation_type());
1983 added_field->set_generated_password_changed(
1984 field->generated_password_changed());
1985 }
1986
1987 if (field->vote_type()) {
1988 added_field->set_vote_type(field->vote_type());
1989 }
1990
1991 if (field->initial_value_hash()) {
1992 added_field->set_initial_value_hash(field->initial_value_hash().value());
1993 }
1994
1995 added_field->set_signature(field->GetFieldSignature().value());
1996
1997 if (field->properties_mask)
1998 added_field->set_properties_mask(field->properties_mask);
1999
2000 if (randomized_encoder_) {
2001 PopulateRandomizedFieldMetadata(
2002 *randomized_encoder_, *this, *field,
2003 added_field->mutable_randomized_field_metadata());
2004 }
2005
2006 if (IsAutofillFieldMetadataEnabled()) {
2007 added_field->set_type(field->form_control_type);
2008
2009 if (!field->name.empty())
2010 added_field->set_name(base::UTF16ToUTF8(field->name));
2011
2012 if (!field->id_attribute.empty())
2013 added_field->set_id(base::UTF16ToUTF8(field->id_attribute));
2014
2015 if (!field->autocomplete_attribute.empty())
2016 added_field->set_autocomplete(field->autocomplete_attribute);
2017
2018 if (!field->css_classes.empty())
2019 added_field->set_css_classes(base::UTF16ToUTF8(field->css_classes));
2020 }
2021 }
2022 }
2023
IsMalformed() const2024 bool FormStructure::IsMalformed() const {
2025 if (!field_count()) // Nothing to add.
2026 return true;
2027
2028 // Some badly formatted web sites repeat fields - limit number of fields to
2029 // 250, which is far larger than any valid form and proto still fits into 10K.
2030 // Do not send requests for forms with more than this many fields, as they are
2031 // near certainly not valid/auto-fillable.
2032 const size_t kMaxFieldsOnTheForm = 250;
2033 if (field_count() > kMaxFieldsOnTheForm)
2034 return true;
2035 return false;
2036 }
2037
IdentifySections(bool has_author_specified_sections)2038 void FormStructure::IdentifySections(bool has_author_specified_sections) {
2039 if (fields_.empty())
2040 return;
2041
2042 const bool is_enabled_autofill_new_sectioning =
2043 base::FeatureList::IsEnabled(features::kAutofillUseNewSectioningMethod);
2044 const bool is_enabled_autofill_redundant_name_sectioning =
2045 base::FeatureList::IsEnabled(
2046 features::kAutofillSectionUponRedundantNameInfo);
2047
2048 // Creates a unique name for the section that starts with |field|.
2049 // TODO(crbug/896689): Cleanup once experiment is launched.
2050 auto get_section_name = [](const AutofillField& field) {
2051 if (base::FeatureList::IsEnabled(
2052 features::kAutofillNameSectionsWithRendererIds)) {
2053 return base::StrCat(
2054 {field.name, base::ASCIIToUTF16("_"),
2055 base::NumberToString16(field.unique_renderer_id.value())});
2056 } else {
2057 return field.unique_name();
2058 }
2059 };
2060
2061 if (!has_author_specified_sections || is_enabled_autofill_new_sectioning) {
2062 base::string16 current_section = get_section_name(*fields_.front());
2063
2064 // Keep track of the types we've seen in this section.
2065 std::set<ServerFieldType> seen_types;
2066 ServerFieldType previous_type = UNKNOWN_TYPE;
2067
2068 // Boolean flag that is set to true when a field in the current section
2069 // has the autocomplete-section attribute defined.
2070 bool previous_autocomplete_section_present = false;
2071
2072 bool is_hidden_section = false;
2073 base::string16 last_visible_section;
2074 for (const auto& field : fields_) {
2075 const ServerFieldType current_type = field->Type().GetStorableType();
2076 // All credit card fields belong to the same section that's different
2077 // from address sections.
2078 if (AutofillType(current_type).group() == CREDIT_CARD) {
2079 field->section = "credit-card";
2080 continue;
2081 }
2082
2083 bool already_saw_current_type = seen_types.count(current_type) > 0;
2084
2085 // Forms often ask for multiple phone numbers -- e.g. both a daytime and
2086 // evening phone number. Our phone number detection is also generally a
2087 // little off. Hence, ignore this field type as a signal here.
2088 if (AutofillType(current_type).group() == PHONE_HOME)
2089 already_saw_current_type = false;
2090
2091 if (is_enabled_autofill_redundant_name_sectioning) {
2092 // Forms sometimes have a different format of inputting names in
2093 // different sections. If we believe a new name is being entered, assume
2094 // it is a new section (unless there are two identical inputs in a row).
2095 if (current_type == NAME_FULL)
2096 already_saw_current_type |= (seen_types.count(NAME_LAST) > 0);
2097 }
2098
2099 bool ignored_field = !field->IsVisible();
2100
2101 // This is the first visible field after a hidden section. Consider it as
2102 // the continuation of the last visible section.
2103 if (!ignored_field && is_hidden_section) {
2104 current_section = last_visible_section;
2105 }
2106
2107 // Start a new section by an ignored field, only if the next field is also
2108 // already seen.
2109 size_t field_index = &field - &fields_[0];
2110 if (ignored_field &&
2111 (is_hidden_section ||
2112 !((field_index + 1) < fields_.size() &&
2113 seen_types.count(
2114 fields_[field_index + 1]->Type().GetStorableType()) > 0))) {
2115 already_saw_current_type = false;
2116 }
2117
2118 // Some forms have adjacent fields of the same type. Two common examples:
2119 // * Forms with two email fields, where the second is meant to "confirm"
2120 // the first.
2121 // * Forms with a <select> menu for states in some countries, and a
2122 // freeform <input> field for states in other countries. (Usually,
2123 // only one of these two will be visible for any given choice of
2124 // country.)
2125 // Generally, adjacent fields of the same type belong in the same logical
2126 // section.
2127 if (current_type == previous_type)
2128 already_saw_current_type = false;
2129
2130 // Boolean flag that is set to true when the |field| has
2131 // autocomplete-section attribute defined.
2132 bool autocomplete_section_attribute_present = false;
2133 if (is_enabled_autofill_new_sectioning)
2134 autocomplete_section_attribute_present =
2135 (field->section != kDefaultSection);
2136
2137 // Boolean flag that is set to true when the |field| has
2138 // autocomplete-section attribute defined and is different that the
2139 // previous field.
2140 bool different_autocomplete_section_than_previous = false;
2141 if (is_enabled_autofill_new_sectioning) {
2142 different_autocomplete_section_than_previous =
2143 (autocomplete_section_attribute_present &&
2144 (!field_index ||
2145 fields_[field_index - 1]->section != field->section));
2146 }
2147
2148 // Start a new section if the |current_type| was already seen or the
2149 // autocomplete-section attribute is defined for the |field| which is
2150 // different than the previous field.
2151 if (current_type != UNKNOWN_TYPE &&
2152 (already_saw_current_type ||
2153 (is_enabled_autofill_new_sectioning &&
2154 different_autocomplete_section_than_previous))) {
2155 // Keep track of seen_types if the new section is hidden. The next
2156 // visible section might be the continuation of the previous visible
2157 // section.
2158 if (ignored_field) {
2159 is_hidden_section = true;
2160 last_visible_section = current_section;
2161 }
2162
2163 if (!is_hidden_section &&
2164 (!is_enabled_autofill_new_sectioning ||
2165 !autocomplete_section_attribute_present ||
2166 different_autocomplete_section_than_previous))
2167 seen_types.clear();
2168
2169 if (is_enabled_autofill_new_sectioning &&
2170 autocomplete_section_attribute_present &&
2171 !previous_autocomplete_section_present) {
2172 // If this field is the first field within the section with a defined
2173 // autocomplete section, then change the section attribute of all the
2174 // parsed fields in the current section to |field->section|.
2175 int i = static_cast<int>(field_index - 1);
2176 while (i >= 0 &&
2177 base::UTF8ToUTF16(fields_[i]->section) == current_section) {
2178 fields_[i]->section = field->section;
2179 i--;
2180 }
2181 }
2182
2183 // The end of a section, so start a new section.
2184 current_section = get_section_name(*field);
2185
2186 if (is_enabled_autofill_new_sectioning) {
2187 // The section described in the autocomplete section attribute
2188 // overrides the value determined by the heuristic.
2189 if (autocomplete_section_attribute_present)
2190 current_section = base::UTF8ToUTF16(field->section);
2191
2192 previous_autocomplete_section_present =
2193 autocomplete_section_attribute_present;
2194 }
2195 }
2196
2197 // Only consider a type "seen" if it was not ignored. Some forms have
2198 // sections for different locales, only one of which is enabled at a
2199 // time. Each section may duplicate some information (e.g. postal code)
2200 // and we don't want that to cause section splits.
2201 // Also only set |previous_type| when the field was not ignored. This
2202 // prevents ignored fields from breaking up fields that are otherwise
2203 // adjacent.
2204 if (!ignored_field) {
2205 seen_types.insert(current_type);
2206 previous_type = current_type;
2207 is_hidden_section = false;
2208 }
2209
2210 field->section = base::UTF16ToUTF8(current_section);
2211 }
2212 }
2213
2214 // Ensure that credit card and address fields are in separate sections.
2215 // This simplifies the section-aware logic in autofill_manager.cc.
2216 for (const auto& field : fields_) {
2217 FieldTypeGroup field_type_group = field->Type().group();
2218 if (field_type_group == CREDIT_CARD)
2219 field->section = field->section + "-cc";
2220 else
2221 field->section = field->section + "-default";
2222 }
2223 }
2224
ShouldSkipField(const FormFieldData & field) const2225 bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
2226 return IsCheckable(field.check_status);
2227 }
2228
ProcessExtractedFields()2229 void FormStructure::ProcessExtractedFields() {
2230 if (base::FeatureList::IsEnabled(
2231 autofill::features::kAutofillLabelAffixRemoval)) {
2232 // Updates the field name parsed by heuristics if several criteria are met.
2233 // Several fields must be present in the form.
2234 if (field_count() < kCommonNameAffixRemovalFieldNumberThreshold)
2235 return;
2236
2237 std::vector<base::StringPiece16> names;
2238 names.reserve(field_count());
2239 for (const auto& field : *this)
2240 names.push_back(field->name);
2241
2242 int longest_prefix_length = FindLongestCommonAffixLength(names, false);
2243 int longest_suffix_length = FindLongestCommonAffixLength(names, true);
2244
2245 // Don't remove the common affix if it's not long enough.
2246 if (longest_prefix_length < kMinCommonNameAffixLength)
2247 longest_prefix_length = 0;
2248
2249 if (longest_suffix_length < kMinCommonNameAffixLength)
2250 longest_suffix_length = 0;
2251
2252 bool success =
2253 SetStrippedParseableNames(longest_prefix_length, longest_suffix_length);
2254
2255 // Don't search for inconsistent prefix if valid affixes are found.
2256 if (success && longest_prefix_length + longest_suffix_length > 0)
2257 return;
2258
2259 // Functionality for stripping a prefix only common to a subset
2260 // of field names.
2261 // This is needed because an exceptional field may be missing a prefix
2262 // which is otherwise consistently applied--for instance, a framework
2263 // may only apply a prefix to those fields which are bound when POSTing.
2264 names.clear();
2265 for (const auto& field : *this)
2266 if (field->name.size() > kMinCommonNameLongPrefixLength)
2267 names.push_back(field->name);
2268
2269 if (names.size() < kCommonNamePrefixRemovalFieldThreshold)
2270 return;
2271
2272 const int longest_long_prefix_length =
2273 FindLongestCommonAffixLength(names, false);
2274
2275 if (longest_long_prefix_length >= kMinCommonNameLongPrefixLength)
2276 SetStrippedParseableNames(longest_long_prefix_length, 0);
2277
2278 return;
2279 }
2280
2281 // Update the field name parsed by heuristics if several criteria are met.
2282 // Several fields must be present in the form.
2283 if (field_count() < kCommonNamePrefixRemovalFieldThreshold)
2284 return;
2285
2286 // Find the longest common prefix within all the field names.
2287 std::vector<base::string16> names;
2288 names.reserve(field_count());
2289 for (const auto& field : *this)
2290 names.push_back(field->name);
2291
2292 const base::string16 longest_prefix = FindLongestCommonPrefix(names);
2293 if (longest_prefix.size() < kMinCommonNamePrefixLength)
2294 return;
2295
2296 // The name without the prefix will be used for heuristics parsing.
2297 for (auto& field : *this) {
2298 if (field->name.size() > longest_prefix.size()) {
2299 field->set_parseable_name(
2300 field->name.substr(longest_prefix.size(), field->name.size()));
2301 }
2302 }
2303 }
2304
SetStrippedParseableNames(size_t offset_left,size_t offset_right)2305 bool FormStructure::SetStrippedParseableNames(size_t offset_left,
2306 size_t offset_right) {
2307 // Keeps track if all stripped strings are valid according to
2308 // |IsValidParseableName()|. If at least one string is invalid,
2309 // all |parseable_name| are reset to |name|.
2310 bool should_keep = true;
2311 for (auto& field : *this) {
2312 // This check allows to only strip affixes from long enough strings.
2313 if (field->name.size() > offset_right + offset_left) {
2314 field->set_parseable_name(field->name.substr(
2315 offset_left, field->name.size() - offset_right - offset_left));
2316 } else {
2317 field->set_parseable_name(field->name);
2318 }
2319
2320 should_keep &= IsValidParseableName(field->parseable_name());
2321 if (!should_keep)
2322 break;
2323 }
2324
2325 // Reset if some stripped string was invalid.
2326 if (!should_keep) {
2327 for (auto& field : *this)
2328 field->set_parseable_name(field->name);
2329 }
2330
2331 return should_keep;
2332 }
2333
IsValidParseableName(base::string16 candidateParseableName)2334 bool FormStructure::IsValidParseableName(
2335 base::string16 candidateParseableName) {
2336 static const base::string16 kParseableNameValidationPattern =
2337 base::UTF8ToUTF16(kParseableNameValidationRe);
2338 if (MatchesPattern(candidateParseableName, kParseableNameValidationPattern))
2339 return true;
2340
2341 return false;
2342 }
2343
2344 // static
FindLongestCommonAffixLength(const std::vector<base::StringPiece16> & strings,bool findCommonSuffix)2345 size_t FormStructure::FindLongestCommonAffixLength(
2346 const std::vector<base::StringPiece16>& strings,
2347 bool findCommonSuffix) {
2348 if (strings.empty())
2349 return 0;
2350
2351 // Go through each character of the first string until there is a mismatch at
2352 // the same position in any other string. Adapted from http://goo.gl/YGukMM.
2353 for (size_t affix_len = 0; affix_len < strings[0].size(); affix_len++) {
2354 size_t base_string_index =
2355 findCommonSuffix ? strings[0].size() - affix_len - 1 : affix_len;
2356 for (size_t i = 1; i < strings.size(); i++) {
2357 size_t compared_string_index =
2358 findCommonSuffix ? strings[i].size() - affix_len - 1 : affix_len;
2359 if (affix_len >= strings[i].size() ||
2360 strings[i][compared_string_index] != strings[0][base_string_index]) {
2361 // Mismatch found.
2362 return affix_len;
2363 }
2364 }
2365 }
2366 return strings[0].size();
2367 }
2368
2369 // static
FindLongestCommonPrefix(const std::vector<base::string16> & strings)2370 base::string16 FormStructure::FindLongestCommonPrefix(
2371 const std::vector<base::string16>& strings) {
2372 if (strings.empty())
2373 return base::string16();
2374
2375 std::vector<base::string16> filtered_strings;
2376
2377 // Any strings less than kMinCommonNamePrefixLength are neither modified
2378 // nor considered when processing for a common prefix.
2379 std::copy_if(
2380 strings.begin(), strings.end(), std::back_inserter(filtered_strings),
2381 [](base::string16 s) { return s.size() >= kMinCommonNamePrefixLength; });
2382
2383 if (filtered_strings.empty())
2384 return base::string16();
2385
2386 // Go through each character of the first string until there is a mismatch at
2387 // the same position in any other string. Adapted from http://goo.gl/YGukMM.
2388 for (size_t prefix_len = 0; prefix_len < filtered_strings[0].size();
2389 prefix_len++) {
2390 for (size_t i = 1; i < filtered_strings.size(); i++) {
2391 if (prefix_len >= filtered_strings[i].size() ||
2392 filtered_strings[i].at(prefix_len) !=
2393 filtered_strings[0].at(prefix_len)) {
2394 // Mismatch found.
2395 return filtered_strings[i].substr(0, prefix_len);
2396 }
2397 }
2398 }
2399 return filtered_strings[0];
2400 }
2401
GetFormTypes() const2402 std::set<FormType> FormStructure::GetFormTypes() const {
2403 std::set<FormType> form_types;
2404 for (const auto& field : fields_) {
2405 form_types.insert(
2406 FormTypes::FieldTypeGroupToFormType(field->Type().group()));
2407 }
2408 return form_types;
2409 }
2410
GetIdentifierForRefill() const2411 base::string16 FormStructure::GetIdentifierForRefill() const {
2412 if (!form_name().empty())
2413 return form_name();
2414
2415 if (field_count() && !field(0)->unique_name().empty())
2416 return field(0)->unique_name();
2417
2418 return base::string16();
2419 }
2420
set_randomized_encoder(std::unique_ptr<RandomizedEncoder> encoder)2421 void FormStructure::set_randomized_encoder(
2422 std::unique_ptr<RandomizedEncoder> encoder) {
2423 randomized_encoder_ = std::move(encoder);
2424 }
2425
RationalizeTypeRelationships()2426 void FormStructure::RationalizeTypeRelationships() {
2427 // Create a local set of all the types for faster lookup.
2428 std::unordered_set<ServerFieldType> types;
2429 for (const auto& field : fields_) {
2430 types.insert(field->Type().GetStorableType());
2431 }
2432
2433 const auto& type_relationship_rules = GetTypeRelationshipMap();
2434
2435 for (const auto& field : fields_) {
2436 ServerFieldType field_type = field->Type().GetStorableType();
2437 const auto& ruleset_iterator = type_relationship_rules.find(field_type);
2438 if (ruleset_iterator != type_relationship_rules.end()) {
2439 // We have relationship rules for this type. Verify that at least one of
2440 // the required related type is present.
2441 bool found = false;
2442 for (ServerFieldType required_type : ruleset_iterator->second) {
2443 if (types.find(required_type) != types.end()) {
2444 // Found a required type, we can break as we only need one required
2445 // type to respect the rule.
2446 found = true;
2447 break;
2448 }
2449 }
2450
2451 if (!found) {
2452 // No required type was found, the current field failed the relationship
2453 // requirements for its type. Disabling Autofill for this field.
2454 field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
2455 }
2456 }
2457 }
2458 }
2459
operator <<(std::ostream & buffer,const FormStructure & form)2460 std::ostream& operator<<(std::ostream& buffer, const FormStructure& form) {
2461 buffer << "\nForm signature: "
2462 << base::StrCat({base::NumberToString(form.form_signature().value()),
2463 " - ",
2464 base::NumberToString(
2465 HashFormSignature(form.form_signature()))});
2466 buffer << "\n Form name: " << form.form_name();
2467 buffer << "\n Unique renderer Id: " << form.unique_renderer_id().value();
2468 buffer << "\n Target URL:" << form.target_url();
2469 for (size_t i = 0; i < form.field_count(); ++i) {
2470 buffer << "\n Field " << i << ": ";
2471 const AutofillField* field = form.field(i);
2472 buffer << "\n Signature: "
2473 << base::StrCat(
2474 {base::NumberToString(field->GetFieldSignature().value()),
2475 " - ",
2476 base::NumberToString(
2477 HashFieldSignature(field->GetFieldSignature())),
2478 ", unique renderer id: ",
2479 base::NumberToString(field->unique_renderer_id.value())});
2480 buffer << "\n Name: " << field->parseable_name();
2481
2482 auto type = field->Type().ToString();
2483 auto heuristic_type = AutofillType(field->heuristic_type()).ToString();
2484 auto server_type = AutofillType(field->server_type()).ToString();
2485 auto html_type_description =
2486 field->html_type() != HTML_TYPE_UNSPECIFIED
2487 ? base::StrCat(
2488 {", html: ", FieldTypeToStringPiece(field->html_type())})
2489 : "";
2490
2491 buffer << "\n Type: "
2492 << base::StrCat({type, " (heuristic: ", heuristic_type, ", server: ",
2493 server_type, html_type_description, ")"});
2494 buffer << "\n Section: " << field->section;
2495
2496 constexpr size_t kMaxLabelSize = 100;
2497 const base::string16 truncated_label =
2498 field->label.substr(0, std::min(field->label.length(), kMaxLabelSize));
2499 buffer << "\n Label: " << truncated_label;
2500
2501 buffer << "\n Is empty: " << (field->IsEmpty() ? "Yes" : "No");
2502 }
2503 return buffer;
2504 }
2505
operator <<(LogBuffer & buffer,const FormStructure & form)2506 LogBuffer& operator<<(LogBuffer& buffer, const FormStructure& form) {
2507 buffer << Tag{"div"} << Attrib{"class", "form"};
2508 buffer << Tag{"table"};
2509 buffer << Tr{} << "Form signature:"
2510 << base::StrCat({base::NumberToString(form.form_signature().value()),
2511 " - ",
2512 base::NumberToString(
2513 HashFormSignature(form.form_signature()))});
2514 buffer << Tr{} << "Form name:" << form.form_name();
2515 buffer << Tr{} << "Unique renderer id:" << form.unique_renderer_id().value();
2516 buffer << Tr{} << "Target URL:" << form.target_url();
2517 for (size_t i = 0; i < form.field_count(); ++i) {
2518 buffer << Tag{"tr"};
2519 buffer << Tag{"td"} << "Field " << i << ": " << CTag{};
2520 const AutofillField* field = form.field(i);
2521 buffer << Tag{"td"};
2522 buffer << Tag{"table"};
2523 buffer << Tr{} << "Signature:"
2524 << base::StrCat(
2525 {base::NumberToString(field->GetFieldSignature().value()),
2526 " - ",
2527 base::NumberToString(
2528 HashFieldSignature(field->GetFieldSignature())),
2529 ", unique renderer id: ",
2530 base::NumberToString(field->unique_renderer_id.value())});
2531 buffer << Tr{} << "Name:" << field->parseable_name();
2532
2533 auto type = field->Type().ToString();
2534 auto heuristic_type = AutofillType(field->heuristic_type()).ToString();
2535 auto server_type = AutofillType(field->server_type()).ToString();
2536 auto html_type_description =
2537 field->html_type() != HTML_TYPE_UNSPECIFIED
2538 ? base::StrCat(
2539 {", html: ", FieldTypeToStringPiece(field->html_type())})
2540 : "";
2541
2542 buffer << Tr{} << "Type:"
2543 << base::StrCat({type, " (heuristic: ", heuristic_type, ", server: ",
2544 server_type, html_type_description, ")"});
2545 buffer << Tr{} << "Section:" << field->section;
2546
2547 constexpr size_t kMaxLabelSize = 100;
2548 const base::string16 truncated_label =
2549 field->label.substr(0, std::min(field->label.length(), kMaxLabelSize));
2550 buffer << Tr{} << "Label:" << truncated_label;
2551
2552 buffer << Tr{} << "Is empty:" << (field->IsEmpty() ? "Yes" : "No");
2553 buffer << CTag{"table"};
2554 buffer << CTag{"td"};
2555 buffer << CTag{"tr"};
2556 }
2557 buffer << CTag{"table"};
2558 buffer << CTag{"div"};
2559 return buffer;
2560 }
2561
2562 } // namespace autofill
2563