1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/autofill/core/browser/form_structure.h"
6 
7 #include <stdint.h>
8 
9 #include <algorithm>
10 #include <map>
11 #include <memory>
12 #include <unordered_map>
13 #include <unordered_set>
14 #include <utility>
15 #include <vector>
16 
17 #include "base/base64.h"
18 #include "base/command_line.h"
19 #include "base/feature_list.h"
20 #include "base/i18n/case_conversion.h"
21 #include "base/logging.h"
22 #include "base/metrics/field_trial.h"
23 #include "base/metrics/histogram_macros.h"
24 #include "base/no_destructor.h"
25 #include "base/strings/strcat.h"
26 #include "base/strings/string_number_conversions.h"
27 #include "base/strings/string_piece.h"
28 #include "base/strings/string_split.h"
29 #include "base/strings/string_util.h"
30 #include "base/strings/stringprintf.h"
31 #include "base/strings/utf_string_conversions.h"
32 #include "base/time/time.h"
33 #include "components/autofill/core/browser/autofill_data_util.h"
34 #include "components/autofill/core/browser/autofill_metrics.h"
35 #include "components/autofill/core/browser/autofill_regex_constants.h"
36 #include "components/autofill/core/browser/autofill_regexes.h"
37 #include "components/autofill/core/browser/autofill_type.h"
38 #include "components/autofill/core/browser/field_types.h"
39 #include "components/autofill/core/browser/form_parsing/field_candidates.h"
40 #include "components/autofill/core/browser/form_parsing/form_field.h"
41 #include "components/autofill/core/browser/logging/log_manager.h"
42 #include "components/autofill/core/browser/randomized_encoder.h"
43 #include "components/autofill/core/browser/rationalization_util.h"
44 #include "components/autofill/core/browser/validation.h"
45 #include "components/autofill/core/common/autofill_constants.h"
46 #include "components/autofill/core/common/autofill_features.h"
47 #include "components/autofill/core/common/autofill_internals/log_message.h"
48 #include "components/autofill/core/common/autofill_internals/logging_scope.h"
49 #include "components/autofill/core/common/autofill_payments_features.h"
50 #include "components/autofill/core/common/autofill_tick_clock.h"
51 #include "components/autofill/core/common/autofill_util.h"
52 #include "components/autofill/core/common/form_data.h"
53 #include "components/autofill/core/common/form_data_predictions.h"
54 #include "components/autofill/core/common/form_field_data.h"
55 #include "components/autofill/core/common/form_field_data_predictions.h"
56 #include "components/autofill/core/common/logging/log_buffer.h"
57 #include "components/autofill/core/common/signatures.h"
58 #include "components/security_state/core/security_state.h"
59 #include "components/version_info/version_info.h"
60 #include "url/origin.h"
61 
62 namespace autofill {
63 
64 using mojom::SubmissionIndicatorEvent;
65 
66 namespace {
67 
68 constexpr char kBillingMode[] = "billing";
69 constexpr char kShippingMode[] = "shipping";
70 
71 // Default section name for the fields.
72 constexpr char kDefaultSection[] = "-default";
73 
74 // Only removing common name prefixes if we have a minimum number of fields and
75 // a minimum prefix length. These values are chosen to avoid cases such as two
76 // fields with "address1" and "address2" and be effective against web frameworks
77 // which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all
78 // fields.
79 constexpr int kCommonNamePrefixRemovalFieldThreshold = 3;
80 constexpr int kMinCommonNamePrefixLength = 16;
81 
82 // Affix removal configuration. Only remove short affixes if they are common
83 // to all field names and there is at least the minimum number of fields.
84 // If no affix common to all field names is found, search for a long
85 // prefix common to a subset of the fields. This case helps include cases of
86 // prefixes prepended by web frameworks.
87 //
88 // Minimum required number of available fields for trying to remove affixes.
89 constexpr int kCommonNameAffixRemovalFieldNumberThreshold = 3;
90 // Minimum required length for affixes common to all field names.
91 constexpr int kMinCommonNameAffixLength = 3;
92 // Minimum required length for prefixes common to a subset of the field names.
93 constexpr int kMinCommonNameLongPrefixLength = 16;
94 // Regex for checking if |parseable_name| is valid after stripping affixes.
95 constexpr char kParseableNameValidationRe[] = "\\D";
96 
97 // Returns true if the scheme given by |url| is one for which autofill is
98 // allowed to activate. By default this only returns true for HTTP and HTTPS.
HasAllowedScheme(const GURL & url)99 bool HasAllowedScheme(const GURL& url) {
100   return url.SchemeIsHTTPOrHTTPS() ||
101          base::FeatureList::IsEnabled(
102              features::kAutofillAllowNonHttpActivation);
103 }
104 
105 // Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
106 // |available_field_types| and returns the hex representation as a string.
EncodeFieldTypes(const ServerFieldTypeSet & available_field_types)107 std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
108   // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
109   // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
110   const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
111 
112   // Pack the types in |available_field_types| into |bit_field|.
113   std::vector<uint8_t> bit_field(kNumBytes, 0);
114   for (const auto& field_type : available_field_types) {
115     // Set the appropriate bit in the field.  The bit we set is the one
116     // |field_type| % 8 from the left of the byte.
117     const size_t byte = field_type / 8;
118     const size_t bit = 0x80 >> (field_type % 8);
119     DCHECK(byte < bit_field.size());
120     bit_field[byte] |= bit;
121   }
122 
123   // Discard any trailing zeroes.
124   // If there are no available types, we return the empty string.
125   size_t data_end = bit_field.size();
126   for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
127   }
128 
129   // Print all meaningfull bytes into a string.
130   std::string data_presence;
131   data_presence.reserve(data_end * 2 + 1);
132   for (size_t i = 0; i < data_end; ++i) {
133     base::StringAppendF(&data_presence, "%02x", bit_field[i]);
134   }
135 
136   return data_presence;
137 }
138 
139 // Returns |true| iff the |token| is a type hint for a contact field, as
140 // specified in the implementation section of http://is.gd/whatwg_autocomplete
141 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
142 // support filling either type of information.
IsContactTypeHint(const std::string & token)143 bool IsContactTypeHint(const std::string& token) {
144   return token == "home" || token == "work" || token == "mobile";
145 }
146 
147 // Returns |true| iff the |token| is a type hint appropriate for a field of the
148 // given |field_type|, as specified in the implementation section of
149 // http://is.gd/whatwg_autocomplete
ContactTypeHintMatchesFieldType(const std::string & token,HtmlFieldType field_type)150 bool ContactTypeHintMatchesFieldType(const std::string& token,
151                                      HtmlFieldType field_type) {
152   // The "home" and "work" type hints are only appropriate for email and phone
153   // number field types.
154   if (token == "home" || token == "work") {
155     return field_type == HTML_TYPE_EMAIL ||
156            (field_type >= HTML_TYPE_TEL &&
157             field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
158   }
159 
160   // The "mobile" type hint is only appropriate for phone number field types.
161   // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
162   // support filling either type of information.
163   if (token == "mobile") {
164     return field_type >= HTML_TYPE_TEL &&
165            field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
166   }
167 
168   return false;
169 }
170 
171 // Returns the Chrome Autofill-supported field type corresponding to the given
172 // |autocomplete_attribute_value|, if there is one, in the context of the given
173 // |field|.  Chrome Autofill supports a subset of the field types listed at
174 // http://is.gd/whatwg_autocomplete
FieldTypeFromAutocompleteAttributeValue(const std::string & autocomplete_attribute_value,const AutofillField & field)175 HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
176     const std::string& autocomplete_attribute_value,
177     const AutofillField& field) {
178   if (autocomplete_attribute_value == "")
179     return HTML_TYPE_UNSPECIFIED;
180 
181   if (autocomplete_attribute_value == "name")
182     return HTML_TYPE_NAME;
183 
184   if (autocomplete_attribute_value == "honorific-prefix")
185     return HTML_TYPE_HONORIFIC_PREFIX;
186 
187   if (autocomplete_attribute_value == "given-name" ||
188       autocomplete_attribute_value == "given_name" ||
189       autocomplete_attribute_value == "first-name" ||
190       autocomplete_attribute_value == "first_name")
191     return HTML_TYPE_GIVEN_NAME;
192 
193   if (autocomplete_attribute_value == "additional-name" ||
194       autocomplete_attribute_value == "additional_name") {
195     if (field.max_length == 1)
196       return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
197     return HTML_TYPE_ADDITIONAL_NAME;
198   }
199 
200   if (autocomplete_attribute_value == "family-name" ||
201       autocomplete_attribute_value == "family_name")
202     return HTML_TYPE_FAMILY_NAME;
203 
204   if (autocomplete_attribute_value == "organization" ||
205       autocomplete_attribute_value == "company")
206     return HTML_TYPE_ORGANIZATION;
207 
208   if (autocomplete_attribute_value == "street-address" ||
209       autocomplete_attribute_value == "street_address" ||
210       autocomplete_attribute_value == "address")
211     return HTML_TYPE_STREET_ADDRESS;
212 
213   if (autocomplete_attribute_value == "address-line1" ||
214       autocomplete_attribute_value == "address_line1")
215     return HTML_TYPE_ADDRESS_LINE1;
216 
217   if (autocomplete_attribute_value == "address-line2" ||
218       autocomplete_attribute_value == "address_line2")
219     return HTML_TYPE_ADDRESS_LINE2;
220 
221   if (autocomplete_attribute_value == "address-line3" ||
222       autocomplete_attribute_value == "address_line3")
223     return HTML_TYPE_ADDRESS_LINE3;
224 
225   // TODO(estade): remove support for "locality" and "region".
226   if (autocomplete_attribute_value == "locality")
227     return HTML_TYPE_ADDRESS_LEVEL2;
228 
229   if (autocomplete_attribute_value == "region")
230     return HTML_TYPE_ADDRESS_LEVEL1;
231 
232   if (autocomplete_attribute_value == "address-level1" ||
233       autocomplete_attribute_value == "address_level1")
234     return HTML_TYPE_ADDRESS_LEVEL1;
235 
236   if (autocomplete_attribute_value == "address-level2" ||
237       autocomplete_attribute_value == "address_level2")
238     return HTML_TYPE_ADDRESS_LEVEL2;
239 
240   if (autocomplete_attribute_value == "address-level3" ||
241       autocomplete_attribute_value == "address_level3")
242     return HTML_TYPE_ADDRESS_LEVEL3;
243 
244   if (autocomplete_attribute_value == "country")
245     return HTML_TYPE_COUNTRY_CODE;
246 
247   if (autocomplete_attribute_value == "country-name" ||
248       autocomplete_attribute_value == "country_name")
249     return HTML_TYPE_COUNTRY_NAME;
250 
251   if (autocomplete_attribute_value == "postal-code" ||
252       autocomplete_attribute_value == "postal_code")
253     return HTML_TYPE_POSTAL_CODE;
254 
255   // content_switches.h isn't accessible from here, hence we have
256   // to copy the string literal. This should be removed soon anyway.
257   if (autocomplete_attribute_value == "address" &&
258       base::CommandLine::ForCurrentProcess()->HasSwitch(
259           "enable-experimental-web-platform-features")) {
260     return HTML_TYPE_FULL_ADDRESS;
261   }
262 
263   if (autocomplete_attribute_value == "cc-name" ||
264       autocomplete_attribute_value == "cc_name")
265     return HTML_TYPE_CREDIT_CARD_NAME_FULL;
266 
267   if (autocomplete_attribute_value == "cc-given-name" ||
268       autocomplete_attribute_value == "cc_given_name")
269     return HTML_TYPE_CREDIT_CARD_NAME_FIRST;
270 
271   if (autocomplete_attribute_value == "cc-family-name" ||
272       autocomplete_attribute_value == "cc_family_name")
273     return HTML_TYPE_CREDIT_CARD_NAME_LAST;
274 
275   if (autocomplete_attribute_value == "cc-number" ||
276       autocomplete_attribute_value == "cc_number")
277     return HTML_TYPE_CREDIT_CARD_NUMBER;
278 
279   if (autocomplete_attribute_value == "cc-exp" ||
280       autocomplete_attribute_value == "cc_exp") {
281     if (field.max_length == 5)
282       return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
283     if (field.max_length == 7)
284       return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
285     return HTML_TYPE_CREDIT_CARD_EXP;
286   }
287 
288   if (autocomplete_attribute_value == "cc-exp-month" ||
289       autocomplete_attribute_value == "cc_exp_month")
290     return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
291 
292   if (autocomplete_attribute_value == "cc-exp-year" ||
293       autocomplete_attribute_value == "cc_exp_year") {
294     if (field.max_length == 2)
295       return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
296     if (field.max_length == 4)
297       return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
298     return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
299   }
300 
301   if (autocomplete_attribute_value == "cc-csc" ||
302       autocomplete_attribute_value == "cc_csc")
303     return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
304 
305   if (autocomplete_attribute_value == "cc-type" ||
306       autocomplete_attribute_value == "cc_type")
307     return HTML_TYPE_CREDIT_CARD_TYPE;
308 
309   if (autocomplete_attribute_value == "transaction-amount" ||
310       autocomplete_attribute_value == "transaction_amount")
311     return HTML_TYPE_TRANSACTION_AMOUNT;
312 
313   if (autocomplete_attribute_value == "transaction-currency" ||
314       autocomplete_attribute_value == "transaction_currency")
315     return HTML_TYPE_TRANSACTION_CURRENCY;
316 
317   if (autocomplete_attribute_value == "tel" ||
318       autocomplete_attribute_value == "phone")
319     return HTML_TYPE_TEL;
320 
321   if (autocomplete_attribute_value == "tel-country-code" ||
322       autocomplete_attribute_value == "phone-country-code" ||
323       autocomplete_attribute_value == "tel_country_code" ||
324       autocomplete_attribute_value == "phone_country_code")
325     return HTML_TYPE_TEL_COUNTRY_CODE;
326 
327   if (autocomplete_attribute_value == "tel-national" ||
328       autocomplete_attribute_value == "phone-national" ||
329       autocomplete_attribute_value == "tel_national" ||
330       autocomplete_attribute_value == "phone_national")
331     return HTML_TYPE_TEL_NATIONAL;
332 
333   if (autocomplete_attribute_value == "tel-area-code" ||
334       autocomplete_attribute_value == "phone-area-code" ||
335       autocomplete_attribute_value == "tel_area_code" ||
336       autocomplete_attribute_value == "phone_area_code")
337     return HTML_TYPE_TEL_AREA_CODE;
338 
339   if (autocomplete_attribute_value == "tel-local" ||
340       autocomplete_attribute_value == "phone-local" ||
341       autocomplete_attribute_value == "tel_local" ||
342       autocomplete_attribute_value == "phone_local")
343     return HTML_TYPE_TEL_LOCAL;
344 
345   if (autocomplete_attribute_value == "tel-local-prefix" ||
346       autocomplete_attribute_value == "phone-local-prefix" ||
347       autocomplete_attribute_value == "tel_local_prefix" ||
348       autocomplete_attribute_value == "phone_local_prefix")
349     return HTML_TYPE_TEL_LOCAL_PREFIX;
350 
351   if (autocomplete_attribute_value == "tel-local-suffix" ||
352       autocomplete_attribute_value == "phone-local-suffix" ||
353       autocomplete_attribute_value == "tel_local_suffix" ||
354       autocomplete_attribute_value == "phone_local_suffix")
355     return HTML_TYPE_TEL_LOCAL_SUFFIX;
356 
357   if (autocomplete_attribute_value == "tel-extension" ||
358       autocomplete_attribute_value == "phone-extension" ||
359       autocomplete_attribute_value == "phone-ext" ||
360       autocomplete_attribute_value == "tel_extension" ||
361       autocomplete_attribute_value == "phone_extension" ||
362       autocomplete_attribute_value == "phone_ext")
363     return HTML_TYPE_TEL_EXTENSION;
364 
365   if (autocomplete_attribute_value == "email" ||
366       autocomplete_attribute_value == "username")
367     return HTML_TYPE_EMAIL;
368 
369   if (autocomplete_attribute_value == "upi-vpa" ||
370       autocomplete_attribute_value == "upi_vpa" ||
371       autocomplete_attribute_value == "upi")
372     return HTML_TYPE_UPI_VPA;
373 
374   if (autocomplete_attribute_value == "one-time-code")
375     return HTML_TYPE_ONE_TIME_CODE;
376 
377   return HTML_TYPE_UNRECOGNIZED;
378 }
379 
operator <<(std::ostream & out,const autofill::AutofillQueryResponse & response)380 std::ostream& operator<<(std::ostream& out,
381                          const autofill::AutofillQueryResponse& response) {
382   for (const auto& form : response.form_suggestions()) {
383     out << "\nForm";
384     for (const auto& field : form.field_suggestions()) {
385       out << "\n Field\n  signature: " << field.field_signature();
386       if (field.has_primary_type_prediction())
387         out << "\n  primary_type_prediction: "
388             << field.primary_type_prediction();
389       for (const auto& prediction : field.predictions())
390         out << "\n  prediction: " << prediction.type();
391     }
392   }
393   return out;
394 }
395 
396 // Returns true iff all form fields autofill types are in |contained_types|.
AllTypesCaptured(const FormStructure & form,const ServerFieldTypeSet & contained_types)397 bool AllTypesCaptured(const FormStructure& form,
398                       const ServerFieldTypeSet& contained_types) {
399   for (const auto& field : form) {
400     for (const auto& type : field->possible_types()) {
401       if (type != UNKNOWN_TYPE && type != EMPTY_TYPE &&
402           !contained_types.count(type))
403         return false;
404     }
405   }
406   return true;
407 }
408 
409 // Encode password attributes and length into |upload|.
EncodePasswordAttributesVote(const std::pair<PasswordAttribute,bool> & password_attributes_vote,const size_t password_length_vote,const int password_symbol_vote,AutofillUploadContents * upload)410 void EncodePasswordAttributesVote(
411     const std::pair<PasswordAttribute, bool>& password_attributes_vote,
412     const size_t password_length_vote,
413     const int password_symbol_vote,
414     AutofillUploadContents* upload) {
415   switch (password_attributes_vote.first) {
416     case PasswordAttribute::kHasLowercaseLetter:
417       upload->set_password_has_lowercase_letter(
418           password_attributes_vote.second);
419       break;
420     case PasswordAttribute::kHasSpecialSymbol:
421       upload->set_password_has_special_symbol(password_attributes_vote.second);
422       if (password_attributes_vote.second)
423         upload->set_password_special_symbol(password_symbol_vote);
424       break;
425     case PasswordAttribute::kPasswordAttributesCount:
426       NOTREACHED();
427   }
428   upload->set_password_length(password_length_vote);
429 }
430 
EncodeRandomizedValue(const RandomizedEncoder & encoder,FormSignature form_signature,FieldSignature field_signature,base::StringPiece data_type,base::StringPiece data_value,bool include_checksum,AutofillRandomizedValue * output)431 void EncodeRandomizedValue(const RandomizedEncoder& encoder,
432                            FormSignature form_signature,
433                            FieldSignature field_signature,
434                            base::StringPiece data_type,
435                            base::StringPiece data_value,
436                            bool include_checksum,
437                            AutofillRandomizedValue* output) {
438   DCHECK(output);
439   output->set_encoding_type(encoder.encoding_type());
440   output->set_encoded_bits(
441       encoder.Encode(form_signature, field_signature, data_type, data_value));
442   if (include_checksum) {
443     DCHECK(data_type == RandomizedEncoder::FORM_URL);
444     output->set_checksum(StrToHash32Bit(data_value.data()));
445   }
446 }
447 
EncodeRandomizedValue(const RandomizedEncoder & encoder,FormSignature form_signature,FieldSignature field_signature,base::StringPiece data_type,base::StringPiece16 data_value,bool include_checksum,AutofillRandomizedValue * output)448 void EncodeRandomizedValue(const RandomizedEncoder& encoder,
449                            FormSignature form_signature,
450                            FieldSignature field_signature,
451                            base::StringPiece data_type,
452                            base::StringPiece16 data_value,
453                            bool include_checksum,
454                            AutofillRandomizedValue* output) {
455   EncodeRandomizedValue(encoder, form_signature, field_signature, data_type,
456                         base::UTF16ToUTF8(data_value), include_checksum,
457                         output);
458 }
459 
PopulateRandomizedFormMetadata(const RandomizedEncoder & encoder,const FormStructure & form,AutofillRandomizedFormMetadata * metadata)460 void PopulateRandomizedFormMetadata(const RandomizedEncoder& encoder,
461                                     const FormStructure& form,
462                                     AutofillRandomizedFormMetadata* metadata) {
463   const FormSignature form_signature = form.form_signature();
464   constexpr FieldSignature
465       kNullFieldSignature;  // Not relevant for form level metadata.
466   if (!form.id_attribute().empty()) {
467     EncodeRandomizedValue(encoder, form_signature, kNullFieldSignature,
468                           RandomizedEncoder::FORM_ID, form.id_attribute(),
469                           /*include_checksum=*/false, metadata->mutable_id());
470   }
471   if (!form.name_attribute().empty()) {
472     EncodeRandomizedValue(encoder, form_signature, kNullFieldSignature,
473                           RandomizedEncoder::FORM_NAME, form.name_attribute(),
474                           /*include_checksum=*/false, metadata->mutable_name());
475   }
476 
477   for (const ButtonTitleInfo& e : form.button_titles()) {
478     auto* button_title = metadata->add_button_title();
479     DCHECK(!e.first.empty());
480     EncodeRandomizedValue(encoder, form_signature, kNullFieldSignature,
481                           RandomizedEncoder::FORM_BUTTON_TITLES, e.first,
482                           /*include_checksum=*/false,
483                           button_title->mutable_title());
484     button_title->set_type(static_cast<ButtonTitleType>(e.second));
485   }
486   auto full_source_url = form.full_source_url().spec();
487   if (encoder.AnonymousUrlCollectionIsEnabled() && !full_source_url.empty()) {
488     EncodeRandomizedValue(encoder, form_signature, kNullFieldSignature,
489                           RandomizedEncoder::FORM_URL, full_source_url,
490                           /*include_checksum=*/true, metadata->mutable_url());
491   }
492 }
493 
PopulateRandomizedFieldMetadata(const RandomizedEncoder & encoder,const FormStructure & form,const AutofillField & field,AutofillRandomizedFieldMetadata * metadata)494 void PopulateRandomizedFieldMetadata(
495     const RandomizedEncoder& encoder,
496     const FormStructure& form,
497     const AutofillField& field,
498     AutofillRandomizedFieldMetadata* metadata) {
499   const FormSignature form_signature = form.form_signature();
500   const FieldSignature field_signature = field.GetFieldSignature();
501   if (!field.id_attribute.empty()) {
502     EncodeRandomizedValue(encoder, form_signature, field_signature,
503                           RandomizedEncoder::FIELD_ID, field.id_attribute,
504                           /*include_checksum=*/false, metadata->mutable_id());
505   }
506   if (!field.name_attribute.empty()) {
507     EncodeRandomizedValue(encoder, form_signature, field_signature,
508                           RandomizedEncoder::FIELD_NAME, field.name_attribute,
509                           /*include_checksum=*/false, metadata->mutable_name());
510   }
511   if (!field.form_control_type.empty()) {
512     EncodeRandomizedValue(encoder, form_signature, field_signature,
513                           RandomizedEncoder::FIELD_CONTROL_TYPE,
514                           field.form_control_type, /*include_checksum=*/false,
515                           metadata->mutable_type());
516   }
517   if (!field.label.empty()) {
518     EncodeRandomizedValue(encoder, form_signature, field_signature,
519                           RandomizedEncoder::FIELD_LABEL, field.label,
520                           /*include_checksum=*/false,
521                           metadata->mutable_label());
522   }
523   if (!field.aria_label.empty()) {
524     EncodeRandomizedValue(encoder, form_signature, field_signature,
525                           RandomizedEncoder::FIELD_ARIA_LABEL, field.aria_label,
526                           /*include_checksum=*/false,
527                           metadata->mutable_aria_label());
528   }
529   if (!field.aria_description.empty()) {
530     EncodeRandomizedValue(encoder, form_signature, field_signature,
531                           RandomizedEncoder::FIELD_ARIA_DESCRIPTION,
532                           field.aria_description, /*include_checksum=*/false,
533                           metadata->mutable_aria_description());
534   }
535   if (!field.css_classes.empty()) {
536     EncodeRandomizedValue(encoder, form_signature, field_signature,
537                           RandomizedEncoder::FIELD_CSS_CLASS, field.css_classes,
538                           /*include_checksum=*/false,
539                           metadata->mutable_css_class());
540   }
541   if (!field.placeholder.empty()) {
542     EncodeRandomizedValue(encoder, form_signature, field_signature,
543                           RandomizedEncoder::FIELD_PLACEHOLDER,
544                           field.placeholder, /*include_checksum=*/false,
545                           metadata->mutable_placeholder());
546   }
547 }
548 
EncodeFormMetadataForQuery(const FormStructure & form,AutofillRandomizedFormMetadata * metadata)549 void EncodeFormMetadataForQuery(const FormStructure& form,
550                                 AutofillRandomizedFormMetadata* metadata) {
551   DCHECK(metadata);
552   metadata->mutable_id()->set_encoded_bits(
553       base::UTF16ToUTF8(form.id_attribute()));
554   metadata->mutable_name()->set_encoded_bits(
555       base::UTF16ToUTF8(form.name_attribute()));
556 }
557 
EncodeFieldMetadataForQuery(const FormFieldData & field,AutofillRandomizedFieldMetadata * metadata)558 void EncodeFieldMetadataForQuery(const FormFieldData& field,
559                                  AutofillRandomizedFieldMetadata* metadata) {
560   DCHECK(metadata);
561   metadata->mutable_id()->set_encoded_bits(
562       base::UTF16ToUTF8(field.id_attribute));
563   metadata->mutable_name()->set_encoded_bits(
564       base::UTF16ToUTF8(field.name_attribute));
565   metadata->mutable_type()->set_encoded_bits(field.form_control_type);
566   metadata->mutable_label()->set_encoded_bits(base::UTF16ToUTF8(field.label));
567   metadata->mutable_aria_label()->set_encoded_bits(
568       base::UTF16ToUTF8(field.aria_label));
569   metadata->mutable_aria_description()->set_encoded_bits(
570       base::UTF16ToUTF8(field.aria_description));
571   metadata->mutable_css_class()->set_encoded_bits(
572       base::UTF16ToUTF8(field.css_classes));
573   metadata->mutable_placeholder()->set_encoded_bits(
574       base::UTF16ToUTF8(field.placeholder));
575 }
576 
577 // Creates the type relationship rules map. The keys represent the type that has
578 // rules, and the value represents the list of required types for the given
579 // key. In order to respect the rule, only one of the required types is needed.
580 // For example, for Autofill to support fields of type
581 // "PHONE_HOME_COUNTRY_CODE", there would need to be at least one other field
582 // of type "PHONE_HOME_NUMBER" or "PHONE_HOME_CITY_AND_NUMBER".
583 const std::unordered_map<ServerFieldType, ServerFieldTypeSet>&
GetTypeRelationshipMap()584 GetTypeRelationshipMap() {
585   // Initialized and cached on first use.
586   static const auto* const rules =
587       new std::unordered_map<ServerFieldType, ServerFieldTypeSet>(
588           {{PHONE_HOME_COUNTRY_CODE,
589             {PHONE_HOME_NUMBER, PHONE_HOME_CITY_AND_NUMBER}}});
590   return *rules;
591 }
592 
593 }  // namespace
594 
FormStructure(const FormData & form)595 FormStructure::FormStructure(const FormData& form)
596     : id_attribute_(form.id_attribute),
597       name_attribute_(form.name_attribute),
598       form_name_(form.name),
599       button_titles_(form.button_titles),
600       source_url_(form.url),
601       full_source_url_(form.full_url),
602       target_url_(form.action),
603       main_frame_origin_(form.main_frame_origin),
604       is_form_tag_(form.is_form_tag),
605       is_formless_checkout_(form.is_formless_checkout),
606       all_fields_are_passwords_(!form.fields.empty()),
607       form_parsed_timestamp_(AutofillTickClock::NowTicks()),
608       passwords_were_revealed_(false),
609       password_symbol_vote_(0),
610       developer_engagement_metrics_(0),
611       unique_renderer_id_(form.unique_renderer_id) {
612   // Copy the form fields.
613   std::map<base::string16, size_t> unique_names;
614   for (const FormFieldData& field : form.fields) {
615     if (!ShouldSkipField(field))
616       ++active_field_count_;
617 
618     if (field.form_control_type == "password")
619       has_password_field_ = true;
620     else
621       all_fields_are_passwords_ = false;
622 
623     // Generate a unique name for this field by appending a counter to the name.
624     // Make sure to prepend the counter with a non-numeric digit so that we are
625     // guaranteed to avoid collisions.
626     base::string16 unique_name =
627         field.name + base::ASCIIToUTF16("_") +
628         base::NumberToString16(++unique_names[field.name]);
629     fields_.push_back(std::make_unique<AutofillField>(field, unique_name));
630   }
631 
632   form_signature_ = autofill::CalculateFormSignature(form);
633   // Do further processing on the fields, as needed.
634   ProcessExtractedFields();
635 }
636 
FormStructure(FormSignature form_signature,const std::vector<FieldSignature> & field_signatures)637 FormStructure::FormStructure(
638     FormSignature form_signature,
639     const std::vector<FieldSignature>& field_signatures)
640     : form_signature_(form_signature) {
641   for (const auto& signature : field_signatures)
642     fields_.push_back(AutofillField::CreateForPasswordManagerUpload(signature));
643 }
644 
645 FormStructure::~FormStructure() = default;
646 
DetermineHeuristicTypes(LogManager * log_manager)647 void FormStructure::DetermineHeuristicTypes(LogManager* log_manager) {
648   const auto determine_heuristic_types_start_time =
649       AutofillTickClock::NowTicks();
650 
651   // First, try to detect field types based on each field's |autocomplete|
652   // attribute value.
653   if (!was_parsed_for_autocomplete_attributes_)
654     ParseFieldTypesFromAutocompleteAttributes();
655 
656   // Then if there are enough active fields, and if we are dealing with either a
657   // proper <form> or a <form>-less checkout, run the heuristics and server
658   // prediction routines.
659   if (ShouldRunHeuristics()) {
660     const FieldCandidatesMap field_type_map = FormField::ParseFormFields(
661         fields_, page_language_, is_form_tag_, log_manager);
662     for (const auto& field : fields_) {
663       const auto iter = field_type_map.find(field->unique_name());
664       if (iter != field_type_map.end()) {
665         field->set_heuristic_type(iter->second.BestHeuristicType());
666       }
667     }
668   }
669 
670   UpdateAutofillCount();
671   IdentifySections(has_author_specified_sections_);
672 
673   developer_engagement_metrics_ = 0;
674   if (IsAutofillable()) {
675     AutofillMetrics::DeveloperEngagementMetric metric =
676         has_author_specified_types_
677             ? AutofillMetrics::FILLABLE_FORM_PARSED_WITH_TYPE_HINTS
678             : AutofillMetrics::FILLABLE_FORM_PARSED_WITHOUT_TYPE_HINTS;
679     developer_engagement_metrics_ |= 1 << metric;
680     AutofillMetrics::LogDeveloperEngagementMetric(metric);
681   }
682 
683   if (has_author_specified_upi_vpa_hint_) {
684     AutofillMetrics::LogDeveloperEngagementMetric(
685         AutofillMetrics::FORM_CONTAINS_UPI_VPA_HINT);
686     developer_engagement_metrics_ |=
687         1 << AutofillMetrics::FORM_CONTAINS_UPI_VPA_HINT;
688   }
689 
690   RationalizeFieldTypePredictions();
691 
692   AutofillMetrics::LogDetermineHeuristicTypesTiming(
693       AutofillTickClock::NowTicks() - determine_heuristic_types_start_time);
694 }
695 
EncodeUploadRequest(const ServerFieldTypeSet & available_field_types,bool form_was_autofilled,const std::string & login_form_signature,bool observed_submission,AutofillUploadContents * upload,std::vector<FormSignature> * encoded_signatures) const696 bool FormStructure::EncodeUploadRequest(
697     const ServerFieldTypeSet& available_field_types,
698     bool form_was_autofilled,
699     const std::string& login_form_signature,
700     bool observed_submission,
701     AutofillUploadContents* upload,
702     std::vector<FormSignature>* encoded_signatures) const {
703   DCHECK(AllTypesCaptured(*this, available_field_types));
704   encoded_signatures->clear();
705 
706   upload->set_submission(observed_submission);
707   upload->set_client_version(
708       version_info::GetProductNameAndVersionForUserAgent());
709   upload->set_form_signature(form_signature().value());
710   upload->set_autofill_used(form_was_autofilled);
711   upload->set_data_present(EncodeFieldTypes(available_field_types));
712   upload->set_passwords_revealed(passwords_were_revealed_);
713   upload->set_has_form_tag(is_form_tag_);
714   if (!page_language_.empty() && randomized_encoder_ != nullptr) {
715     upload->set_language(page_language_);
716   }
717 
718   auto triggering_event = (submission_event_ != SubmissionIndicatorEvent::NONE)
719                               ? submission_event_
720                               : ToSubmissionIndicatorEvent(submission_source_);
721 
722   DCHECK(autofill::mojom::IsKnownEnumValue(triggering_event));
723   upload->set_submission_event(
724       static_cast<AutofillUploadContents_SubmissionIndicatorEvent>(
725           triggering_event));
726 
727   if (password_attributes_vote_) {
728     EncodePasswordAttributesVote(*password_attributes_vote_,
729                                  password_length_vote_, password_symbol_vote_,
730                                  upload);
731   }
732 
733   if (IsAutofillFieldMetadataEnabled()) {
734     upload->set_action_signature(StrToHash64Bit(target_url_.host()));
735     if (!form_name().empty())
736       upload->set_form_name(base::UTF16ToUTF8(form_name()));
737     for (const ButtonTitleInfo& e : button_titles_) {
738       auto* button_title = upload->add_button_title();
739       button_title->set_title(base::UTF16ToUTF8(e.first));
740       button_title->set_type(static_cast<ButtonTitleType>(e.second));
741     }
742   }
743 
744   if (!login_form_signature.empty()) {
745     uint64_t login_sig;
746     if (base::StringToUint64(login_form_signature, &login_sig))
747       upload->set_login_form_signature(login_sig);
748   }
749 
750   if (IsMalformed())
751     return false;  // Malformed form, skip it.
752 
753   EncodeFormForUpload(upload, encoded_signatures);
754   return true;
755 }
756 
757 // static
EncodeQueryRequest(const std::vector<FormStructure * > & forms,AutofillPageQueryRequest * query,std::vector<FormSignature> * queried_form_signatures)758 bool FormStructure::EncodeQueryRequest(
759     const std::vector<FormStructure*>& forms,
760     AutofillPageQueryRequest* query,
761     std::vector<FormSignature>* queried_form_signatures) {
762   DCHECK(queried_form_signatures);
763   queried_form_signatures->clear();
764   queried_form_signatures->reserve(forms.size());
765 
766   query->set_client_version(
767       version_info::GetProductNameAndVersionForUserAgent());
768 
769   // If a page contains repeated forms, detect that and encode only one form as
770   // the returned data would be the same for all the repeated forms.
771   // TODO(crbug/1064709#c11): the statement is not entirely correct because
772   // (1) distinct forms can have identical form signatures because we truncate
773   // (large) numbers in the form signature calculation while these are
774   // considered for field signatures; (2) for dynamic forms we will hold on to
775   // the original form signature.
776   std::set<FormSignature> processed_forms;
777   for (const auto* form : forms) {
778     if (processed_forms.find(form->form_signature()) != processed_forms.end())
779       continue;
780     processed_forms.insert(form->form_signature());
781     UMA_HISTOGRAM_COUNTS_1000("Autofill.FieldCount", form->field_count());
782     if (form->IsMalformed())
783       continue;
784 
785     form->EncodeFormForQuery(query->add_forms(), queried_form_signatures);
786   }
787 
788   return !queried_form_signatures->empty();
789 }
790 
791 // static
ParseApiQueryResponse(base::StringPiece payload,const std::vector<FormStructure * > & forms,const std::vector<FormSignature> & queried_form_signatures,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)792 void FormStructure::ParseApiQueryResponse(
793     base::StringPiece payload,
794     const std::vector<FormStructure*>& forms,
795     const std::vector<FormSignature>& queried_form_signatures,
796     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
797   AutofillMetrics::LogServerQueryMetric(
798       AutofillMetrics::QUERY_RESPONSE_RECEIVED);
799 
800   std::string decoded_payload;
801   if (!base::Base64Decode(payload, &decoded_payload)) {
802     VLOG(1) << "Could not decode payload from base64 to bytes";
803     return;
804   }
805 
806   // Parse the response.
807   AutofillQueryResponse response;
808   if (!response.ParseFromString(decoded_payload))
809     return;
810 
811   VLOG(1) << "Autofill query response from API was successfully parsed: "
812           << response;
813 
814   ProcessQueryResponse(response, forms, queried_form_signatures,
815                        form_interactions_ukm_logger);
816 }
817 
818 // static
ProcessQueryResponse(const AutofillQueryResponse & response,const std::vector<FormStructure * > & forms,const std::vector<FormSignature> & queried_form_signatures,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)819 void FormStructure::ProcessQueryResponse(
820     const AutofillQueryResponse& response,
821     const std::vector<FormStructure*>& forms,
822     const std::vector<FormSignature>& queried_form_signatures,
823     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
824   AutofillMetrics::LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
825 
826   bool heuristics_detected_fillable_field = false;
827   bool query_response_overrode_heuristics = false;
828 
829   std::map<std::pair<FormSignature, FieldSignature>,
830            AutofillQueryResponse::FormSuggestion::FieldSuggestion>
831       field_types;
832   for (int form_idx = 0;
833        form_idx < std::min(response.form_suggestions_size(),
834                            static_cast<int>(queried_form_signatures.size()));
835        ++form_idx) {
836     FormSignature form_sig = queried_form_signatures.at(form_idx);
837     for (const auto& field :
838          response.form_suggestions(form_idx).field_suggestions()) {
839       FieldSignature field_sig(field.field_signature());
840       field_types[std::make_pair(form_sig, field_sig)] = field;
841     }
842   }
843 
844   // Copy the field types into the actual form.
845   for (FormStructure* form : forms) {
846     bool query_response_has_no_server_data = true;
847     for (auto& field : form->fields_) {
848       auto it = field_types.find(
849           std::make_pair(form->form_signature(), field->GetFieldSignature()));
850       if (it == field_types.end())
851         continue;
852 
853       const auto& current_field = it->second;
854 
855       ServerFieldType field_type =
856           static_cast<ServerFieldType>(current_field.primary_type_prediction());
857       query_response_has_no_server_data &= field_type == NO_SERVER_DATA;
858 
859       ServerFieldType heuristic_type = field->heuristic_type();
860       if (heuristic_type != UNKNOWN_TYPE)
861         heuristics_detected_fillable_field = true;
862 
863       field->set_server_type(field_type);
864       std::vector<AutofillQueryResponse::FormSuggestion::FieldSuggestion::
865                       FieldPrediction>
866           server_predictions;
867       if (current_field.predictions_size() == 0) {
868         AutofillQueryResponse::FormSuggestion::FieldSuggestion::FieldPrediction
869             field_prediction;
870         field_prediction.set_type(field_type);
871         server_predictions.push_back(field_prediction);
872       } else {
873         server_predictions.assign(current_field.predictions().begin(),
874                                   current_field.predictions().end());
875       }
876       field->set_server_predictions(std::move(server_predictions));
877       field->set_may_use_prefilled_placeholder(
878           current_field.may_use_prefilled_placeholder());
879 
880       if (heuristic_type != field->Type().GetStorableType())
881         query_response_overrode_heuristics = true;
882 
883       if (current_field.has_password_requirements())
884         field->SetPasswordRequirements(current_field.password_requirements());
885     }
886 
887     AutofillMetrics::LogServerResponseHasDataForForm(
888         !query_response_has_no_server_data);
889 
890     form->UpdateAutofillCount();
891     form->RationalizeRepeatedFields(form_interactions_ukm_logger);
892     form->RationalizeFieldTypePredictions();
893     form->IdentifySections(false);
894   }
895 
896   AutofillMetrics::ServerQueryMetric metric;
897   if (query_response_overrode_heuristics) {
898     if (heuristics_detected_fillable_field) {
899       metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
900     } else {
901       metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
902     }
903   } else {
904     metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
905   }
906   AutofillMetrics::LogServerQueryMetric(metric);
907 }
908 
909 // static
GetFieldTypePredictions(const std::vector<FormStructure * > & form_structures)910 std::vector<FormDataPredictions> FormStructure::GetFieldTypePredictions(
911     const std::vector<FormStructure*>& form_structures) {
912   std::vector<FormDataPredictions> forms;
913   forms.reserve(form_structures.size());
914   for (const FormStructure* form_structure : form_structures) {
915     FormDataPredictions form;
916     form.data = form_structure->ToFormData();
917     form.signature = form_structure->FormSignatureAsStr();
918 
919     for (const auto& field : form_structure->fields_) {
920       FormFieldDataPredictions annotated_field;
921       annotated_field.signature = field->FieldSignatureAsStr();
922       annotated_field.heuristic_type =
923           AutofillType(field->heuristic_type()).ToString();
924       annotated_field.server_type =
925           AutofillType(field->server_type()).ToString();
926       annotated_field.overall_type = field->Type().ToString();
927       annotated_field.parseable_name =
928           base::UTF16ToUTF8(field->parseable_name());
929       annotated_field.section = field->section;
930       form.fields.push_back(annotated_field);
931     }
932 
933     forms.push_back(form);
934   }
935   return forms;
936 }
937 
938 // static
IsAutofillFieldMetadataEnabled()939 bool FormStructure::IsAutofillFieldMetadataEnabled() {
940   const std::string group_name =
941       base::FieldTrialList::FindFullName("AutofillFieldMetadata");
942   return base::StartsWith(group_name, "Enabled", base::CompareCase::SENSITIVE);
943 }
944 
CreateForPasswordManagerUpload(FormSignature form_signature,const std::vector<FieldSignature> & field_signatures)945 std::unique_ptr<FormStructure> FormStructure::CreateForPasswordManagerUpload(
946     FormSignature form_signature,
947     const std::vector<FieldSignature>& field_signatures) {
948   std::unique_ptr<FormStructure> form;
949   form.reset(new FormStructure(form_signature, field_signatures));
950   return form;
951 }
952 
FormSignatureAsStr() const953 std::string FormStructure::FormSignatureAsStr() const {
954   return base::NumberToString(form_signature().value());
955 }
956 
IsAutofillable() const957 bool FormStructure::IsAutofillable() const {
958   size_t min_required_fields =
959       std::min({kMinRequiredFieldsForHeuristics, kMinRequiredFieldsForQuery,
960                 kMinRequiredFieldsForUpload});
961   if (autofill_count() < min_required_fields)
962     return false;
963 
964   return ShouldBeParsed();
965 }
966 
IsCompleteCreditCardForm() const967 bool FormStructure::IsCompleteCreditCardForm() const {
968   bool found_cc_number = false;
969   bool found_cc_expiration = false;
970   for (const auto& field : fields_) {
971     ServerFieldType type = field->Type().GetStorableType();
972     if (!found_cc_expiration && data_util::IsCreditCardExpirationType(type)) {
973       found_cc_expiration = true;
974     } else if (!found_cc_number && type == CREDIT_CARD_NUMBER) {
975       found_cc_number = true;
976     }
977     if (found_cc_expiration && found_cc_number)
978       return true;
979   }
980   return false;
981 }
982 
UpdateAutofillCount()983 void FormStructure::UpdateAutofillCount() {
984   autofill_count_ = 0;
985   for (const auto& field : *this) {
986     if (field && field->IsFieldFillable())
987       ++autofill_count_;
988   }
989 }
990 
ShouldBeParsed(LogManager * log_manager) const991 bool FormStructure::ShouldBeParsed(LogManager* log_manager) const {
992   // Exclude URLs not on the web via HTTP(S).
993   if (!HasAllowedScheme(source_url_)) {
994     if (log_manager) {
995       log_manager->Log() << LoggingScope::kAbortParsing
996                          << LogMessage::kAbortParsingNotAllowedScheme << *this;
997     }
998     return false;
999   }
1000 
1001   size_t min_required_fields =
1002       std::min({kMinRequiredFieldsForHeuristics, kMinRequiredFieldsForQuery,
1003                 kMinRequiredFieldsForUpload});
1004   if (active_field_count() < min_required_fields &&
1005       (!all_fields_are_passwords() ||
1006        active_field_count() < kRequiredFieldsForFormsWithOnlyPasswordFields) &&
1007       !has_author_specified_types_) {
1008     if (log_manager) {
1009       log_manager->Log() << LoggingScope::kAbortParsing
1010                          << LogMessage::kAbortParsingNotEnoughFields
1011                          << active_field_count() << *this;
1012     }
1013     return false;
1014   }
1015 
1016   // Rule out search forms.
1017   static const base::string16 kUrlSearchActionPattern =
1018       base::UTF8ToUTF16(kUrlSearchActionRe);
1019   if (MatchesPattern(base::UTF8ToUTF16(target_url_.path_piece()),
1020                      kUrlSearchActionPattern)) {
1021     if (log_manager) {
1022       log_manager->Log() << LoggingScope::kAbortParsing
1023                          << LogMessage::kAbortParsingUrlMatchesSearchRegex
1024                          << *this;
1025     }
1026     return false;
1027   }
1028 
1029   bool has_text_field = false;
1030   for (const auto& it : *this) {
1031     has_text_field |= it->form_control_type != "select-one";
1032   }
1033 
1034   if (!has_text_field && log_manager) {
1035     log_manager->Log() << LoggingScope::kAbortParsing
1036                        << LogMessage::kAbortParsingFormHasNoTextfield << *this;
1037   }
1038 
1039   return has_text_field;
1040 }
1041 
ShouldRunHeuristics() const1042 bool FormStructure::ShouldRunHeuristics() const {
1043   return active_field_count() >= kMinRequiredFieldsForHeuristics &&
1044          HasAllowedScheme(source_url_) &&
1045          (is_form_tag_ || is_formless_checkout_ ||
1046           !base::FeatureList::IsEnabled(
1047               features::kAutofillRestrictUnownedFieldsToFormlessCheckout));
1048 }
1049 
ShouldBeQueried() const1050 bool FormStructure::ShouldBeQueried() const {
1051   return (has_password_field_ ||
1052           active_field_count() >= kMinRequiredFieldsForQuery) &&
1053          ShouldBeParsed();
1054 }
1055 
ShouldBeUploaded() const1056 bool FormStructure::ShouldBeUploaded() const {
1057   return active_field_count() >= kMinRequiredFieldsForUpload &&
1058          ShouldBeParsed();
1059 }
1060 
RetrieveFromCache(const FormStructure & cached_form,const bool should_keep_cached_value,const bool only_server_and_autofill_state)1061 void FormStructure::RetrieveFromCache(
1062     const FormStructure& cached_form,
1063     const bool should_keep_cached_value,
1064     const bool only_server_and_autofill_state) {
1065   std::map<FieldRendererId, const AutofillField*> cached_fields_by_id;
1066   for (size_t i = 0; i < cached_form.field_count(); ++i) {
1067     auto* const field = cached_form.field(i);
1068     cached_fields_by_id[field->unique_renderer_id] = field;
1069   }
1070   for (auto& field : *this) {
1071     const AutofillField* cached_field = nullptr;
1072     const auto& it = cached_fields_by_id.find(field->unique_renderer_id);
1073     if (it != cached_fields_by_id.end())
1074       cached_field = it->second;
1075 
1076     // If the unique renderer id (or the name) is not stable due to some Java
1077     // Script magic in the website, use the field signature as a fallback
1078     // solution to find the field in the cached form.
1079     if (!cached_field) {
1080       // Iterates over the fields to find the field with the same form
1081       // signature.
1082       for (size_t i = 0; i < cached_form.field_count(); ++i) {
1083         auto* const cfield = cached_form.field(i);
1084         if (field->GetFieldSignature() == cfield->GetFieldSignature()) {
1085           // If there are multiple matches, do not retrieve the field and stop
1086           // the process.
1087           if (cached_field) {
1088             cached_field = nullptr;
1089             break;
1090           } else {
1091             cached_field = cfield;
1092           }
1093         }
1094       }
1095     }
1096 
1097     if (cached_field) {
1098       if (!only_server_and_autofill_state) {
1099         // Transfer attributes of the cached AutofillField to the newly created
1100         // AutofillField.
1101         field->set_heuristic_type(cached_field->heuristic_type());
1102         field->SetHtmlType(cached_field->html_type(),
1103                            cached_field->html_mode());
1104         field->section = cached_field->section;
1105         field->set_only_fill_when_focused(
1106             cached_field->only_fill_when_focused());
1107       }
1108       if (should_keep_cached_value) {
1109         field->is_autofilled = cached_field->is_autofilled;
1110       }
1111       if (field->form_control_type != "select-one") {
1112         if (should_keep_cached_value) {
1113           field->value = cached_field->value;
1114           value_from_dynamic_change_form_ = true;
1115         } else if (field->value == cached_field->value &&
1116                    (field->server_type() != ADDRESS_HOME_COUNTRY &&
1117                     field->server_type() != ADDRESS_HOME_STATE)) {
1118           // From the perspective of learning user data, text fields containing
1119           // default values are equivalent to empty fields.
1120           // Since a website can prefill country and state values basedw on
1121           // GeoIp, the mechanism is deactivated for state and country fields.
1122           field->value = base::string16();
1123         }
1124       }
1125       field->set_server_type(cached_field->server_type());
1126       field->set_previously_autofilled(cached_field->previously_autofilled());
1127     }
1128   }
1129 
1130   UpdateAutofillCount();
1131 
1132   // Update form parsed timestamp
1133   form_parsed_timestamp_ =
1134       std::min(form_parsed_timestamp_, cached_form.form_parsed_timestamp_);
1135 
1136   // The form signature should match between query and upload requests to the
1137   // server. On many websites, form elements are dynamically added, removed, or
1138   // rearranged via JavaScript between page load and form submission, so we
1139   // copy over the |form_signature_field_names_| corresponding to the query
1140   // request.
1141   form_signature_ = cached_form.form_signature_;
1142 }
1143 
LogQualityMetrics(const base::TimeTicks & load_time,const base::TimeTicks & interaction_time,const base::TimeTicks & submission_time,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger,bool did_show_suggestions,bool observed_submission) const1144 void FormStructure::LogQualityMetrics(
1145     const base::TimeTicks& load_time,
1146     const base::TimeTicks& interaction_time,
1147     const base::TimeTicks& submission_time,
1148     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger,
1149     bool did_show_suggestions,
1150     bool observed_submission) const {
1151   // Use the same timestamp on UKM Metrics generated within this method's scope.
1152   AutofillMetrics::UkmTimestampPin timestamp_pin(form_interactions_ukm_logger);
1153 
1154   size_t num_detected_field_types = 0;
1155   size_t num_edited_autofilled_fields = 0;
1156   bool did_autofill_all_possible_fields = true;
1157   bool did_autofill_some_possible_fields = false;
1158   bool is_for_credit_card = IsCompleteCreditCardForm();
1159   bool has_upi_vpa_field = false;
1160 
1161   // Determine the correct suffix for the metric, depending on whether or
1162   // not a submission was observed.
1163   const AutofillMetrics::QualityMetricType metric_type =
1164       observed_submission ? AutofillMetrics::TYPE_SUBMISSION
1165                           : AutofillMetrics::TYPE_NO_SUBMISSION;
1166 
1167   for (size_t i = 0; i < field_count(); ++i) {
1168     auto* const field = this->field(i);
1169     if (IsUPIVirtualPaymentAddress(field->value)) {
1170       has_upi_vpa_field = true;
1171       AutofillMetrics::LogUserHappinessMetric(
1172           AutofillMetrics::USER_DID_ENTER_UPI_VPA, field->Type().group(),
1173           security_state::SecurityLevel::SECURITY_LEVEL_COUNT,
1174           data_util::DetermineGroups(*this));
1175     }
1176 
1177     form_interactions_ukm_logger->LogFieldFillStatus(*this, *field,
1178                                                      metric_type);
1179 
1180     AutofillMetrics::LogHeuristicPredictionQualityMetrics(
1181         form_interactions_ukm_logger, *this, *field, metric_type);
1182     AutofillMetrics::LogServerPredictionQualityMetrics(
1183         form_interactions_ukm_logger, *this, *field, metric_type);
1184     AutofillMetrics::LogOverallPredictionQualityMetrics(
1185         form_interactions_ukm_logger, *this, *field, metric_type);
1186     // We count fields that were autofilled but later modified, regardless of
1187     // whether the data now in the field is recognized.
1188     if (field->previously_autofilled())
1189       num_edited_autofilled_fields++;
1190 
1191     const ServerFieldTypeSet& field_types = field->possible_types();
1192     DCHECK(!field_types.empty());
1193     if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE)) {
1194       DCHECK_EQ(field_types.size(), 1u);
1195       continue;
1196     }
1197 
1198     ++num_detected_field_types;
1199     if (field->is_autofilled)
1200       did_autofill_some_possible_fields = true;
1201     else if (!field->only_fill_when_focused())
1202       did_autofill_all_possible_fields = false;
1203 
1204     // If the form was submitted, record if field types have been filled and
1205     // subsequently edited by the user.
1206     if (observed_submission) {
1207       if (field->is_autofilled || field->previously_autofilled()) {
1208         AutofillMetrics::LogEditedAutofilledFieldAtSubmission(
1209             form_interactions_ukm_logger, *this, *field);
1210       }
1211     }
1212   }
1213 
1214   AutofillMetrics::LogNumberOfEditedAutofilledFields(
1215       num_edited_autofilled_fields, observed_submission);
1216 
1217   // We log "submission" and duration metrics if we are here after observing a
1218   // submission event.
1219   if (observed_submission) {
1220     AutofillMetrics::AutofillFormSubmittedState state;
1221     if (num_detected_field_types < kMinRequiredFieldsForHeuristics &&
1222         num_detected_field_types < kMinRequiredFieldsForQuery) {
1223       state = AutofillMetrics::NON_FILLABLE_FORM_OR_NEW_DATA;
1224     } else {
1225       if (did_autofill_all_possible_fields) {
1226         state = AutofillMetrics::FILLABLE_FORM_AUTOFILLED_ALL;
1227       } else if (did_autofill_some_possible_fields) {
1228         state = AutofillMetrics::FILLABLE_FORM_AUTOFILLED_SOME;
1229       } else if (!did_show_suggestions) {
1230         state = AutofillMetrics::
1231             FILLABLE_FORM_AUTOFILLED_NONE_DID_NOT_SHOW_SUGGESTIONS;
1232       } else {
1233         state =
1234             AutofillMetrics::FILLABLE_FORM_AUTOFILLED_NONE_DID_SHOW_SUGGESTIONS;
1235       }
1236 
1237       // Unlike the other times, the |submission_time| should always be
1238       // available.
1239       DCHECK(!submission_time.is_null());
1240 
1241       // The |load_time| might be unset, in the case that the form was
1242       // dynamically added to the DOM.
1243       if (!load_time.is_null()) {
1244         // Submission should always chronologically follow form load.
1245         DCHECK_GE(submission_time, load_time);
1246         base::TimeDelta elapsed = submission_time - load_time;
1247         if (did_autofill_some_possible_fields)
1248           AutofillMetrics::LogFormFillDurationFromLoadWithAutofill(elapsed);
1249         else
1250           AutofillMetrics::LogFormFillDurationFromLoadWithoutAutofill(elapsed);
1251       }
1252 
1253       // The |interaction_time| might be unset, in the case that the user
1254       // submitted a blank form.
1255       if (!interaction_time.is_null()) {
1256         // Submission should always chronologically follow interaction.
1257         DCHECK(submission_time > interaction_time);
1258         base::TimeDelta elapsed = submission_time - interaction_time;
1259         AutofillMetrics::LogFormFillDurationFromInteraction(
1260             GetFormTypes(), did_autofill_some_possible_fields, elapsed);
1261       }
1262     }
1263 
1264     AutofillMetrics::LogAutofillFormSubmittedState(
1265         state, is_for_credit_card, has_upi_vpa_field, GetFormTypes(),
1266         form_parsed_timestamp_, form_signature(), form_interactions_ukm_logger);
1267   }
1268 }
1269 
LogQualityMetricsBasedOnAutocomplete(AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger) const1270 void FormStructure::LogQualityMetricsBasedOnAutocomplete(
1271     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger)
1272     const {
1273   const AutofillMetrics::QualityMetricType metric_type =
1274       AutofillMetrics::TYPE_AUTOCOMPLETE_BASED;
1275   for (const auto& field : fields_) {
1276     if (field->html_type() != HTML_TYPE_UNSPECIFIED &&
1277         field->html_type() != HTML_TYPE_UNRECOGNIZED) {
1278       AutofillMetrics::LogHeuristicPredictionQualityMetrics(
1279           form_interactions_ukm_logger, *this, *field, metric_type);
1280       AutofillMetrics::LogServerPredictionQualityMetrics(
1281           form_interactions_ukm_logger, *this, *field, metric_type);
1282     }
1283   }
1284 }
1285 
ParseFieldTypesFromAutocompleteAttributes()1286 void FormStructure::ParseFieldTypesFromAutocompleteAttributes() {
1287   has_author_specified_types_ = false;
1288   has_author_specified_sections_ = false;
1289   has_author_specified_upi_vpa_hint_ = false;
1290   for (const std::unique_ptr<AutofillField>& field : fields_) {
1291     // To prevent potential section name collisions, add a default suffix for
1292     // other fields.  Without this, 'autocomplete' attribute values
1293     // "section--shipping street-address" and "shipping street-address" would be
1294     // parsed identically, given the section handling code below.  We do this
1295     // before any validation so that fields with invalid attributes still end up
1296     // in the default section.  These default section names will be overridden
1297     // by subsequent heuristic parsing steps if there are no author-specified
1298     // section names.
1299     field->section = kDefaultSection;
1300 
1301     std::vector<std::string> tokens =
1302         LowercaseAndTokenizeAttributeString(field->autocomplete_attribute);
1303 
1304     // The autocomplete attribute is overloaded: it can specify either a field
1305     // type hint or whether autocomplete should be enabled at all.  Ignore the
1306     // latter type of attribute value.
1307     if (tokens.empty() ||
1308         (tokens.size() == 1 &&
1309          (tokens[0] == "on" || tokens[0] == "off" || tokens[0] == "false"))) {
1310       continue;
1311     }
1312 
1313     // Any other value, even it is invalid, is considered to be a type hint.
1314     // This allows a website's author to specify an attribute like
1315     // autocomplete="other" on a field to disable all Autofill heuristics for
1316     // the form.
1317     has_author_specified_types_ = true;
1318 
1319     // Per the spec, the tokens are parsed in reverse order. The expected
1320     // pattern is:
1321     // [section-*] [shipping|billing] [type_hint] field_type
1322 
1323     // (1) The final token must be the field type. If it is not one of the known
1324     // types, abort.
1325     std::string field_type_token = tokens.back();
1326     tokens.pop_back();
1327     HtmlFieldType field_type =
1328         FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1329     if (field_type == HTML_TYPE_UPI_VPA) {
1330       has_author_specified_upi_vpa_hint_ = true;
1331       // TODO(crbug.com/702223): Flesh out support for UPI-VPA.
1332       field_type = HTML_TYPE_UNRECOGNIZED;
1333     }
1334     if (field_type == HTML_TYPE_UNSPECIFIED)
1335       continue;
1336 
1337     // (2) The preceding token, if any, may be a type hint.
1338     if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1339       // If it is, it must match the field type; otherwise, abort.
1340       // Note that an invalid token invalidates the entire attribute value, even
1341       // if the other tokens are valid.
1342       if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1343         continue;
1344 
1345       // Chrome Autofill ignores these type hints.
1346       tokens.pop_back();
1347     }
1348 
1349     DCHECK_EQ(kDefaultSection, field->section);
1350     std::string section = field->section;
1351     HtmlFieldMode mode = HTML_MODE_NONE;
1352 
1353     // (3) The preceding token, if any, may be a fixed string that is either
1354     // "shipping" or "billing".  Chrome Autofill treats these as implicit
1355     // section name suffixes.
1356     if (!tokens.empty()) {
1357       if (tokens.back() == kShippingMode)
1358         mode = HTML_MODE_SHIPPING;
1359       else if (tokens.back() == kBillingMode)
1360         mode = HTML_MODE_BILLING;
1361 
1362       if (mode != HTML_MODE_NONE) {
1363         section = "-" + tokens.back();
1364         tokens.pop_back();
1365       }
1366     }
1367 
1368     // (4) The preceding token, if any, may be a named section.
1369     const base::StringPiece kSectionPrefix = "section-";
1370     if (!tokens.empty() && base::StartsWith(tokens.back(), kSectionPrefix,
1371                                             base::CompareCase::SENSITIVE)) {
1372       // Prepend this section name to the suffix set in the preceding block.
1373       section = tokens.back().substr(kSectionPrefix.size()) + section;
1374       tokens.pop_back();
1375     }
1376 
1377     // (5) No other tokens are allowed.  If there are any remaining, abort.
1378     if (!tokens.empty())
1379       continue;
1380 
1381     if (section != kDefaultSection) {
1382       has_author_specified_sections_ = true;
1383       field->section = section;
1384     }
1385 
1386     // No errors encountered while parsing!
1387     // Update the |field|'s type based on what was parsed from the attribute.
1388     field->SetHtmlType(field_type, mode);
1389   }
1390 
1391   was_parsed_for_autocomplete_attributes_ = true;
1392 }
1393 
PossibleValues(ServerFieldType type)1394 std::set<base::string16> FormStructure::PossibleValues(ServerFieldType type) {
1395   std::set<base::string16> values;
1396   AutofillType target_type(type);
1397   for (const auto& field : fields_) {
1398     if (field->Type().GetStorableType() != target_type.GetStorableType() ||
1399         field->Type().group() != target_type.group()) {
1400       continue;
1401     }
1402 
1403     // No option values; anything goes.
1404     if (field->option_values.empty()) {
1405       values.clear();
1406       break;
1407     }
1408 
1409     for (const base::string16& val : field->option_values) {
1410       if (!val.empty())
1411         values.insert(base::i18n::ToUpper(val));
1412     }
1413 
1414     for (const base::string16& content : field->option_contents) {
1415       if (!content.empty())
1416         values.insert(base::i18n::ToUpper(content));
1417     }
1418   }
1419 
1420   return values;
1421 }
1422 
field(size_t index) const1423 const AutofillField* FormStructure::field(size_t index) const {
1424   if (index >= fields_.size()) {
1425     NOTREACHED();
1426     return nullptr;
1427   }
1428 
1429   return fields_[index].get();
1430 }
1431 
field(size_t index)1432 AutofillField* FormStructure::field(size_t index) {
1433   return const_cast<AutofillField*>(
1434       static_cast<const FormStructure*>(this)->field(index));
1435 }
1436 
field_count() const1437 size_t FormStructure::field_count() const {
1438   return fields_.size();
1439 }
1440 
active_field_count() const1441 size_t FormStructure::active_field_count() const {
1442   return active_field_count_;
1443 }
1444 
ToFormData() const1445 FormData FormStructure::ToFormData() const {
1446   FormData data;
1447   data.id_attribute = id_attribute_;
1448   data.name_attribute = name_attribute_;
1449   data.name = form_name_;
1450   data.button_titles = button_titles_;
1451   data.url = source_url_;
1452   data.full_url = full_source_url_;
1453   data.action = target_url_;
1454   data.main_frame_origin = main_frame_origin_;
1455   data.is_form_tag = is_form_tag_;
1456   data.is_formless_checkout = is_formless_checkout_;
1457   data.unique_renderer_id = unique_renderer_id_;
1458 
1459   for (size_t i = 0; i < fields_.size(); ++i) {
1460     data.fields.push_back(FormFieldData(*fields_[i]));
1461   }
1462 
1463   return data;
1464 }
1465 
SectionedFieldsIndexes()1466 FormStructure::SectionedFieldsIndexes::SectionedFieldsIndexes() {}
1467 
~SectionedFieldsIndexes()1468 FormStructure::SectionedFieldsIndexes::~SectionedFieldsIndexes() {}
1469 
RationalizeCreditCardFieldPredictions()1470 void FormStructure::RationalizeCreditCardFieldPredictions() {
1471   bool cc_first_name_found = false;
1472   bool cc_last_name_found = false;
1473   bool cc_num_found = false;
1474   bool cc_month_found = false;
1475   bool cc_year_found = false;
1476   bool cc_type_found = false;
1477   bool cc_cvc_found = false;
1478   size_t num_months_found = 0;
1479   size_t num_other_fields_found = 0;
1480   for (const auto& field : fields_) {
1481     ServerFieldType current_field_type =
1482         field->ComputedType().GetStorableType();
1483     switch (current_field_type) {
1484       case CREDIT_CARD_NAME_FIRST:
1485         cc_first_name_found = true;
1486         break;
1487       case CREDIT_CARD_NAME_LAST:
1488         cc_last_name_found = true;
1489         break;
1490       case CREDIT_CARD_NAME_FULL:
1491         cc_first_name_found = true;
1492         cc_last_name_found = true;
1493         break;
1494       case CREDIT_CARD_NUMBER:
1495         cc_num_found = true;
1496         break;
1497       case CREDIT_CARD_EXP_MONTH:
1498         cc_month_found = true;
1499         ++num_months_found;
1500         break;
1501       case CREDIT_CARD_EXP_2_DIGIT_YEAR:
1502       case CREDIT_CARD_EXP_4_DIGIT_YEAR:
1503         cc_year_found = true;
1504         break;
1505       case CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR:
1506       case CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR:
1507         cc_month_found = true;
1508         cc_year_found = true;
1509         ++num_months_found;
1510         break;
1511       case CREDIT_CARD_TYPE:
1512         cc_type_found = true;
1513         break;
1514       case CREDIT_CARD_VERIFICATION_CODE:
1515         cc_cvc_found = true;
1516         break;
1517       case ADDRESS_HOME_ZIP:
1518       case ADDRESS_BILLING_ZIP:
1519         // Zip/Postal code often appears as part of a Credit Card form. Do
1520         // not count it as a non-cc-related field.
1521         break;
1522       default:
1523         ++num_other_fields_found;
1524     }
1525   }
1526 
1527   // A partial CC name is unlikely. Prefer to consider these profile names
1528   // when partial.
1529   bool cc_name_found = cc_first_name_found && cc_last_name_found;
1530 
1531   // A partial CC expiry date should not be filled. These are often confused
1532   // with quantity/height fields and/or generic year fields.
1533   bool cc_date_found = cc_month_found && cc_year_found;
1534 
1535   // Count the credit card related fields in the form.
1536   size_t num_cc_fields_found =
1537       static_cast<int>(cc_name_found) + static_cast<int>(cc_num_found) +
1538       static_cast<int>(cc_date_found) + static_cast<int>(cc_type_found) +
1539       static_cast<int>(cc_cvc_found);
1540 
1541   // Retain credit card related fields if the form has multiple fields or has
1542   // no unrelated fields (useful for single cc-field forms). Credit card number
1543   // is permitted to be alone in an otherwise unrelated form because some
1544   // dynamic forms reveal the remainder of the fields only after the credit
1545   // card number is entered and identified as a credit card by the site.
1546   bool keep_cc_fields =
1547       cc_num_found || num_cc_fields_found >= 3 || num_other_fields_found == 0;
1548 
1549   // Do an update pass over the fields to rewrite the types if credit card
1550   // fields are not to be retained. Some special handling is given to expiry
1551   // dates if the full date is not found or multiple expiry date fields are
1552   // found. See comments inline below.
1553   for (auto it = fields_.begin(); it != fields_.end(); ++it) {
1554     auto& field = *it;
1555     ServerFieldType current_field_type = field->Type().GetStorableType();
1556     switch (current_field_type) {
1557       case CREDIT_CARD_NAME_FIRST:
1558         if (!keep_cc_fields)
1559           field->SetTypeTo(AutofillType(NAME_FIRST));
1560         break;
1561       case CREDIT_CARD_NAME_LAST:
1562         if (!keep_cc_fields)
1563           field->SetTypeTo(AutofillType(NAME_LAST));
1564         break;
1565       case CREDIT_CARD_NAME_FULL:
1566         if (!keep_cc_fields)
1567           field->SetTypeTo(AutofillType(NAME_FULL));
1568         break;
1569       case CREDIT_CARD_NUMBER:
1570       case CREDIT_CARD_TYPE:
1571       case CREDIT_CARD_VERIFICATION_CODE:
1572       case CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR:
1573       case CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR:
1574         if (!keep_cc_fields)
1575           field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1576         break;
1577       case CREDIT_CARD_EXP_MONTH:
1578         // Do not preserve an expiry month prediction if any of the following
1579         // are true:
1580         //   (1) the form is determined to be be non-cc related, so all cc
1581         //       field predictions are to be discarded
1582         //   (2) the expiry month was found without a corresponding year
1583         //   (3) multiple month fields were found in a form having a full
1584         //       expiry date. This usually means the form is a checkout form
1585         //       that also has one or more quantity fields. Suppress the expiry
1586         //       month field(s) not immediately preceding an expiry year field.
1587         if (!keep_cc_fields || !cc_date_found) {
1588           field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1589         } else if (num_months_found > 1) {
1590           auto it2 = it + 1;
1591           if (it2 == fields_.end()) {
1592             field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1593           } else {
1594             ServerFieldType next_field_type = (*it2)->Type().GetStorableType();
1595             if (next_field_type != CREDIT_CARD_EXP_2_DIGIT_YEAR &&
1596                 next_field_type != CREDIT_CARD_EXP_4_DIGIT_YEAR) {
1597               field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1598             }
1599           }
1600         }
1601         break;
1602       case CREDIT_CARD_EXP_2_DIGIT_YEAR:
1603       case CREDIT_CARD_EXP_4_DIGIT_YEAR:
1604         if (!keep_cc_fields || !cc_date_found)
1605           field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1606         break;
1607       default:
1608         break;
1609     }
1610   }
1611 }
1612 
RationalizePhoneNumbersInSection(std::string section)1613 void FormStructure::RationalizePhoneNumbersInSection(std::string section) {
1614   if (phone_rationalized_[section])
1615     return;
1616   std::vector<AutofillField*> fields;
1617   for (size_t i = 0; i < field_count(); ++i) {
1618     if (field(i)->section != section)
1619       continue;
1620     fields.push_back(field(i));
1621   }
1622   rationalization_util::RationalizePhoneNumberFields(fields);
1623   phone_rationalized_[section] = true;
1624 }
1625 
ApplyRationalizationsToFieldAndLog(size_t field_index,ServerFieldType new_type,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1626 void FormStructure::ApplyRationalizationsToFieldAndLog(
1627     size_t field_index,
1628     ServerFieldType new_type,
1629     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1630   if (field_index >= fields_.size())
1631     return;
1632   auto old_type = fields_[field_index]->Type().GetStorableType();
1633   fields_[field_index]->SetTypeTo(AutofillType(new_type));
1634   if (form_interactions_ukm_logger) {
1635     form_interactions_ukm_logger->LogRepeatedServerTypePredictionRationalized(
1636         form_signature_, *fields_[field_index], old_type);
1637   }
1638 }
1639 
RationalizeAddressLineFields(SectionedFieldsIndexes * sections_of_address_indexes,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1640 void FormStructure::RationalizeAddressLineFields(
1641     SectionedFieldsIndexes* sections_of_address_indexes,
1642     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1643   // The rationalization happens within sections.
1644   for (sections_of_address_indexes->Reset();
1645        !sections_of_address_indexes->IsFinished();
1646        sections_of_address_indexes->WalkForwardToTheNextSection()) {
1647     auto current_section = sections_of_address_indexes->CurrentSection();
1648 
1649     // The rationalization only applies to sections that have 2 or 3 visible
1650     // street address predictions.
1651     if (current_section.size() != 2 && current_section.size() != 3) {
1652       continue;
1653     }
1654 
1655     int nb_address_rationalized = 0;
1656     for (auto field_index : current_section) {
1657       switch (nb_address_rationalized) {
1658         case 0:
1659           ApplyRationalizationsToFieldAndLog(field_index, ADDRESS_HOME_LINE1,
1660                                              form_interactions_ukm_logger);
1661           break;
1662         case 1:
1663           ApplyRationalizationsToFieldAndLog(field_index, ADDRESS_HOME_LINE2,
1664                                              form_interactions_ukm_logger);
1665           break;
1666         case 2:
1667           ApplyRationalizationsToFieldAndLog(field_index, ADDRESS_HOME_LINE3,
1668                                              form_interactions_ukm_logger);
1669           break;
1670         default:
1671           NOTREACHED();
1672           break;
1673       }
1674       ++nb_address_rationalized;
1675     }
1676   }
1677 }
1678 
ApplyRationalizationsToHiddenSelects(size_t field_index,ServerFieldType new_type,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1679 void FormStructure::ApplyRationalizationsToHiddenSelects(
1680     size_t field_index,
1681     ServerFieldType new_type,
1682     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1683   ServerFieldType old_type = fields_[field_index]->Type().GetStorableType();
1684 
1685   // Walk on the hidden select fields right after the field_index which share
1686   // the same type with the field_index, and apply the rationalization to them
1687   // as well. These fields, if any, function as one field with the field_index.
1688   for (auto current_index = field_index + 1; current_index < fields_.size();
1689        current_index++) {
1690     if (fields_[current_index]->IsVisible() ||
1691         fields_[current_index]->form_control_type != "select-one" ||
1692         fields_[current_index]->Type().GetStorableType() != old_type)
1693       break;
1694     ApplyRationalizationsToFieldAndLog(current_index, new_type,
1695                                        form_interactions_ukm_logger);
1696   }
1697 
1698   // Same for the fields coming right before the field_index. (No need to check
1699   // for the fields appearing before the first field!)
1700   if (field_index == 0)
1701     return;
1702   for (auto current_index = field_index - 1;; current_index--) {
1703     if (fields_[current_index]->IsVisible() ||
1704         fields_[current_index]->form_control_type != "select-one" ||
1705         fields_[current_index]->Type().GetStorableType() != old_type)
1706       break;
1707     ApplyRationalizationsToFieldAndLog(current_index, new_type,
1708                                        form_interactions_ukm_logger);
1709     if (current_index == 0)
1710       break;
1711   }
1712 }
1713 
HeuristicsPredictionsAreApplicable(size_t upper_index,size_t lower_index,ServerFieldType first_type,ServerFieldType second_type)1714 bool FormStructure::HeuristicsPredictionsAreApplicable(
1715     size_t upper_index,
1716     size_t lower_index,
1717     ServerFieldType first_type,
1718     ServerFieldType second_type) {
1719   // The predictions are applicable if one field has one of the two types, and
1720   // the other has the other type.
1721   if (fields_[upper_index]->heuristic_type() ==
1722       fields_[lower_index]->heuristic_type())
1723     return false;
1724   if ((fields_[upper_index]->heuristic_type() == first_type ||
1725        fields_[upper_index]->heuristic_type() == second_type) &&
1726       (fields_[lower_index]->heuristic_type() == first_type ||
1727        fields_[lower_index]->heuristic_type() == second_type))
1728     return true;
1729   return false;
1730 }
1731 
ApplyRationalizationsToFields(size_t upper_index,size_t lower_index,ServerFieldType upper_type,ServerFieldType lower_type,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1732 void FormStructure::ApplyRationalizationsToFields(
1733     size_t upper_index,
1734     size_t lower_index,
1735     ServerFieldType upper_type,
1736     ServerFieldType lower_type,
1737     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1738   // Hidden fields are ignored during the rationalization, but 'select' hidden
1739   // fields also get autofilled to support their corresponding visible
1740   // 'synthetic fields'. So, if a field's type is rationalized, we should make
1741   // sure that the rationalization is also applied to its corresponding hidden
1742   // fields, if any.
1743   ApplyRationalizationsToHiddenSelects(upper_index, upper_type,
1744                                        form_interactions_ukm_logger);
1745   ApplyRationalizationsToFieldAndLog(upper_index, upper_type,
1746                                      form_interactions_ukm_logger);
1747 
1748   ApplyRationalizationsToHiddenSelects(lower_index, lower_type,
1749                                        form_interactions_ukm_logger);
1750   ApplyRationalizationsToFieldAndLog(lower_index, lower_type,
1751                                      form_interactions_ukm_logger);
1752 }
1753 
FieldShouldBeRationalizedToCountry(size_t upper_index)1754 bool FormStructure::FieldShouldBeRationalizedToCountry(size_t upper_index) {
1755   // Upper field is country if and only if it's the first visible address field
1756   // in its section. Otherwise, the upper field is a state, and the lower one
1757   // is a country.
1758   for (int field_index = upper_index - 1; field_index >= 0; --field_index) {
1759     if (fields_[field_index]->IsVisible() &&
1760         AutofillType(fields_[field_index]->Type().GetStorableType()).group() ==
1761             ADDRESS_HOME &&
1762         fields_[field_index]->section == fields_[upper_index]->section) {
1763       return false;
1764     }
1765   }
1766   return true;
1767 }
1768 
RationalizeAddressStateCountry(SectionedFieldsIndexes * sections_of_state_indexes,SectionedFieldsIndexes * sections_of_country_indexes,AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1769 void FormStructure::RationalizeAddressStateCountry(
1770     SectionedFieldsIndexes* sections_of_state_indexes,
1771     SectionedFieldsIndexes* sections_of_country_indexes,
1772     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1773   // Walk on the sections of state and country indexes simultaneously. If they
1774   // both point to the same section, it means that that section includes both
1775   // the country and the state type. This means that no that rationalization is
1776   // needed. So, walk both pointers forward. Otherwise, look at the section that
1777   // appears earlier on the form. That section doesn't have any field of the
1778   // other type. Rationalize the fields on the earlier section if needed. Walk
1779   // the pointer that points to the earlier section forward. Stop when both
1780   // sections of indexes are processed. (This resembles the merge in the merge
1781   // sort.)
1782   sections_of_state_indexes->Reset();
1783   sections_of_country_indexes->Reset();
1784 
1785   while (!sections_of_state_indexes->IsFinished() ||
1786          !sections_of_country_indexes->IsFinished()) {
1787     auto current_section_of_state_indexes =
1788         sections_of_state_indexes->CurrentSection();
1789     auto current_section_of_country_indexes =
1790         sections_of_country_indexes->CurrentSection();
1791     // If there are still sections left with both country and state type, and
1792     // state and country current sections are equal, then that section has both
1793     // state and country. No rationalization needed.
1794     if (!sections_of_state_indexes->IsFinished() &&
1795         !sections_of_country_indexes->IsFinished() &&
1796         fields_[sections_of_state_indexes->CurrentIndex()]->section ==
1797             fields_[sections_of_country_indexes->CurrentIndex()]->section) {
1798       sections_of_state_indexes->WalkForwardToTheNextSection();
1799       sections_of_country_indexes->WalkForwardToTheNextSection();
1800       continue;
1801     }
1802 
1803     size_t upper_index = 0, lower_index = 0;
1804 
1805     // If country section is before the state ones, it means that that section
1806     // misses states, and the other way around.
1807     if (current_section_of_state_indexes < current_section_of_country_indexes) {
1808       // We only rationalize when we have exactly two visible fields of a kind.
1809       if (current_section_of_state_indexes.size() == 2) {
1810         upper_index = current_section_of_state_indexes[0];
1811         lower_index = current_section_of_state_indexes[1];
1812       }
1813       sections_of_state_indexes->WalkForwardToTheNextSection();
1814     } else {
1815       // We only rationalize when we have exactly two visible fields of a kind.
1816       if (current_section_of_country_indexes.size() == 2) {
1817         upper_index = current_section_of_country_indexes[0];
1818         lower_index = current_section_of_country_indexes[1];
1819       }
1820       sections_of_country_indexes->WalkForwardToTheNextSection();
1821     }
1822 
1823     // This is when upper and lower indexes are not changed, meaning that there
1824     // is no need for rationalization.
1825     if (upper_index == lower_index) {
1826       continue;
1827     }
1828 
1829     if (HeuristicsPredictionsAreApplicable(upper_index, lower_index,
1830                                            ADDRESS_HOME_STATE,
1831                                            ADDRESS_HOME_COUNTRY)) {
1832       ApplyRationalizationsToFields(
1833           upper_index, lower_index, fields_[upper_index]->heuristic_type(),
1834           fields_[lower_index]->heuristic_type(), form_interactions_ukm_logger);
1835       continue;
1836     }
1837 
1838     if (FieldShouldBeRationalizedToCountry(upper_index)) {
1839       ApplyRationalizationsToFields(upper_index, lower_index,
1840                                     ADDRESS_HOME_COUNTRY, ADDRESS_HOME_STATE,
1841                                     form_interactions_ukm_logger);
1842     } else {
1843       ApplyRationalizationsToFields(upper_index, lower_index,
1844                                     ADDRESS_HOME_STATE, ADDRESS_HOME_COUNTRY,
1845                                     form_interactions_ukm_logger);
1846     }
1847   }
1848 }
1849 
RationalizeRepeatedFields(AutofillMetrics::FormInteractionsUkmLogger * form_interactions_ukm_logger)1850 void FormStructure::RationalizeRepeatedFields(
1851     AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger) {
1852   // The type of every field whose index is in
1853   // sectioned_field_indexes_by_type[|type|] is predicted by server as |type|.
1854   // Example: sectioned_field_indexes_by_type[FULL_NAME] is a sectioned fields
1855   // indexes of fields whose types are predicted as FULL_NAME by the server.
1856   SectionedFieldsIndexes sectioned_field_indexes_by_type[MAX_VALID_FIELD_TYPE];
1857 
1858   for (const auto& field : fields_) {
1859     // The hidden fields are not considered when rationalizing.
1860     if (!field->IsVisible())
1861       continue;
1862     // The billing and non-billing types are aggregated.
1863     auto current_type = field->Type().GetStorableType();
1864 
1865     if (current_type != UNKNOWN_TYPE && current_type < MAX_VALID_FIELD_TYPE) {
1866       // Look at the sectioned field indexes for the current type, if the
1867       // current field belongs to that section, then the field index should be
1868       // added to that same section, otherwise, start a new section.
1869       sectioned_field_indexes_by_type[current_type].AddFieldIndex(
1870           &field - &fields_[0],
1871           /*is_new_section*/ sectioned_field_indexes_by_type[current_type]
1872                   .Empty() ||
1873               fields_[sectioned_field_indexes_by_type[current_type]
1874                           .LastFieldIndex()]
1875                       ->section != field->section);
1876     }
1877   }
1878 
1879   RationalizeAddressLineFields(
1880       &(sectioned_field_indexes_by_type[ADDRESS_HOME_STREET_ADDRESS]),
1881       form_interactions_ukm_logger);
1882   // Since the billing types are mapped to the non-billing ones, no need to
1883   // take care of ADDRESS_BILLING_STATE and .. .
1884   RationalizeAddressStateCountry(
1885       &(sectioned_field_indexes_by_type[ADDRESS_HOME_STATE]),
1886       &(sectioned_field_indexes_by_type[ADDRESS_HOME_COUNTRY]),
1887       form_interactions_ukm_logger);
1888 }
1889 
RationalizeFieldTypePredictions()1890 void FormStructure::RationalizeFieldTypePredictions() {
1891   RationalizeCreditCardFieldPredictions();
1892   for (const auto& field : fields_) {
1893     if (base::FeatureList::IsEnabled(features::kAutofillOffNoServerData) &&
1894         !field->should_autocomplete && field->server_type() == NO_SERVER_DATA &&
1895         field->heuristic_type() != CREDIT_CARD_VERIFICATION_CODE) {
1896       // When the field has autocomplete off, and the server returned no
1897       // prediction, then assume Autofill is not useful for the current field.
1898       // Special case for CVC (crbug.com/968036). We never send votes for CVC
1899       // fields, but we still fill them when the user inputs them via the CVC
1900       // prompt. Since Autofill doesn't trigger from a CVC field, we can keep
1901       // the client-side predictions for this type.
1902       field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
1903     } else {
1904       field->SetTypeTo(field->Type());
1905     }
1906   }
1907   RationalizeTypeRelationships();
1908 }
1909 
EncodeFormForQuery(AutofillPageQueryRequest::Form * query_form,std::vector<FormSignature> * queried_form_signatures) const1910 void FormStructure::EncodeFormForQuery(
1911     AutofillPageQueryRequest::Form* query_form,
1912     std::vector<FormSignature>* queried_form_signatures) const {
1913   DCHECK(!IsMalformed());
1914 
1915   query_form->set_signature(form_signature().value());
1916   queried_form_signatures->push_back(form_signature());
1917 
1918   if (is_rich_query_enabled_) {
1919     EncodeFormMetadataForQuery(*this, query_form->mutable_metadata());
1920   }
1921 
1922   for (const auto& field : fields_) {
1923     if (ShouldSkipField(*field))
1924       continue;
1925 
1926     AutofillPageQueryRequest::Form::Field* added_field =
1927         query_form->add_fields();
1928     added_field->set_signature(field->GetFieldSignature().value());
1929 
1930     if (is_rich_query_enabled_) {
1931       EncodeFieldMetadataForQuery(*field, added_field->mutable_metadata());
1932     }
1933 
1934     if (IsAutofillFieldMetadataEnabled()) {
1935       added_field->set_control_type(field->form_control_type);
1936 
1937       if (!field->name.empty())
1938         added_field->set_name(base::UTF16ToUTF8(field->name));
1939     }
1940   }
1941 }
1942 
EncodeFormForUpload(AutofillUploadContents * upload,std::vector<FormSignature> * encoded_signatures) const1943 void FormStructure::EncodeFormForUpload(
1944     AutofillUploadContents* upload,
1945     std::vector<FormSignature>* encoded_signatures) const {
1946   DCHECK(!IsMalformed());
1947 
1948   encoded_signatures->push_back(form_signature());
1949 
1950   if (randomized_encoder_) {
1951     PopulateRandomizedFormMetadata(*randomized_encoder_, *this,
1952                                    upload->mutable_randomized_form_metadata());
1953   }
1954 
1955   for (const auto& field : fields_) {
1956     // Don't upload checkable fields.
1957     if (IsCheckable(field->check_status))
1958       continue;
1959 
1960     // Add the same field elements as the query and a few more below.
1961     if (ShouldSkipField(*field))
1962       continue;
1963 
1964     auto* added_field = upload->add_field();
1965 
1966     for (const auto& field_type : field->possible_types()) {
1967       added_field->add_autofill_type(field_type);
1968     }
1969 
1970     field->NormalizePossibleTypesValidities();
1971 
1972     for (const auto& field_type_validities :
1973          field->possible_types_validities()) {
1974       auto* type_validities = added_field->add_autofill_type_validities();
1975       type_validities->set_type(field_type_validities.first);
1976       for (const auto& validity : field_type_validities.second) {
1977         type_validities->add_validity(validity);
1978       }
1979     }
1980 
1981     if (field->generation_type()) {
1982       added_field->set_generation_type(field->generation_type());
1983       added_field->set_generated_password_changed(
1984           field->generated_password_changed());
1985     }
1986 
1987     if (field->vote_type()) {
1988       added_field->set_vote_type(field->vote_type());
1989     }
1990 
1991     if (field->initial_value_hash()) {
1992       added_field->set_initial_value_hash(field->initial_value_hash().value());
1993     }
1994 
1995     added_field->set_signature(field->GetFieldSignature().value());
1996 
1997     if (field->properties_mask)
1998       added_field->set_properties_mask(field->properties_mask);
1999 
2000     if (randomized_encoder_) {
2001       PopulateRandomizedFieldMetadata(
2002           *randomized_encoder_, *this, *field,
2003           added_field->mutable_randomized_field_metadata());
2004     }
2005 
2006     if (IsAutofillFieldMetadataEnabled()) {
2007       added_field->set_type(field->form_control_type);
2008 
2009       if (!field->name.empty())
2010         added_field->set_name(base::UTF16ToUTF8(field->name));
2011 
2012       if (!field->id_attribute.empty())
2013         added_field->set_id(base::UTF16ToUTF8(field->id_attribute));
2014 
2015       if (!field->autocomplete_attribute.empty())
2016         added_field->set_autocomplete(field->autocomplete_attribute);
2017 
2018       if (!field->css_classes.empty())
2019         added_field->set_css_classes(base::UTF16ToUTF8(field->css_classes));
2020     }
2021   }
2022 }
2023 
IsMalformed() const2024 bool FormStructure::IsMalformed() const {
2025   if (!field_count())  // Nothing to add.
2026     return true;
2027 
2028   // Some badly formatted web sites repeat fields - limit number of fields to
2029   // 250, which is far larger than any valid form and proto still fits into 10K.
2030   // Do not send requests for forms with more than this many fields, as they are
2031   // near certainly not valid/auto-fillable.
2032   const size_t kMaxFieldsOnTheForm = 250;
2033   if (field_count() > kMaxFieldsOnTheForm)
2034     return true;
2035   return false;
2036 }
2037 
IdentifySections(bool has_author_specified_sections)2038 void FormStructure::IdentifySections(bool has_author_specified_sections) {
2039   if (fields_.empty())
2040     return;
2041 
2042   const bool is_enabled_autofill_new_sectioning =
2043       base::FeatureList::IsEnabled(features::kAutofillUseNewSectioningMethod);
2044   const bool is_enabled_autofill_redundant_name_sectioning =
2045       base::FeatureList::IsEnabled(
2046           features::kAutofillSectionUponRedundantNameInfo);
2047 
2048   // Creates a unique name for the section that starts with |field|.
2049   // TODO(crbug/896689): Cleanup once experiment is launched.
2050   auto get_section_name = [](const AutofillField& field) {
2051     if (base::FeatureList::IsEnabled(
2052             features::kAutofillNameSectionsWithRendererIds)) {
2053       return base::StrCat(
2054           {field.name, base::ASCIIToUTF16("_"),
2055            base::NumberToString16(field.unique_renderer_id.value())});
2056     } else {
2057       return field.unique_name();
2058     }
2059   };
2060 
2061   if (!has_author_specified_sections || is_enabled_autofill_new_sectioning) {
2062     base::string16 current_section = get_section_name(*fields_.front());
2063 
2064     // Keep track of the types we've seen in this section.
2065     std::set<ServerFieldType> seen_types;
2066     ServerFieldType previous_type = UNKNOWN_TYPE;
2067 
2068     // Boolean flag that is set to true when a field in the current section
2069     // has the autocomplete-section attribute defined.
2070     bool previous_autocomplete_section_present = false;
2071 
2072     bool is_hidden_section = false;
2073     base::string16 last_visible_section;
2074     for (const auto& field : fields_) {
2075       const ServerFieldType current_type = field->Type().GetStorableType();
2076       // All credit card fields belong to the same section that's different
2077       // from address sections.
2078       if (AutofillType(current_type).group() == CREDIT_CARD) {
2079         field->section = "credit-card";
2080         continue;
2081       }
2082 
2083       bool already_saw_current_type = seen_types.count(current_type) > 0;
2084 
2085       // Forms often ask for multiple phone numbers -- e.g. both a daytime and
2086       // evening phone number.  Our phone number detection is also generally a
2087       // little off.  Hence, ignore this field type as a signal here.
2088       if (AutofillType(current_type).group() == PHONE_HOME)
2089         already_saw_current_type = false;
2090 
2091       if (is_enabled_autofill_redundant_name_sectioning) {
2092         // Forms sometimes have a different format of inputting names in
2093         // different sections. If we believe a new name is being entered, assume
2094         // it is a new section (unless there are two identical inputs in a row).
2095         if (current_type == NAME_FULL)
2096           already_saw_current_type |= (seen_types.count(NAME_LAST) > 0);
2097       }
2098 
2099       bool ignored_field = !field->IsVisible();
2100 
2101       // This is the first visible field after a hidden section. Consider it as
2102       // the continuation of the last visible section.
2103       if (!ignored_field && is_hidden_section) {
2104         current_section = last_visible_section;
2105       }
2106 
2107       // Start a new section by an ignored field, only if the next field is also
2108       // already seen.
2109       size_t field_index = &field - &fields_[0];
2110       if (ignored_field &&
2111           (is_hidden_section ||
2112            !((field_index + 1) < fields_.size() &&
2113              seen_types.count(
2114                  fields_[field_index + 1]->Type().GetStorableType()) > 0))) {
2115         already_saw_current_type = false;
2116       }
2117 
2118       // Some forms have adjacent fields of the same type.  Two common examples:
2119       //  * Forms with two email fields, where the second is meant to "confirm"
2120       //    the first.
2121       //  * Forms with a <select> menu for states in some countries, and a
2122       //    freeform <input> field for states in other countries.  (Usually,
2123       //    only one of these two will be visible for any given choice of
2124       //    country.)
2125       // Generally, adjacent fields of the same type belong in the same logical
2126       // section.
2127       if (current_type == previous_type)
2128         already_saw_current_type = false;
2129 
2130       // Boolean flag that is set to true when the |field| has
2131       // autocomplete-section attribute defined.
2132       bool autocomplete_section_attribute_present = false;
2133       if (is_enabled_autofill_new_sectioning)
2134         autocomplete_section_attribute_present =
2135             (field->section != kDefaultSection);
2136 
2137       // Boolean flag that is set to true when the |field| has
2138       // autocomplete-section attribute defined and is different that the
2139       // previous field.
2140       bool different_autocomplete_section_than_previous = false;
2141       if (is_enabled_autofill_new_sectioning) {
2142         different_autocomplete_section_than_previous =
2143             (autocomplete_section_attribute_present &&
2144              (!field_index ||
2145               fields_[field_index - 1]->section != field->section));
2146       }
2147 
2148       // Start a new section if the |current_type| was already seen or the
2149       // autocomplete-section attribute is defined for the |field| which is
2150       // different than the previous field.
2151       if (current_type != UNKNOWN_TYPE &&
2152           (already_saw_current_type ||
2153            (is_enabled_autofill_new_sectioning &&
2154             different_autocomplete_section_than_previous))) {
2155         // Keep track of seen_types if the new section is hidden. The next
2156         // visible section might be the continuation of the previous visible
2157         // section.
2158         if (ignored_field) {
2159           is_hidden_section = true;
2160           last_visible_section = current_section;
2161         }
2162 
2163         if (!is_hidden_section &&
2164             (!is_enabled_autofill_new_sectioning ||
2165              !autocomplete_section_attribute_present ||
2166              different_autocomplete_section_than_previous))
2167           seen_types.clear();
2168 
2169         if (is_enabled_autofill_new_sectioning &&
2170             autocomplete_section_attribute_present &&
2171             !previous_autocomplete_section_present) {
2172           // If this field is the first field within the section with a defined
2173           // autocomplete section, then change the section attribute of all the
2174           // parsed fields in the current section to |field->section|.
2175           int i = static_cast<int>(field_index - 1);
2176           while (i >= 0 &&
2177                  base::UTF8ToUTF16(fields_[i]->section) == current_section) {
2178             fields_[i]->section = field->section;
2179             i--;
2180           }
2181         }
2182 
2183         // The end of a section, so start a new section.
2184         current_section = get_section_name(*field);
2185 
2186         if (is_enabled_autofill_new_sectioning) {
2187           // The section described in the autocomplete section attribute
2188           // overrides the value determined by the heuristic.
2189           if (autocomplete_section_attribute_present)
2190             current_section = base::UTF8ToUTF16(field->section);
2191 
2192           previous_autocomplete_section_present =
2193               autocomplete_section_attribute_present;
2194         }
2195       }
2196 
2197       // Only consider a type "seen" if it was not ignored. Some forms have
2198       // sections for different locales, only one of which is enabled at a
2199       // time. Each section may duplicate some information (e.g. postal code)
2200       // and we don't want that to cause section splits.
2201       // Also only set |previous_type| when the field was not ignored. This
2202       // prevents ignored fields from breaking up fields that are otherwise
2203       // adjacent.
2204       if (!ignored_field) {
2205         seen_types.insert(current_type);
2206         previous_type = current_type;
2207         is_hidden_section = false;
2208       }
2209 
2210       field->section = base::UTF16ToUTF8(current_section);
2211     }
2212   }
2213 
2214   // Ensure that credit card and address fields are in separate sections.
2215   // This simplifies the section-aware logic in autofill_manager.cc.
2216   for (const auto& field : fields_) {
2217     FieldTypeGroup field_type_group = field->Type().group();
2218     if (field_type_group == CREDIT_CARD)
2219       field->section = field->section + "-cc";
2220     else
2221       field->section = field->section + "-default";
2222   }
2223 }
2224 
ShouldSkipField(const FormFieldData & field) const2225 bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
2226   return IsCheckable(field.check_status);
2227 }
2228 
ProcessExtractedFields()2229 void FormStructure::ProcessExtractedFields() {
2230   if (base::FeatureList::IsEnabled(
2231           autofill::features::kAutofillLabelAffixRemoval)) {
2232     // Updates the field name parsed by heuristics if several criteria are met.
2233     // Several fields must be present in the form.
2234     if (field_count() < kCommonNameAffixRemovalFieldNumberThreshold)
2235       return;
2236 
2237     std::vector<base::StringPiece16> names;
2238     names.reserve(field_count());
2239     for (const auto& field : *this)
2240       names.push_back(field->name);
2241 
2242     int longest_prefix_length = FindLongestCommonAffixLength(names, false);
2243     int longest_suffix_length = FindLongestCommonAffixLength(names, true);
2244 
2245     // Don't remove the common affix if it's not long enough.
2246     if (longest_prefix_length < kMinCommonNameAffixLength)
2247       longest_prefix_length = 0;
2248 
2249     if (longest_suffix_length < kMinCommonNameAffixLength)
2250       longest_suffix_length = 0;
2251 
2252     bool success =
2253         SetStrippedParseableNames(longest_prefix_length, longest_suffix_length);
2254 
2255     // Don't search for inconsistent prefix if valid affixes are found.
2256     if (success && longest_prefix_length + longest_suffix_length > 0)
2257       return;
2258 
2259     // Functionality for stripping a prefix only common to a subset
2260     // of field names.
2261     // This is needed because an exceptional field may be missing a prefix
2262     // which is otherwise consistently applied--for instance, a framework
2263     // may only apply a prefix to those fields which are bound when POSTing.
2264     names.clear();
2265     for (const auto& field : *this)
2266       if (field->name.size() > kMinCommonNameLongPrefixLength)
2267         names.push_back(field->name);
2268 
2269     if (names.size() < kCommonNamePrefixRemovalFieldThreshold)
2270       return;
2271 
2272     const int longest_long_prefix_length =
2273         FindLongestCommonAffixLength(names, false);
2274 
2275     if (longest_long_prefix_length >= kMinCommonNameLongPrefixLength)
2276       SetStrippedParseableNames(longest_long_prefix_length, 0);
2277 
2278     return;
2279   }
2280 
2281   // Update the field name parsed by heuristics if several criteria are met.
2282   // Several fields must be present in the form.
2283   if (field_count() < kCommonNamePrefixRemovalFieldThreshold)
2284     return;
2285 
2286   // Find the longest common prefix within all the field names.
2287   std::vector<base::string16> names;
2288   names.reserve(field_count());
2289   for (const auto& field : *this)
2290     names.push_back(field->name);
2291 
2292   const base::string16 longest_prefix = FindLongestCommonPrefix(names);
2293   if (longest_prefix.size() < kMinCommonNamePrefixLength)
2294     return;
2295 
2296   // The name without the prefix will be used for heuristics parsing.
2297   for (auto& field : *this) {
2298     if (field->name.size() > longest_prefix.size()) {
2299       field->set_parseable_name(
2300           field->name.substr(longest_prefix.size(), field->name.size()));
2301     }
2302   }
2303 }
2304 
SetStrippedParseableNames(size_t offset_left,size_t offset_right)2305 bool FormStructure::SetStrippedParseableNames(size_t offset_left,
2306                                               size_t offset_right) {
2307   // Keeps track if all stripped strings are valid according to
2308   // |IsValidParseableName()|. If at least one string is invalid,
2309   // all |parseable_name| are reset to |name|.
2310   bool should_keep = true;
2311   for (auto& field : *this) {
2312     // This check allows to only strip affixes from long enough strings.
2313     if (field->name.size() > offset_right + offset_left) {
2314       field->set_parseable_name(field->name.substr(
2315           offset_left, field->name.size() - offset_right - offset_left));
2316     } else {
2317       field->set_parseable_name(field->name);
2318     }
2319 
2320     should_keep &= IsValidParseableName(field->parseable_name());
2321     if (!should_keep)
2322       break;
2323   }
2324 
2325   // Reset if some stripped string was invalid.
2326   if (!should_keep) {
2327     for (auto& field : *this)
2328       field->set_parseable_name(field->name);
2329   }
2330 
2331   return should_keep;
2332 }
2333 
IsValidParseableName(base::string16 candidateParseableName)2334 bool FormStructure::IsValidParseableName(
2335     base::string16 candidateParseableName) {
2336   static const base::string16 kParseableNameValidationPattern =
2337       base::UTF8ToUTF16(kParseableNameValidationRe);
2338   if (MatchesPattern(candidateParseableName, kParseableNameValidationPattern))
2339     return true;
2340 
2341   return false;
2342 }
2343 
2344 // static
FindLongestCommonAffixLength(const std::vector<base::StringPiece16> & strings,bool findCommonSuffix)2345 size_t FormStructure::FindLongestCommonAffixLength(
2346     const std::vector<base::StringPiece16>& strings,
2347     bool findCommonSuffix) {
2348   if (strings.empty())
2349     return 0;
2350 
2351   // Go through each character of the first string until there is a mismatch at
2352   // the same position in any other string. Adapted from http://goo.gl/YGukMM.
2353   for (size_t affix_len = 0; affix_len < strings[0].size(); affix_len++) {
2354     size_t base_string_index =
2355         findCommonSuffix ? strings[0].size() - affix_len - 1 : affix_len;
2356     for (size_t i = 1; i < strings.size(); i++) {
2357       size_t compared_string_index =
2358           findCommonSuffix ? strings[i].size() - affix_len - 1 : affix_len;
2359       if (affix_len >= strings[i].size() ||
2360           strings[i][compared_string_index] != strings[0][base_string_index]) {
2361         // Mismatch found.
2362         return affix_len;
2363       }
2364     }
2365   }
2366   return strings[0].size();
2367 }
2368 
2369 // static
FindLongestCommonPrefix(const std::vector<base::string16> & strings)2370 base::string16 FormStructure::FindLongestCommonPrefix(
2371     const std::vector<base::string16>& strings) {
2372   if (strings.empty())
2373     return base::string16();
2374 
2375   std::vector<base::string16> filtered_strings;
2376 
2377   // Any strings less than kMinCommonNamePrefixLength are neither modified
2378   // nor considered when processing for a common prefix.
2379   std::copy_if(
2380       strings.begin(), strings.end(), std::back_inserter(filtered_strings),
2381       [](base::string16 s) { return s.size() >= kMinCommonNamePrefixLength; });
2382 
2383   if (filtered_strings.empty())
2384     return base::string16();
2385 
2386   // Go through each character of the first string until there is a mismatch at
2387   // the same position in any other string. Adapted from http://goo.gl/YGukMM.
2388   for (size_t prefix_len = 0; prefix_len < filtered_strings[0].size();
2389        prefix_len++) {
2390     for (size_t i = 1; i < filtered_strings.size(); i++) {
2391       if (prefix_len >= filtered_strings[i].size() ||
2392           filtered_strings[i].at(prefix_len) !=
2393               filtered_strings[0].at(prefix_len)) {
2394         // Mismatch found.
2395         return filtered_strings[i].substr(0, prefix_len);
2396       }
2397     }
2398   }
2399   return filtered_strings[0];
2400 }
2401 
GetFormTypes() const2402 std::set<FormType> FormStructure::GetFormTypes() const {
2403   std::set<FormType> form_types;
2404   for (const auto& field : fields_) {
2405     form_types.insert(
2406         FormTypes::FieldTypeGroupToFormType(field->Type().group()));
2407   }
2408   return form_types;
2409 }
2410 
GetIdentifierForRefill() const2411 base::string16 FormStructure::GetIdentifierForRefill() const {
2412   if (!form_name().empty())
2413     return form_name();
2414 
2415   if (field_count() && !field(0)->unique_name().empty())
2416     return field(0)->unique_name();
2417 
2418   return base::string16();
2419 }
2420 
set_randomized_encoder(std::unique_ptr<RandomizedEncoder> encoder)2421 void FormStructure::set_randomized_encoder(
2422     std::unique_ptr<RandomizedEncoder> encoder) {
2423   randomized_encoder_ = std::move(encoder);
2424 }
2425 
RationalizeTypeRelationships()2426 void FormStructure::RationalizeTypeRelationships() {
2427   // Create a local set of all the types for faster lookup.
2428   std::unordered_set<ServerFieldType> types;
2429   for (const auto& field : fields_) {
2430     types.insert(field->Type().GetStorableType());
2431   }
2432 
2433   const auto& type_relationship_rules = GetTypeRelationshipMap();
2434 
2435   for (const auto& field : fields_) {
2436     ServerFieldType field_type = field->Type().GetStorableType();
2437     const auto& ruleset_iterator = type_relationship_rules.find(field_type);
2438     if (ruleset_iterator != type_relationship_rules.end()) {
2439       // We have relationship rules for this type. Verify that at least one of
2440       // the required related type is present.
2441       bool found = false;
2442       for (ServerFieldType required_type : ruleset_iterator->second) {
2443         if (types.find(required_type) != types.end()) {
2444           // Found a required type, we can break as we only need one required
2445           // type to respect the rule.
2446           found = true;
2447           break;
2448         }
2449       }
2450 
2451       if (!found) {
2452         // No required type was found, the current field failed the relationship
2453         // requirements for its type. Disabling Autofill for this field.
2454         field->SetTypeTo(AutofillType(UNKNOWN_TYPE));
2455       }
2456     }
2457   }
2458 }
2459 
operator <<(std::ostream & buffer,const FormStructure & form)2460 std::ostream& operator<<(std::ostream& buffer, const FormStructure& form) {
2461   buffer << "\nForm signature: "
2462          << base::StrCat({base::NumberToString(form.form_signature().value()),
2463                           " - ",
2464                           base::NumberToString(
2465                               HashFormSignature(form.form_signature()))});
2466   buffer << "\n Form name: " << form.form_name();
2467   buffer << "\n Unique renderer Id: " << form.unique_renderer_id().value();
2468   buffer << "\n Target URL:" << form.target_url();
2469   for (size_t i = 0; i < form.field_count(); ++i) {
2470     buffer << "\n Field " << i << ": ";
2471     const AutofillField* field = form.field(i);
2472     buffer << "\n  Signature: "
2473            << base::StrCat(
2474                   {base::NumberToString(field->GetFieldSignature().value()),
2475                    " - ",
2476                    base::NumberToString(
2477                        HashFieldSignature(field->GetFieldSignature())),
2478                    ", unique renderer id: ",
2479                    base::NumberToString(field->unique_renderer_id.value())});
2480     buffer << "\n  Name: " << field->parseable_name();
2481 
2482     auto type = field->Type().ToString();
2483     auto heuristic_type = AutofillType(field->heuristic_type()).ToString();
2484     auto server_type = AutofillType(field->server_type()).ToString();
2485     auto html_type_description =
2486         field->html_type() != HTML_TYPE_UNSPECIFIED
2487             ? base::StrCat(
2488                   {", html: ", FieldTypeToStringPiece(field->html_type())})
2489             : "";
2490 
2491     buffer << "\n  Type: "
2492            << base::StrCat({type, " (heuristic: ", heuristic_type, ", server: ",
2493                             server_type, html_type_description, ")"});
2494     buffer << "\n  Section: " << field->section;
2495 
2496     constexpr size_t kMaxLabelSize = 100;
2497     const base::string16 truncated_label =
2498         field->label.substr(0, std::min(field->label.length(), kMaxLabelSize));
2499     buffer << "\n  Label: " << truncated_label;
2500 
2501     buffer << "\n  Is empty: " << (field->IsEmpty() ? "Yes" : "No");
2502   }
2503   return buffer;
2504 }
2505 
operator <<(LogBuffer & buffer,const FormStructure & form)2506 LogBuffer& operator<<(LogBuffer& buffer, const FormStructure& form) {
2507   buffer << Tag{"div"} << Attrib{"class", "form"};
2508   buffer << Tag{"table"};
2509   buffer << Tr{} << "Form signature:"
2510          << base::StrCat({base::NumberToString(form.form_signature().value()),
2511                           " - ",
2512                           base::NumberToString(
2513                               HashFormSignature(form.form_signature()))});
2514   buffer << Tr{} << "Form name:" << form.form_name();
2515   buffer << Tr{} << "Unique renderer id:" << form.unique_renderer_id().value();
2516   buffer << Tr{} << "Target URL:" << form.target_url();
2517   for (size_t i = 0; i < form.field_count(); ++i) {
2518     buffer << Tag{"tr"};
2519     buffer << Tag{"td"} << "Field " << i << ": " << CTag{};
2520     const AutofillField* field = form.field(i);
2521     buffer << Tag{"td"};
2522     buffer << Tag{"table"};
2523     buffer << Tr{} << "Signature:"
2524            << base::StrCat(
2525                   {base::NumberToString(field->GetFieldSignature().value()),
2526                    " - ",
2527                    base::NumberToString(
2528                        HashFieldSignature(field->GetFieldSignature())),
2529                    ", unique renderer id: ",
2530                    base::NumberToString(field->unique_renderer_id.value())});
2531     buffer << Tr{} << "Name:" << field->parseable_name();
2532 
2533     auto type = field->Type().ToString();
2534     auto heuristic_type = AutofillType(field->heuristic_type()).ToString();
2535     auto server_type = AutofillType(field->server_type()).ToString();
2536     auto html_type_description =
2537         field->html_type() != HTML_TYPE_UNSPECIFIED
2538             ? base::StrCat(
2539                   {", html: ", FieldTypeToStringPiece(field->html_type())})
2540             : "";
2541 
2542     buffer << Tr{} << "Type:"
2543            << base::StrCat({type, " (heuristic: ", heuristic_type, ", server: ",
2544                             server_type, html_type_description, ")"});
2545     buffer << Tr{} << "Section:" << field->section;
2546 
2547     constexpr size_t kMaxLabelSize = 100;
2548     const base::string16 truncated_label =
2549         field->label.substr(0, std::min(field->label.length(), kMaxLabelSize));
2550     buffer << Tr{} << "Label:" << truncated_label;
2551 
2552     buffer << Tr{} << "Is empty:" << (field->IsEmpty() ? "Yes" : "No");
2553     buffer << CTag{"table"};
2554     buffer << CTag{"td"};
2555     buffer << CTag{"tr"};
2556   }
2557   buffer << CTag{"table"};
2558   buffer << CTag{"div"};
2559   return buffer;
2560 }
2561 
2562 }  // namespace autofill
2563