1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/autofill/core/browser/form_parsing/form_field.h"
6 
7 #include <algorithm>
8 #include <cstddef>
9 #include <iterator>
10 #include <memory>
11 #include <string>
12 #include <utility>
13 
14 #include "base/feature_list.h"
15 #include "base/strings/string_piece.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "components/autofill/core/browser/autofill_field.h"
20 #include "components/autofill/core/browser/autofill_regexes.h"
21 #include "components/autofill/core/browser/autofill_type.h"
22 #include "components/autofill/core/browser/form_parsing/address_field.h"
23 #include "components/autofill/core/browser/form_parsing/autofill_scanner.h"
24 #include "components/autofill/core/browser/form_parsing/credit_card_field.h"
25 #include "components/autofill/core/browser/form_parsing/email_field.h"
26 #include "components/autofill/core/browser/form_parsing/name_field.h"
27 #include "components/autofill/core/browser/form_parsing/phone_field.h"
28 #include "components/autofill/core/browser/form_parsing/price_field.h"
29 #include "components/autofill/core/browser/form_parsing/search_field.h"
30 #include "components/autofill/core/browser/form_parsing/travel_field.h"
31 #include "components/autofill/core/browser/form_structure.h"
32 #include "components/autofill/core/browser/logging/log_manager.h"
33 #include "components/autofill/core/common/autofill_constants.h"
34 #include "components/autofill/core/common/autofill_features.h"
35 #include "components/autofill/core/common/autofill_internals/log_message.h"
36 #include "components/autofill/core/common/autofill_internals/logging_scope.h"
37 #include "components/autofill/core/common/autofill_util.h"
38 
39 namespace autofill {
40 
41 // There's an implicit precedence determined by the values assigned here. Email
42 // is currently the most important followed by Phone, Travel, Address,
43 // Credit Card, Name, and Search.
44 const float FormField::kBaseEmailParserScore = 1.4f;
45 const float FormField::kBasePhoneParserScore = 1.3f;
46 const float FormField::kBaseTravelParserScore = 1.2f;
47 const float FormField::kBaseAddressParserScore = 1.1f;
48 const float FormField::kBaseCreditCardParserScore = 1.0f;
49 const float FormField::kBasePriceParserScore = 0.95f;
50 const float FormField::kBaseNameParserScore = 0.9f;
51 const float FormField::kBaseSearchParserScore = 0.8f;
52 
53 // static
ParseFormFields(const std::vector<std::unique_ptr<AutofillField>> & fields,const std::string & page_language,bool is_form_tag,LogManager * log_manager)54 FieldCandidatesMap FormField::ParseFormFields(
55     const std::vector<std::unique_ptr<AutofillField>>& fields,
56     const std::string& page_language,
57     bool is_form_tag,
58     LogManager* log_manager) {
59   // Set up a working copy of the fields to be processed.
60   std::vector<AutofillField*> processed_fields;
61   for (const auto& field : fields) {
62     // Ignore checkable fields as they interfere with parsers assuming context.
63     // Eg., while parsing address, "Is PO box" checkbox after ADDRESS_LINE1
64     // interferes with correctly understanding ADDRESS_LINE2.
65     // Ignore fields marked as presentational, unless for 'select' fields (for
66     // synthetic fields.)
67     if (IsCheckable(field->check_status) ||
68         (field->role == FormFieldData::RoleAttribute::kPresentation &&
69          field->form_control_type != "select-one")) {
70       continue;
71     }
72     processed_fields.push_back(field.get());
73   }
74 
75   FieldCandidatesMap field_candidates;
76 
77   // Email pass.
78   ParseFormFieldsPass(EmailField::Parse, processed_fields, &field_candidates,
79                       page_language, log_manager);
80   const size_t email_count = field_candidates.size();
81 
82   // Phone pass.
83   ParseFormFieldsPass(PhoneField::Parse, processed_fields, &field_candidates,
84                       page_language, log_manager);
85 
86   // Travel pass.
87   ParseFormFieldsPass(TravelField::Parse, processed_fields, &field_candidates,
88                       page_language, log_manager);
89 
90   // Address pass.
91   ParseFormFieldsPass(autofill::AddressField::Parse, processed_fields,
92                       &field_candidates, page_language, log_manager);
93 
94   // Credit card pass.
95   ParseFormFieldsPass(CreditCardField::Parse, processed_fields,
96                       &field_candidates, page_language, log_manager);
97 
98   // Price pass.
99   ParseFormFieldsPass(PriceField::Parse, processed_fields, &field_candidates,
100                       page_language, log_manager);
101 
102   // Name pass.
103   ParseFormFieldsPass(NameField::Parse, processed_fields, &field_candidates,
104                       page_language, log_manager);
105 
106   // Search pass.
107   ParseFormFieldsPass(SearchField::Parse, processed_fields, &field_candidates,
108                       page_language, log_manager);
109 
110   size_t fillable_fields = 0;
111   if (base::FeatureList::IsEnabled(features::kAutofillFixFillableFieldTypes)) {
112     for (const auto& candidate : field_candidates) {
113       if (IsFillableFieldType(candidate.second.BestHeuristicType()))
114         ++fillable_fields;
115     }
116   } else {
117     fillable_fields = field_candidates.size();
118   }
119 
120   // Do not autofill a form if there aren't enough fields. Otherwise, it is
121   // very easy to have false positives. See http://crbug.com/447332
122   // For <form> tags, make an exception for email fields, which are commonly
123   // the only recognized field on account registration sites.
124   const bool accept_parsing =
125       fillable_fields >= kMinRequiredFieldsForHeuristics ||
126       (is_form_tag && email_count > 0);
127 
128   if (!accept_parsing) {
129     if (log_manager) {
130       LogBuffer table_rows;
131       for (const auto& field : fields) {
132         table_rows << Tr{} << "Field:" << *field;
133       }
134       for (const auto& candidate : field_candidates) {
135         LogBuffer name;
136         name << "Type candidate for: " << candidate.first;
137         LogBuffer description;
138         ServerFieldType field_type = candidate.second.BestHeuristicType();
139         description << "BestHeuristicType: "
140                     << AutofillType::ServerFieldTypeToString(field_type)
141                     << ", is fillable: " << IsFillableFieldType(field_type);
142         table_rows << Tr{} << std::move(name) << std::move(description);
143       }
144       log_manager->Log()
145           << LoggingScope::kParsing
146           << LogMessage::kLocalHeuristicDidNotFindEnoughFillableFields
147           << Tag{"table"} << Attrib{"class", "form"} << std::move(table_rows)
148           << CTag{"table"};
149     }
150     field_candidates.clear();
151   }
152 
153   return field_candidates;
154 }
155 
156 // static
ParseField(AutofillScanner * scanner,const base::string16 & pattern,AutofillField ** match,const RegExLogging & logging)157 bool FormField::ParseField(AutofillScanner* scanner,
158                            const base::string16& pattern,
159                            AutofillField** match,
160                            const RegExLogging& logging) {
161   return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, match, logging);
162 }
163 
ParseField(AutofillScanner * scanner,const std::vector<MatchingPattern> & patterns,AutofillField ** match,const RegExLogging & logging)164 bool FormField::ParseField(AutofillScanner* scanner,
165                            const std::vector<MatchingPattern>& patterns,
166                            AutofillField** match,
167                            const RegExLogging& logging) {
168   return ParseFieldSpecifics(scanner, patterns, match, logging);
169 }
170 
ParseField(AutofillScanner * scanner,const base::string16 & pattern,const std::vector<MatchingPattern> & patterns,AutofillField ** match,const RegExLogging & logging)171 bool FormField::ParseField(AutofillScanner* scanner,
172                            const base::string16& pattern,
173                            const std::vector<MatchingPattern>& patterns,
174                            AutofillField** match,
175                            const RegExLogging& logging) {
176   if (base::FeatureList::IsEnabled(
177           features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
178       base::FeatureList::IsEnabled(
179           features::
180               kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
181     return ParseField(scanner, patterns, match, logging);
182   } else {
183     return ParseField(scanner, pattern, match, logging);
184   }
185 }
186 
ParseFieldSpecifics(AutofillScanner * scanner,const base::string16 & pattern,int match_field_attributes,int match_field_input_types,AutofillField ** match,const RegExLogging & logging)187 bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
188                                     const base::string16& pattern,
189                                     int match_field_attributes,
190                                     int match_field_input_types,
191                                     AutofillField** match,
192                                     const RegExLogging& logging) {
193   if (scanner->IsEnd())
194     return false;
195 
196   const AutofillField* field = scanner->Cursor();
197 
198   if (!MatchesFormControlType(field->form_control_type,
199                               match_field_input_types))
200     return false;
201 
202   return MatchAndAdvance(scanner, pattern, match_field_attributes,
203                          match_field_input_types, match, logging);
204 }
205 
ParseFieldSpecifics(AutofillScanner * scanner,const std::vector<MatchingPattern> & patterns,AutofillField ** match,const RegExLogging & logging)206 bool FormField::ParseFieldSpecifics(
207     AutofillScanner* scanner,
208     const std::vector<MatchingPattern>& patterns,
209     AutofillField** match,
210     const RegExLogging& logging) {
211   if (scanner->IsEnd())
212     return false;
213 
214   const AutofillField* field = scanner->Cursor();
215 
216   for (const auto& pattern : patterns) {
217     if (!MatchesFormControlType(field->form_control_type,
218                                 pattern.match_field_input_types)) {
219       continue;
220     }
221 
222     // TODO(crbug.com/1132831): Remove feature check once launched.
223     if (base::FeatureList::IsEnabled(
224             features::
225                 kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
226       if (pattern.negative_pattern.has_value() &&
227           FormField::Match(field,
228                            base::UTF8ToUTF16(pattern.negative_pattern.value()),
229                            pattern.match_field_attributes,
230                            pattern.match_field_input_types, logging)) {
231         continue;
232       }
233     }
234 
235     if (MatchAndAdvance(scanner, base::UTF8ToUTF16(pattern.positive_pattern),
236                         pattern.match_field_attributes,
237                         pattern.match_field_input_types, match, logging)) {
238       return true;
239     }
240   }
241   return false;
242 }
243 
244 // static
ParseFieldSpecifics(AutofillScanner * scanner,const base::string16 & pattern,int match_type,AutofillField ** match,const RegExLogging & logging)245 bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
246                                     const base::string16& pattern,
247                                     int match_type,
248                                     AutofillField** match,
249                                     const RegExLogging& logging) {
250   int match_field_attributes = match_type & 0b11;
251   int match_field_types = match_type & ~0b11;
252 
253   return ParseFieldSpecifics(scanner, pattern, match_field_attributes,
254                              match_field_types, match, logging);
255 }
256 
ParseFieldSpecifics(AutofillScanner * scanner,const base::string16 & pattern,int match_type,const std::vector<MatchingPattern> & patterns,AutofillField ** match,const RegExLogging & logging,MatchFieldBitmasks match_field_bitmasks)257 bool FormField::ParseFieldSpecifics(
258     AutofillScanner* scanner,
259     const base::string16& pattern,
260     int match_type,
261     const std::vector<MatchingPattern>& patterns,
262     AutofillField** match,
263     const RegExLogging& logging,
264     MatchFieldBitmasks match_field_bitmasks) {
265   if (base::FeatureList::IsEnabled(
266           features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
267       base::FeatureList::IsEnabled(
268           features::
269               kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
270     // TODO(crbug/1142936): This hack is to allow
271     // AddressField::ParseNameAndLabelSeparately().
272     if (match_field_bitmasks.restrict_attributes != ~0 ||
273         match_field_bitmasks.augment_types != 0) {
274       std::vector<MatchingPattern> patterns_with_restricted_match_type =
275           patterns;
276       for (MatchingPattern& mp : patterns_with_restricted_match_type) {
277         mp.match_field_attributes &= match_field_bitmasks.restrict_attributes;
278         mp.match_field_input_types |= match_field_bitmasks.augment_types;
279       }
280       return ParseFieldSpecifics(scanner, patterns_with_restricted_match_type,
281                                  match, logging);
282     }
283     return ParseFieldSpecifics(scanner, patterns, match, logging);
284   } else {
285     return ParseFieldSpecifics(scanner, pattern, match_type, match, logging);
286   }
287 }
288 
289 // static
ParseEmptyLabel(AutofillScanner * scanner,AutofillField ** match)290 bool FormField::ParseEmptyLabel(AutofillScanner* scanner,
291                                 AutofillField** match) {
292   return ParseFieldSpecifics(scanner, base::ASCIIToUTF16("^$"),
293                              MATCH_LABEL | MATCH_ALL_INPUTS, match);
294 }
295 
296 // static
AddClassification(const AutofillField * field,ServerFieldType type,float score,FieldCandidatesMap * field_candidates)297 void FormField::AddClassification(const AutofillField* field,
298                                   ServerFieldType type,
299                                   float score,
300                                   FieldCandidatesMap* field_candidates) {
301   // Several fields are optional.
302   if (field == nullptr)
303     return;
304 
305   FieldCandidates& candidates = (*field_candidates)[field->unique_name()];
306   candidates.AddFieldCandidate(type, score);
307 }
308 
MatchAndAdvance(AutofillScanner * scanner,const base::string16 & pattern,int match_field_attributes,int match_field_input_types,AutofillField ** match,const RegExLogging & logging)309 bool FormField::MatchAndAdvance(AutofillScanner* scanner,
310                                 const base::string16& pattern,
311                                 int match_field_attributes,
312                                 int match_field_input_types,
313                                 AutofillField** match,
314                                 const RegExLogging& logging) {
315   AutofillField* field = scanner->Cursor();
316   if (FormField::Match(field, pattern, match_field_attributes,
317                        match_field_input_types, logging)) {
318     if (match)
319       *match = field;
320     scanner->Advance();
321     return true;
322   }
323 
324   return false;
325 }
326 
327 // static
MatchAndAdvance(AutofillScanner * scanner,const base::string16 & pattern,int match_type,AutofillField ** match,const RegExLogging & logging)328 bool FormField::MatchAndAdvance(AutofillScanner* scanner,
329                                 const base::string16& pattern,
330                                 int match_type,
331                                 AutofillField** match,
332                                 const RegExLogging& logging) {
333   int match_field_attributes = match_type & 0b11;
334   int match_field_types = match_type & ~0b11;
335 
336   return MatchAndAdvance(scanner, pattern, match_field_attributes,
337                          match_field_types, match, logging);
338 }
339 
Match(const AutofillField * field,const base::string16 & pattern,int match_field_attributes,int match_field_input_types,const RegExLogging & logging)340 bool FormField::Match(const AutofillField* field,
341                       const base::string16& pattern,
342                       int match_field_attributes,
343                       int match_field_input_types,
344                       const RegExLogging& logging) {
345   bool found_match = false;
346   base::StringPiece match_type_string;
347   base::StringPiece16 value;
348   base::string16 match;
349 
350   if ((match_field_attributes & MATCH_LABEL) &&
351       MatchesPattern(field->label, pattern, &match)) {
352     found_match = true;
353     match_type_string = "Match in label";
354     value = field->label;
355   } else if ((match_field_attributes & MATCH_NAME) &&
356              MatchesPattern(field->parseable_name(), pattern, &match)) {
357     found_match = true;
358     match_type_string = "Match in name";
359     value = field->parseable_name();
360   }
361 
362   if (found_match && logging.log_manager) {
363     LogBuffer table_rows;
364     table_rows << Tr{} << "Match type:" << match_type_string;
365     table_rows << Tr{} << "RegEx:" << logging.regex_name;
366     table_rows << Tr{} << "Value: " << HighlightValue(value, match);
367     // The matched substring is reported once more as the highlighting is not
368     // particularly copy&paste friendly.
369     table_rows << Tr{} << "Matched substring: " << match;
370     logging.log_manager->Log()
371         << LoggingScope::kParsing << LogMessage::kLocalHeuristicRegExMatched
372         << Tag{"table"} << std::move(table_rows) << CTag{"table"};
373   }
374 
375   return found_match;
376 }
377 
378 // static
Match(const AutofillField * field,const base::string16 & pattern,int match_type,const RegExLogging & logging)379 bool FormField::Match(const AutofillField* field,
380                       const base::string16& pattern,
381                       int match_type,
382                       const RegExLogging& logging) {
383   int match_field_attributes = match_type & 0b11;
384   int match_field_types = match_type & ~0b11;
385 
386   return Match(field, pattern, match_field_attributes, match_field_types,
387                logging);
388 }
389 
390 // static
ParseFormFieldsPass(ParseFunction parse,const std::vector<AutofillField * > & fields,FieldCandidatesMap * field_candidates,const std::string & page_language,LogManager * log_manager)391 void FormField::ParseFormFieldsPass(ParseFunction parse,
392                                     const std::vector<AutofillField*>& fields,
393                                     FieldCandidatesMap* field_candidates,
394                                     const std::string& page_language,
395                                     LogManager* log_manager) {
396   AutofillScanner scanner(fields);
397   while (!scanner.IsEnd()) {
398     std::unique_ptr<FormField> form_field =
399         parse(&scanner, page_language, log_manager);
400     if (form_field == nullptr) {
401       scanner.Advance();
402     } else {
403       // Add entries into |field_candidates| for each field type found in
404       // |fields|.
405       form_field->AddClassifications(field_candidates);
406     }
407   }
408 }
409 
MatchesFormControlType(const std::string & type,int match_type)410 bool FormField::MatchesFormControlType(const std::string& type,
411                                        int match_type) {
412   if ((match_type & MATCH_TEXT) && type == "text")
413     return true;
414 
415   if ((match_type & MATCH_EMAIL) && type == "email")
416     return true;
417 
418   if ((match_type & MATCH_TELEPHONE) && type == "tel")
419     return true;
420 
421   if ((match_type & MATCH_SELECT) && type == "select-one")
422     return true;
423 
424   if ((match_type & MATCH_TEXT_AREA) && type == "textarea")
425     return true;
426 
427   if ((match_type & MATCH_PASSWORD) && type == "password")
428     return true;
429 
430   if ((match_type & MATCH_NUMBER) && type == "number")
431     return true;
432 
433   if ((match_type & MATCH_SEARCH) && type == "search")
434     return true;
435 
436   return false;
437 }
438 
439 }  // namespace autofill
440