1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/core/browser/form_parsing/form_field.h"
6
7 #include <algorithm>
8 #include <cstddef>
9 #include <iterator>
10 #include <memory>
11 #include <string>
12 #include <utility>
13
14 #include "base/feature_list.h"
15 #include "base/strings/string_piece.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "components/autofill/core/browser/autofill_field.h"
20 #include "components/autofill/core/browser/autofill_regexes.h"
21 #include "components/autofill/core/browser/autofill_type.h"
22 #include "components/autofill/core/browser/form_parsing/address_field.h"
23 #include "components/autofill/core/browser/form_parsing/autofill_scanner.h"
24 #include "components/autofill/core/browser/form_parsing/credit_card_field.h"
25 #include "components/autofill/core/browser/form_parsing/email_field.h"
26 #include "components/autofill/core/browser/form_parsing/name_field.h"
27 #include "components/autofill/core/browser/form_parsing/phone_field.h"
28 #include "components/autofill/core/browser/form_parsing/price_field.h"
29 #include "components/autofill/core/browser/form_parsing/search_field.h"
30 #include "components/autofill/core/browser/form_parsing/travel_field.h"
31 #include "components/autofill/core/browser/form_structure.h"
32 #include "components/autofill/core/browser/logging/log_manager.h"
33 #include "components/autofill/core/common/autofill_constants.h"
34 #include "components/autofill/core/common/autofill_features.h"
35 #include "components/autofill/core/common/autofill_internals/log_message.h"
36 #include "components/autofill/core/common/autofill_internals/logging_scope.h"
37 #include "components/autofill/core/common/autofill_util.h"
38
39 namespace autofill {
40
41 // There's an implicit precedence determined by the values assigned here. Email
42 // is currently the most important followed by Phone, Travel, Address,
43 // Credit Card, Name, and Search.
44 const float FormField::kBaseEmailParserScore = 1.4f;
45 const float FormField::kBasePhoneParserScore = 1.3f;
46 const float FormField::kBaseTravelParserScore = 1.2f;
47 const float FormField::kBaseAddressParserScore = 1.1f;
48 const float FormField::kBaseCreditCardParserScore = 1.0f;
49 const float FormField::kBasePriceParserScore = 0.95f;
50 const float FormField::kBaseNameParserScore = 0.9f;
51 const float FormField::kBaseSearchParserScore = 0.8f;
52
53 // static
ParseFormFields(const std::vector<std::unique_ptr<AutofillField>> & fields,const std::string & page_language,bool is_form_tag,LogManager * log_manager)54 FieldCandidatesMap FormField::ParseFormFields(
55 const std::vector<std::unique_ptr<AutofillField>>& fields,
56 const std::string& page_language,
57 bool is_form_tag,
58 LogManager* log_manager) {
59 // Set up a working copy of the fields to be processed.
60 std::vector<AutofillField*> processed_fields;
61 for (const auto& field : fields) {
62 // Ignore checkable fields as they interfere with parsers assuming context.
63 // Eg., while parsing address, "Is PO box" checkbox after ADDRESS_LINE1
64 // interferes with correctly understanding ADDRESS_LINE2.
65 // Ignore fields marked as presentational, unless for 'select' fields (for
66 // synthetic fields.)
67 if (IsCheckable(field->check_status) ||
68 (field->role == FormFieldData::RoleAttribute::kPresentation &&
69 field->form_control_type != "select-one")) {
70 continue;
71 }
72 processed_fields.push_back(field.get());
73 }
74
75 FieldCandidatesMap field_candidates;
76
77 // Email pass.
78 ParseFormFieldsPass(EmailField::Parse, processed_fields, &field_candidates,
79 page_language, log_manager);
80 const size_t email_count = field_candidates.size();
81
82 // Phone pass.
83 ParseFormFieldsPass(PhoneField::Parse, processed_fields, &field_candidates,
84 page_language, log_manager);
85
86 // Travel pass.
87 ParseFormFieldsPass(TravelField::Parse, processed_fields, &field_candidates,
88 page_language, log_manager);
89
90 // Address pass.
91 ParseFormFieldsPass(autofill::AddressField::Parse, processed_fields,
92 &field_candidates, page_language, log_manager);
93
94 // Credit card pass.
95 ParseFormFieldsPass(CreditCardField::Parse, processed_fields,
96 &field_candidates, page_language, log_manager);
97
98 // Price pass.
99 ParseFormFieldsPass(PriceField::Parse, processed_fields, &field_candidates,
100 page_language, log_manager);
101
102 // Name pass.
103 ParseFormFieldsPass(NameField::Parse, processed_fields, &field_candidates,
104 page_language, log_manager);
105
106 // Search pass.
107 ParseFormFieldsPass(SearchField::Parse, processed_fields, &field_candidates,
108 page_language, log_manager);
109
110 size_t fillable_fields = 0;
111 if (base::FeatureList::IsEnabled(features::kAutofillFixFillableFieldTypes)) {
112 for (const auto& candidate : field_candidates) {
113 if (IsFillableFieldType(candidate.second.BestHeuristicType()))
114 ++fillable_fields;
115 }
116 } else {
117 fillable_fields = field_candidates.size();
118 }
119
120 // Do not autofill a form if there aren't enough fields. Otherwise, it is
121 // very easy to have false positives. See http://crbug.com/447332
122 // For <form> tags, make an exception for email fields, which are commonly
123 // the only recognized field on account registration sites.
124 const bool accept_parsing =
125 fillable_fields >= kMinRequiredFieldsForHeuristics ||
126 (is_form_tag && email_count > 0);
127
128 if (!accept_parsing) {
129 if (log_manager) {
130 LogBuffer table_rows;
131 for (const auto& field : fields) {
132 table_rows << Tr{} << "Field:" << *field;
133 }
134 for (const auto& candidate : field_candidates) {
135 LogBuffer name;
136 name << "Type candidate for: " << candidate.first;
137 LogBuffer description;
138 ServerFieldType field_type = candidate.second.BestHeuristicType();
139 description << "BestHeuristicType: "
140 << AutofillType::ServerFieldTypeToString(field_type)
141 << ", is fillable: " << IsFillableFieldType(field_type);
142 table_rows << Tr{} << std::move(name) << std::move(description);
143 }
144 log_manager->Log()
145 << LoggingScope::kParsing
146 << LogMessage::kLocalHeuristicDidNotFindEnoughFillableFields
147 << Tag{"table"} << Attrib{"class", "form"} << std::move(table_rows)
148 << CTag{"table"};
149 }
150 field_candidates.clear();
151 }
152
153 return field_candidates;
154 }
155
156 // static
ParseField(AutofillScanner * scanner,const base::string16 & pattern,AutofillField ** match,const RegExLogging & logging)157 bool FormField::ParseField(AutofillScanner* scanner,
158 const base::string16& pattern,
159 AutofillField** match,
160 const RegExLogging& logging) {
161 return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, match, logging);
162 }
163
ParseField(AutofillScanner * scanner,const std::vector<MatchingPattern> & patterns,AutofillField ** match,const RegExLogging & logging)164 bool FormField::ParseField(AutofillScanner* scanner,
165 const std::vector<MatchingPattern>& patterns,
166 AutofillField** match,
167 const RegExLogging& logging) {
168 return ParseFieldSpecifics(scanner, patterns, match, logging);
169 }
170
ParseField(AutofillScanner * scanner,const base::string16 & pattern,const std::vector<MatchingPattern> & patterns,AutofillField ** match,const RegExLogging & logging)171 bool FormField::ParseField(AutofillScanner* scanner,
172 const base::string16& pattern,
173 const std::vector<MatchingPattern>& patterns,
174 AutofillField** match,
175 const RegExLogging& logging) {
176 if (base::FeatureList::IsEnabled(
177 features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
178 base::FeatureList::IsEnabled(
179 features::
180 kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
181 return ParseField(scanner, patterns, match, logging);
182 } else {
183 return ParseField(scanner, pattern, match, logging);
184 }
185 }
186
ParseFieldSpecifics(AutofillScanner * scanner,const base::string16 & pattern,int match_field_attributes,int match_field_input_types,AutofillField ** match,const RegExLogging & logging)187 bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
188 const base::string16& pattern,
189 int match_field_attributes,
190 int match_field_input_types,
191 AutofillField** match,
192 const RegExLogging& logging) {
193 if (scanner->IsEnd())
194 return false;
195
196 const AutofillField* field = scanner->Cursor();
197
198 if (!MatchesFormControlType(field->form_control_type,
199 match_field_input_types))
200 return false;
201
202 return MatchAndAdvance(scanner, pattern, match_field_attributes,
203 match_field_input_types, match, logging);
204 }
205
ParseFieldSpecifics(AutofillScanner * scanner,const std::vector<MatchingPattern> & patterns,AutofillField ** match,const RegExLogging & logging)206 bool FormField::ParseFieldSpecifics(
207 AutofillScanner* scanner,
208 const std::vector<MatchingPattern>& patterns,
209 AutofillField** match,
210 const RegExLogging& logging) {
211 if (scanner->IsEnd())
212 return false;
213
214 const AutofillField* field = scanner->Cursor();
215
216 for (const auto& pattern : patterns) {
217 if (!MatchesFormControlType(field->form_control_type,
218 pattern.match_field_input_types)) {
219 continue;
220 }
221
222 // TODO(crbug.com/1132831): Remove feature check once launched.
223 if (base::FeatureList::IsEnabled(
224 features::
225 kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
226 if (pattern.negative_pattern.has_value() &&
227 FormField::Match(field,
228 base::UTF8ToUTF16(pattern.negative_pattern.value()),
229 pattern.match_field_attributes,
230 pattern.match_field_input_types, logging)) {
231 continue;
232 }
233 }
234
235 if (MatchAndAdvance(scanner, base::UTF8ToUTF16(pattern.positive_pattern),
236 pattern.match_field_attributes,
237 pattern.match_field_input_types, match, logging)) {
238 return true;
239 }
240 }
241 return false;
242 }
243
244 // static
ParseFieldSpecifics(AutofillScanner * scanner,const base::string16 & pattern,int match_type,AutofillField ** match,const RegExLogging & logging)245 bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
246 const base::string16& pattern,
247 int match_type,
248 AutofillField** match,
249 const RegExLogging& logging) {
250 int match_field_attributes = match_type & 0b11;
251 int match_field_types = match_type & ~0b11;
252
253 return ParseFieldSpecifics(scanner, pattern, match_field_attributes,
254 match_field_types, match, logging);
255 }
256
ParseFieldSpecifics(AutofillScanner * scanner,const base::string16 & pattern,int match_type,const std::vector<MatchingPattern> & patterns,AutofillField ** match,const RegExLogging & logging,MatchFieldBitmasks match_field_bitmasks)257 bool FormField::ParseFieldSpecifics(
258 AutofillScanner* scanner,
259 const base::string16& pattern,
260 int match_type,
261 const std::vector<MatchingPattern>& patterns,
262 AutofillField** match,
263 const RegExLogging& logging,
264 MatchFieldBitmasks match_field_bitmasks) {
265 if (base::FeatureList::IsEnabled(
266 features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
267 base::FeatureList::IsEnabled(
268 features::
269 kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
270 // TODO(crbug/1142936): This hack is to allow
271 // AddressField::ParseNameAndLabelSeparately().
272 if (match_field_bitmasks.restrict_attributes != ~0 ||
273 match_field_bitmasks.augment_types != 0) {
274 std::vector<MatchingPattern> patterns_with_restricted_match_type =
275 patterns;
276 for (MatchingPattern& mp : patterns_with_restricted_match_type) {
277 mp.match_field_attributes &= match_field_bitmasks.restrict_attributes;
278 mp.match_field_input_types |= match_field_bitmasks.augment_types;
279 }
280 return ParseFieldSpecifics(scanner, patterns_with_restricted_match_type,
281 match, logging);
282 }
283 return ParseFieldSpecifics(scanner, patterns, match, logging);
284 } else {
285 return ParseFieldSpecifics(scanner, pattern, match_type, match, logging);
286 }
287 }
288
289 // static
ParseEmptyLabel(AutofillScanner * scanner,AutofillField ** match)290 bool FormField::ParseEmptyLabel(AutofillScanner* scanner,
291 AutofillField** match) {
292 return ParseFieldSpecifics(scanner, base::ASCIIToUTF16("^$"),
293 MATCH_LABEL | MATCH_ALL_INPUTS, match);
294 }
295
296 // static
AddClassification(const AutofillField * field,ServerFieldType type,float score,FieldCandidatesMap * field_candidates)297 void FormField::AddClassification(const AutofillField* field,
298 ServerFieldType type,
299 float score,
300 FieldCandidatesMap* field_candidates) {
301 // Several fields are optional.
302 if (field == nullptr)
303 return;
304
305 FieldCandidates& candidates = (*field_candidates)[field->unique_name()];
306 candidates.AddFieldCandidate(type, score);
307 }
308
MatchAndAdvance(AutofillScanner * scanner,const base::string16 & pattern,int match_field_attributes,int match_field_input_types,AutofillField ** match,const RegExLogging & logging)309 bool FormField::MatchAndAdvance(AutofillScanner* scanner,
310 const base::string16& pattern,
311 int match_field_attributes,
312 int match_field_input_types,
313 AutofillField** match,
314 const RegExLogging& logging) {
315 AutofillField* field = scanner->Cursor();
316 if (FormField::Match(field, pattern, match_field_attributes,
317 match_field_input_types, logging)) {
318 if (match)
319 *match = field;
320 scanner->Advance();
321 return true;
322 }
323
324 return false;
325 }
326
327 // static
MatchAndAdvance(AutofillScanner * scanner,const base::string16 & pattern,int match_type,AutofillField ** match,const RegExLogging & logging)328 bool FormField::MatchAndAdvance(AutofillScanner* scanner,
329 const base::string16& pattern,
330 int match_type,
331 AutofillField** match,
332 const RegExLogging& logging) {
333 int match_field_attributes = match_type & 0b11;
334 int match_field_types = match_type & ~0b11;
335
336 return MatchAndAdvance(scanner, pattern, match_field_attributes,
337 match_field_types, match, logging);
338 }
339
Match(const AutofillField * field,const base::string16 & pattern,int match_field_attributes,int match_field_input_types,const RegExLogging & logging)340 bool FormField::Match(const AutofillField* field,
341 const base::string16& pattern,
342 int match_field_attributes,
343 int match_field_input_types,
344 const RegExLogging& logging) {
345 bool found_match = false;
346 base::StringPiece match_type_string;
347 base::StringPiece16 value;
348 base::string16 match;
349
350 if ((match_field_attributes & MATCH_LABEL) &&
351 MatchesPattern(field->label, pattern, &match)) {
352 found_match = true;
353 match_type_string = "Match in label";
354 value = field->label;
355 } else if ((match_field_attributes & MATCH_NAME) &&
356 MatchesPattern(field->parseable_name(), pattern, &match)) {
357 found_match = true;
358 match_type_string = "Match in name";
359 value = field->parseable_name();
360 }
361
362 if (found_match && logging.log_manager) {
363 LogBuffer table_rows;
364 table_rows << Tr{} << "Match type:" << match_type_string;
365 table_rows << Tr{} << "RegEx:" << logging.regex_name;
366 table_rows << Tr{} << "Value: " << HighlightValue(value, match);
367 // The matched substring is reported once more as the highlighting is not
368 // particularly copy&paste friendly.
369 table_rows << Tr{} << "Matched substring: " << match;
370 logging.log_manager->Log()
371 << LoggingScope::kParsing << LogMessage::kLocalHeuristicRegExMatched
372 << Tag{"table"} << std::move(table_rows) << CTag{"table"};
373 }
374
375 return found_match;
376 }
377
378 // static
Match(const AutofillField * field,const base::string16 & pattern,int match_type,const RegExLogging & logging)379 bool FormField::Match(const AutofillField* field,
380 const base::string16& pattern,
381 int match_type,
382 const RegExLogging& logging) {
383 int match_field_attributes = match_type & 0b11;
384 int match_field_types = match_type & ~0b11;
385
386 return Match(field, pattern, match_field_attributes, match_field_types,
387 logging);
388 }
389
390 // static
ParseFormFieldsPass(ParseFunction parse,const std::vector<AutofillField * > & fields,FieldCandidatesMap * field_candidates,const std::string & page_language,LogManager * log_manager)391 void FormField::ParseFormFieldsPass(ParseFunction parse,
392 const std::vector<AutofillField*>& fields,
393 FieldCandidatesMap* field_candidates,
394 const std::string& page_language,
395 LogManager* log_manager) {
396 AutofillScanner scanner(fields);
397 while (!scanner.IsEnd()) {
398 std::unique_ptr<FormField> form_field =
399 parse(&scanner, page_language, log_manager);
400 if (form_field == nullptr) {
401 scanner.Advance();
402 } else {
403 // Add entries into |field_candidates| for each field type found in
404 // |fields|.
405 form_field->AddClassifications(field_candidates);
406 }
407 }
408 }
409
MatchesFormControlType(const std::string & type,int match_type)410 bool FormField::MatchesFormControlType(const std::string& type,
411 int match_type) {
412 if ((match_type & MATCH_TEXT) && type == "text")
413 return true;
414
415 if ((match_type & MATCH_EMAIL) && type == "email")
416 return true;
417
418 if ((match_type & MATCH_TELEPHONE) && type == "tel")
419 return true;
420
421 if ((match_type & MATCH_SELECT) && type == "select-one")
422 return true;
423
424 if ((match_type & MATCH_TEXT_AREA) && type == "textarea")
425 return true;
426
427 if ((match_type & MATCH_PASSWORD) && type == "password")
428 return true;
429
430 if ((match_type & MATCH_NUMBER) && type == "number")
431 return true;
432
433 if ((match_type & MATCH_SEARCH) && type == "search")
434 return true;
435
436 return false;
437 }
438
439 } // namespace autofill
440