1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/password_manager/core/browser/form_parsing/form_parser.h"
6
7 #include <stdint.h>
8
9 #include <algorithm>
10 #include <iterator>
11 #include <set>
12 #include <string>
13 #include <utility>
14 #include <vector>
15
16 #include "base/metrics/histogram_macros.h"
17 #include "base/no_destructor.h"
18 #include "base/stl_util.h"
19 #include "base/strings/string16.h"
20 #include "base/strings/string_piece.h"
21 #include "base/strings/string_split.h"
22 #include "base/strings/utf_string_conversions.h"
23 #include "build/build_config.h"
24 #include "components/autofill/core/browser/field_types.h"
25 #include "components/autofill/core/common/autofill_regex_constants.h"
26 #include "components/autofill/core/common/autofill_regexes.h"
27 #include "components/autofill/core/common/form_data.h"
28 #include "components/autofill/core/common/password_form.h"
29 #include "components/password_manager/core/common/password_manager_features.h"
30
31 using autofill::FieldPropertiesFlags;
32 using autofill::FormData;
33 using autofill::FormFieldData;
34 using autofill::PasswordForm;
35 using base::string16;
36
37 namespace password_manager {
38
39 namespace {
40
41 constexpr char kAutocompleteUsername[] = "username";
42 constexpr char kAutocompleteCurrentPassword[] = "current-password";
43 constexpr char kAutocompleteNewPassword[] = "new-password";
44 constexpr char kAutocompleteCreditCardPrefix[] = "cc-";
45 constexpr char kAutocompleteOneTimePassword[] = "one-time-code";
46
47 // The susbset of autocomplete flags related to passwords.
48 enum class AutocompleteFlag {
49 kNone,
50 kUsername,
51 kCurrentPassword,
52 kNewPassword,
53 // Represents the whole family of cc-* flags + OTP flag.
54 kNonPassword
55 };
56
57 // The autocomplete attribute has one of the following structures:
58 // [section-*] [shipping|billing] [type_hint] field_type
59 // on | off | false
60 // (see
61 // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#autofilling-form-controls%3A-the-autocomplete-attribute).
62 // For password forms, only the field_type is relevant. So parsing the attribute
63 // amounts to just taking the last token. If that token is one of "username",
64 // "current-password" or "new-password", this returns an appropriate enum value.
65 // If the token starts with a "cc-" prefix or is "one-time-code" token, this
66 // returns kNonPassword.
67 // Otherwise, returns kNone.
ExtractAutocompleteFlag(const std::string & attribute)68 AutocompleteFlag ExtractAutocompleteFlag(const std::string& attribute) {
69 std::vector<base::StringPiece> tokens =
70 base::SplitStringPiece(attribute, base::kWhitespaceASCII,
71 base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
72 if (tokens.empty())
73 return AutocompleteFlag::kNone;
74
75 const base::StringPiece& field_type = tokens.back();
76 if (base::LowerCaseEqualsASCII(field_type, kAutocompleteUsername))
77 return AutocompleteFlag::kUsername;
78 if (base::LowerCaseEqualsASCII(field_type, kAutocompleteCurrentPassword))
79 return AutocompleteFlag::kCurrentPassword;
80 if (base::LowerCaseEqualsASCII(field_type, kAutocompleteNewPassword))
81 return AutocompleteFlag::kNewPassword;
82
83 if (base::LowerCaseEqualsASCII(field_type, kAutocompleteOneTimePassword) ||
84 base::StartsWith(field_type, kAutocompleteCreditCardPrefix,
85 base::CompareCase::SENSITIVE)) {
86 return AutocompleteFlag::kNonPassword;
87 }
88 return AutocompleteFlag::kNone;
89 }
90
91 // How likely is user interaction for a given field?
92 // Note: higher numeric values should match higher likeliness to allow using the
93 // standard operator< for comparison of likeliness.
94 enum class Interactability {
95 // When the field is invisible.
96 kUnlikely = 0,
97 // When the field is visible/focusable.
98 kPossible = 1,
99 // When the user actually typed into the field before.
100 kCertain = 2,
101 };
102
103 // A wrapper around FormFieldData, carrying some additional data used during
104 // parsing.
105 struct ProcessedField {
106 // This points to the wrapped FormFieldData.
107 const FormFieldData* field;
108
109 // The flag derived from field->autocomplete_attribute.
110 AutocompleteFlag autocomplete_flag = AutocompleteFlag::kNone;
111
112 // True if field->form_control_type == "password".
113 bool is_password = false;
114
115 // True if field is predicted to be a password.
116 bool is_predicted_as_password = false;
117
118 // True if the server predicts that this field is not a password field.
119 bool server_hints_not_password = false;
120
121 // True if the server predicts that this field is not a username field.
122 bool server_hints_not_username = false;
123
124 Interactability interactability = Interactability::kUnlikely;
125 };
126
127 // Returns true if the |str| contains words related to CVC fields.
StringMatchesCVC(const base::string16 & str)128 bool StringMatchesCVC(const base::string16& str) {
129 static const base::NoDestructor<base::string16> kCardCvcReCached(
130 base::UTF8ToUTF16(autofill::kCardCvcRe));
131
132 return autofill::MatchesPattern(str, *kCardCvcReCached);
133 }
134
135 // Returns true if the |str| contains words related to SSN fields.
StringMatchesSSN(const base::string16 & str)136 bool StringMatchesSSN(const base::string16& str) {
137 static const base::NoDestructor<base::string16> kSSNReCached(
138 base::UTF8ToUTF16(autofill::kSocialSecurityRe));
139
140 return autofill::MatchesPattern(str, *kSSNReCached);
141 }
142
143 // Returns true if the |str| contains words related to one time password fields.
StringMatchesOTP(const base::string16 & str)144 bool StringMatchesOTP(const base::string16& str) {
145 static const base::NoDestructor<base::string16> kOTPReCached(
146 base::UTF8ToUTF16(autofill::kOneTimePwdRe));
147
148 return autofill::MatchesPattern(str, *kOTPReCached);
149 }
150
151 // TODO(crbug.com/860700): Remove name and attribute checking once server-side
152 // provides hints for CVC.
153 // Returns true if the |field| is suspected to be not the password field.
154 // The suspicion is based on server-side provided hints and on checking the
155 // field's id and name for hinting towards a CVC code, Social Security
156 // Number or one-time password.
IsNotPasswordField(const ProcessedField & field)157 bool IsNotPasswordField(const ProcessedField& field) {
158 return field.server_hints_not_password ||
159 field.autocomplete_flag == AutocompleteFlag::kNonPassword ||
160 StringMatchesCVC(field.field->name_attribute) ||
161 StringMatchesCVC(field.field->id_attribute) ||
162 StringMatchesSSN(field.field->name_attribute) ||
163 StringMatchesSSN(field.field->id_attribute) ||
164 StringMatchesOTP(field.field->name_attribute) ||
165 StringMatchesOTP(field.field->id_attribute);
166 }
167
168 // Returns true if the |field| is suspected to be not the username field.
IsNotUsernameField(const ProcessedField & field)169 bool IsNotUsernameField(const ProcessedField& field) {
170 return field.server_hints_not_username;
171 }
172
173 // Checks if the Finch experiment for offering password generation for
174 // server-predicted clear-text fields is enabled.
IsPasswordGenerationForClearTextFieldsEnabled()175 bool IsPasswordGenerationForClearTextFieldsEnabled() {
176 return base::FeatureList::IsEnabled(
177 password_manager::features::KEnablePasswordGenerationForClearTextFields);
178 }
179
180 // Returns true iff |field_type| is one of password types.
IsPasswordPrediction(const CredentialFieldType field_type)181 bool IsPasswordPrediction(const CredentialFieldType field_type) {
182 switch (field_type) {
183 case CredentialFieldType::kUsername:
184 case CredentialFieldType::kSingleUsername:
185 case CredentialFieldType::kNone:
186 return false;
187 case CredentialFieldType::kCurrentPassword:
188 case CredentialFieldType::kNewPassword:
189 case CredentialFieldType::kConfirmationPassword:
190 return true;
191 }
192 NOTREACHED();
193 return false;
194 }
195
196 // Returns true iff |processed_field| matches the |interactability_bar|. That is
197 // when either:
198 // (1) |processed_field.interactability| is not less than |interactability_bar|,
199 // or
200 // (2) |interactability_bar| is |kCertain|, and |processed_field| was
201 // autofilled. The second clause helps to handle the case when both Chrome and
202 // the user contribute to filling a form:
203 //
204 // <form>
205 // <input type="password" autocomplete="current-password" id="Chrome">
206 // <input type="password" autocomplete="new-password" id="user">
207 // </form>
208 //
209 // In the example above, imagine that Chrome filled the field with id=Chrome,
210 // and the user typed the new password in field with id=user. Then the parser
211 // should identify that id=Chrome is the current password and id=user is the new
212 // password. Without clause (2), Chrome would ignore id=Chrome.
MatchesInteractability(const ProcessedField & processed_field,Interactability interactability_bar)213 bool MatchesInteractability(const ProcessedField& processed_field,
214 Interactability interactability_bar) {
215 return (processed_field.interactability >= interactability_bar) ||
216 (interactability_bar == Interactability::kCertain &&
217 (processed_field.field->properties_mask &
218 FieldPropertiesFlags::AUTOFILLED));
219 }
220
DoesStringContainOnlyDigits(const base::string16 & s)221 bool DoesStringContainOnlyDigits(const base::string16& s) {
222 return std::all_of(s.begin(), s.end(), &base::IsAsciiDigit<base::char16>);
223 }
224
225 // Heuristics to determine that a string is very unlikely to be a username.
IsProbablyNotUsername(const base::string16 & s)226 bool IsProbablyNotUsername(const base::string16& s) {
227 return s.empty() || (s.size() < 3 && DoesStringContainOnlyDigits(s));
228 }
229
230 // Returns |typed_value| if it is not empty, |value| otherwise.
GetFieldValue(const FormFieldData & field)231 base::string16 GetFieldValue(const FormFieldData& field) {
232 return field.typed_value.empty() ? field.value : field.typed_value;
233 }
234
235 // A helper struct that is used to capture significant fields to be used for
236 // the construction of a PasswordForm.
237 struct SignificantFields {
238 const FormFieldData* username = nullptr;
239 const FormFieldData* password = nullptr;
240 const FormFieldData* new_password = nullptr;
241 const FormFieldData* confirmation_password = nullptr;
242 // True if the information about fields could only be derived after relaxing
243 // some constraints. The resulting PasswordForm should only be used for
244 // fallback UI.
245 bool is_fallback = false;
246
247 // True iff the new password field was found with server hints or autocomplete
248 // attributes.
249 bool is_new_password_reliable = false;
250
251 // True if the current form has only username, but no passwords.
252 bool is_single_username = false;
253
254 // Returns true if some password field is present. This is the minimal
255 // requirement for a successful creation of a PasswordForm is present.
HasPasswordspassword_manager::__anon5a32d2e60111::SignificantFields256 bool HasPasswords() const {
257 DCHECK(!confirmation_password || new_password)
258 << "There is no password to confirm if there is no new password field.";
259 return password || new_password;
260 }
261
ClearAllPasswordFieldspassword_manager::__anon5a32d2e60111::SignificantFields262 void ClearAllPasswordFields() {
263 password = nullptr;
264 new_password = nullptr;
265 confirmation_password = nullptr;
266 }
267 };
268
269 // Returns true if |field| is in |significant_fields|.
IsFieldInSignificantFields(const SignificantFields & significant_fields,const FormFieldData * field)270 bool IsFieldInSignificantFields(const SignificantFields& significant_fields,
271 const FormFieldData* field) {
272 return significant_fields.username == field ||
273 significant_fields.password == field ||
274 significant_fields.new_password == field ||
275 significant_fields.confirmation_password == field;
276 }
277
DoesPredictionCorrespondToField(const FormFieldData & field,const PasswordFieldPrediction & prediction)278 bool DoesPredictionCorrespondToField(
279 const FormFieldData& field,
280 const PasswordFieldPrediction& prediction) {
281 #if defined(OS_IOS)
282 return field.unique_id == prediction.unique_id;
283 #else
284 return field.unique_renderer_id == prediction.renderer_id;
285 #endif
286 }
287
288 // Returns the first element of |fields| which corresponds to |prediction|, or
289 // null if there is no such element.
FindField(std::vector<ProcessedField> * processed_fields,const PasswordFieldPrediction & prediction)290 ProcessedField* FindField(std::vector<ProcessedField>* processed_fields,
291 const PasswordFieldPrediction& prediction) {
292 for (ProcessedField& processed_field : *processed_fields) {
293 if (DoesPredictionCorrespondToField(*processed_field.field, prediction))
294 return &processed_field;
295 }
296 return nullptr;
297 }
298
299 // Tries to parse |processed_fields| based on server |predictions|. Uses |mode|
300 // to decide which of two username hints are relevant, if present.
ParseUsingPredictions(std::vector<ProcessedField> * processed_fields,const FormPredictions & predictions,FormDataParser::Mode mode,SignificantFields * result)301 void ParseUsingPredictions(std::vector<ProcessedField>* processed_fields,
302 const FormPredictions& predictions,
303 FormDataParser::Mode mode,
304 SignificantFields* result) {
305 // Following the design from https://goo.gl/Mc2KRe, this code will attempt to
306 // understand the special case when there are two usernames hinted by the
307 // server. In that case, they are considered the sign-in and sign-up
308 // usernames, in the order in which the (only) current password and the first
309 // new-password come. If there is another amount of usernames, 0 or 2+ current
310 // password fields or no new password field, then the abort switch below is
311 // set and simply the first field of each kind is used.
312 bool prevent_handling_two_usernames = false; // the abort switch
313 // Whether the first username is for sign-in.
314 bool sign_in_username_first = true;
315 // First username is stored in |result->username|.
316 const FormFieldData* second_username = nullptr;
317
318 for (const PasswordFieldPrediction& prediction : predictions.fields) {
319 ProcessedField* processed_field = nullptr;
320
321 CredentialFieldType field_type = DeriveFromServerFieldType(prediction.type);
322 bool is_password_prediction = IsPasswordPrediction(field_type);
323 if (mode == FormDataParser::Mode::kSaving && is_password_prediction) {
324 // TODO(crbug.com/913965): Consider server predictions for password fields
325 // in SAVING mode when the server predictions become complete.
326 continue;
327 }
328 switch (field_type) {
329 case CredentialFieldType::kUsername:
330 if (!result->username) {
331 processed_field = FindField(processed_fields, prediction);
332 if (processed_field)
333 result->username = processed_field->field;
334 } else if (!second_username) {
335 processed_field = FindField(processed_fields, prediction);
336 if (processed_field)
337 second_username = processed_field->field;
338 } else {
339 prevent_handling_two_usernames = true;
340 }
341 break;
342 case CredentialFieldType::kSingleUsername:
343 processed_field = FindField(processed_fields, prediction);
344 if (processed_field) {
345 result->username = processed_field->field;
346 result->is_single_username = true;
347 result->ClearAllPasswordFields();
348 return;
349 }
350 break;
351 case CredentialFieldType::kCurrentPassword:
352 if (result->password) {
353 prevent_handling_two_usernames = true;
354 } else {
355 processed_field = FindField(processed_fields, prediction);
356 if (processed_field) {
357 if (!processed_field->is_password)
358 continue;
359 result->password = processed_field->field;
360 }
361 }
362 break;
363 case CredentialFieldType::kNewPassword:
364 // If any (and thus the first) new password comes before the current
365 // password, the first username is understood as sign-up, not sign-in.
366 if (!result->password)
367 sign_in_username_first = false;
368
369 // If multiple hints for new-password fields are given (e.g., because
370 // of more fields having the same signature), the first one should be
371 // marked as new-password. That way the generation can be offered
372 // before the user has thought of and typed their new password
373 // elsewhere. See https://crbug.com/902700 for more details.
374 if (!result->new_password) {
375 processed_field = FindField(processed_fields, prediction);
376 if (processed_field) {
377 if (!IsPasswordGenerationForClearTextFieldsEnabled() &&
378 !processed_field->is_password) {
379 continue;
380 }
381 result->new_password = processed_field->field;
382 processed_field->is_predicted_as_password = true;
383 }
384 }
385 break;
386 case CredentialFieldType::kConfirmationPassword:
387 processed_field = FindField(processed_fields, prediction);
388 if (processed_field) {
389 if (!IsPasswordGenerationForClearTextFieldsEnabled() &&
390 !processed_field->is_password) {
391 continue;
392 }
393 result->confirmation_password = processed_field->field;
394 processed_field->is_predicted_as_password = true;
395 }
396 break;
397 case CredentialFieldType::kNone:
398 break;
399 }
400 }
401
402 if (!result->new_password || !result->password)
403 prevent_handling_two_usernames = true;
404
405 if (!prevent_handling_two_usernames && second_username) {
406 // Now that there are two usernames, |sign_in_username_first| determines
407 // which is sign-in and which sign-up.
408 const FormFieldData* sign_in = result->username;
409 const FormFieldData* sign_up = second_username;
410 if (!sign_in_username_first)
411 std::swap(sign_in, sign_up);
412 // For filling, the sign-in username is relevant, because Chrome should not
413 // fill where the credentials first need to be created. For saving, the
414 // sign-up username is relevant: if both have values, then the sign-up one
415 // was not filled and hence was typed by the user.
416 result->username =
417 mode == FormDataParser::Mode::kSaving ? sign_up : sign_in;
418 }
419
420 // If the server suggests there is a confirmation field but no new password,
421 // something went wrong. Sanitize the result.
422 if (result->confirmation_password && !result->new_password)
423 result->confirmation_password = nullptr;
424
425 // For the use of basic heuristics, also mark CVC fields and NOT_PASSWORD
426 // fields as such.
427 for (const PasswordFieldPrediction& prediction : predictions.fields) {
428 ProcessedField* current_field = FindField(processed_fields, prediction);
429 if (!current_field)
430 continue;
431 if (prediction.type == autofill::CREDIT_CARD_VERIFICATION_CODE ||
432 prediction.type == autofill::NOT_PASSWORD) {
433 current_field->server_hints_not_password = true;
434 } else if (prediction.type == autofill::NOT_USERNAME) {
435 current_field->server_hints_not_username = true;
436 }
437 }
438 }
439
440 // Looks for autocomplete attributes in |processed_fields| and saves predictions
441 // to |result|. Assumption on the usage autocomplete attributes:
442 // 1. Not more than 1 field with autocomplete=username.
443 // 2. Not more than 1 field with autocomplete=current-password.
444 // 3. Not more than 2 fields with autocomplete=new-password.
445 // 4. Only password fields have "*-password" attribute and only non-password
446 // fields have the "username" attribute.
447 // If any assumption is violated, the autocomplete attribute is ignored.
ParseUsingAutocomplete(const std::vector<ProcessedField> & processed_fields,SignificantFields * result)448 void ParseUsingAutocomplete(const std::vector<ProcessedField>& processed_fields,
449 SignificantFields* result) {
450 bool new_password_found_by_server = result->new_password;
451 const FormFieldData* field_marked_as_username = nullptr;
452 int username_fields_found = 0;
453 for (const ProcessedField& processed_field : processed_fields) {
454 if (IsFieldInSignificantFields(*result, processed_field.field)) {
455 // Skip this field because it was already chosen in previous steps.
456 continue;
457 }
458 switch (processed_field.autocomplete_flag) {
459 case AutocompleteFlag::kUsername:
460 if (processed_field.is_password || result->username ||
461 processed_field.server_hints_not_username)
462 continue;
463 username_fields_found++;
464 field_marked_as_username = processed_field.field;
465 break;
466 case AutocompleteFlag::kCurrentPassword:
467 if (!processed_field.is_password || result->password ||
468 processed_field.server_hints_not_password)
469 continue;
470 result->password = processed_field.field;
471 break;
472 case AutocompleteFlag::kNewPassword:
473 if (!processed_field.is_password || new_password_found_by_server ||
474 processed_field.server_hints_not_password)
475 continue;
476 // The first field with autocomplete=new-password is considered to be
477 // new_password and the second is confirmation_password.
478 if (!result->new_password)
479 result->new_password = processed_field.field;
480 else if (!result->confirmation_password)
481 result->confirmation_password = processed_field.field;
482 break;
483 case AutocompleteFlag::kNonPassword:
484 case AutocompleteFlag::kNone:
485 break;
486 }
487 }
488 if (!result->username && username_fields_found == 1)
489 result->username = field_marked_as_username;
490 }
491
492 // This computes the "likely" condition from the design https://goo.gl/ERvoEN .
493 // The |field| is likely to be a password if it is not a CVC field, not
494 // readonly, etc. |*ignored_readonly| is incremented specifically if this
495 // function returns false because of the |field| being readonly.
IsLikelyPassword(const ProcessedField & field,size_t * ignored_readonly)496 bool IsLikelyPassword(const ProcessedField& field, size_t* ignored_readonly) {
497 // Readonly fields can be an indication that filling is useless (e.g., the
498 // page might use a virtual keyboard). However, if the field was readonly
499 // only temporarily, that makes it still interesting for saving. The fact
500 // that a user typed or Chrome filled into that field in the past is an
501 // indicator that the readonly was only temporary.
502 if (field.field->is_readonly &&
503 !(field.field->properties_mask & (FieldPropertiesFlags::USER_TYPED |
504 FieldPropertiesFlags::AUTOFILLED))) {
505 ++*ignored_readonly;
506 return false;
507 }
508 return !IsNotPasswordField(field);
509 }
510
511 // Filters the available passwords from |processed_fields| using these rules:
512 // (1) Passwords with Interactability below |best_interactability| are removed.
513 // (2) If |mode| == |kSaving|, passwords with empty values are removed.
514 // (3) Passwords for which IsLikelyPassword returns false are removed.
515 // If applying rules (1)-(3) results in a non-empty vector of password fields,
516 // that vector is returned. Otherwise, only rules (1) and (2) are applied and
517 // the result returned (even if it is empty).
518 // Neither of the following output parameters may be null:
519 // |readonly_status| will be updated according to the processing of the parsed
520 // fields.
521 // |is_fallback| is set to true if the filtering rule (3) was not used to
522 // obtain the result.
GetRelevantPasswords(const std::vector<ProcessedField> & processed_fields,FormDataParser::Mode mode,Interactability best_interactability,FormDataParser::ReadonlyPasswordFields * readonly_status,bool * is_fallback)523 std::vector<const FormFieldData*> GetRelevantPasswords(
524 const std::vector<ProcessedField>& processed_fields,
525 FormDataParser::Mode mode,
526 Interactability best_interactability,
527 FormDataParser::ReadonlyPasswordFields* readonly_status,
528 bool* is_fallback) {
529 DCHECK(readonly_status);
530 DCHECK(is_fallback);
531
532 // Step 0: filter out all non-password fields.
533 std::vector<const ProcessedField*> passwords;
534 passwords.reserve(processed_fields.size());
535 for (const ProcessedField& processed_field : processed_fields) {
536 if (processed_field.is_password)
537 passwords.push_back(&processed_field);
538 }
539 if (passwords.empty())
540 return std::vector<const FormFieldData*>();
541
542 // These two counters are used to determine the ReadonlyPasswordFields value
543 // corresponding to this form.
544 const size_t all_passwords_seen = passwords.size();
545 size_t ignored_readonly = 0;
546
547 // Step 1: apply filter criterion (1).
548 base::EraseIf(
549 passwords, [best_interactability](const ProcessedField* processed_field) {
550 return !MatchesInteractability(*processed_field, best_interactability);
551 });
552
553 if (mode == FormDataParser::Mode::kSaving) {
554 // Step 2: apply filter criterion (2).
555 base::EraseIf(passwords, [](const ProcessedField* processed_field) {
556 return processed_field->field->value.empty();
557 });
558 }
559
560 // Step 3: apply filter criterion (3). Keep the current content of
561 // |passwords| though, in case it is needed for fallback.
562 std::vector<const ProcessedField*> filtered;
563 filtered.reserve(passwords.size());
564 std::copy_if(passwords.begin(), passwords.end(), std::back_inserter(filtered),
565 [&ignored_readonly](const ProcessedField* processed_field) {
566 return IsLikelyPassword(*processed_field, &ignored_readonly);
567 });
568 // Compute the readonly statistic for metrics.
569 DCHECK_LE(ignored_readonly, all_passwords_seen);
570 if (ignored_readonly == 0)
571 *readonly_status = FormDataParser::ReadonlyPasswordFields::kNoneIgnored;
572 else if (ignored_readonly < all_passwords_seen)
573 *readonly_status = FormDataParser::ReadonlyPasswordFields::kSomeIgnored;
574 else
575 *readonly_status = FormDataParser::ReadonlyPasswordFields::kAllIgnored;
576
577 // Ensure that |filtered| contains what needs to be returned...
578 if (filtered.empty()) {
579 filtered = std::move(passwords);
580 *is_fallback = true;
581 }
582
583 // ...and strip ProcessedFields down to FormFieldData.
584 std::vector<const FormFieldData*> result;
585 result.reserve(filtered.size());
586 for (const ProcessedField* processed_field : filtered)
587 result.push_back(processed_field->field);
588
589 return result;
590 }
591
592 // Detects different password fields from |passwords|.
LocateSpecificPasswords(const std::vector<const FormFieldData * > & passwords,const FormFieldData ** current_password,const FormFieldData ** new_password,const FormFieldData ** confirmation_password)593 void LocateSpecificPasswords(const std::vector<const FormFieldData*>& passwords,
594 const FormFieldData** current_password,
595 const FormFieldData** new_password,
596 const FormFieldData** confirmation_password) {
597 DCHECK(current_password);
598 DCHECK(!*current_password);
599 DCHECK(new_password);
600 DCHECK(!*new_password);
601 DCHECK(confirmation_password);
602 DCHECK(!*confirmation_password);
603
604 switch (passwords.size()) {
605 case 1:
606 *current_password = passwords[0];
607 break;
608 case 2:
609 if (!passwords[0]->value.empty() &&
610 passwords[0]->value == passwords[1]->value) {
611 // Two identical non-empty passwords: assume we are seeing a new
612 // password with a confirmation. This can be either a sign-up form or a
613 // password change form that does not ask for the old password.
614 *new_password = passwords[0];
615 *confirmation_password = passwords[1];
616 } else {
617 // Assume first is old password, second is new (no choice but to guess).
618 // If the passwords are both empty, it is impossible to tell if they
619 // are the old and the new one, or the new one and its confirmation. In
620 // that case Chrome errs on the side of filling and classifies them as
621 // old & new to allow filling of change password forms.
622 *current_password = passwords[0];
623 *new_password = passwords[1];
624 }
625 break;
626 default:
627 // If there are more than 3 passwords it is not very clear what this form
628 // it is. Consider only the first 3 passwords in such case as a
629 // best-effort solution.
630 if (!passwords[0]->value.empty() &&
631 passwords[0]->value == passwords[1]->value &&
632 passwords[0]->value == passwords[2]->value) {
633 // All passwords are the same. Assume that the first field is the
634 // current password.
635 *current_password = passwords[0];
636 } else if (passwords[1]->value == passwords[2]->value) {
637 // New password is the duplicated one, and comes second; or empty form
638 // with at least 3 password fields.
639 *current_password = passwords[0];
640 *new_password = passwords[1];
641 *confirmation_password = passwords[2];
642 } else if (passwords[0]->value == passwords[1]->value) {
643 // It is strange that the new password comes first, but trust more which
644 // fields are duplicated than the ordering of fields. Assume that
645 // any password fields after the new password contain sensitive
646 // information that isn't actually a password (security hint, SSN, etc.)
647 *new_password = passwords[0];
648 *confirmation_password = passwords[1];
649 } else {
650 // Three different passwords, or first and last match with middle
651 // different. No idea which is which. Let's save the first password.
652 // Password selection in a prompt will allow to correct the choice.
653 *current_password = passwords[0];
654 }
655 }
656 }
657
658 // Tries to find username field among text fields from |processed_fields|
659 // occurring before |first_relevant_password|. Returns nullptr if the username
660 // is not found. If |mode| is SAVING, ignores all fields with empty values.
661 // Ignores all fields with interactability less than |best_interactability|.
FindUsernameFieldBaseHeuristics(const std::vector<ProcessedField> & processed_fields,const std::vector<ProcessedField>::const_iterator & first_relevant_password,FormDataParser::Mode mode,Interactability best_interactability,bool is_fallback)662 const FormFieldData* FindUsernameFieldBaseHeuristics(
663 const std::vector<ProcessedField>& processed_fields,
664 const std::vector<ProcessedField>::const_iterator& first_relevant_password,
665 FormDataParser::Mode mode,
666 Interactability best_interactability,
667 bool is_fallback) {
668 DCHECK(first_relevant_password != processed_fields.end());
669
670 // For saving filter out empty fields and fields with values which are not
671 // username.
672 const bool is_saving = mode == FormDataParser::Mode::kSaving;
673
674 // Search through the text input fields preceding |first_relevant_password|
675 // and find the closest one focusable and the closest one in general.
676
677 const FormFieldData* focusable_username = nullptr;
678 const FormFieldData* username = nullptr;
679
680 // Do reverse search to find the closest candidates preceding the password.
681 for (auto it = std::make_reverse_iterator(first_relevant_password);
682 it != processed_fields.rend(); ++it) {
683 if (it->is_password || it->is_predicted_as_password)
684 continue;
685 if (!MatchesInteractability(*it, best_interactability))
686 continue;
687 if (is_saving && IsProbablyNotUsername(it->field->value))
688 continue;
689 if (!is_fallback && IsNotPasswordField(*it))
690 continue;
691 if (!is_fallback && IsNotUsernameField(*it)) {
692 continue;
693 }
694 if (!username)
695 username = it->field;
696 if (it->field->is_focusable) {
697 focusable_username = it->field;
698 break;
699 }
700 }
701
702 return focusable_username ? focusable_username : username;
703 }
704
705 // A helper to return a |field|'s unique_renderer_id or
706 // kNotSetRendererId if |field| is null.
ExtractUniqueId(const FormFieldData * field)707 uint32_t ExtractUniqueId(const FormFieldData* field) {
708 return field ? field->unique_renderer_id : FormData::kNotSetRendererId;
709 }
710
711 // Tries to find the username and password fields in |processed_fields| based
712 // on the structure (how the fields are ordered). If |mode| is SAVING, only
713 // considers non-empty fields. The |found_fields| is both an input and output
714 // argument: if some password field and the username are already present, the
715 // the function exits early. If something is missing, the function tries to
716 // complete it. The result is stored back in |found_fields|. The best
717 // interactability for usernames, which depends on position of the found
718 // passwords as well, is returned through |username_max| to be used in other
719 // kinds of analysis. If password fields end up being parsed, |readonly_status|
720 // will be updated according to that processing.
ParseUsingBaseHeuristics(const std::vector<ProcessedField> & processed_fields,FormDataParser::Mode mode,SignificantFields * found_fields,Interactability * username_max,FormDataParser::ReadonlyPasswordFields * readonly_status)721 void ParseUsingBaseHeuristics(
722 const std::vector<ProcessedField>& processed_fields,
723 FormDataParser::Mode mode,
724 SignificantFields* found_fields,
725 Interactability* username_max,
726 FormDataParser::ReadonlyPasswordFields* readonly_status) {
727 // If there is both the username and the minimal set of fields to build a
728 // PasswordForm, return early -- no more work to do.
729 if (found_fields->HasPasswords() && found_fields->username)
730 return;
731
732 // Will point to the password included in |found_field| which is first in the
733 // order of fields in |processed_fields|.
734 auto first_relevant_password = processed_fields.end();
735
736 if (!found_fields->HasPasswords()) {
737 // What is the best interactability among passwords?
738 Interactability password_max = Interactability::kUnlikely;
739 for (const ProcessedField& processed_field : processed_fields) {
740 if (processed_field.is_password && !IsNotPasswordField(processed_field))
741 password_max = std::max(password_max, processed_field.interactability);
742 }
743
744 // Try to find password elements (current, new, confirmation) among those
745 // with best interactability.
746 std::vector<const FormFieldData*> passwords =
747 GetRelevantPasswords(processed_fields, mode, password_max,
748 readonly_status, &found_fields->is_fallback);
749 if (passwords.empty())
750 return;
751 LocateSpecificPasswords(passwords, &found_fields->password,
752 &found_fields->new_password,
753 &found_fields->confirmation_password);
754 if (!found_fields->HasPasswords())
755 return;
756 for (auto it = processed_fields.begin(); it != processed_fields.end();
757 ++it) {
758 if (it->field == passwords[0]) {
759 first_relevant_password = it;
760 break;
761 }
762 }
763 } else {
764 const uint32_t password_ids[] = {
765 ExtractUniqueId(found_fields->password),
766 ExtractUniqueId(found_fields->new_password),
767 ExtractUniqueId(found_fields->confirmation_password)};
768 for (auto it = processed_fields.begin(); it != processed_fields.end();
769 ++it) {
770 if ((it->is_password || it->is_predicted_as_password) &&
771 base::Contains(password_ids, it->field->unique_renderer_id)) {
772 first_relevant_password = it;
773 break;
774 }
775 }
776 }
777 DCHECK(first_relevant_password != processed_fields.end());
778
779 if (found_fields->username)
780 return;
781
782 // What is the best interactability among text fields preceding the passwords?
783 *username_max = Interactability::kUnlikely;
784 for (auto it = processed_fields.begin(); it != first_relevant_password;
785 ++it) {
786 if (!it->is_password && !IsNotPasswordField(*it))
787 *username_max = std::max(*username_max, it->interactability);
788 }
789
790 found_fields->username = FindUsernameFieldBaseHeuristics(
791 processed_fields, first_relevant_password, mode, *username_max,
792 found_fields->is_fallback);
793 return;
794 }
795
796 // Helper to get the platform specific identifier by which autofill and password
797 // manager refer to a field. The fuzzing infrastructure doed not run on iOS, so
798 // the iOS specific parts of PasswordForm are also built on fuzzer enabled
799 // platforms. See http://crbug.com/896594
GetPlatformSpecificIdentifier(const FormFieldData & field)800 string16 GetPlatformSpecificIdentifier(const FormFieldData& field) {
801 #if defined(OS_IOS)
802 return field.unique_id;
803 #else
804 return field.name;
805 #endif
806 }
807
808 // Set username and password fields in |password_form| based on
809 // |significant_fields| .
SetFields(const SignificantFields & significant_fields,PasswordForm * password_form)810 void SetFields(const SignificantFields& significant_fields,
811 PasswordForm* password_form) {
812 #if !defined(OS_IOS)
813 password_form->has_renderer_ids = true;
814 #endif
815 if (significant_fields.username) {
816 password_form->username_element =
817 GetPlatformSpecificIdentifier(*significant_fields.username);
818 password_form->username_value = GetFieldValue(*significant_fields.username);
819 password_form->username_element_renderer_id =
820 significant_fields.username->unique_renderer_id;
821 }
822
823 if (significant_fields.password) {
824 password_form->password_element =
825 GetPlatformSpecificIdentifier(*significant_fields.password);
826 password_form->password_value = GetFieldValue(*significant_fields.password);
827 password_form->password_element_renderer_id =
828 significant_fields.password->unique_renderer_id;
829 }
830
831 if (significant_fields.new_password) {
832 password_form->new_password_element =
833 GetPlatformSpecificIdentifier(*significant_fields.new_password);
834 password_form->new_password_value =
835 GetFieldValue(*significant_fields.new_password);
836 password_form->new_password_element_renderer_id =
837 significant_fields.new_password->unique_renderer_id;
838 }
839
840 if (significant_fields.confirmation_password) {
841 password_form->confirmation_password_element =
842 GetPlatformSpecificIdentifier(
843 *significant_fields.confirmation_password);
844 password_form->confirmation_password_element_renderer_id =
845 significant_fields.confirmation_password->unique_renderer_id;
846 }
847 }
848
849 // For each relevant field of |fields| computes additional data useful for
850 // parsing and wraps that in a ProcessedField. Returns the vector of all those
851 // ProcessedField instances, or an empty vector if there was not a single
852 // password field. Also, computes the vector of all password values and
853 // associated element names in |all_possible_passwords|, and similarly for
854 // usernames and |all_possible_usernames|. If |mode| is |kSaving|, fields with
855 // empty values are ignored.
ProcessFields(const std::vector<FormFieldData> & fields,autofill::ValueElementVector * all_possible_passwords,autofill::ValueElementVector * all_possible_usernames,FormDataParser::Mode mode)856 std::vector<ProcessedField> ProcessFields(
857 const std::vector<FormFieldData>& fields,
858 autofill::ValueElementVector* all_possible_passwords,
859 autofill::ValueElementVector* all_possible_usernames,
860 FormDataParser::Mode mode) {
861 DCHECK(all_possible_passwords);
862 DCHECK(all_possible_passwords->empty());
863
864 std::vector<ProcessedField> result;
865 result.reserve(fields.size());
866
867 // |all_possible_passwords| should only contain each value once.
868 // |seen_password_values| ensures that duplicates are ignored.
869 std::set<base::StringPiece16> seen_password_values;
870 // Similarly for usernames.
871 std::set<base::StringPiece16> seen_username_values;
872
873 const bool consider_only_non_empty = mode == FormDataParser::Mode::kSaving;
874 for (const FormFieldData& field : fields) {
875 if (!field.IsTextInputElement())
876 continue;
877 if (consider_only_non_empty && field.value.empty())
878 continue;
879
880 const bool is_password = field.form_control_type == "password";
881
882 if (!field.value.empty()) {
883 std::set<base::StringPiece16>& seen_values =
884 is_password ? seen_password_values : seen_username_values;
885 autofill::ValueElementVector* all_possible_fields =
886 is_password ? all_possible_passwords : all_possible_usernames;
887 // Only the field name of the first occurrence is added.
888 auto insertion = seen_values.insert(base::StringPiece16(field.value));
889 if (insertion.second) {
890 // There was no such element in |seen_values|.
891 all_possible_fields->push_back({field.value, field.name});
892 }
893 }
894
895 const AutocompleteFlag flag =
896 ExtractAutocompleteFlag(field.autocomplete_attribute);
897
898 ProcessedField processed_field = {
899 .field = &field, .autocomplete_flag = flag, .is_password = is_password};
900
901 if (field.properties_mask & FieldPropertiesFlags::USER_TYPED)
902 processed_field.interactability = Interactability::kCertain;
903 else if (field.is_focusable)
904 processed_field.interactability = Interactability::kPossible;
905
906 result.push_back(processed_field);
907 }
908
909 return result;
910 }
911
912 // Find the first element in |username_predictions| (i.e. the most reliable
913 // prediction) that occurs in |processed_fields| and has interactability level
914 // at least |username_max|.
FindUsernameInPredictions(const std::vector<uint32_t> & username_predictions,const std::vector<ProcessedField> & processed_fields,Interactability username_max)915 const FormFieldData* FindUsernameInPredictions(
916 const std::vector<uint32_t>& username_predictions,
917 const std::vector<ProcessedField>& processed_fields,
918 Interactability username_max) {
919 for (uint32_t predicted_id : username_predictions) {
920 auto iter = std::find_if(
921 processed_fields.begin(), processed_fields.end(),
922 [predicted_id, username_max](const ProcessedField& processed_field) {
923 return processed_field.field->unique_renderer_id == predicted_id &&
924 MatchesInteractability(processed_field, username_max);
925 });
926 if (iter != processed_fields.end()) {
927 return iter->field;
928 }
929 }
930 return nullptr;
931 }
932
933 // Return true if |significant_fields| has an username field and
934 // |form_predictions| has |may_use_prefilled_placeholder| == true for the
935 // username field.
GetMayUsePrefilledPlaceholder(const base::Optional<FormPredictions> & form_predictions,const SignificantFields & significant_fields)936 bool GetMayUsePrefilledPlaceholder(
937 const base::Optional<FormPredictions>& form_predictions,
938 const SignificantFields& significant_fields) {
939 if (!base::FeatureList::IsEnabled(
940 password_manager::features::kEnableOverwritingPlaceholderUsernames))
941 return false;
942
943 if (!form_predictions || !significant_fields.username)
944 return false;
945
946 uint32_t username_id = significant_fields.username->unique_renderer_id;
947 for (const PasswordFieldPrediction& prediction : form_predictions->fields) {
948 if (prediction.renderer_id == username_id)
949 return prediction.may_use_prefilled_placeholder;
950 }
951 return false;
952 }
953
954 // Puts together a PasswordForm, the result of the parsing, based on the
955 // |form_data| description of the form metadata (e.g., action), the already
956 // parsed information about what are the |significant_fields|, the list
957 // |all_possible_passwords| of all non-empty password values which occurred in
958 // the form and their associated element names, and the list
959 // |all_possible_usernames| of all non-empty username values which
960 // occurred in the form and their associated elements. |form_predictions| is
961 // used to find fields that may have preffilled placeholders.
AssemblePasswordForm(const FormData & form_data,const SignificantFields & significant_fields,autofill::ValueElementVector all_possible_passwords,autofill::ValueElementVector all_possible_usernames,const base::Optional<FormPredictions> & form_predictions)962 std::unique_ptr<PasswordForm> AssemblePasswordForm(
963 const FormData& form_data,
964 const SignificantFields& significant_fields,
965 autofill::ValueElementVector all_possible_passwords,
966 autofill::ValueElementVector all_possible_usernames,
967 const base::Optional<FormPredictions>& form_predictions) {
968 if (!significant_fields.HasPasswords() &&
969 !significant_fields.is_single_username) {
970 return nullptr;
971 }
972
973 // Create the PasswordForm and set data not related to specific fields.
974 auto result = std::make_unique<PasswordForm>();
975 result->origin = form_data.url;
976 result->signon_realm = GetSignonRealm(form_data.url);
977 result->action = form_data.action;
978 result->form_data = form_data;
979 result->all_possible_passwords = std::move(all_possible_passwords);
980 result->all_possible_usernames = std::move(all_possible_usernames);
981 result->scheme = PasswordForm::Scheme::kHtml;
982 result->blacklisted_by_user = false;
983 result->type = PasswordForm::Type::kManual;
984 result->username_may_use_prefilled_placeholder =
985 GetMayUsePrefilledPlaceholder(form_predictions, significant_fields);
986 result->is_new_password_reliable =
987 significant_fields.is_new_password_reliable;
988 result->only_for_fallback = significant_fields.is_fallback;
989 result->submission_event = form_data.submission_event;
990
991 for (const FormFieldData& field : form_data.fields) {
992 if (field.form_control_type == "password" &&
993 (field.properties_mask & FieldPropertiesFlags::AUTOFILLED)) {
994 result->form_has_autofilled_value = true;
995 }
996 }
997
998 // Set data related to specific fields.
999 SetFields(significant_fields, result.get());
1000 return result;
1001 }
1002
1003 } // namespace
1004
1005 FormDataParser::FormDataParser() = default;
1006
1007 FormDataParser::~FormDataParser() = default;
1008
Parse(const FormData & form_data,Mode mode)1009 std::unique_ptr<PasswordForm> FormDataParser::Parse(const FormData& form_data,
1010 Mode mode) {
1011 if (form_data.fields.size() > kMaxParseableFields)
1012 return nullptr;
1013
1014 readonly_status_ = ReadonlyPasswordFields::kNoHeuristics;
1015 autofill::ValueElementVector all_possible_passwords;
1016 autofill::ValueElementVector all_possible_usernames;
1017 std::vector<ProcessedField> processed_fields = ProcessFields(
1018 form_data.fields, &all_possible_passwords, &all_possible_usernames, mode);
1019
1020 if (processed_fields.empty())
1021 return nullptr;
1022
1023 SignificantFields significant_fields;
1024 UsernameDetectionMethod username_detection_method =
1025 UsernameDetectionMethod::kNoUsernameDetected;
1026
1027 // (1) First, try to parse with server predictions.
1028 if (predictions_) {
1029 ParseUsingPredictions(&processed_fields, *predictions_, mode,
1030 &significant_fields);
1031 if (significant_fields.username) {
1032 username_detection_method =
1033 UsernameDetectionMethod::kServerSidePrediction;
1034 }
1035 }
1036
1037 // (2) If that failed, try to parse with autocomplete attributes.
1038 if (!significant_fields.is_single_username) {
1039 ParseUsingAutocomplete(processed_fields, &significant_fields);
1040 if (username_detection_method ==
1041 UsernameDetectionMethod::kNoUsernameDetected &&
1042 significant_fields.username) {
1043 username_detection_method =
1044 UsernameDetectionMethod::kAutocompleteAttribute;
1045 }
1046 }
1047
1048 // Pass the "reliability" information to mark the new-password fields as
1049 // eligible for automatic password generation. This only makes sense when
1050 // forms are analysed for filling, because no passwords are generated when the
1051 // user saves the already entered one.
1052 if (mode == Mode::kFilling && significant_fields.new_password) {
1053 significant_fields.is_new_password_reliable = true;
1054 }
1055
1056 // (3) Now try to fill the gaps.
1057 const bool username_found_before_heuristic = significant_fields.username;
1058
1059 // Try to parse with base heuristic.
1060 if (!significant_fields.is_single_username) {
1061 Interactability username_max = Interactability::kUnlikely;
1062 ParseUsingBaseHeuristics(processed_fields, mode, &significant_fields,
1063 &username_max, &readonly_status_);
1064 if (username_detection_method ==
1065 UsernameDetectionMethod::kNoUsernameDetected &&
1066 significant_fields.username) {
1067 username_detection_method = UsernameDetectionMethod::kBaseHeuristic;
1068 }
1069
1070 // Additionally, and based on the best interactability computed by base
1071 // heuristics, try to improve the username based on the context of the
1072 // fields, unless the username already came from more reliable types of
1073 // analysis.
1074 if (!username_found_before_heuristic) {
1075 const FormFieldData* username_field_by_context =
1076 FindUsernameInPredictions(form_data.username_predictions,
1077 processed_fields, username_max);
1078 if (username_field_by_context &&
1079 !(mode == FormDataParser::Mode::kSaving &&
1080 username_field_by_context->value.empty())) {
1081 significant_fields.username = username_field_by_context;
1082 if (username_detection_method ==
1083 UsernameDetectionMethod::kNoUsernameDetected ||
1084 username_detection_method ==
1085 UsernameDetectionMethod::kBaseHeuristic) {
1086 username_detection_method =
1087 UsernameDetectionMethod::kHtmlBasedClassifier;
1088 }
1089 }
1090 }
1091 }
1092
1093 UMA_HISTOGRAM_ENUMERATION("PasswordManager.UsernameDetectionMethod",
1094 username_detection_method,
1095 UsernameDetectionMethod::kCount);
1096
1097 return AssemblePasswordForm(form_data, significant_fields,
1098 std::move(all_possible_passwords),
1099 std::move(all_possible_usernames), predictions_);
1100 }
1101
GetSignonRealm(const GURL & url)1102 std::string GetSignonRealm(const GURL& url) {
1103 GURL::Replacements rep;
1104 rep.ClearUsername();
1105 rep.ClearPassword();
1106 rep.ClearQuery();
1107 rep.ClearRef();
1108 rep.SetPathStr(std::string());
1109 return url.ReplaceComponents(rep).spec();
1110 }
1111
1112 } // namespace password_manager
1113