1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/password_manager/core/browser/form_parsing/form_parser.h"
6 
7 #include <stdint.h>
8 
9 #include <algorithm>
10 #include <iterator>
11 #include <set>
12 #include <string>
13 #include <utility>
14 #include <vector>
15 
16 #include "base/metrics/histogram_macros.h"
17 #include "base/no_destructor.h"
18 #include "base/stl_util.h"
19 #include "base/strings/string16.h"
20 #include "base/strings/string_piece.h"
21 #include "base/strings/string_split.h"
22 #include "base/strings/utf_string_conversions.h"
23 #include "build/build_config.h"
24 #include "components/autofill/core/browser/field_types.h"
25 #include "components/autofill/core/common/autofill_regex_constants.h"
26 #include "components/autofill/core/common/autofill_regexes.h"
27 #include "components/autofill/core/common/form_data.h"
28 #include "components/autofill/core/common/password_form.h"
29 #include "components/password_manager/core/common/password_manager_features.h"
30 
31 using autofill::FieldPropertiesFlags;
32 using autofill::FormData;
33 using autofill::FormFieldData;
34 using autofill::PasswordForm;
35 using base::string16;
36 
37 namespace password_manager {
38 
39 namespace {
40 
41 constexpr char kAutocompleteUsername[] = "username";
42 constexpr char kAutocompleteCurrentPassword[] = "current-password";
43 constexpr char kAutocompleteNewPassword[] = "new-password";
44 constexpr char kAutocompleteCreditCardPrefix[] = "cc-";
45 constexpr char kAutocompleteOneTimePassword[] = "one-time-code";
46 
47 // The susbset of autocomplete flags related to passwords.
48 enum class AutocompleteFlag {
49   kNone,
50   kUsername,
51   kCurrentPassword,
52   kNewPassword,
53   // Represents the whole family of cc-* flags + OTP flag.
54   kNonPassword
55 };
56 
57 // The autocomplete attribute has one of the following structures:
58 //   [section-*] [shipping|billing] [type_hint] field_type
59 //   on | off | false
60 // (see
61 // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#autofilling-form-controls%3A-the-autocomplete-attribute).
62 // For password forms, only the field_type is relevant. So parsing the attribute
63 // amounts to just taking the last token.  If that token is one of "username",
64 // "current-password" or "new-password", this returns an appropriate enum value.
65 // If the token starts with a "cc-" prefix or is "one-time-code" token, this
66 // returns kNonPassword.
67 // Otherwise, returns kNone.
ExtractAutocompleteFlag(const std::string & attribute)68 AutocompleteFlag ExtractAutocompleteFlag(const std::string& attribute) {
69   std::vector<base::StringPiece> tokens =
70       base::SplitStringPiece(attribute, base::kWhitespaceASCII,
71                              base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
72   if (tokens.empty())
73     return AutocompleteFlag::kNone;
74 
75   const base::StringPiece& field_type = tokens.back();
76   if (base::LowerCaseEqualsASCII(field_type, kAutocompleteUsername))
77     return AutocompleteFlag::kUsername;
78   if (base::LowerCaseEqualsASCII(field_type, kAutocompleteCurrentPassword))
79     return AutocompleteFlag::kCurrentPassword;
80   if (base::LowerCaseEqualsASCII(field_type, kAutocompleteNewPassword))
81     return AutocompleteFlag::kNewPassword;
82 
83   if (base::LowerCaseEqualsASCII(field_type, kAutocompleteOneTimePassword) ||
84       base::StartsWith(field_type, kAutocompleteCreditCardPrefix,
85                        base::CompareCase::SENSITIVE)) {
86     return AutocompleteFlag::kNonPassword;
87   }
88   return AutocompleteFlag::kNone;
89 }
90 
91 // How likely is user interaction for a given field?
92 // Note: higher numeric values should match higher likeliness to allow using the
93 // standard operator< for comparison of likeliness.
94 enum class Interactability {
95   // When the field is invisible.
96   kUnlikely = 0,
97   // When the field is visible/focusable.
98   kPossible = 1,
99   // When the user actually typed into the field before.
100   kCertain = 2,
101 };
102 
103 // A wrapper around FormFieldData, carrying some additional data used during
104 // parsing.
105 struct ProcessedField {
106   // This points to the wrapped FormFieldData.
107   const FormFieldData* field;
108 
109   // The flag derived from field->autocomplete_attribute.
110   AutocompleteFlag autocomplete_flag = AutocompleteFlag::kNone;
111 
112   // True if field->form_control_type == "password".
113   bool is_password = false;
114 
115   // True if field is predicted to be a password.
116   bool is_predicted_as_password = false;
117 
118   // True if the server predicts that this field is not a password field.
119   bool server_hints_not_password = false;
120 
121   // True if the server predicts that this field is not a username field.
122   bool server_hints_not_username = false;
123 
124   Interactability interactability = Interactability::kUnlikely;
125 };
126 
127 // Returns true if the |str| contains words related to CVC fields.
StringMatchesCVC(const base::string16 & str)128 bool StringMatchesCVC(const base::string16& str) {
129   static const base::NoDestructor<base::string16> kCardCvcReCached(
130       base::UTF8ToUTF16(autofill::kCardCvcRe));
131 
132   return autofill::MatchesPattern(str, *kCardCvcReCached);
133 }
134 
135 // Returns true if the |str| contains words related to SSN fields.
StringMatchesSSN(const base::string16 & str)136 bool StringMatchesSSN(const base::string16& str) {
137   static const base::NoDestructor<base::string16> kSSNReCached(
138       base::UTF8ToUTF16(autofill::kSocialSecurityRe));
139 
140   return autofill::MatchesPattern(str, *kSSNReCached);
141 }
142 
143 // Returns true if the |str| contains words related to one time password fields.
StringMatchesOTP(const base::string16 & str)144 bool StringMatchesOTP(const base::string16& str) {
145   static const base::NoDestructor<base::string16> kOTPReCached(
146       base::UTF8ToUTF16(autofill::kOneTimePwdRe));
147 
148   return autofill::MatchesPattern(str, *kOTPReCached);
149 }
150 
151 // TODO(crbug.com/860700): Remove name and attribute checking once server-side
152 // provides hints for CVC.
153 // Returns true if the |field| is suspected to be not the password field.
154 // The suspicion is based on server-side provided hints and on checking the
155 // field's id and name for hinting towards a CVC code, Social Security
156 // Number or one-time password.
IsNotPasswordField(const ProcessedField & field)157 bool IsNotPasswordField(const ProcessedField& field) {
158   return field.server_hints_not_password ||
159          field.autocomplete_flag == AutocompleteFlag::kNonPassword ||
160          StringMatchesCVC(field.field->name_attribute) ||
161          StringMatchesCVC(field.field->id_attribute) ||
162          StringMatchesSSN(field.field->name_attribute) ||
163          StringMatchesSSN(field.field->id_attribute) ||
164          StringMatchesOTP(field.field->name_attribute) ||
165          StringMatchesOTP(field.field->id_attribute);
166 }
167 
168 // Returns true if the |field| is suspected to be not the username field.
IsNotUsernameField(const ProcessedField & field)169 bool IsNotUsernameField(const ProcessedField& field) {
170   return field.server_hints_not_username;
171 }
172 
173 // Checks if the Finch experiment for offering password generation for
174 // server-predicted clear-text fields is enabled.
IsPasswordGenerationForClearTextFieldsEnabled()175 bool IsPasswordGenerationForClearTextFieldsEnabled() {
176   return base::FeatureList::IsEnabled(
177       password_manager::features::KEnablePasswordGenerationForClearTextFields);
178 }
179 
180 // Returns true iff |field_type| is one of password types.
IsPasswordPrediction(const CredentialFieldType field_type)181 bool IsPasswordPrediction(const CredentialFieldType field_type) {
182   switch (field_type) {
183     case CredentialFieldType::kUsername:
184     case CredentialFieldType::kSingleUsername:
185     case CredentialFieldType::kNone:
186       return false;
187     case CredentialFieldType::kCurrentPassword:
188     case CredentialFieldType::kNewPassword:
189     case CredentialFieldType::kConfirmationPassword:
190       return true;
191   }
192   NOTREACHED();
193   return false;
194 }
195 
196 // Returns true iff |processed_field| matches the |interactability_bar|. That is
197 // when either:
198 // (1) |processed_field.interactability| is not less than |interactability_bar|,
199 //     or
200 // (2) |interactability_bar| is |kCertain|, and |processed_field| was
201 // autofilled. The second clause helps to handle the case when both Chrome and
202 // the user contribute to filling a form:
203 //
204 // <form>
205 //   <input type="password" autocomplete="current-password" id="Chrome">
206 //   <input type="password" autocomplete="new-password" id="user">
207 // </form>
208 //
209 // In the example above, imagine that Chrome filled the field with id=Chrome,
210 // and the user typed the new password in field with id=user. Then the parser
211 // should identify that id=Chrome is the current password and id=user is the new
212 // password. Without clause (2), Chrome would ignore id=Chrome.
MatchesInteractability(const ProcessedField & processed_field,Interactability interactability_bar)213 bool MatchesInteractability(const ProcessedField& processed_field,
214                             Interactability interactability_bar) {
215   return (processed_field.interactability >= interactability_bar) ||
216          (interactability_bar == Interactability::kCertain &&
217           (processed_field.field->properties_mask &
218            FieldPropertiesFlags::AUTOFILLED));
219 }
220 
DoesStringContainOnlyDigits(const base::string16 & s)221 bool DoesStringContainOnlyDigits(const base::string16& s) {
222   return std::all_of(s.begin(), s.end(), &base::IsAsciiDigit<base::char16>);
223 }
224 
225 // Heuristics to determine that a string is very unlikely to be a username.
IsProbablyNotUsername(const base::string16 & s)226 bool IsProbablyNotUsername(const base::string16& s) {
227   return s.empty() || (s.size() < 3 && DoesStringContainOnlyDigits(s));
228 }
229 
230 // Returns |typed_value| if it is not empty, |value| otherwise.
GetFieldValue(const FormFieldData & field)231 base::string16 GetFieldValue(const FormFieldData& field) {
232   return field.typed_value.empty() ? field.value : field.typed_value;
233 }
234 
235 // A helper struct that is used to capture significant fields to be used for
236 // the construction of a PasswordForm.
237 struct SignificantFields {
238   const FormFieldData* username = nullptr;
239   const FormFieldData* password = nullptr;
240   const FormFieldData* new_password = nullptr;
241   const FormFieldData* confirmation_password = nullptr;
242   // True if the information about fields could only be derived after relaxing
243   // some constraints. The resulting PasswordForm should only be used for
244   // fallback UI.
245   bool is_fallback = false;
246 
247   // True iff the new password field was found with server hints or autocomplete
248   // attributes.
249   bool is_new_password_reliable = false;
250 
251   // True if the current form has only username, but no passwords.
252   bool is_single_username = false;
253 
254   // Returns true if some password field is present. This is the minimal
255   // requirement for a successful creation of a PasswordForm is present.
HasPasswordspassword_manager::__anon5a32d2e60111::SignificantFields256   bool HasPasswords() const {
257     DCHECK(!confirmation_password || new_password)
258         << "There is no password to confirm if there is no new password field.";
259     return password || new_password;
260   }
261 
ClearAllPasswordFieldspassword_manager::__anon5a32d2e60111::SignificantFields262   void ClearAllPasswordFields() {
263     password = nullptr;
264     new_password = nullptr;
265     confirmation_password = nullptr;
266   }
267 };
268 
269 // Returns true if |field| is in |significant_fields|.
IsFieldInSignificantFields(const SignificantFields & significant_fields,const FormFieldData * field)270 bool IsFieldInSignificantFields(const SignificantFields& significant_fields,
271                                 const FormFieldData* field) {
272   return significant_fields.username == field ||
273          significant_fields.password == field ||
274          significant_fields.new_password == field ||
275          significant_fields.confirmation_password == field;
276 }
277 
DoesPredictionCorrespondToField(const FormFieldData & field,const PasswordFieldPrediction & prediction)278 bool DoesPredictionCorrespondToField(
279     const FormFieldData& field,
280     const PasswordFieldPrediction& prediction) {
281 #if defined(OS_IOS)
282   return field.unique_id == prediction.unique_id;
283 #else
284   return field.unique_renderer_id == prediction.renderer_id;
285 #endif
286 }
287 
288 // Returns the first element of |fields| which corresponds to |prediction|, or
289 // null if there is no such element.
FindField(std::vector<ProcessedField> * processed_fields,const PasswordFieldPrediction & prediction)290 ProcessedField* FindField(std::vector<ProcessedField>* processed_fields,
291                           const PasswordFieldPrediction& prediction) {
292   for (ProcessedField& processed_field : *processed_fields) {
293     if (DoesPredictionCorrespondToField(*processed_field.field, prediction))
294       return &processed_field;
295   }
296   return nullptr;
297 }
298 
299 // Tries to parse |processed_fields| based on server |predictions|. Uses |mode|
300 // to decide which of two username hints are relevant, if present.
ParseUsingPredictions(std::vector<ProcessedField> * processed_fields,const FormPredictions & predictions,FormDataParser::Mode mode,SignificantFields * result)301 void ParseUsingPredictions(std::vector<ProcessedField>* processed_fields,
302                            const FormPredictions& predictions,
303                            FormDataParser::Mode mode,
304                            SignificantFields* result) {
305   // Following the design from https://goo.gl/Mc2KRe, this code will attempt to
306   // understand the special case when there are two usernames hinted by the
307   // server. In that case, they are considered the sign-in and sign-up
308   // usernames, in the order in which the (only) current password and the first
309   // new-password come. If there is another amount of usernames, 0 or 2+ current
310   // password fields or no new password field, then the abort switch below is
311   // set and simply the first field of each kind is used.
312   bool prevent_handling_two_usernames = false;  // the abort switch
313   // Whether the first username is for sign-in.
314   bool sign_in_username_first = true;
315   // First username is stored in |result->username|.
316   const FormFieldData* second_username = nullptr;
317 
318   for (const PasswordFieldPrediction& prediction : predictions.fields) {
319     ProcessedField* processed_field = nullptr;
320 
321     CredentialFieldType field_type = DeriveFromServerFieldType(prediction.type);
322     bool is_password_prediction = IsPasswordPrediction(field_type);
323     if (mode == FormDataParser::Mode::kSaving && is_password_prediction) {
324       // TODO(crbug.com/913965): Consider server predictions for password fields
325       // in SAVING mode when the server predictions become complete.
326       continue;
327     }
328     switch (field_type) {
329       case CredentialFieldType::kUsername:
330         if (!result->username) {
331           processed_field = FindField(processed_fields, prediction);
332           if (processed_field)
333             result->username = processed_field->field;
334         } else if (!second_username) {
335           processed_field = FindField(processed_fields, prediction);
336           if (processed_field)
337             second_username = processed_field->field;
338         } else {
339           prevent_handling_two_usernames = true;
340         }
341         break;
342       case CredentialFieldType::kSingleUsername:
343         processed_field = FindField(processed_fields, prediction);
344         if (processed_field) {
345           result->username = processed_field->field;
346           result->is_single_username = true;
347           result->ClearAllPasswordFields();
348           return;
349         }
350         break;
351       case CredentialFieldType::kCurrentPassword:
352         if (result->password) {
353           prevent_handling_two_usernames = true;
354         } else {
355           processed_field = FindField(processed_fields, prediction);
356           if (processed_field) {
357             if (!processed_field->is_password)
358               continue;
359             result->password = processed_field->field;
360           }
361         }
362         break;
363       case CredentialFieldType::kNewPassword:
364         // If any (and thus the first) new password comes before the current
365         // password, the first username is understood as sign-up, not sign-in.
366         if (!result->password)
367           sign_in_username_first = false;
368 
369         // If multiple hints for new-password fields are given (e.g., because
370         // of more fields having the same signature), the first one should be
371         // marked as new-password. That way the generation can be offered
372         // before the user has thought of and typed their new password
373         // elsewhere. See https://crbug.com/902700 for more details.
374         if (!result->new_password) {
375           processed_field = FindField(processed_fields, prediction);
376           if (processed_field) {
377             if (!IsPasswordGenerationForClearTextFieldsEnabled() &&
378                 !processed_field->is_password) {
379               continue;
380             }
381             result->new_password = processed_field->field;
382             processed_field->is_predicted_as_password = true;
383           }
384         }
385         break;
386       case CredentialFieldType::kConfirmationPassword:
387         processed_field = FindField(processed_fields, prediction);
388         if (processed_field) {
389           if (!IsPasswordGenerationForClearTextFieldsEnabled() &&
390               !processed_field->is_password) {
391             continue;
392           }
393           result->confirmation_password = processed_field->field;
394           processed_field->is_predicted_as_password = true;
395         }
396         break;
397       case CredentialFieldType::kNone:
398         break;
399     }
400   }
401 
402   if (!result->new_password || !result->password)
403     prevent_handling_two_usernames = true;
404 
405   if (!prevent_handling_two_usernames && second_username) {
406     // Now that there are two usernames, |sign_in_username_first| determines
407     // which is sign-in and which sign-up.
408     const FormFieldData* sign_in = result->username;
409     const FormFieldData* sign_up = second_username;
410     if (!sign_in_username_first)
411       std::swap(sign_in, sign_up);
412     // For filling, the sign-in username is relevant, because Chrome should not
413     // fill where the credentials first need to be created. For saving, the
414     // sign-up username is relevant: if both have values, then the sign-up one
415     // was not filled and hence was typed by the user.
416     result->username =
417         mode == FormDataParser::Mode::kSaving ? sign_up : sign_in;
418   }
419 
420   // If the server suggests there is a confirmation field but no new password,
421   // something went wrong. Sanitize the result.
422   if (result->confirmation_password && !result->new_password)
423     result->confirmation_password = nullptr;
424 
425   // For the use of basic heuristics, also mark CVC fields and NOT_PASSWORD
426   // fields as such.
427   for (const PasswordFieldPrediction& prediction : predictions.fields) {
428     ProcessedField* current_field = FindField(processed_fields, prediction);
429     if (!current_field)
430       continue;
431     if (prediction.type == autofill::CREDIT_CARD_VERIFICATION_CODE ||
432         prediction.type == autofill::NOT_PASSWORD) {
433       current_field->server_hints_not_password = true;
434     } else if (prediction.type == autofill::NOT_USERNAME) {
435       current_field->server_hints_not_username = true;
436     }
437   }
438 }
439 
440 // Looks for autocomplete attributes in |processed_fields| and saves predictions
441 // to |result|. Assumption on the usage autocomplete attributes:
442 // 1. Not more than 1 field with autocomplete=username.
443 // 2. Not more than 1 field with autocomplete=current-password.
444 // 3. Not more than 2 fields with autocomplete=new-password.
445 // 4. Only password fields have "*-password" attribute and only non-password
446 //    fields have the "username" attribute.
447 // If any assumption is violated, the autocomplete attribute is ignored.
ParseUsingAutocomplete(const std::vector<ProcessedField> & processed_fields,SignificantFields * result)448 void ParseUsingAutocomplete(const std::vector<ProcessedField>& processed_fields,
449                             SignificantFields* result) {
450   bool new_password_found_by_server = result->new_password;
451   const FormFieldData* field_marked_as_username = nullptr;
452   int username_fields_found = 0;
453   for (const ProcessedField& processed_field : processed_fields) {
454     if (IsFieldInSignificantFields(*result, processed_field.field)) {
455       // Skip this field because it was already chosen in previous steps.
456       continue;
457     }
458     switch (processed_field.autocomplete_flag) {
459       case AutocompleteFlag::kUsername:
460         if (processed_field.is_password || result->username ||
461             processed_field.server_hints_not_username)
462           continue;
463         username_fields_found++;
464         field_marked_as_username = processed_field.field;
465         break;
466       case AutocompleteFlag::kCurrentPassword:
467         if (!processed_field.is_password || result->password ||
468             processed_field.server_hints_not_password)
469           continue;
470         result->password = processed_field.field;
471         break;
472       case AutocompleteFlag::kNewPassword:
473         if (!processed_field.is_password || new_password_found_by_server ||
474             processed_field.server_hints_not_password)
475           continue;
476         // The first field with autocomplete=new-password is considered to be
477         // new_password and the second is confirmation_password.
478         if (!result->new_password)
479           result->new_password = processed_field.field;
480         else if (!result->confirmation_password)
481           result->confirmation_password = processed_field.field;
482         break;
483       case AutocompleteFlag::kNonPassword:
484       case AutocompleteFlag::kNone:
485         break;
486     }
487   }
488   if (!result->username && username_fields_found == 1)
489     result->username = field_marked_as_username;
490 }
491 
492 // This computes the "likely" condition from the design https://goo.gl/ERvoEN .
493 // The |field| is likely to be a password if it is not a CVC field, not
494 // readonly, etc. |*ignored_readonly| is incremented specifically if this
495 // function returns false because of the |field| being readonly.
IsLikelyPassword(const ProcessedField & field,size_t * ignored_readonly)496 bool IsLikelyPassword(const ProcessedField& field, size_t* ignored_readonly) {
497   // Readonly fields can be an indication that filling is useless (e.g., the
498   // page might use a virtual keyboard). However, if the field was readonly
499   // only temporarily, that makes it still interesting for saving. The fact
500   // that a user typed or Chrome filled into that field in the past is an
501   // indicator that the readonly was only temporary.
502   if (field.field->is_readonly &&
503       !(field.field->properties_mask & (FieldPropertiesFlags::USER_TYPED |
504                                         FieldPropertiesFlags::AUTOFILLED))) {
505     ++*ignored_readonly;
506     return false;
507   }
508   return !IsNotPasswordField(field);
509 }
510 
511 // Filters the available passwords from |processed_fields| using these rules:
512 // (1) Passwords with Interactability below |best_interactability| are removed.
513 // (2) If |mode| == |kSaving|, passwords with empty values are removed.
514 // (3) Passwords for which IsLikelyPassword returns false are removed.
515 // If applying rules (1)-(3) results in a non-empty vector of password fields,
516 // that vector is returned. Otherwise, only rules (1) and (2) are applied and
517 // the result returned (even if it is empty).
518 // Neither of the following output parameters may be null:
519 // |readonly_status| will be updated according to the processing of the parsed
520 // fields.
521 // |is_fallback| is set to true if the filtering rule (3) was not used to
522 // obtain the result.
GetRelevantPasswords(const std::vector<ProcessedField> & processed_fields,FormDataParser::Mode mode,Interactability best_interactability,FormDataParser::ReadonlyPasswordFields * readonly_status,bool * is_fallback)523 std::vector<const FormFieldData*> GetRelevantPasswords(
524     const std::vector<ProcessedField>& processed_fields,
525     FormDataParser::Mode mode,
526     Interactability best_interactability,
527     FormDataParser::ReadonlyPasswordFields* readonly_status,
528     bool* is_fallback) {
529   DCHECK(readonly_status);
530   DCHECK(is_fallback);
531 
532   // Step 0: filter out all non-password fields.
533   std::vector<const ProcessedField*> passwords;
534   passwords.reserve(processed_fields.size());
535   for (const ProcessedField& processed_field : processed_fields) {
536     if (processed_field.is_password)
537       passwords.push_back(&processed_field);
538   }
539   if (passwords.empty())
540     return std::vector<const FormFieldData*>();
541 
542   // These two counters are used to determine the ReadonlyPasswordFields value
543   // corresponding to this form.
544   const size_t all_passwords_seen = passwords.size();
545   size_t ignored_readonly = 0;
546 
547   // Step 1: apply filter criterion (1).
548   base::EraseIf(
549       passwords, [best_interactability](const ProcessedField* processed_field) {
550         return !MatchesInteractability(*processed_field, best_interactability);
551       });
552 
553   if (mode == FormDataParser::Mode::kSaving) {
554     // Step 2: apply filter criterion (2).
555     base::EraseIf(passwords, [](const ProcessedField* processed_field) {
556       return processed_field->field->value.empty();
557     });
558   }
559 
560   // Step 3: apply filter criterion (3). Keep the current content of
561   // |passwords| though, in case it is needed for fallback.
562   std::vector<const ProcessedField*> filtered;
563   filtered.reserve(passwords.size());
564   std::copy_if(passwords.begin(), passwords.end(), std::back_inserter(filtered),
565                [&ignored_readonly](const ProcessedField* processed_field) {
566                  return IsLikelyPassword(*processed_field, &ignored_readonly);
567                });
568   // Compute the readonly statistic for metrics.
569   DCHECK_LE(ignored_readonly, all_passwords_seen);
570   if (ignored_readonly == 0)
571     *readonly_status = FormDataParser::ReadonlyPasswordFields::kNoneIgnored;
572   else if (ignored_readonly < all_passwords_seen)
573     *readonly_status = FormDataParser::ReadonlyPasswordFields::kSomeIgnored;
574   else
575     *readonly_status = FormDataParser::ReadonlyPasswordFields::kAllIgnored;
576 
577   // Ensure that |filtered| contains what needs to be returned...
578   if (filtered.empty()) {
579     filtered = std::move(passwords);
580     *is_fallback = true;
581   }
582 
583   // ...and strip ProcessedFields down to FormFieldData.
584   std::vector<const FormFieldData*> result;
585   result.reserve(filtered.size());
586   for (const ProcessedField* processed_field : filtered)
587     result.push_back(processed_field->field);
588 
589   return result;
590 }
591 
592 // Detects different password fields from |passwords|.
LocateSpecificPasswords(const std::vector<const FormFieldData * > & passwords,const FormFieldData ** current_password,const FormFieldData ** new_password,const FormFieldData ** confirmation_password)593 void LocateSpecificPasswords(const std::vector<const FormFieldData*>& passwords,
594                              const FormFieldData** current_password,
595                              const FormFieldData** new_password,
596                              const FormFieldData** confirmation_password) {
597   DCHECK(current_password);
598   DCHECK(!*current_password);
599   DCHECK(new_password);
600   DCHECK(!*new_password);
601   DCHECK(confirmation_password);
602   DCHECK(!*confirmation_password);
603 
604   switch (passwords.size()) {
605     case 1:
606       *current_password = passwords[0];
607       break;
608     case 2:
609       if (!passwords[0]->value.empty() &&
610           passwords[0]->value == passwords[1]->value) {
611         // Two identical non-empty passwords: assume we are seeing a new
612         // password with a confirmation. This can be either a sign-up form or a
613         // password change form that does not ask for the old password.
614         *new_password = passwords[0];
615         *confirmation_password = passwords[1];
616       } else {
617         // Assume first is old password, second is new (no choice but to guess).
618         // If the passwords are both empty, it is impossible to tell if they
619         // are the old and the new one, or the new one and its confirmation. In
620         // that case Chrome errs on the side of filling and classifies them as
621         // old & new to allow filling of change password forms.
622         *current_password = passwords[0];
623         *new_password = passwords[1];
624       }
625       break;
626     default:
627       // If there are more than 3 passwords it is not very clear what this form
628       // it is. Consider only the first 3 passwords in such case as a
629       // best-effort solution.
630       if (!passwords[0]->value.empty() &&
631           passwords[0]->value == passwords[1]->value &&
632           passwords[0]->value == passwords[2]->value) {
633         // All passwords are the same. Assume that the first field is the
634         // current password.
635         *current_password = passwords[0];
636       } else if (passwords[1]->value == passwords[2]->value) {
637         // New password is the duplicated one, and comes second; or empty form
638         // with at least 3 password fields.
639         *current_password = passwords[0];
640         *new_password = passwords[1];
641         *confirmation_password = passwords[2];
642       } else if (passwords[0]->value == passwords[1]->value) {
643         // It is strange that the new password comes first, but trust more which
644         // fields are duplicated than the ordering of fields. Assume that
645         // any password fields after the new password contain sensitive
646         // information that isn't actually a password (security hint, SSN, etc.)
647         *new_password = passwords[0];
648         *confirmation_password = passwords[1];
649       } else {
650         // Three different passwords, or first and last match with middle
651         // different. No idea which is which. Let's save the first password.
652         // Password selection in a prompt will allow to correct the choice.
653         *current_password = passwords[0];
654       }
655   }
656 }
657 
658 // Tries to find username field among text fields from |processed_fields|
659 // occurring before |first_relevant_password|. Returns nullptr if the username
660 // is not found. If |mode| is SAVING, ignores all fields with empty values.
661 // Ignores all fields with interactability less than |best_interactability|.
FindUsernameFieldBaseHeuristics(const std::vector<ProcessedField> & processed_fields,const std::vector<ProcessedField>::const_iterator & first_relevant_password,FormDataParser::Mode mode,Interactability best_interactability,bool is_fallback)662 const FormFieldData* FindUsernameFieldBaseHeuristics(
663     const std::vector<ProcessedField>& processed_fields,
664     const std::vector<ProcessedField>::const_iterator& first_relevant_password,
665     FormDataParser::Mode mode,
666     Interactability best_interactability,
667     bool is_fallback) {
668   DCHECK(first_relevant_password != processed_fields.end());
669 
670   // For saving filter out empty fields and fields with values which are not
671   // username.
672   const bool is_saving = mode == FormDataParser::Mode::kSaving;
673 
674   // Search through the text input fields preceding |first_relevant_password|
675   // and find the closest one focusable and the closest one in general.
676 
677   const FormFieldData* focusable_username = nullptr;
678   const FormFieldData* username = nullptr;
679 
680   // Do reverse search to find the closest candidates preceding the password.
681   for (auto it = std::make_reverse_iterator(first_relevant_password);
682        it != processed_fields.rend(); ++it) {
683     if (it->is_password || it->is_predicted_as_password)
684       continue;
685     if (!MatchesInteractability(*it, best_interactability))
686       continue;
687     if (is_saving && IsProbablyNotUsername(it->field->value))
688       continue;
689     if (!is_fallback && IsNotPasswordField(*it))
690       continue;
691     if (!is_fallback && IsNotUsernameField(*it)) {
692       continue;
693     }
694     if (!username)
695       username = it->field;
696     if (it->field->is_focusable) {
697       focusable_username = it->field;
698       break;
699     }
700   }
701 
702   return focusable_username ? focusable_username : username;
703 }
704 
705 // A helper to return a |field|'s unique_renderer_id or
706 // kNotSetRendererId if |field| is null.
ExtractUniqueId(const FormFieldData * field)707 uint32_t ExtractUniqueId(const FormFieldData* field) {
708   return field ? field->unique_renderer_id : FormData::kNotSetRendererId;
709 }
710 
711 // Tries to find the username and password fields in |processed_fields| based
712 // on the structure (how the fields are ordered). If |mode| is SAVING, only
713 // considers non-empty fields. The |found_fields| is both an input and output
714 // argument: if some password field and the username are already present, the
715 // the function exits early. If something is missing, the function tries to
716 // complete it. The result is stored back in |found_fields|. The best
717 // interactability for usernames, which depends on position of the found
718 // passwords as well, is returned through |username_max| to be used in other
719 // kinds of analysis. If password fields end up being parsed, |readonly_status|
720 // will be updated according to that processing.
ParseUsingBaseHeuristics(const std::vector<ProcessedField> & processed_fields,FormDataParser::Mode mode,SignificantFields * found_fields,Interactability * username_max,FormDataParser::ReadonlyPasswordFields * readonly_status)721 void ParseUsingBaseHeuristics(
722     const std::vector<ProcessedField>& processed_fields,
723     FormDataParser::Mode mode,
724     SignificantFields* found_fields,
725     Interactability* username_max,
726     FormDataParser::ReadonlyPasswordFields* readonly_status) {
727   // If there is both the username and the minimal set of fields to build a
728   // PasswordForm, return early -- no more work to do.
729   if (found_fields->HasPasswords() && found_fields->username)
730     return;
731 
732   // Will point to the password included in |found_field| which is first in the
733   // order of fields in |processed_fields|.
734   auto first_relevant_password = processed_fields.end();
735 
736   if (!found_fields->HasPasswords()) {
737     // What is the best interactability among passwords?
738     Interactability password_max = Interactability::kUnlikely;
739     for (const ProcessedField& processed_field : processed_fields) {
740       if (processed_field.is_password && !IsNotPasswordField(processed_field))
741         password_max = std::max(password_max, processed_field.interactability);
742     }
743 
744     // Try to find password elements (current, new, confirmation) among those
745     // with best interactability.
746     std::vector<const FormFieldData*> passwords =
747         GetRelevantPasswords(processed_fields, mode, password_max,
748                              readonly_status, &found_fields->is_fallback);
749     if (passwords.empty())
750       return;
751     LocateSpecificPasswords(passwords, &found_fields->password,
752                             &found_fields->new_password,
753                             &found_fields->confirmation_password);
754     if (!found_fields->HasPasswords())
755       return;
756     for (auto it = processed_fields.begin(); it != processed_fields.end();
757          ++it) {
758       if (it->field == passwords[0]) {
759         first_relevant_password = it;
760         break;
761       }
762     }
763   } else {
764     const uint32_t password_ids[] = {
765         ExtractUniqueId(found_fields->password),
766         ExtractUniqueId(found_fields->new_password),
767         ExtractUniqueId(found_fields->confirmation_password)};
768     for (auto it = processed_fields.begin(); it != processed_fields.end();
769          ++it) {
770       if ((it->is_password || it->is_predicted_as_password) &&
771           base::Contains(password_ids, it->field->unique_renderer_id)) {
772         first_relevant_password = it;
773         break;
774       }
775     }
776   }
777   DCHECK(first_relevant_password != processed_fields.end());
778 
779   if (found_fields->username)
780     return;
781 
782   // What is the best interactability among text fields preceding the passwords?
783   *username_max = Interactability::kUnlikely;
784   for (auto it = processed_fields.begin(); it != first_relevant_password;
785        ++it) {
786     if (!it->is_password && !IsNotPasswordField(*it))
787       *username_max = std::max(*username_max, it->interactability);
788   }
789 
790   found_fields->username = FindUsernameFieldBaseHeuristics(
791       processed_fields, first_relevant_password, mode, *username_max,
792       found_fields->is_fallback);
793   return;
794 }
795 
796 // Helper to get the platform specific identifier by which autofill and password
797 // manager refer to a field. The fuzzing infrastructure doed not run on iOS, so
798 // the iOS specific parts of PasswordForm are also built on fuzzer enabled
799 // platforms. See http://crbug.com/896594
GetPlatformSpecificIdentifier(const FormFieldData & field)800 string16 GetPlatformSpecificIdentifier(const FormFieldData& field) {
801 #if defined(OS_IOS)
802   return field.unique_id;
803 #else
804   return field.name;
805 #endif
806 }
807 
808 // Set username and password fields in |password_form| based on
809 // |significant_fields| .
SetFields(const SignificantFields & significant_fields,PasswordForm * password_form)810 void SetFields(const SignificantFields& significant_fields,
811                PasswordForm* password_form) {
812 #if !defined(OS_IOS)
813   password_form->has_renderer_ids = true;
814 #endif
815   if (significant_fields.username) {
816     password_form->username_element =
817         GetPlatformSpecificIdentifier(*significant_fields.username);
818     password_form->username_value = GetFieldValue(*significant_fields.username);
819     password_form->username_element_renderer_id =
820         significant_fields.username->unique_renderer_id;
821   }
822 
823   if (significant_fields.password) {
824     password_form->password_element =
825         GetPlatformSpecificIdentifier(*significant_fields.password);
826     password_form->password_value = GetFieldValue(*significant_fields.password);
827     password_form->password_element_renderer_id =
828         significant_fields.password->unique_renderer_id;
829   }
830 
831   if (significant_fields.new_password) {
832     password_form->new_password_element =
833         GetPlatformSpecificIdentifier(*significant_fields.new_password);
834     password_form->new_password_value =
835         GetFieldValue(*significant_fields.new_password);
836     password_form->new_password_element_renderer_id =
837         significant_fields.new_password->unique_renderer_id;
838   }
839 
840   if (significant_fields.confirmation_password) {
841     password_form->confirmation_password_element =
842         GetPlatformSpecificIdentifier(
843             *significant_fields.confirmation_password);
844     password_form->confirmation_password_element_renderer_id =
845         significant_fields.confirmation_password->unique_renderer_id;
846   }
847 }
848 
849 // For each relevant field of |fields| computes additional data useful for
850 // parsing and wraps that in a ProcessedField. Returns the vector of all those
851 // ProcessedField instances, or an empty vector if there was not a single
852 // password field. Also, computes the vector of all password values and
853 // associated element names in |all_possible_passwords|, and similarly for
854 // usernames and |all_possible_usernames|. If |mode| is |kSaving|, fields with
855 // empty values are ignored.
ProcessFields(const std::vector<FormFieldData> & fields,autofill::ValueElementVector * all_possible_passwords,autofill::ValueElementVector * all_possible_usernames,FormDataParser::Mode mode)856 std::vector<ProcessedField> ProcessFields(
857     const std::vector<FormFieldData>& fields,
858     autofill::ValueElementVector* all_possible_passwords,
859     autofill::ValueElementVector* all_possible_usernames,
860     FormDataParser::Mode mode) {
861   DCHECK(all_possible_passwords);
862   DCHECK(all_possible_passwords->empty());
863 
864   std::vector<ProcessedField> result;
865   result.reserve(fields.size());
866 
867   // |all_possible_passwords| should only contain each value once.
868   // |seen_password_values| ensures that duplicates are ignored.
869   std::set<base::StringPiece16> seen_password_values;
870   // Similarly for usernames.
871   std::set<base::StringPiece16> seen_username_values;
872 
873   const bool consider_only_non_empty = mode == FormDataParser::Mode::kSaving;
874   for (const FormFieldData& field : fields) {
875     if (!field.IsTextInputElement())
876       continue;
877     if (consider_only_non_empty && field.value.empty())
878       continue;
879 
880     const bool is_password = field.form_control_type == "password";
881 
882     if (!field.value.empty()) {
883       std::set<base::StringPiece16>& seen_values =
884           is_password ? seen_password_values : seen_username_values;
885       autofill::ValueElementVector* all_possible_fields =
886           is_password ? all_possible_passwords : all_possible_usernames;
887       // Only the field name of the first occurrence is added.
888       auto insertion = seen_values.insert(base::StringPiece16(field.value));
889       if (insertion.second) {
890         // There was no such element in |seen_values|.
891         all_possible_fields->push_back({field.value, field.name});
892       }
893     }
894 
895     const AutocompleteFlag flag =
896         ExtractAutocompleteFlag(field.autocomplete_attribute);
897 
898     ProcessedField processed_field = {
899         .field = &field, .autocomplete_flag = flag, .is_password = is_password};
900 
901     if (field.properties_mask & FieldPropertiesFlags::USER_TYPED)
902       processed_field.interactability = Interactability::kCertain;
903     else if (field.is_focusable)
904       processed_field.interactability = Interactability::kPossible;
905 
906     result.push_back(processed_field);
907   }
908 
909   return result;
910 }
911 
912 // Find the first element in |username_predictions| (i.e. the most reliable
913 // prediction) that occurs in |processed_fields| and has interactability level
914 // at least |username_max|.
FindUsernameInPredictions(const std::vector<uint32_t> & username_predictions,const std::vector<ProcessedField> & processed_fields,Interactability username_max)915 const FormFieldData* FindUsernameInPredictions(
916     const std::vector<uint32_t>& username_predictions,
917     const std::vector<ProcessedField>& processed_fields,
918     Interactability username_max) {
919   for (uint32_t predicted_id : username_predictions) {
920     auto iter = std::find_if(
921         processed_fields.begin(), processed_fields.end(),
922         [predicted_id, username_max](const ProcessedField& processed_field) {
923           return processed_field.field->unique_renderer_id == predicted_id &&
924                  MatchesInteractability(processed_field, username_max);
925         });
926     if (iter != processed_fields.end()) {
927       return iter->field;
928     }
929   }
930   return nullptr;
931 }
932 
933 // Return true if |significant_fields| has an username field and
934 // |form_predictions| has |may_use_prefilled_placeholder| == true for the
935 // username field.
GetMayUsePrefilledPlaceholder(const base::Optional<FormPredictions> & form_predictions,const SignificantFields & significant_fields)936 bool GetMayUsePrefilledPlaceholder(
937     const base::Optional<FormPredictions>& form_predictions,
938     const SignificantFields& significant_fields) {
939   if (!base::FeatureList::IsEnabled(
940           password_manager::features::kEnableOverwritingPlaceholderUsernames))
941     return false;
942 
943   if (!form_predictions || !significant_fields.username)
944     return false;
945 
946   uint32_t username_id = significant_fields.username->unique_renderer_id;
947   for (const PasswordFieldPrediction& prediction : form_predictions->fields) {
948     if (prediction.renderer_id == username_id)
949       return prediction.may_use_prefilled_placeholder;
950   }
951   return false;
952 }
953 
954 // Puts together a PasswordForm, the result of the parsing, based on the
955 // |form_data| description of the form metadata (e.g., action), the already
956 // parsed information about what are the |significant_fields|, the list
957 // |all_possible_passwords| of all non-empty password values which occurred in
958 // the form and their associated element names, and the list
959 // |all_possible_usernames| of all non-empty username values which
960 // occurred in the form and their associated elements. |form_predictions| is
961 // used to find fields that may have preffilled placeholders.
AssemblePasswordForm(const FormData & form_data,const SignificantFields & significant_fields,autofill::ValueElementVector all_possible_passwords,autofill::ValueElementVector all_possible_usernames,const base::Optional<FormPredictions> & form_predictions)962 std::unique_ptr<PasswordForm> AssemblePasswordForm(
963     const FormData& form_data,
964     const SignificantFields& significant_fields,
965     autofill::ValueElementVector all_possible_passwords,
966     autofill::ValueElementVector all_possible_usernames,
967     const base::Optional<FormPredictions>& form_predictions) {
968   if (!significant_fields.HasPasswords() &&
969       !significant_fields.is_single_username) {
970     return nullptr;
971   }
972 
973   // Create the PasswordForm and set data not related to specific fields.
974   auto result = std::make_unique<PasswordForm>();
975   result->origin = form_data.url;
976   result->signon_realm = GetSignonRealm(form_data.url);
977   result->action = form_data.action;
978   result->form_data = form_data;
979   result->all_possible_passwords = std::move(all_possible_passwords);
980   result->all_possible_usernames = std::move(all_possible_usernames);
981   result->scheme = PasswordForm::Scheme::kHtml;
982   result->blacklisted_by_user = false;
983   result->type = PasswordForm::Type::kManual;
984   result->username_may_use_prefilled_placeholder =
985       GetMayUsePrefilledPlaceholder(form_predictions, significant_fields);
986   result->is_new_password_reliable =
987       significant_fields.is_new_password_reliable;
988   result->only_for_fallback = significant_fields.is_fallback;
989   result->submission_event = form_data.submission_event;
990 
991   for (const FormFieldData& field : form_data.fields) {
992     if (field.form_control_type == "password" &&
993         (field.properties_mask & FieldPropertiesFlags::AUTOFILLED)) {
994       result->form_has_autofilled_value = true;
995     }
996   }
997 
998   // Set data related to specific fields.
999   SetFields(significant_fields, result.get());
1000   return result;
1001 }
1002 
1003 }  // namespace
1004 
1005 FormDataParser::FormDataParser() = default;
1006 
1007 FormDataParser::~FormDataParser() = default;
1008 
Parse(const FormData & form_data,Mode mode)1009 std::unique_ptr<PasswordForm> FormDataParser::Parse(const FormData& form_data,
1010                                                     Mode mode) {
1011   if (form_data.fields.size() > kMaxParseableFields)
1012     return nullptr;
1013 
1014   readonly_status_ = ReadonlyPasswordFields::kNoHeuristics;
1015   autofill::ValueElementVector all_possible_passwords;
1016   autofill::ValueElementVector all_possible_usernames;
1017   std::vector<ProcessedField> processed_fields = ProcessFields(
1018       form_data.fields, &all_possible_passwords, &all_possible_usernames, mode);
1019 
1020   if (processed_fields.empty())
1021     return nullptr;
1022 
1023   SignificantFields significant_fields;
1024   UsernameDetectionMethod username_detection_method =
1025       UsernameDetectionMethod::kNoUsernameDetected;
1026 
1027   // (1) First, try to parse with server predictions.
1028   if (predictions_) {
1029     ParseUsingPredictions(&processed_fields, *predictions_, mode,
1030                           &significant_fields);
1031     if (significant_fields.username) {
1032       username_detection_method =
1033           UsernameDetectionMethod::kServerSidePrediction;
1034     }
1035   }
1036 
1037   // (2) If that failed, try to parse with autocomplete attributes.
1038   if (!significant_fields.is_single_username) {
1039     ParseUsingAutocomplete(processed_fields, &significant_fields);
1040     if (username_detection_method ==
1041             UsernameDetectionMethod::kNoUsernameDetected &&
1042         significant_fields.username) {
1043       username_detection_method =
1044           UsernameDetectionMethod::kAutocompleteAttribute;
1045     }
1046   }
1047 
1048   // Pass the "reliability" information to mark the new-password fields as
1049   // eligible for automatic password generation. This only makes sense when
1050   // forms are analysed for filling, because no passwords are generated when the
1051   // user saves the already entered one.
1052   if (mode == Mode::kFilling && significant_fields.new_password) {
1053     significant_fields.is_new_password_reliable = true;
1054   }
1055 
1056   // (3) Now try to fill the gaps.
1057   const bool username_found_before_heuristic = significant_fields.username;
1058 
1059   // Try to parse with base heuristic.
1060   if (!significant_fields.is_single_username) {
1061     Interactability username_max = Interactability::kUnlikely;
1062     ParseUsingBaseHeuristics(processed_fields, mode, &significant_fields,
1063                              &username_max, &readonly_status_);
1064     if (username_detection_method ==
1065             UsernameDetectionMethod::kNoUsernameDetected &&
1066         significant_fields.username) {
1067       username_detection_method = UsernameDetectionMethod::kBaseHeuristic;
1068     }
1069 
1070     // Additionally, and based on the best interactability computed by base
1071     // heuristics, try to improve the username based on the context of the
1072     // fields, unless the username already came from more reliable types of
1073     // analysis.
1074     if (!username_found_before_heuristic) {
1075       const FormFieldData* username_field_by_context =
1076           FindUsernameInPredictions(form_data.username_predictions,
1077                                     processed_fields, username_max);
1078       if (username_field_by_context &&
1079           !(mode == FormDataParser::Mode::kSaving &&
1080             username_field_by_context->value.empty())) {
1081         significant_fields.username = username_field_by_context;
1082         if (username_detection_method ==
1083                 UsernameDetectionMethod::kNoUsernameDetected ||
1084             username_detection_method ==
1085                 UsernameDetectionMethod::kBaseHeuristic) {
1086           username_detection_method =
1087               UsernameDetectionMethod::kHtmlBasedClassifier;
1088         }
1089       }
1090     }
1091   }
1092 
1093   UMA_HISTOGRAM_ENUMERATION("PasswordManager.UsernameDetectionMethod",
1094                             username_detection_method,
1095                             UsernameDetectionMethod::kCount);
1096 
1097   return AssemblePasswordForm(form_data, significant_fields,
1098                               std::move(all_possible_passwords),
1099                               std::move(all_possible_usernames), predictions_);
1100 }
1101 
GetSignonRealm(const GURL & url)1102 std::string GetSignonRealm(const GURL& url) {
1103   GURL::Replacements rep;
1104   rep.ClearUsername();
1105   rep.ClearPassword();
1106   rep.ClearQuery();
1107   rep.ClearRef();
1108   rep.SetPathStr(std::string());
1109   return url.ReplaceComponents(rep).spec();
1110 }
1111 
1112 }  // namespace password_manager
1113