1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <vector>
7 
8 #include "base/bind.h"
9 #include "base/command_line.h"
10 #include "base/feature_list.h"
11 #include "base/files/file_enumerator.h"
12 #include "base/files/file_path.h"
13 #include "base/macros.h"
14 #include "base/no_destructor.h"
15 #include "base/path_service.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/test/scoped_feature_list.h"
20 #include "build/build_config.h"
21 #include "chrome/browser/ui/browser.h"
22 #include "chrome/browser/ui/tabs/tab_strip_model.h"
23 #include "chrome/common/chrome_paths.h"
24 #include "chrome/test/base/in_process_browser_test.h"
25 #include "chrome/test/base/ui_test_utils.h"
26 #include "components/autofill/content/browser/content_autofill_driver.h"
27 #include "components/autofill/content/browser/content_autofill_driver_factory.h"
28 #include "components/autofill/core/browser/autofill_experiments.h"
29 #include "components/autofill/core/browser/autofill_manager.h"
30 #include "components/autofill/core/browser/data_driven_test.h"
31 #include "components/autofill/core/browser/form_structure.h"
32 #include "components/autofill/core/browser/pattern_provider/pattern_configuration_parser.h"
33 #include "components/autofill/core/common/autofill_features.h"
34 #include "components/autofill/core/common/renderer_id.h"
35 #include "content/public/common/content_switches.h"
36 #include "content/public/test/browser_test.h"
37 #include "net/http/http_status_code.h"
38 #include "net/test/embedded_test_server/embedded_test_server.h"
39 #include "net/test/embedded_test_server/http_request.h"
40 #include "net/test/embedded_test_server/http_response.h"
41 #include "url/gurl.h"
42 
43 #if defined(OS_MAC)
44 #include "base/mac/foundation_util.h"
45 #endif
46 
47 namespace autofill {
48 namespace {
49 
50 using net::test_server::BasicHttpResponse;
51 using net::test_server::HttpRequest;
52 using net::test_server::HttpResponse;
53 
54 const base::FilePath::CharType kTestName[] = FILE_PATH_LITERAL("heuristics");
55 
GetFailingTestNames()56 const std::set<base::FilePath::StringType>& GetFailingTestNames() {
57   static auto* failing_test_names = new std::set<base::FilePath::StringType>{};
58   return *failing_test_names;
59 }
60 
GetTestDataDir()61 const base::FilePath& GetTestDataDir() {
62   static base::NoDestructor<base::FilePath> dir([]() {
63     base::FilePath dir;
64     base::PathService::Get(base::DIR_SOURCE_ROOT, &dir);
65     dir = dir.AppendASCII("components").AppendASCII("test").AppendASCII("data");
66     return dir;
67   }());
68   return *dir;
69 }
70 
GetInputDir()71 const base::FilePath GetInputDir() {
72   static base::FilePath input_dir = GetTestDataDir()
73                                         .AppendASCII("autofill")
74                                         .Append(kTestName)
75                                         .AppendASCII("input");
76   return input_dir;
77 }
78 
GetTestFiles()79 std::vector<base::FilePath> GetTestFiles() {
80   base::FileEnumerator input_files(GetInputDir(), false,
81                                    base::FileEnumerator::FILES);
82   std::vector<base::FilePath> files;
83   for (base::FilePath input_file = input_files.Next(); !input_file.empty();
84        input_file = input_files.Next()) {
85     files.push_back(input_file);
86   }
87   std::sort(files.begin(), files.end());
88 
89 #if defined(OS_MAC)
90   base::mac::ClearAmIBundledCache();
91 #endif  // defined(OS_MAC)
92 
93   return files;
94 }
95 
FormStructuresToString(const std::map<FormRendererId,std::unique_ptr<FormStructure>> & forms)96 std::string FormStructuresToString(
97     const std::map<FormRendererId, std::unique_ptr<FormStructure>>& forms) {
98   std::string forms_string;
99   // The forms are sorted by renderer ID, which should make the order
100   // deterministic.
101   for (const auto& kv : forms) {
102     const auto* form = kv.second.get();
103     std::map<std::string, int> section_to_index;
104     for (const auto& field : *form) {
105       // Normalize the section by replacing the unique but platform-dependent
106       // integers in |field->section| with consecutive unique integers.
107       std::string section = field->section;
108       size_t last_underscore = section.find_last_of('_');
109       size_t next_dash = section.find_first_of('-', last_underscore);
110       int new_section_index = static_cast<int>(section_to_index.size() + 1);
111       int section_index =
112           section_to_index.insert(std::make_pair(section, new_section_index))
113               .first->second;
114       if (last_underscore != std::string::npos &&
115           next_dash != std::string::npos) {
116         section = base::StringPrintf(
117             "%s%d%s", section.substr(0, last_underscore + 1).c_str(),
118             section_index, section.substr(next_dash).c_str());
119       }
120 
121       forms_string += base::JoinString(
122           {field->Type().ToString(), base::UTF16ToUTF8(field->name),
123            base::UTF16ToUTF8(field->label), base::UTF16ToUTF8(field->value),
124            section},
125           base::StringPiece(" | "));
126       forms_string += "\n";
127     }
128   }
129   return forms_string;
130 }
131 
132 }  // namespace
133 
134 // A data-driven test for verifying Autofill heuristics. Each input is an HTML
135 // file that contains one or more forms. The corresponding output file lists the
136 // heuristically detected type for each field.
137 class FormStructureBrowserTest
138     : public InProcessBrowserTest,
139       public DataDrivenTest,
140       public ::testing::WithParamInterface<base::FilePath> {
141  protected:
142   FormStructureBrowserTest();
143   ~FormStructureBrowserTest() override;
144 
145   // InProcessBrowserTest
146   void SetUpCommandLine(base::CommandLine* command_line) override;
147 
148   // BrowserTestBase
149   void SetUpOnMainThread() override;
150 
151   // DataDrivenTest:
152   void GenerateResults(const std::string& input, std::string* output) override;
153 
154  private:
155   std::unique_ptr<HttpResponse> HandleRequest(const HttpRequest& request);
156 
157   base::test::ScopedFeatureList feature_list_;
158 
159   // The response content to be returned by the embedded test server. Note that
160   // this is populated in the main thread as a part of the setup in the
161   // GenerateResults method but it is consumed later in the IO thread by the
162   // embedded test server to generate the response.
163   std::string html_content_;
164   DISALLOW_COPY_AND_ASSIGN(FormStructureBrowserTest);
165 };
166 
FormStructureBrowserTest()167 FormStructureBrowserTest::FormStructureBrowserTest()
168     : DataDrivenTest(GetTestDataDir()) {
169   feature_list_.InitWithFeatures(
170       // Enabled
171       {// TODO(crbug.com/1098943): Remove once experiment is over.
172        autofill::features::kAutofillEnableSupportForMoreStructureInNames,
173        // TODO(crbug.com/1125978): Remove once launched.
174        autofill::features::kAutofillEnableSupportForMoreStructureInAddresses,
175        // TODO(crbug.com/896689): Remove once launched.
176        autofill::features::kAutofillNameSectionsWithRendererIds,
177        // TODO(crbug.com/1076175) Remove once launched.
178        autofill::features::kAutofillUseNewSectioningMethod},
179       // Disabled
180       {autofill::features::kAutofillRestrictUnownedFieldsToFormlessCheckout});
181 }
182 
~FormStructureBrowserTest()183 FormStructureBrowserTest::~FormStructureBrowserTest() {}
184 
SetUpCommandLine(base::CommandLine * command_line)185 void FormStructureBrowserTest::SetUpCommandLine(
186     base::CommandLine* command_line) {
187   InProcessBrowserTest::SetUpCommandLine(command_line);
188   // Suppress most output logs because we can't really control the output for
189   // arbitrary test sites.
190   command_line->AppendSwitchASCII(switches::kLoggingLevel, "2");
191 }
192 
SetUpOnMainThread()193 void FormStructureBrowserTest::SetUpOnMainThread() {
194   InProcessBrowserTest::SetUpOnMainThread();
195 
196   // Load the MatchingPattern definitions.
197   base::RunLoop run_loop;
198   field_type_parsing::PopulateFromResourceBundle(run_loop.QuitClosure());
199   run_loop.Run();
200 
201   embedded_test_server()->RegisterRequestHandler(base::BindRepeating(
202       &FormStructureBrowserTest::HandleRequest, base::Unretained(this)));
203   ASSERT_TRUE(embedded_test_server()->Start());
204 }
205 
GenerateResults(const std::string & input,std::string * output)206 void FormStructureBrowserTest::GenerateResults(const std::string& input,
207                                                std::string* output) {
208   // Cache the content to be returned by the embedded test server. This data
209   // is readonly after this point.
210   html_content_.clear();
211   html_content_.reserve(input.length());
212   for (const char c : input) {
213     // Strip `\n`, `\t`, `\r` from |html| to match old `data:` URL behavior.
214     // TODO(crbug/239819): the tests expect weird concatenation behavior based
215     //   legacy data URL behavior. Fix this so the the tests better represent
216     //   the parsing being done in the wild.
217     if (c != '\r' && c != '\n' && c != '\t')
218       html_content_.push_back(c);
219   }
220 
221   // Navigate to the test html content.
222   ASSERT_NO_FATAL_FAILURE(ui_test_utils::NavigateToURL(
223       browser(), embedded_test_server()->GetURL("/test.html")));
224 
225   // Dump the form fields (and their inferred field types).
226   content::WebContents* web_contents =
227       browser()->tab_strip_model()->GetActiveWebContents();
228   ContentAutofillDriver* autofill_driver =
229       ContentAutofillDriverFactory::FromWebContents(web_contents)
230           ->DriverForFrame(web_contents->GetMainFrame());
231   ASSERT_NE(nullptr, autofill_driver);
232   AutofillManager* autofill_manager = autofill_driver->autofill_manager();
233   ASSERT_NE(nullptr, autofill_manager);
234   *output = FormStructuresToString(autofill_manager->form_structures());
235 }
236 
HandleRequest(const HttpRequest & request)237 std::unique_ptr<HttpResponse> FormStructureBrowserTest::HandleRequest(
238     const HttpRequest& request) {
239   auto response = std::make_unique<BasicHttpResponse>();
240   response->set_code(net::HTTP_OK);
241   response->set_content(html_content_);
242   response->set_content_type("text/html; charset=utf-8");
243   return std::move(response);
244 }
245 
IN_PROC_BROWSER_TEST_P(FormStructureBrowserTest,DataDrivenHeuristics)246 IN_PROC_BROWSER_TEST_P(FormStructureBrowserTest, DataDrivenHeuristics) {
247   // Prints the path of the test to be executed.
248   LOG(INFO) << GetParam().MaybeAsASCII();
249   bool is_expected_to_pass =
250       !base::Contains(GetFailingTestNames(), GetParam().BaseName().value());
251   RunOneDataDrivenTest(GetParam(), GetOutputDirectory(kTestName),
252                        is_expected_to_pass);
253 }
254 
255 INSTANTIATE_TEST_SUITE_P(AllForms,
256                          FormStructureBrowserTest,
257                          testing::ValuesIn(GetTestFiles()));
258 
259 }  // namespace autofill
260