1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <string>
7 #include <utility>
8
9 #include "base/files/file_util.h"
10 #include "base/files/scoped_temp_dir.h"
11 #include "base/macros.h"
12 #include "base/run_loop.h"
13 #include "base/strings/string_util.h"
14 #include "base/test/bind.h"
15 #include "base/threading/thread_restrictions.h"
16 #include "content/browser/renderer_host/navigation_request.h"
17 #include "content/browser/renderer_host/render_frame_host_impl.h"
18 #include "content/browser/web_contents/web_contents_impl.h"
19 #include "content/public/test/browser_test.h"
20 #include "content/public/test/browser_test_utils.h"
21 #include "content/public/test/content_browser_test.h"
22 #include "content/public/test/content_browser_test_utils.h"
23 #include "content/public/test/navigation_handle_observer.h"
24 #include "content/public/test/test_utils.h"
25 #include "content/shell/browser/shell.h"
26 #include "mojo/public/c/system/trap.h"
27 #include "mojo/public/c/system/types.h"
28 #include "mojo/public/cpp/system/data_pipe.h"
29 #include "mojo/public/cpp/system/handle_signals_state.h"
30 #include "mojo/public/cpp/system/simple_watcher.h"
31 #include "net/base/filename_util.h"
32 #include "net/dns/mock_host_resolver.h"
33 #include "services/network/public/cpp/web_sandbox_flags.h"
34 #include "url/gurl.h"
35 #include "url/url_constants.h"
36
37 namespace content {
38
39 namespace {
40
41 // Tests about navigations to MHTML archives.
42 class NavigationMhtmlBrowserTest : public ContentBrowserTest {
43 public:
web_contents() const44 WebContentsImpl* web_contents() const {
45 return static_cast<WebContentsImpl*>(shell()->web_contents());
46 }
47
main_frame_host()48 RenderFrameHostImpl* main_frame_host() {
49 return web_contents()->GetFrameTree()->root()->current_frame_host();
50 }
51
52 protected:
SetUpOnMainThread()53 void SetUpOnMainThread() final {
54 ContentBrowserTest::SetUpOnMainThread();
55 host_resolver()->AddRule("*", "127.0.0.1");
56 }
57 };
58
59 // Helper class: Build MHTML documents easily in tests.
60 class MhtmlArchive {
61 public:
62 MhtmlArchive() = default;
~MhtmlArchive()63 ~MhtmlArchive() {
64 base::ScopedAllowBlockingForTesting allow_blocking_;
65 EXPECT_TRUE(file_directory_.Delete());
66 }
67
AddResource(const std::string content)68 void AddResource(const std::string content) {
69 content_ += "\n--MHTML_BOUNDARY\n" + content;
70 }
71
AddHtmlDocument(const GURL & url,const std::string headers,const std::string body)72 void AddHtmlDocument(const GURL& url,
73 const std::string headers,
74 const std::string body) {
75 const char* document_template =
76 "Content-Type: text/html\n"
77 "Content-Location: $1\n"
78 "$2"
79 "\n"
80 "$3";
81 AddResource(base::ReplaceStringPlaceholders(
82 document_template, {url.spec(), headers, body}, nullptr));
83 }
84
AddHtmlDocument(const GURL & url,const std::string body)85 void AddHtmlDocument(const GURL& url, const std::string body) {
86 AddHtmlDocument(url, "" /* headers */, body);
87 }
88
89 // Writes the MHTML archive into a file and returns its URL.
Write(const std::string & file)90 const GURL Write(const std::string& file) {
91 const char* document_header =
92 "From: The chromium developers\n"
93 "Subject: <the subject>\n"
94 "Date: Mon, May 27 2019 11:55:42 GMT+0200\n"
95 "MIME-Version: 1.0\n"
96 "Content-Type: multipart/related;"
97 " boundary=\"MHTML_BOUNDARY\";"
98 " type=\"text/html\"\n";
99 std::string document = document_header + content_ + "\n--MHTML_BOUNDARY--";
100
101 // MHTML uses carriage return before every new lines.
102 base::ReplaceChars(document, "\n", "\r\n", &document);
103
104 base::ScopedAllowBlockingForTesting allow_blocking_;
105 EXPECT_TRUE(file_directory_.CreateUniqueTempDir());
106 base::FilePath file_path = file_directory_.GetPath().AppendASCII(file);
107 EXPECT_TRUE(base::WriteFile(file_path, document));
108 return net::FilePathToFileURL(file_path);
109 }
110
111 private:
112 base::ScopedTempDir file_directory_;
113 std::string content_;
114
115 DISALLOW_COPY_AND_ASSIGN(MhtmlArchive);
116 };
117
118 } // namespace
119
120 // An MHTML document with an iframe. The iframe's document is found in the
121 // archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeFound)122 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeFound) {
123 MhtmlArchive mhtml_archive;
124 mhtml_archive.AddHtmlDocument(
125 GURL("http://example.com"),
126 "<iframe src=\"http://example.com/found.html\"></iframe>");
127 mhtml_archive.AddHtmlDocument(GURL("http://example.com/found.html"),
128 "<iframe></iframe>");
129 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
130
131 NavigationHandleObserver iframe_navigation(
132 web_contents(), GURL("http://example.com/found.html"));
133 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
134
135 RenderFrameHostImpl* main_document = main_frame_host();
136 ASSERT_EQ(1u, main_document->child_count());
137 RenderFrameHostImpl* sub_document =
138 main_document->child_at(0)->current_frame_host();
139
140 EXPECT_TRUE(main_document->is_mhtml_document());
141 EXPECT_TRUE(sub_document->is_mhtml_document());
142
143 // When the iframe's content is loaded from the MHTML archive, a successful
144 // commit using the provided URL happens, even if the resource wasn't loaded
145 // from this URL initially.
146 EXPECT_EQ(GURL("http://example.com/found.html"),
147 sub_document->GetLastCommittedURL());
148 EXPECT_TRUE(iframe_navigation.has_committed());
149 EXPECT_FALSE(iframe_navigation.is_error());
150
151 // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
152 // can't be used, because javascript is disabled. Instead, check it was able
153 // to load an iframe.
154 EXPECT_EQ(1u, sub_document->child_count());
155 }
156
157 // An MHTML document with an iframe. The iframe's document is not found in the
158 // archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeNotFound)159 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeNotFound) {
160 MhtmlArchive mhtml_archive;
161 mhtml_archive.AddHtmlDocument(
162 GURL("http://example.com"),
163 "<iframe src=\"http://example.com/not_found.html\"></iframe>");
164 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
165 NavigationHandleObserver iframe_navigation_observer(
166 web_contents(), GURL("http://example.com/not_found.html"));
167 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
168
169 RenderFrameHostImpl* main_document = main_frame_host();
170 ASSERT_EQ(1u, main_document->child_count());
171 RenderFrameHostImpl* sub_document =
172 main_document->child_at(0)->current_frame_host();
173
174 EXPECT_TRUE(main_document->is_mhtml_document());
175 EXPECT_TRUE(sub_document->is_mhtml_document());
176
177 // This should commit as a failed navigation, but the browser side doesn't
178 // have enough information to make that determination. On the renderer side,
179 // there's no existing way to turn `CommitNavigation()` into
180 // `CommitFailedNavigation()`.
181 // TODO(https://crbug.com/1112965): Fix this by implementing a MHTML
182 // URLLoaderFactory; then failure to find the resource can use the standard
183 // error handling path.
184 EXPECT_TRUE(iframe_navigation_observer.has_committed());
185 EXPECT_FALSE(iframe_navigation_observer.is_error());
186 EXPECT_EQ(GURL("http://example.com/not_found.html"),
187 sub_document->GetLastCommittedURL());
188 }
189
190 // An MHTML document with an iframe using a data-URL. The data-URL is not
191 // defined in the MHTML archive.
192 // TODO(https://crbug.com/967307): Enable this test. It currently reaches a
193 // DCHECK or timeout in release mode.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeDataUrlNotFound)194 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeDataUrlNotFound) {
195 MhtmlArchive mhtml_archive;
196 mhtml_archive.AddHtmlDocument(
197 GURL("http://example.com"),
198 "<iframe src=\"data:text/html,<iframe></iframe>\"></iframe>");
199 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
200
201 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
202 RenderFrameHostImpl* main_document = main_frame_host();
203
204 ASSERT_EQ(1u, main_document->child_count());
205 RenderFrameHostImpl* sub_document =
206 main_document->child_at(0)->current_frame_host();
207 EXPECT_EQ(GURL("data:text/html,<iframe></iframe>"),
208 sub_document->GetLastCommittedURL());
209
210 EXPECT_TRUE(main_document->is_mhtml_document());
211 EXPECT_FALSE(sub_document->is_mhtml_document()); // Served from data-url.
212
213 // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
214 // can't be used, because javascript is disabled. Instead, check it was able
215 // to load an iframe.
216 EXPECT_EQ(1u, sub_document->child_count());
217 }
218
219 // An MHTML document with an iframe using a data-URL. The data-URL IS defined in
220 // the MHTML archive, but isn't used, per https://crbug.com/969696.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeDataUrlFound)221 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeDataUrlFound) {
222 MhtmlArchive mhtml_archive;
223 mhtml_archive.AddHtmlDocument(
224 GURL("http://example.com"),
225 "<iframe src=\"data:text/html,<iframe></iframe>\"></iframe>");
226 mhtml_archive.AddHtmlDocument(GURL("data:text/html,<iframe></iframe>"),
227 "no iframes");
228 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
229
230 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
231 RenderFrameHostImpl* main_document = main_frame_host();
232
233 ASSERT_EQ(1u, main_document->child_count());
234 RenderFrameHostImpl* sub_document =
235 main_document->child_at(0)->current_frame_host();
236 EXPECT_EQ(GURL("data:text/html,<iframe></iframe>"),
237 sub_document->GetLastCommittedURL());
238
239 EXPECT_TRUE(main_document->is_mhtml_document());
240 EXPECT_FALSE(sub_document->is_mhtml_document()); // Served from data-url.
241
242 // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
243 // can't be used, because javascript is disabled. Instead, check it was able
244 // to load an iframe.
245 EXPECT_EQ(1u, sub_document->child_count());
246 }
247
248 // An iframe uses its srcdoc attribute and the about:srcdoc is not defined in
249 // the MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeAboutSrcdocNoFound)250 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeAboutSrcdocNoFound) {
251 MhtmlArchive mhtml_archive;
252 mhtml_archive.AddHtmlDocument(
253 GURL("http://example.com"),
254 "<iframe srcdoc=\"<iframe></iframe>\"></iframe>");
255 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
256 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
257
258 RenderFrameHostImpl* main_document = main_frame_host();
259 ASSERT_EQ(1u, main_document->child_count());
260 RenderFrameHostImpl* sub_document =
261 main_document->child_at(0)->current_frame_host();
262 EXPECT_TRUE(sub_document->GetLastCommittedURL().IsAboutSrcdoc());
263
264 EXPECT_TRUE(main_document->is_mhtml_document());
265 EXPECT_TRUE(sub_document->is_mhtml_document());
266
267 // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
268 // can't be used, because javascript is disabled. Instead, check it was able
269 // to load an iframe.
270 EXPECT_EQ(1u, sub_document->child_count());
271 }
272
273 // An iframe uses its srcdoc attribute and the about:srcdoc IS defined in
274 // the MHTML archive. Its content is NEVER loaded from the MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeAboutSrcdocFound)275 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeAboutSrcdocFound) {
276 MhtmlArchive mhtml_archive;
277 mhtml_archive.AddHtmlDocument(
278 GURL("http://example.com"),
279 "<iframe srcdoc=\"<iframe></iframe>\"></iframe>");
280 mhtml_archive.AddHtmlDocument(GURL("about:srcdoc'"), "no iframe");
281 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
282 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
283
284 RenderFrameHostImpl* main_document = main_frame_host();
285 ASSERT_EQ(1u, main_document->child_count());
286 RenderFrameHostImpl* sub_document =
287 main_document->child_at(0)->current_frame_host();
288 EXPECT_TRUE(sub_document->GetLastCommittedURL().IsAboutSrcdoc());
289
290 EXPECT_TRUE(main_document->is_mhtml_document());
291 EXPECT_TRUE(sub_document->is_mhtml_document());
292
293 // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
294 // can't be used, because javascript is disabled. Instead, check it was able
295 // to load an iframe.
296 EXPECT_EQ(1u, sub_document->child_count());
297 }
298
299 // An MHTML document with an iframe loading the about:blank document. The
300 // about:blank resource is not defined in the MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeAboutBlankNotFound)301 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeAboutBlankNotFound) {
302 MhtmlArchive mhtml_archive;
303 mhtml_archive.AddHtmlDocument(GURL("http://example.com"),
304 "<iframe src=\"about:blank\"></iframe>"
305 // Note: this is actually treated as a
306 // same-document navigation!
307 "<iframe src=\"about:blank#fragment\"></iframe>"
308 "<iframe src=\"about:blank?query\"></iframe>");
309 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
310 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
311
312 RenderFrameHostImpl* main_document = main_frame_host();
313 ASSERT_EQ(3u, main_document->child_count());
314 auto iframe_url = [main_document](int index) {
315 return main_document->child_at(index)
316 ->current_frame_host()
317 ->GetLastCommittedURL();
318 };
319
320 // about:blank in MHTML has some very unusual behavior. When navigating to
321 // about:blank in the context of a MHTML archive, the renderer-side MHTML
322 // handler actually attempts to look up the resource for about:blank<...>" in
323 // the MHTML archive.
324 //
325 // Prior to https://crrev.com/c/2335323, failing to find the resource in the
326 // MHTML archive usually led to the commit being silently dropped (see
327 // `IframeNotFound` and `IframeContentIdNotFound`). However, about:blank
328 // behaved differently, due to a special case in frame_loader.cc's
329 // `ShouldNavigate()` for URLs that will load as an empty document.
330 //
331 // However, after https://crrev.com/c/23335323, loading about:blank without a
332 // corresponding resource in the MHTML archive will be treated as loading
333 // static data rather than loading an empty document. This affects the timing
334 // of load completion; loading an empty document synchronously completes
335 // during `CommitNavigation()`, while loading static data (even if the data is
336 // empty) completes "later".
337 EXPECT_EQ(iframe_url(0), GURL("about:blank"));
338 // Note: unlike the other two subframe navigations, this navigation actually
339 // succeeds as a same-document navigation...
340 // Note 2: this same-document navigation is performed asynchronously. Prior to
341 // https://crrev.com/c/23335323, the test would consider the page as loaded
342 // before the fragment navigation completed, resulting in an empty last
343 // committed URL.
344 EXPECT_EQ(iframe_url(1), GURL("about:blank#fragment"));
345 EXPECT_EQ(iframe_url(2), GURL("about:blank?query"));
346 }
347
348 // An MHTML document with an iframe loading the about:blank document AND the
349 // about:blank document is a resource of the MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeAboutBlankFound)350 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeAboutBlankFound) {
351 MhtmlArchive mhtml_archive;
352 mhtml_archive.AddHtmlDocument(GURL("http://example.com"),
353 "<iframe src=\"about:blank\"></iframe>");
354 mhtml_archive.AddHtmlDocument(
355 GURL(url::kAboutBlankURL),
356 "<iframe src=\"http://example.com/found.html\">/iframe>");
357 mhtml_archive.AddHtmlDocument(GURL("http://example.com/found.html"), "");
358 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
359 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
360
361 RenderFrameHostImpl* main_document = main_frame_host();
362 ASSERT_EQ(1u, main_document->child_count());
363 RenderFrameHostImpl* about_blank_document =
364 main_document->child_at(0)->current_frame_host();
365
366 EXPECT_TRUE(main_document->is_mhtml_document());
367 // TODO(arthursonzogni): This should be true here.
368 EXPECT_FALSE(about_blank_document->is_mhtml_document());
369
370 // about:blank is loaded from the archive, so it has an iframe.
371 // See https://crbug.com/969667
372 ASSERT_EQ(1u, about_blank_document->child_count());
373 }
374
375 // An MHTML document with an iframe trying to load a javascript URL.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeJavascriptUrlNotFound)376 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,
377 IframeJavascriptUrlNotFound) {
378 MhtmlArchive mhtml_archive;
379 mhtml_archive.AddHtmlDocument(
380 GURL("http://example.com"),
381 "<iframe src=\"javascript:console.log('test')\"></iframe>");
382 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
383
384 WebContentsConsoleObserver console_observer(web_contents());
385 console_observer.SetPattern(base::StringPrintf(
386 "Blocked script execution in '%s' because the document's frame "
387 "is sandboxed and the 'allow-scripts' permission is not set.",
388 mhtml_url.spec().c_str()));
389
390 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
391 console_observer.Wait();
392
393 RenderFrameHostImpl* main_document = main_frame_host();
394 ASSERT_EQ(1u, main_document->child_count());
395 RenderFrameHostImpl* sub_document =
396 main_document->child_at(0)->current_frame_host();
397
398 EXPECT_TRUE(main_document->is_mhtml_document());
399
400 // The |sub_document| is the initial empty document.
401 EXPECT_FALSE(sub_document->is_mhtml_document());
402 EXPECT_EQ(GURL(), sub_document->GetLastCommittedURL());
403 }
404
405 // An MHTML document with an iframe trying to load a javascript URL. The
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeJavascriptUrlFound)406 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeJavascriptUrlFound) {
407 MhtmlArchive mhtml_archive;
408 mhtml_archive.AddHtmlDocument(
409 GURL("http://example.com"),
410 "<iframe src=\"javascript:console.log('test')\"></iframe>");
411 mhtml_archive.AddHtmlDocument(GURL("javascript:console.log('test')"),
412 "<iframe></iframe>");
413 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
414
415 WebContentsConsoleObserver console_observer(web_contents());
416 console_observer.SetPattern(base::StringPrintf(
417 "Blocked script execution in '%s' because the document's frame "
418 "is sandboxed and the 'allow-scripts' permission is not set.",
419 mhtml_url.spec().c_str()));
420
421 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
422 console_observer.Wait();
423
424 RenderFrameHostImpl* main_document = main_frame_host();
425 ASSERT_EQ(1u, main_document->child_count());
426 RenderFrameHostImpl* sub_document =
427 main_document->child_at(0)->current_frame_host();
428
429 EXPECT_TRUE(main_document->is_mhtml_document());
430
431 // The |sub_document| is the initial empty document.
432 EXPECT_FALSE(sub_document->is_mhtml_document());
433 EXPECT_EQ(GURL(), sub_document->GetLastCommittedURL());
434
435 EXPECT_EQ(0u, sub_document->child_count());
436 }
437
438 // Load iframe with the content-ID scheme. The resource is found in the MHTML
439 // archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeContentIdFound)440 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeContentIdFound) {
441 MhtmlArchive mhtml_archive;
442 mhtml_archive.AddHtmlDocument(GURL("http://example.com"),
443 "<iframe src=\"cid:iframe\"></iframe>");
444 mhtml_archive.AddHtmlDocument(GURL("http://example.com/found.html"),
445 "Content-ID: <iframe>\n", "<iframe></iframe>");
446 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
447
448 NavigationHandleObserver iframe_navigation(web_contents(),
449 GURL("cid:iframe"));
450 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
451
452 RenderFrameHostImpl* main_document = main_frame_host();
453 ASSERT_EQ(1u, main_document->child_count());
454 RenderFrameHostImpl* sub_document =
455 main_document->child_at(0)->current_frame_host();
456
457 EXPECT_TRUE(main_document->is_mhtml_document());
458 EXPECT_TRUE(sub_document->is_mhtml_document());
459
460 EXPECT_EQ(GURL("cid:iframe"), sub_document->GetLastCommittedURL());
461 EXPECT_TRUE(iframe_navigation.has_committed());
462 EXPECT_FALSE(iframe_navigation.is_error());
463
464 // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
465 // can't be used, because javascript is disabled. Instead, check it was able
466 // to load an iframe.
467 EXPECT_EQ(1u, sub_document->child_count());
468 }
469
470 // Load iframe with the content-ID scheme. The resource is not found in the
471 // MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeContentIdNotFound)472 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeContentIdNotFound) {
473 MhtmlArchive mhtml_archive;
474 mhtml_archive.AddHtmlDocument(GURL("http://example.com"),
475 "<iframe src=\"cid:iframe\"></iframe>");
476 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
477
478 NavigationHandleObserver iframe_navigation(web_contents(),
479 GURL("cid:iframe"));
480 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
481
482 RenderFrameHostImpl* main_document = main_frame_host();
483 ASSERT_EQ(1u, main_document->child_count());
484 RenderFrameHostImpl* sub_document =
485 main_document->child_at(0)->current_frame_host();
486
487 EXPECT_TRUE(main_document->is_mhtml_document());
488 EXPECT_TRUE(sub_document->is_mhtml_document());
489
490 // This should commit as a failed navigation, but the browser side doesn't
491 // have enough information to make that determination. On the renderer side,
492 // there's no existing way to turn `CommitNavigation()` into
493 // `CommitFailedNavigation()`.
494 // TODO(https://crbug.com/1112965): Fix this by implementing a MHTML
495 // URLLoaderFactory; then failure to find the resource can use the standard
496 // error handling path.
497 EXPECT_EQ(GURL("cid:iframe"), sub_document->GetLastCommittedURL());
498 EXPECT_TRUE(iframe_navigation.has_committed());
499 EXPECT_FALSE(iframe_navigation.is_error());
500 }
501
502 // Tests Content-Security-Policy: frame-ancestors enforcement in MHTML
503 // subframes. It isn't enforced currently.
504 // See https://crbug.com/969711.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,CspFrameAncestor)505 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, CspFrameAncestor) {
506 MhtmlArchive mhtml_archive;
507 mhtml_archive.AddHtmlDocument(
508 GURL("http://example.com/main"),
509 "<iframe src=\"http://example.com/subframe\"></iframe>");
510 mhtml_archive.AddHtmlDocument(
511 GURL("http://example.com/subframe"),
512 "Content-Security-Policy: frame-ancestors 'none'\n", "<iframe></iframe>");
513 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
514
515 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
516
517 RenderFrameHostImpl* main_document = main_frame_host();
518 ASSERT_EQ(1u, main_document->child_count());
519 RenderFrameHostImpl* sub_document =
520 main_document->child_at(0)->current_frame_host();
521
522 EXPECT_TRUE(main_document->is_mhtml_document());
523 EXPECT_TRUE(sub_document->is_mhtml_document());
524
525 // Currently, frame-ancestors is not enforced. See https://crbug.com/969711.
526 // Check that the iframe is properly loaded. EvalJs("document.body.innerHTML")
527 // can't be used, because javascript is disabled. Instead, check it was able
528 // to load an iframe.
529 ASSERT_EQ(1u, sub_document->child_count());
530 }
531
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,SameDocumentNavigationWhileLoading)532 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,
533 SameDocumentNavigationWhileLoading) {
534 // Load a MHTML archive normally so there's a renderer process for file://.
535 MhtmlArchive mhtml_archive;
536 mhtml_archive.AddHtmlDocument(GURL("http://example.com/main"),
537 "<p>Hello world!</p>");
538 const GURL mhtml_url = mhtml_archive.Write("index.mhtml");
539 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
540
541 const RenderProcessHost* const rph = main_frame_host()->GetProcess();
542
543 // Navigate to another MHTML archive which will reuse the same renderer.
544 MhtmlArchive mhtml_archive2;
545 mhtml_archive2.AddHtmlDocument(GURL("http://example.com/main2"),
546 "<p>Hello world again!</p>");
547 const GURL mhtml_url2 = mhtml_archive2.Write("index2.mhtml");
548
549 TestNavigationManager manager(web_contents(), mhtml_url2);
550 shell()->LoadURL(mhtml_url2);
551
552 EXPECT_TRUE(manager.WaitForResponse());
553 // The new navigation should not have committed yet.
554 EXPECT_EQ(mhtml_url, main_frame_host()->GetLastCommittedURL());
555
556 // Make sure it actually picked the same process.
557 NavigationRequest* request =
558 NavigationRequest::From(manager.GetNavigationHandle());
559 EXPECT_EQ(rph, request->GetRenderFrameHost()->GetProcess());
560
561 // Delay the response body from being received by the renderer.
562 mojo::ScopedDataPipeConsumerHandle consumer;
563 mojo::ScopedDataPipeProducerHandle producer;
564 ASSERT_EQ(MOJO_RESULT_OK,
565 mojo::CreateDataPipe(/* options */ nullptr, &producer, &consumer));
566 using std::swap;
567 swap(request->mutable_response_body_for_testing(), consumer);
568
569 // Resume the navigation, which should send a |CommitNavigation()| to the
570 // renderer.
571 manager.ResumeNavigation();
572
573 // Archive loading is split into two phases: first, the entire response body
574 // is read and parsed into an MHTML archive by |MHTMLBodyLoaderClient|, and
575 // then the renderer commits the response. Since the data pipe for the
576 // response body was swapped out above, the renderer should not have committed
577 // a navigation to |mhtml_url2|.
578 // Note: Ideally, this should resume the navigation and wait for a signal that
579 // the renderer is attempting to read the response body. Unfortunately, no
580 // such signal exsts. As-is, this check is imperfect.
581 EXPECT_EQ(mhtml_url, main_frame_host()->GetLastCommittedURL());
582 EXPECT_TRUE(web_contents()->IsLoading());
583
584 // While archive loading is still in progress and nothing has been committed,
585 // trigger a same-document navigation.
586 url::Replacements<char> replacements;
587 replacements.SetRef("fragment", url::Component(0, strlen("fragment")));
588 const GURL mhtml_url_with_fragment =
589 mhtml_url.ReplaceComponents(replacements);
590 // TODO(dcheng): Using NavigateToURL() here seems to cause the test to hang.
591 // Figure out why.
592 shell()->LoadURL(mhtml_url_with_fragment);
593
594 // The same-document navigation should cancel MHTML loading. On the browser
595 // side, this can be observed by waiting for the peer handle to be closed by
596 // the renderer.
597 base::RunLoop run_loop;
598 mojo::SimpleWatcher watcher(FROM_HERE,
599 mojo::SimpleWatcher::ArmingPolicy::AUTOMATIC);
600 watcher.Watch(
601 producer.get(), MOJO_HANDLE_SIGNAL_PEER_CLOSED,
602 MOJO_TRIGGER_CONDITION_SIGNALS_SATISFIED,
603 base::BindLambdaForTesting(
604 [&](MojoResult result, const mojo::HandleSignalsState& state) {
605 EXPECT_EQ(MOJO_RESULT_OK, result);
606 EXPECT_TRUE(state.peer_closed());
607 run_loop.Quit();
608 }));
609 run_loop.Run();
610
611 WaitForLoadStop(web_contents());
612 EXPECT_EQ(mhtml_url_with_fragment, main_frame_host()->GetLastCommittedURL());
613 }
614
615 // Check RenderFrameHostImpl::is_mhtml_document() stays true after same-document
616 // navigation in MHTML document.
617 // Regression test for https://crbug.com/1126391
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,SameDocumentNavigationPreservesMhtmlFlag)618 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,
619 SameDocumentNavigationPreservesMhtmlFlag) {
620 MhtmlArchive mhtml_archive;
621 mhtml_archive.AddHtmlDocument(GURL("http://a.com/a"), "");
622 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
623 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
624 EXPECT_TRUE(main_frame_host()->is_mhtml_document());
625 EXPECT_TRUE(NavigateToURL(
626 shell(), GURL(main_frame_host()->GetLastCommittedURL().spec() + "#foo")));
627 EXPECT_TRUE(main_frame_host()->is_mhtml_document());
628 }
629
630 // Check RenderFrameHostImpl::is_mhtml_document() is correctly set for history
631 // navigation to MHTML document. It should continue to work when restored from
632 // the BackForwardCache.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,BackNavigationPreservesMhtmlFlag)633 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,
634 BackNavigationPreservesMhtmlFlag) {
635 ASSERT_TRUE(embedded_test_server()->Start());
636
637 MhtmlArchive mhtml_archive;
638 mhtml_archive.AddHtmlDocument(GURL("http://a.com/a"), "");
639 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
640 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
641 EXPECT_TRUE(main_frame_host()->is_mhtml_document());
642 EXPECT_TRUE(NavigateToURL(
643 shell(), embedded_test_server()->GetURL("b.com", "/title1.html")));
644 EXPECT_FALSE(main_frame_host()->is_mhtml_document());
645 web_contents()->GetController().GoBack();
646 WaitForLoadStop(web_contents());
647 EXPECT_TRUE(main_frame_host()->is_mhtml_document());
648 }
649
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,SandboxedIframe)650 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, SandboxedIframe) {
651 MhtmlArchive mhtml_archive;
652 mhtml_archive.AddHtmlDocument(GURL("http://a.com"), "", R"(
653 <iframe src="http://a.com/unsandboxed.html" ></iframe>
654 <iframe src="http://a.com/sandboxed.html" sandbox></iframe>
655 )");
656 mhtml_archive.AddHtmlDocument(GURL("http://a.com/sandboxed.html"), "");
657 mhtml_archive.AddHtmlDocument(GURL("http://a.com/unsandboxed.html"), "");
658 GURL mhtml_url = mhtml_archive.Write("index.mhtml");
659
660 EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
661
662 RenderFrameHostImpl* rfh_main = main_frame_host();
663 ASSERT_EQ(2u, rfh_main->child_count());
664 RenderFrameHostImpl* rfh_unsandboxed =
665 rfh_main->child_at(0)->current_frame_host();
666 RenderFrameHostImpl* rfh_sandboxed =
667 rfh_main->child_at(1)->current_frame_host();
668
669 auto strict_sandbox = network::mojom::WebSandboxFlags::kAll;
670 auto default_mhtml_sandbox =
671 ~network::mojom::WebSandboxFlags::kPopups &
672 ~network::mojom::WebSandboxFlags::kPropagatesToAuxiliaryBrowsingContexts;
673
674 EXPECT_EQ(default_mhtml_sandbox, rfh_main->active_sandbox_flags());
675 EXPECT_EQ(default_mhtml_sandbox, rfh_unsandboxed->active_sandbox_flags());
676 EXPECT_EQ(strict_sandbox, rfh_sandboxed->active_sandbox_flags());
677 }
678
679 } // namespace content
680