1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <string>
7 #include <utility>
8 
9 #include "base/files/file_util.h"
10 #include "base/files/scoped_temp_dir.h"
11 #include "base/macros.h"
12 #include "base/run_loop.h"
13 #include "base/strings/string_util.h"
14 #include "base/test/bind.h"
15 #include "base/threading/thread_restrictions.h"
16 #include "content/browser/renderer_host/navigation_request.h"
17 #include "content/browser/renderer_host/render_frame_host_impl.h"
18 #include "content/browser/web_contents/web_contents_impl.h"
19 #include "content/public/test/browser_test.h"
20 #include "content/public/test/browser_test_utils.h"
21 #include "content/public/test/content_browser_test.h"
22 #include "content/public/test/content_browser_test_utils.h"
23 #include "content/public/test/navigation_handle_observer.h"
24 #include "content/public/test/test_utils.h"
25 #include "content/shell/browser/shell.h"
26 #include "mojo/public/c/system/trap.h"
27 #include "mojo/public/c/system/types.h"
28 #include "mojo/public/cpp/system/data_pipe.h"
29 #include "mojo/public/cpp/system/handle_signals_state.h"
30 #include "mojo/public/cpp/system/simple_watcher.h"
31 #include "net/base/filename_util.h"
32 #include "net/dns/mock_host_resolver.h"
33 #include "services/network/public/cpp/web_sandbox_flags.h"
34 #include "url/gurl.h"
35 #include "url/url_constants.h"
36 
37 namespace content {
38 
39 namespace {
40 
41 // Tests about navigations to MHTML archives.
42 class NavigationMhtmlBrowserTest : public ContentBrowserTest {
43  public:
web_contents() const44   WebContentsImpl* web_contents() const {
45     return static_cast<WebContentsImpl*>(shell()->web_contents());
46   }
47 
main_frame_host()48   RenderFrameHostImpl* main_frame_host() {
49     return web_contents()->GetFrameTree()->root()->current_frame_host();
50   }
51 
52  protected:
SetUpOnMainThread()53   void SetUpOnMainThread() final {
54     ContentBrowserTest::SetUpOnMainThread();
55     host_resolver()->AddRule("*", "127.0.0.1");
56   }
57 };
58 
59 // Helper class: Build MHTML documents easily in tests.
60 class MhtmlArchive {
61  public:
62   MhtmlArchive() = default;
~MhtmlArchive()63   ~MhtmlArchive() {
64     base::ScopedAllowBlockingForTesting allow_blocking_;
65     EXPECT_TRUE(file_directory_.Delete());
66   }
67 
AddResource(const std::string content)68   void AddResource(const std::string content) {
69     content_ += "\n--MHTML_BOUNDARY\n" + content;
70   }
71 
AddHtmlDocument(const GURL & url,const std::string headers,const std::string body)72   void AddHtmlDocument(const GURL& url,
73                        const std::string headers,
74                        const std::string body) {
75     const char* document_template =
76         "Content-Type: text/html\n"
77         "Content-Location: $1\n"
78         "$2"
79         "\n"
80         "$3";
81     AddResource(base::ReplaceStringPlaceholders(
82         document_template, {url.spec(), headers, body}, nullptr));
83   }
84 
AddHtmlDocument(const GURL & url,const std::string body)85   void AddHtmlDocument(const GURL& url, const std::string body) {
86     AddHtmlDocument(url, "" /* headers */, body);
87   }
88 
89   // Writes the MHTML archive into a file and returns its URL.
Write(const std::string & file)90   const GURL Write(const std::string& file) {
91     const char* document_header =
92         "From: The chromium developers\n"
93         "Subject: <the subject>\n"
94         "Date: Mon, May 27 2019 11:55:42 GMT+0200\n"
95         "MIME-Version: 1.0\n"
96         "Content-Type: multipart/related;"
97         "              boundary=\"MHTML_BOUNDARY\";"
98         "              type=\"text/html\"\n";
99     std::string document = document_header + content_ + "\n--MHTML_BOUNDARY--";
100 
101     // MHTML uses carriage return before every new lines.
102     base::ReplaceChars(document, "\n", "\r\n", &document);
103 
104     base::ScopedAllowBlockingForTesting allow_blocking_;
105     EXPECT_TRUE(file_directory_.CreateUniqueTempDir());
106     base::FilePath file_path = file_directory_.GetPath().AppendASCII(file);
107     EXPECT_TRUE(base::WriteFile(file_path, document));
108     return net::FilePathToFileURL(file_path);
109   }
110 
111  private:
112   base::ScopedTempDir file_directory_;
113   std::string content_;
114 
115   DISALLOW_COPY_AND_ASSIGN(MhtmlArchive);
116 };
117 
118 }  // namespace
119 
120 // An MHTML document with an iframe. The iframe's document is found in the
121 // archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeFound)122 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeFound) {
123   MhtmlArchive mhtml_archive;
124   mhtml_archive.AddHtmlDocument(
125       GURL("http://example.com"),
126       "<iframe src=\"http://example.com/found.html\"></iframe>");
127   mhtml_archive.AddHtmlDocument(GURL("http://example.com/found.html"),
128                                 "<iframe></iframe>");
129   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
130 
131   NavigationHandleObserver iframe_navigation(
132       web_contents(), GURL("http://example.com/found.html"));
133   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
134 
135   RenderFrameHostImpl* main_document = main_frame_host();
136   ASSERT_EQ(1u, main_document->child_count());
137   RenderFrameHostImpl* sub_document =
138       main_document->child_at(0)->current_frame_host();
139 
140   EXPECT_TRUE(main_document->is_mhtml_document());
141   EXPECT_TRUE(sub_document->is_mhtml_document());
142 
143   // When the iframe's content is loaded from the MHTML archive, a successful
144   // commit using the provided URL happens, even if the resource wasn't loaded
145   // from this URL initially.
146   EXPECT_EQ(GURL("http://example.com/found.html"),
147             sub_document->GetLastCommittedURL());
148   EXPECT_TRUE(iframe_navigation.has_committed());
149   EXPECT_FALSE(iframe_navigation.is_error());
150 
151   // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
152   // can't be used, because javascript is disabled. Instead, check it was able
153   // to load an iframe.
154   EXPECT_EQ(1u, sub_document->child_count());
155 }
156 
157 // An MHTML document with an iframe. The iframe's document is not found in the
158 // archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeNotFound)159 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeNotFound) {
160   MhtmlArchive mhtml_archive;
161   mhtml_archive.AddHtmlDocument(
162       GURL("http://example.com"),
163       "<iframe src=\"http://example.com/not_found.html\"></iframe>");
164   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
165   NavigationHandleObserver iframe_navigation_observer(
166       web_contents(), GURL("http://example.com/not_found.html"));
167   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
168 
169   RenderFrameHostImpl* main_document = main_frame_host();
170   ASSERT_EQ(1u, main_document->child_count());
171   RenderFrameHostImpl* sub_document =
172       main_document->child_at(0)->current_frame_host();
173 
174   EXPECT_TRUE(main_document->is_mhtml_document());
175   EXPECT_TRUE(sub_document->is_mhtml_document());
176 
177   // This should commit as a failed navigation, but the browser side doesn't
178   // have enough information to make that determination. On the renderer side,
179   // there's no existing way to turn `CommitNavigation()` into
180   // `CommitFailedNavigation()`.
181   // TODO(https://crbug.com/1112965): Fix this by implementing a MHTML
182   // URLLoaderFactory; then failure to find the resource can use the standard
183   // error handling path.
184   EXPECT_TRUE(iframe_navigation_observer.has_committed());
185   EXPECT_FALSE(iframe_navigation_observer.is_error());
186   EXPECT_EQ(GURL("http://example.com/not_found.html"),
187             sub_document->GetLastCommittedURL());
188 }
189 
190 // An MHTML document with an iframe using a data-URL. The data-URL is not
191 // defined in the MHTML archive.
192 // TODO(https://crbug.com/967307): Enable this test. It currently reaches a
193 // DCHECK or timeout in release mode.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeDataUrlNotFound)194 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeDataUrlNotFound) {
195   MhtmlArchive mhtml_archive;
196   mhtml_archive.AddHtmlDocument(
197       GURL("http://example.com"),
198       "<iframe src=\"data:text/html,<iframe></iframe>\"></iframe>");
199   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
200 
201   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
202   RenderFrameHostImpl* main_document = main_frame_host();
203 
204   ASSERT_EQ(1u, main_document->child_count());
205   RenderFrameHostImpl* sub_document =
206       main_document->child_at(0)->current_frame_host();
207   EXPECT_EQ(GURL("data:text/html,<iframe></iframe>"),
208             sub_document->GetLastCommittedURL());
209 
210   EXPECT_TRUE(main_document->is_mhtml_document());
211   EXPECT_FALSE(sub_document->is_mhtml_document());  // Served from data-url.
212 
213   // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
214   // can't be used, because javascript is disabled. Instead, check it was able
215   // to load an iframe.
216   EXPECT_EQ(1u, sub_document->child_count());
217 }
218 
219 // An MHTML document with an iframe using a data-URL. The data-URL IS defined in
220 // the MHTML archive, but isn't used, per https://crbug.com/969696.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeDataUrlFound)221 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeDataUrlFound) {
222   MhtmlArchive mhtml_archive;
223   mhtml_archive.AddHtmlDocument(
224       GURL("http://example.com"),
225       "<iframe src=\"data:text/html,<iframe></iframe>\"></iframe>");
226   mhtml_archive.AddHtmlDocument(GURL("data:text/html,<iframe></iframe>"),
227                                 "no iframes");
228   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
229 
230   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
231   RenderFrameHostImpl* main_document = main_frame_host();
232 
233   ASSERT_EQ(1u, main_document->child_count());
234   RenderFrameHostImpl* sub_document =
235       main_document->child_at(0)->current_frame_host();
236   EXPECT_EQ(GURL("data:text/html,<iframe></iframe>"),
237             sub_document->GetLastCommittedURL());
238 
239   EXPECT_TRUE(main_document->is_mhtml_document());
240   EXPECT_FALSE(sub_document->is_mhtml_document());  // Served from data-url.
241 
242   // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
243   // can't be used, because javascript is disabled. Instead, check it was able
244   // to load an iframe.
245   EXPECT_EQ(1u, sub_document->child_count());
246 }
247 
248 // An iframe uses its srcdoc attribute and the about:srcdoc is not defined in
249 // the MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeAboutSrcdocNoFound)250 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeAboutSrcdocNoFound) {
251   MhtmlArchive mhtml_archive;
252   mhtml_archive.AddHtmlDocument(
253       GURL("http://example.com"),
254       "<iframe srcdoc=\"<iframe></iframe>\"></iframe>");
255   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
256   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
257 
258   RenderFrameHostImpl* main_document = main_frame_host();
259   ASSERT_EQ(1u, main_document->child_count());
260   RenderFrameHostImpl* sub_document =
261       main_document->child_at(0)->current_frame_host();
262   EXPECT_TRUE(sub_document->GetLastCommittedURL().IsAboutSrcdoc());
263 
264   EXPECT_TRUE(main_document->is_mhtml_document());
265   EXPECT_TRUE(sub_document->is_mhtml_document());
266 
267   // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
268   // can't be used, because javascript is disabled. Instead, check it was able
269   // to load an iframe.
270   EXPECT_EQ(1u, sub_document->child_count());
271 }
272 
273 // An iframe uses its srcdoc attribute and the about:srcdoc IS defined in
274 // the MHTML archive. Its content is NEVER loaded from the MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeAboutSrcdocFound)275 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeAboutSrcdocFound) {
276   MhtmlArchive mhtml_archive;
277   mhtml_archive.AddHtmlDocument(
278       GURL("http://example.com"),
279       "<iframe srcdoc=\"<iframe></iframe>\"></iframe>");
280   mhtml_archive.AddHtmlDocument(GURL("about:srcdoc'"), "no iframe");
281   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
282   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
283 
284   RenderFrameHostImpl* main_document = main_frame_host();
285   ASSERT_EQ(1u, main_document->child_count());
286   RenderFrameHostImpl* sub_document =
287       main_document->child_at(0)->current_frame_host();
288   EXPECT_TRUE(sub_document->GetLastCommittedURL().IsAboutSrcdoc());
289 
290   EXPECT_TRUE(main_document->is_mhtml_document());
291   EXPECT_TRUE(sub_document->is_mhtml_document());
292 
293   // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
294   // can't be used, because javascript is disabled. Instead, check it was able
295   // to load an iframe.
296   EXPECT_EQ(1u, sub_document->child_count());
297 }
298 
299 // An MHTML document with an iframe loading the about:blank document. The
300 // about:blank resource is not defined in the MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeAboutBlankNotFound)301 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeAboutBlankNotFound) {
302   MhtmlArchive mhtml_archive;
303   mhtml_archive.AddHtmlDocument(GURL("http://example.com"),
304                                 "<iframe src=\"about:blank\"></iframe>"
305                                 // Note: this is actually treated as a
306                                 // same-document navigation!
307                                 "<iframe src=\"about:blank#fragment\"></iframe>"
308                                 "<iframe src=\"about:blank?query\"></iframe>");
309   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
310   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
311 
312   RenderFrameHostImpl* main_document = main_frame_host();
313   ASSERT_EQ(3u, main_document->child_count());
314   auto iframe_url = [main_document](int index) {
315     return main_document->child_at(index)
316         ->current_frame_host()
317         ->GetLastCommittedURL();
318   };
319 
320   // about:blank in MHTML has some very unusual behavior. When navigating to
321   // about:blank in the context of a MHTML archive, the renderer-side MHTML
322   // handler actually attempts to look up the resource for about:blank<...>" in
323   // the MHTML archive.
324   //
325   // Prior to https://crrev.com/c/2335323, failing to find the resource in the
326   // MHTML archive usually led to the commit being silently dropped (see
327   // `IframeNotFound` and `IframeContentIdNotFound`). However, about:blank
328   // behaved differently, due to a special case in frame_loader.cc's
329   // `ShouldNavigate()` for URLs that will load as an empty document.
330   //
331   // However, after https://crrev.com/c/23335323, loading about:blank without a
332   // corresponding resource in the MHTML archive will be treated as loading
333   // static data rather than loading an empty document. This affects the timing
334   // of load completion; loading an empty document synchronously completes
335   // during `CommitNavigation()`, while loading static data (even if the data is
336   // empty) completes "later".
337   EXPECT_EQ(iframe_url(0), GURL("about:blank"));
338   // Note: unlike the other two subframe navigations, this navigation actually
339   // succeeds as a same-document navigation...
340   // Note 2: this same-document navigation is performed asynchronously. Prior to
341   // https://crrev.com/c/23335323, the test would consider the page as loaded
342   // before the fragment navigation completed, resulting in an empty last
343   // committed URL.
344   EXPECT_EQ(iframe_url(1), GURL("about:blank#fragment"));
345   EXPECT_EQ(iframe_url(2), GURL("about:blank?query"));
346 }
347 
348 // An MHTML document with an iframe loading the about:blank document AND the
349 // about:blank document is a resource of the MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeAboutBlankFound)350 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeAboutBlankFound) {
351   MhtmlArchive mhtml_archive;
352   mhtml_archive.AddHtmlDocument(GURL("http://example.com"),
353                                 "<iframe src=\"about:blank\"></iframe>");
354   mhtml_archive.AddHtmlDocument(
355       GURL(url::kAboutBlankURL),
356       "<iframe src=\"http://example.com/found.html\">/iframe>");
357   mhtml_archive.AddHtmlDocument(GURL("http://example.com/found.html"), "");
358   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
359   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
360 
361   RenderFrameHostImpl* main_document = main_frame_host();
362   ASSERT_EQ(1u, main_document->child_count());
363   RenderFrameHostImpl* about_blank_document =
364       main_document->child_at(0)->current_frame_host();
365 
366   EXPECT_TRUE(main_document->is_mhtml_document());
367   // TODO(arthursonzogni): This should be true here.
368   EXPECT_FALSE(about_blank_document->is_mhtml_document());
369 
370   // about:blank is loaded from the archive, so it has an iframe.
371   // See https://crbug.com/969667
372   ASSERT_EQ(1u, about_blank_document->child_count());
373 }
374 
375 // An MHTML document with an iframe trying to load a javascript URL.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeJavascriptUrlNotFound)376 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,
377                        IframeJavascriptUrlNotFound) {
378   MhtmlArchive mhtml_archive;
379   mhtml_archive.AddHtmlDocument(
380       GURL("http://example.com"),
381       "<iframe src=\"javascript:console.log('test')\"></iframe>");
382   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
383 
384   WebContentsConsoleObserver console_observer(web_contents());
385   console_observer.SetPattern(base::StringPrintf(
386       "Blocked script execution in '%s' because the document's frame "
387       "is sandboxed and the 'allow-scripts' permission is not set.",
388       mhtml_url.spec().c_str()));
389 
390   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
391   console_observer.Wait();
392 
393   RenderFrameHostImpl* main_document = main_frame_host();
394   ASSERT_EQ(1u, main_document->child_count());
395   RenderFrameHostImpl* sub_document =
396       main_document->child_at(0)->current_frame_host();
397 
398   EXPECT_TRUE(main_document->is_mhtml_document());
399 
400   // The |sub_document| is the initial empty document.
401   EXPECT_FALSE(sub_document->is_mhtml_document());
402   EXPECT_EQ(GURL(), sub_document->GetLastCommittedURL());
403 }
404 
405 // An MHTML document with an iframe trying to load a javascript URL. The
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeJavascriptUrlFound)406 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeJavascriptUrlFound) {
407   MhtmlArchive mhtml_archive;
408   mhtml_archive.AddHtmlDocument(
409       GURL("http://example.com"),
410       "<iframe src=\"javascript:console.log('test')\"></iframe>");
411   mhtml_archive.AddHtmlDocument(GURL("javascript:console.log('test')"),
412                                 "<iframe></iframe>");
413   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
414 
415   WebContentsConsoleObserver console_observer(web_contents());
416   console_observer.SetPattern(base::StringPrintf(
417       "Blocked script execution in '%s' because the document's frame "
418       "is sandboxed and the 'allow-scripts' permission is not set.",
419       mhtml_url.spec().c_str()));
420 
421   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
422   console_observer.Wait();
423 
424   RenderFrameHostImpl* main_document = main_frame_host();
425   ASSERT_EQ(1u, main_document->child_count());
426   RenderFrameHostImpl* sub_document =
427       main_document->child_at(0)->current_frame_host();
428 
429   EXPECT_TRUE(main_document->is_mhtml_document());
430 
431   // The |sub_document| is the initial empty document.
432   EXPECT_FALSE(sub_document->is_mhtml_document());
433   EXPECT_EQ(GURL(), sub_document->GetLastCommittedURL());
434 
435   EXPECT_EQ(0u, sub_document->child_count());
436 }
437 
438 // Load iframe with the content-ID scheme. The resource is found in the MHTML
439 // archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeContentIdFound)440 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeContentIdFound) {
441   MhtmlArchive mhtml_archive;
442   mhtml_archive.AddHtmlDocument(GURL("http://example.com"),
443                                 "<iframe src=\"cid:iframe\"></iframe>");
444   mhtml_archive.AddHtmlDocument(GURL("http://example.com/found.html"),
445                                 "Content-ID: <iframe>\n", "<iframe></iframe>");
446   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
447 
448   NavigationHandleObserver iframe_navigation(web_contents(),
449                                              GURL("cid:iframe"));
450   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
451 
452   RenderFrameHostImpl* main_document = main_frame_host();
453   ASSERT_EQ(1u, main_document->child_count());
454   RenderFrameHostImpl* sub_document =
455       main_document->child_at(0)->current_frame_host();
456 
457   EXPECT_TRUE(main_document->is_mhtml_document());
458   EXPECT_TRUE(sub_document->is_mhtml_document());
459 
460   EXPECT_EQ(GURL("cid:iframe"), sub_document->GetLastCommittedURL());
461   EXPECT_TRUE(iframe_navigation.has_committed());
462   EXPECT_FALSE(iframe_navigation.is_error());
463 
464   // Check the iframe is properly loaded. EvalJs("document.body.innerHTML")
465   // can't be used, because javascript is disabled. Instead, check it was able
466   // to load an iframe.
467   EXPECT_EQ(1u, sub_document->child_count());
468 }
469 
470 // Load iframe with the content-ID scheme. The resource is not found in the
471 // MHTML archive.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,IframeContentIdNotFound)472 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, IframeContentIdNotFound) {
473   MhtmlArchive mhtml_archive;
474   mhtml_archive.AddHtmlDocument(GURL("http://example.com"),
475                                 "<iframe src=\"cid:iframe\"></iframe>");
476   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
477 
478   NavigationHandleObserver iframe_navigation(web_contents(),
479                                              GURL("cid:iframe"));
480   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
481 
482   RenderFrameHostImpl* main_document = main_frame_host();
483   ASSERT_EQ(1u, main_document->child_count());
484   RenderFrameHostImpl* sub_document =
485       main_document->child_at(0)->current_frame_host();
486 
487   EXPECT_TRUE(main_document->is_mhtml_document());
488   EXPECT_TRUE(sub_document->is_mhtml_document());
489 
490   // This should commit as a failed navigation, but the browser side doesn't
491   // have enough information to make that determination. On the renderer side,
492   // there's no existing way to turn `CommitNavigation()` into
493   // `CommitFailedNavigation()`.
494   // TODO(https://crbug.com/1112965): Fix this by implementing a MHTML
495   // URLLoaderFactory; then failure to find the resource can use the standard
496   // error handling path.
497   EXPECT_EQ(GURL("cid:iframe"), sub_document->GetLastCommittedURL());
498   EXPECT_TRUE(iframe_navigation.has_committed());
499   EXPECT_FALSE(iframe_navigation.is_error());
500 }
501 
502 // Tests Content-Security-Policy: frame-ancestors enforcement in MHTML
503 // subframes. It isn't enforced currently.
504 // See https://crbug.com/969711.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,CspFrameAncestor)505 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, CspFrameAncestor) {
506   MhtmlArchive mhtml_archive;
507   mhtml_archive.AddHtmlDocument(
508       GURL("http://example.com/main"),
509       "<iframe src=\"http://example.com/subframe\"></iframe>");
510   mhtml_archive.AddHtmlDocument(
511       GURL("http://example.com/subframe"),
512       "Content-Security-Policy: frame-ancestors 'none'\n", "<iframe></iframe>");
513   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
514 
515   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
516 
517   RenderFrameHostImpl* main_document = main_frame_host();
518   ASSERT_EQ(1u, main_document->child_count());
519   RenderFrameHostImpl* sub_document =
520       main_document->child_at(0)->current_frame_host();
521 
522   EXPECT_TRUE(main_document->is_mhtml_document());
523   EXPECT_TRUE(sub_document->is_mhtml_document());
524 
525   // Currently, frame-ancestors is not enforced. See https://crbug.com/969711.
526   // Check that the iframe is properly loaded. EvalJs("document.body.innerHTML")
527   // can't be used, because javascript is disabled. Instead, check it was able
528   // to load an iframe.
529   ASSERT_EQ(1u, sub_document->child_count());
530 }
531 
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,SameDocumentNavigationWhileLoading)532 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,
533                        SameDocumentNavigationWhileLoading) {
534   // Load a MHTML archive normally so there's a renderer process for file://.
535   MhtmlArchive mhtml_archive;
536   mhtml_archive.AddHtmlDocument(GURL("http://example.com/main"),
537                                 "<p>Hello world!</p>");
538   const GURL mhtml_url = mhtml_archive.Write("index.mhtml");
539   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
540 
541   const RenderProcessHost* const rph = main_frame_host()->GetProcess();
542 
543   // Navigate to another MHTML archive which will reuse the same renderer.
544   MhtmlArchive mhtml_archive2;
545   mhtml_archive2.AddHtmlDocument(GURL("http://example.com/main2"),
546                                  "<p>Hello world again!</p>");
547   const GURL mhtml_url2 = mhtml_archive2.Write("index2.mhtml");
548 
549   TestNavigationManager manager(web_contents(), mhtml_url2);
550   shell()->LoadURL(mhtml_url2);
551 
552   EXPECT_TRUE(manager.WaitForResponse());
553   // The new navigation should not have committed yet.
554   EXPECT_EQ(mhtml_url, main_frame_host()->GetLastCommittedURL());
555 
556   // Make sure it actually picked the same process.
557   NavigationRequest* request =
558       NavigationRequest::From(manager.GetNavigationHandle());
559   EXPECT_EQ(rph, request->GetRenderFrameHost()->GetProcess());
560 
561   // Delay the response body from being received by the renderer.
562   mojo::ScopedDataPipeConsumerHandle consumer;
563   mojo::ScopedDataPipeProducerHandle producer;
564   ASSERT_EQ(MOJO_RESULT_OK,
565             mojo::CreateDataPipe(/* options */ nullptr, &producer, &consumer));
566   using std::swap;
567   swap(request->mutable_response_body_for_testing(), consumer);
568 
569   // Resume the navigation, which should send a |CommitNavigation()| to the
570   // renderer.
571   manager.ResumeNavigation();
572 
573   // Archive loading is split into two phases: first, the entire response body
574   // is read and parsed into an MHTML archive by |MHTMLBodyLoaderClient|, and
575   // then the renderer commits the response. Since the data pipe for the
576   // response body was swapped out above, the renderer should not have committed
577   // a navigation to |mhtml_url2|.
578   // Note: Ideally, this should resume the navigation and wait for a signal that
579   // the renderer is attempting to read the response body. Unfortunately, no
580   // such signal exsts. As-is, this check is imperfect.
581   EXPECT_EQ(mhtml_url, main_frame_host()->GetLastCommittedURL());
582   EXPECT_TRUE(web_contents()->IsLoading());
583 
584   // While archive loading is still in progress and nothing has been committed,
585   // trigger a same-document navigation.
586   url::Replacements<char> replacements;
587   replacements.SetRef("fragment", url::Component(0, strlen("fragment")));
588   const GURL mhtml_url_with_fragment =
589       mhtml_url.ReplaceComponents(replacements);
590   // TODO(dcheng): Using NavigateToURL() here seems to cause the test to hang.
591   // Figure out why.
592   shell()->LoadURL(mhtml_url_with_fragment);
593 
594   // The same-document navigation should cancel MHTML loading. On the browser
595   // side, this can be observed by waiting for the peer handle to be closed by
596   // the renderer.
597   base::RunLoop run_loop;
598   mojo::SimpleWatcher watcher(FROM_HERE,
599                               mojo::SimpleWatcher::ArmingPolicy::AUTOMATIC);
600   watcher.Watch(
601       producer.get(), MOJO_HANDLE_SIGNAL_PEER_CLOSED,
602       MOJO_TRIGGER_CONDITION_SIGNALS_SATISFIED,
603       base::BindLambdaForTesting(
604           [&](MojoResult result, const mojo::HandleSignalsState& state) {
605             EXPECT_EQ(MOJO_RESULT_OK, result);
606             EXPECT_TRUE(state.peer_closed());
607             run_loop.Quit();
608           }));
609   run_loop.Run();
610 
611   WaitForLoadStop(web_contents());
612   EXPECT_EQ(mhtml_url_with_fragment, main_frame_host()->GetLastCommittedURL());
613 }
614 
615 // Check RenderFrameHostImpl::is_mhtml_document() stays true after same-document
616 // navigation in MHTML document.
617 // Regression test for https://crbug.com/1126391
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,SameDocumentNavigationPreservesMhtmlFlag)618 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,
619                        SameDocumentNavigationPreservesMhtmlFlag) {
620   MhtmlArchive mhtml_archive;
621   mhtml_archive.AddHtmlDocument(GURL("http://a.com/a"), "");
622   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
623   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
624   EXPECT_TRUE(main_frame_host()->is_mhtml_document());
625   EXPECT_TRUE(NavigateToURL(
626       shell(), GURL(main_frame_host()->GetLastCommittedURL().spec() + "#foo")));
627   EXPECT_TRUE(main_frame_host()->is_mhtml_document());
628 }
629 
630 // Check RenderFrameHostImpl::is_mhtml_document() is correctly set for history
631 // navigation to MHTML document. It should continue to work when restored from
632 // the BackForwardCache.
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,BackNavigationPreservesMhtmlFlag)633 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,
634                        BackNavigationPreservesMhtmlFlag) {
635   ASSERT_TRUE(embedded_test_server()->Start());
636 
637   MhtmlArchive mhtml_archive;
638   mhtml_archive.AddHtmlDocument(GURL("http://a.com/a"), "");
639   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
640   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
641   EXPECT_TRUE(main_frame_host()->is_mhtml_document());
642   EXPECT_TRUE(NavigateToURL(
643       shell(), embedded_test_server()->GetURL("b.com", "/title1.html")));
644   EXPECT_FALSE(main_frame_host()->is_mhtml_document());
645   web_contents()->GetController().GoBack();
646   WaitForLoadStop(web_contents());
647   EXPECT_TRUE(main_frame_host()->is_mhtml_document());
648 }
649 
IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest,SandboxedIframe)650 IN_PROC_BROWSER_TEST_F(NavigationMhtmlBrowserTest, SandboxedIframe) {
651   MhtmlArchive mhtml_archive;
652   mhtml_archive.AddHtmlDocument(GURL("http://a.com"), "", R"(
653     <iframe src="http://a.com/unsandboxed.html"        ></iframe>
654     <iframe src="http://a.com/sandboxed.html"   sandbox></iframe>
655   )");
656   mhtml_archive.AddHtmlDocument(GURL("http://a.com/sandboxed.html"), "");
657   mhtml_archive.AddHtmlDocument(GURL("http://a.com/unsandboxed.html"), "");
658   GURL mhtml_url = mhtml_archive.Write("index.mhtml");
659 
660   EXPECT_TRUE(NavigateToURL(shell(), mhtml_url));
661 
662   RenderFrameHostImpl* rfh_main = main_frame_host();
663   ASSERT_EQ(2u, rfh_main->child_count());
664   RenderFrameHostImpl* rfh_unsandboxed =
665       rfh_main->child_at(0)->current_frame_host();
666   RenderFrameHostImpl* rfh_sandboxed =
667       rfh_main->child_at(1)->current_frame_host();
668 
669   auto strict_sandbox = network::mojom::WebSandboxFlags::kAll;
670   auto default_mhtml_sandbox =
671       ~network::mojom::WebSandboxFlags::kPopups &
672       ~network::mojom::WebSandboxFlags::kPropagatesToAuxiliaryBrowsingContexts;
673 
674   EXPECT_EQ(default_mhtml_sandbox, rfh_main->active_sandbox_flags());
675   EXPECT_EQ(default_mhtml_sandbox, rfh_unsandboxed->active_sandbox_flags());
676   EXPECT_EQ(strict_sandbox, rfh_sandboxed->active_sandbox_flags());
677 }
678 
679 }  // namespace content
680