1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <stdint.h>
6 #include <memory>
7 
8 #include "base/bind.h"
9 #include "base/callback.h"
10 #include "base/files/file_path.h"
11 #include "base/files/file_util.h"
12 #include "base/files/scoped_temp_dir.h"
13 #include "base/macros.h"
14 #include "base/path_service.h"
15 #include "base/run_loop.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/test/metrics/histogram_tester.h"
18 #include "base/threading/thread_restrictions.h"
19 #include "build/build_config.h"
20 #include "components/download/public/common/download_task_runner.h"
21 #include "content/browser/download/mhtml_generation_manager.h"
22 #include "content/browser/renderer_host/render_process_host_impl.h"
23 #include "content/common/download/mhtml_file_writer.mojom.h"
24 #include "content/public/browser/browser_task_traits.h"
25 #include "content/public/browser/browser_thread.h"
26 #include "content/public/browser/mhtml_extra_parts.h"
27 #include "content/public/browser/mhtml_generation_result.h"
28 #include "content/public/browser/render_frame_host.h"
29 #include "content/public/browser/render_process_host.h"
30 #include "content/public/browser/web_contents.h"
31 #include "content/public/common/content_paths.h"
32 #include "content/public/common/mhtml_generation_params.h"
33 #include "content/public/test/browser_test.h"
34 #include "content/public/test/browser_test_utils.h"
35 #include "content/public/test/content_browser_test.h"
36 #include "content/public/test/content_browser_test_utils.h"
37 #include "content/public/test/test_utils.h"
38 #include "content/shell/browser/shell.h"
39 #include "crypto/secure_hash.h"
40 #include "crypto/sha2.h"
41 #include "mojo/public/cpp/bindings/associated_receiver.h"
42 #include "mojo/public/cpp/bindings/pending_associated_receiver.h"
43 #include "net/base/filename_util.h"
44 #include "net/dns/mock_host_resolver.h"
45 #include "net/test/embedded_test_server/embedded_test_server.h"
46 #include "testing/gmock/include/gmock/gmock.h"
47 #include "testing/gtest/include/gtest/gtest.h"
48 #include "third_party/blink/public/common/associated_interfaces/associated_interface_provider.h"
49 #include "third_party/blink/public/mojom/frame/find_in_page.mojom.h"
50 
51 using testing::ContainsRegex;
52 using testing::HasSubstr;
53 using testing::Not;
54 
55 namespace content {
56 
57 namespace {
58 
59 // A dummy WebContentsDelegate which tracks the results of a find operation.
60 class FindTrackingDelegate : public WebContentsDelegate {
61  public:
FindTrackingDelegate(const std::string & search)62   explicit FindTrackingDelegate(const std::string& search)
63       : search_(search), matches_(-1) {}
64 
65   // Returns number of results.
Wait(WebContents * web_contents)66   int Wait(WebContents* web_contents) {
67     WebContentsDelegate* old_delegate = web_contents->GetDelegate();
68     web_contents->SetDelegate(this);
69 
70     auto options = blink::mojom::FindOptions::New();
71     options->run_synchronously_for_testing = true;
72     options->match_case = false;
73 
74     web_contents->Find(global_request_id++, base::UTF8ToUTF16(search_),
75                        std::move(options));
76     run_loop_.Run();
77 
78     web_contents->SetDelegate(old_delegate);
79 
80     return matches_;
81   }
82 
FindReply(WebContents * web_contents,int request_id,int number_of_matches,const gfx::Rect & selection_rect,int active_match_ordinal,bool final_update)83   void FindReply(WebContents* web_contents,
84                  int request_id,
85                  int number_of_matches,
86                  const gfx::Rect& selection_rect,
87                  int active_match_ordinal,
88                  bool final_update) override {
89     if (final_update) {
90       matches_ = number_of_matches;
91       run_loop_.Quit();
92     }
93   }
94 
95   static int global_request_id;
96 
97  private:
98   std::string search_;
99   int matches_;
100   base::RunLoop run_loop_;
101 
102   DISALLOW_COPY_AND_ASSIGN(FindTrackingDelegate);
103 };
104 
105 // static
106 int FindTrackingDelegate::global_request_id = 0;
107 
108 const char kTestData[] =
109     "Sample Text to write on a generated MHTML "
110     "file for tests to validate whether the implementation is able to access "
111     "and write to the file.";
112 
113 class MockWriterBase : public mojom::MhtmlFileWriter {
114  public:
115   MockWriterBase() = default;
116   ~MockWriterBase() override = default;
117 
BindReceiver(mojo::ScopedInterfaceEndpointHandle handle)118   void BindReceiver(mojo::ScopedInterfaceEndpointHandle handle) {
119     receiver_.Bind(mojo::PendingAssociatedReceiver<mojom::MhtmlFileWriter>(
120         std::move(handle)));
121   }
122 
123  protected:
SendResponse(SerializeAsMHTMLCallback callback)124   void SendResponse(SerializeAsMHTMLCallback callback) {
125     std::vector<std::string> dummy_digests;
126     base::TimeDelta dummy_time_delta = base::TimeDelta::FromMilliseconds(100);
127     std::move(callback).Run(mojom::MhtmlSaveStatus::kSuccess, dummy_digests,
128                             dummy_time_delta);
129   }
130 
WriteDataToDestinationFile(base::File & destination_file)131   void WriteDataToDestinationFile(base::File& destination_file) {
132     base::ScopedAllowBlockingForTesting allow_blocking;
133     destination_file.WriteAtCurrentPos(kTestData, strlen(kTestData));
134     destination_file.Close();
135   }
136 
WriteDataToProducerPipe(mojo::ScopedDataPipeProducerHandle producer_pipe)137   void WriteDataToProducerPipe(
138       mojo::ScopedDataPipeProducerHandle producer_pipe) {
139     base::ScopedAllowBlockingForTesting allow_blocking;
140     uint32_t size = strlen(kTestData);
141     producer_pipe->WriteData(kTestData, &size, MOJO_WRITE_DATA_FLAG_NONE);
142     producer_pipe.reset();
143   }
144 
145   mojo::AssociatedReceiver<mojom::MhtmlFileWriter> receiver_{this};
146 
147  private:
148   DISALLOW_COPY_AND_ASSIGN(MockWriterBase);
149 };
150 
151 // This Mock injects our overwritten interface, running the callback
152 // SerializeAsMHTMLResponse and immediately disconnecting the message pipe.
153 class RespondAndDisconnectMockWriter
154     : public MockWriterBase,
155       public base::RefCountedThreadSafe<RespondAndDisconnectMockWriter> {
156  public:
RespondAndDisconnectMockWriter()157   RespondAndDisconnectMockWriter() {}
158 
SerializeAsMHTML(mojom::SerializeAsMHTMLParamsPtr params,SerializeAsMHTMLCallback callback)159   void SerializeAsMHTML(mojom::SerializeAsMHTMLParamsPtr params,
160                         SerializeAsMHTMLCallback callback) override {
161     // Upon using the overridden mock interface implementation, this will be
162     // handled by the product code as illustrated below.  (1), (2), (3) depict
163     // points in time when product code runs on UI thread and download sequence.
164     // For the repro, the message pipe disconnection needs to happen between (1)
165     // and (3).
166     //
167     //   Test instance     UI thread         download sequence
168     //     ---------       ---------           -----------
169     //        |                |                     |
170     //    WE ARE HERE          |                     |
171     //        |                |                     |
172     //        |                |                     |
173     //        +--------------->+                     |
174     //        |                |                     |
175     //        |                |                     |
176     //        |                |                     |
177     //        |                |                     |
178     //        |                |                     |
179     //        |                |                     |
180     // (1)    |      MHTMLGenerationManager::Job     |
181     //        |      ::SerializeAsMHTMLResponse      |
182     //        |                +-------------------->+
183     //        |                |                     |
184     //        |                |                     |
185     //        |                |                     |
186     // (2)    |                |          MHTMLGenerationManager::Job
187     //        |                |          ::CloseFileOnFileThread
188     //        |                |                     |
189     //        |                |                     |
190     //        |           test needs to              |
191     //        |       disconnect message pipe        |
192     //        |      HERE - between (1) and (3)      |
193     //        |                |                     |
194     //        |                |                     |
195     //        |                +<--------------------+
196     //        |                |                     |
197     // (3)    |      MHTMLGenerationManager          |
198     //        |      Job::OnFinished                 |
199     //        |                |                     |
200     //
201     // We hope that the error handler is invoked between (1) and (3) by doing
202     // the following:
203     // - From here, run the callback response to the UI thread. This queues
204     //   the response message onto the bound message pipe.
205     // - After running the callback response, immediately unbind the message
206     //   pipe in order to queue a message onto the bound message pipe to notify
207     //   the Browser the connection was closed and invoke the error handler.
208     // - Upon resuming operation, the FIFO ordering property of associated
209     //   interfaces guarantees the execution of (1) before the error handler.
210     //   (1) posts (2) to the download sequence and terminates. The client end
211     //   then accepts the error notification and invokes the connection error
212     //   handler, guaranteeing its execution before (3).
213 
214     bool compute_contents_hash = params->output_handle->is_producer_handle();
215 
216     // Write a valid MHTML file to its respective handle, since we are not
217     // actively running a serialization pipeline in the mock implementation.
218     if (compute_contents_hash) {
219       WriteDataToProducerPipe(
220           std::move(params->output_handle->get_producer_handle()));
221     } else {
222       WriteDataToDestinationFile(params->output_handle->get_file_handle());
223     }
224 
225     SendResponse(std::move(callback));
226 
227     // Reset the message pipe connection to invoke the disconnect callback. The
228     // disconnect handler from here will finalize the Job and attempt to call
229     // MHTMLGenerationManager::Job::CloseFile a second time. If this situation
230     // is handled correctly, the browser file should be invalidated and
231     // idempotent.
232     if (!compute_contents_hash) {
233       receiver_.reset();
234       return;
235     }
236 
237     // In the case we are using a data pipe to stream serialized MHTML data,
238     // we must ensure the write complete notification arrives before the
239     // connection error notification, otherwise the Browser will report
240     // an MhtmlSaveStatus != kSuccess. We can guarantee this by potentially
241     // running tasks after each watcher invocation to send notifications that
242     // it has been completed. We need at least two tasks to guarantee this,
243     // as there can be at most two watcher invocations to write a block of
244     // data smaller than the data pipe buffer to file.
245     download::GetDownloadTaskRunner()->PostTask(
246         FROM_HERE,
247         base::BindOnce(&RespondAndDisconnectMockWriter::TaskX,
248                        scoped_refptr<RespondAndDisconnectMockWriter>(this)));
249   }
250 
TaskX()251   void TaskX() {
252     download::GetDownloadTaskRunner()->PostTask(
253         FROM_HERE,
254         base::BindOnce(&RespondAndDisconnectMockWriter::TaskY,
255                        scoped_refptr<RespondAndDisconnectMockWriter>(this)));
256   }
257 
TaskY()258   void TaskY() {
259     GetUIThreadTaskRunner({})->PostTask(
260         FROM_HERE,
261         base::BindOnce(&RespondAndDisconnectMockWriter::TaskZ,
262                        scoped_refptr<RespondAndDisconnectMockWriter>(this)));
263   }
264 
TaskZ()265   void TaskZ() { receiver_.reset(); }
266 
267  private:
268   friend base::RefCountedThreadSafe<RespondAndDisconnectMockWriter>;
269 
270   ~RespondAndDisconnectMockWriter() override = default;
271 
272   DISALLOW_COPY_AND_ASSIGN(RespondAndDisconnectMockWriter);
273 };
274 
275 }  // namespace
276 
277 class MHTMLGenerationTest : public ContentBrowserTest,
278                             public testing::WithParamInterface<bool> {
279  public:
MHTMLGenerationTest()280   MHTMLGenerationTest()
281       : has_mhtml_callback_run_(false),
282         file_size_(0),
283         file_digest_(base::nullopt),
284         well_formedness_check_(true) {}
285 
286   enum TaskOrder { WriteThenRespond, RespondThenWrite };
287 
288  protected:
SetUpOnMainThread()289   void SetUpOnMainThread() override {
290     ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
291     ASSERT_TRUE(embedded_test_server()->Start());
292     ContentBrowserTest::SetUpOnMainThread();
293   }
294 
OverrideInterface(MockWriterBase * mock_writer)295   void OverrideInterface(MockWriterBase* mock_writer) {
296     blink::AssociatedInterfaceProvider* remote_interfaces =
297         shell()
298             ->web_contents()
299             ->GetMainFrame()
300             ->GetRemoteAssociatedInterfaces();
301     remote_interfaces->OverrideBinderForTesting(
302         mojom::MhtmlFileWriter::Name_,
303         base::BindRepeating(&MockWriterBase::BindReceiver,
304                             base::Unretained(mock_writer)));
305   }
306 
GenerateMHTML(base::FilePath & path,const GURL & url)307   void GenerateMHTML(base::FilePath& path, const GURL& url) {
308     MHTMLGenerationParams params(path);
309     GenerateMHTML(params, url);
310   }
311 
GenerateMHTML(MHTMLGenerationParams & params,const GURL & url)312   void GenerateMHTML(MHTMLGenerationParams& params, const GURL& url) {
313     EXPECT_TRUE(NavigateToURL(shell(), url));
314     GenerateMHTMLForCurrentPage(params);
315   }
316 
GenerateMHTMLForCurrentPage(MHTMLGenerationParams & params)317   void GenerateMHTMLForCurrentPage(MHTMLGenerationParams& params) {
318     base::RunLoop run_loop;
319     histogram_tester_.reset(new base::HistogramTester());
320 
321     bool use_result_callback = GetParam();
322 
323     if (use_result_callback) {
324       shell()->web_contents()->GenerateMHTMLWithResult(
325           params,
326           base::BindOnce(&MHTMLGenerationTest::MHTMLGeneratedWithResult,
327                          base::Unretained(this), run_loop.QuitClosure()));
328     } else {
329       shell()->web_contents()->GenerateMHTML(
330           params,
331           base::BindOnce(&MHTMLGenerationTest::MHTMLGenerated,
332                          base::Unretained(this), run_loop.QuitClosure()));
333     }
334 
335     // Block until the MHTML is generated.
336     run_loop.Run();
337 
338     ASSERT_TRUE(has_mhtml_callback_run())
339         << "Unexpected error generating MHTML file";
340 
341     // TODO(crbug.com/997408): Add tests which will let MHTMLGeneration manager
342     // fail during file write operation. This will allow us to actually test if
343     // we receive a bogus hash instead of a base::nullopt.
344     EXPECT_EQ(base::nullopt, file_digest());
345 
346     // Skip well formedness check if explicitly disabled or there was a
347     // generation error.
348     bool generation_failed = file_size() == -1;
349     if (!well_formedness_check_ || generation_failed)
350       return;
351 
352     // Loads the generated file to check if it is well formed.
353     WebContentsConsoleObserver console_observer(shell()->web_contents());
354     console_observer.SetPattern("Malformed multipart archive: *");
355 
356     EXPECT_TRUE(
357         NavigateToURL(shell(), net::FilePathToFileURL(params.file_path)))
358         << "Error navigating to the generated MHTML file";
359     EXPECT_TRUE(console_observer.messages().empty())
360         << "The generated MHTML file is malformed";
361   }
362 
363   void TwoStepSyncTestFor(const TaskOrder order);
364 
ReadFileSizeFromDisk(base::FilePath path)365   int64_t ReadFileSizeFromDisk(base::FilePath path) {
366     base::ScopedAllowBlockingForTesting allow_blocking;
367     int64_t file_size;
368     if (!base::GetFileSize(path, &file_size)) return -1;
369     return file_size;
370   }
371 
TestOriginalVsSavedPage(const GURL & url,MHTMLGenerationParams params,int expected_number_of_frames,const std::vector<std::string> & expected_substrings,const std::vector<std::string> & forbidden_substrings_in_saved_page,bool skip_verification_of_original_page=false)372   void TestOriginalVsSavedPage(
373       const GURL& url,
374       MHTMLGenerationParams params,
375       int expected_number_of_frames,
376       const std::vector<std::string>& expected_substrings,
377       const std::vector<std::string>& forbidden_substrings_in_saved_page,
378       bool skip_verification_of_original_page = false) {
379     // Navigate to the test page and verify if test expectations
380     // are met (this is mostly a sanity check - a failure to meet
381     // expectations would probably mean that there is a test bug
382     // (i.e. that we got called with wrong expected_foo argument).
383     EXPECT_TRUE(NavigateToURL(shell(), url));
384     if (!skip_verification_of_original_page) {
385       AssertExpectationsAboutCurrentTab(expected_number_of_frames,
386                                         expected_substrings,
387                                         std::vector<std::string>());
388     }
389 
390     GenerateMHTML(params, url);
391 
392     // Stop the test server (to make sure the locally saved page
393     // is self-contained / won't try to open original resources).
394     ASSERT_TRUE(embedded_test_server()->ShutdownAndWaitUntilComplete());
395 
396     // Open the saved page and verify if test expectations are
397     // met (i.e. if the same expectations are met for "after"
398     // [saved version of the page] as for the "before"
399     // [the original version of the page].
400     EXPECT_TRUE(
401         NavigateToURL(shell(), net::FilePathToFileURL(params.file_path)));
402     AssertExpectationsAboutCurrentTab(expected_number_of_frames,
403                                       expected_substrings,
404                                       forbidden_substrings_in_saved_page);
405   }
406 
AssertExpectationsAboutCurrentTab(int expected_number_of_frames,const std::vector<std::string> & expected_substrings,const std::vector<std::string> & forbidden_substrings)407   void AssertExpectationsAboutCurrentTab(
408       int expected_number_of_frames,
409       const std::vector<std::string>& expected_substrings,
410       const std::vector<std::string>& forbidden_substrings) {
411     int actual_number_of_frames =
412         shell()->web_contents()->GetAllFrames().size();
413     EXPECT_EQ(expected_number_of_frames, actual_number_of_frames);
414 
415     for (const auto& expected_substring : expected_substrings) {
416       FindTrackingDelegate delegate(expected_substring);
417       int actual_number_of_matches = delegate.Wait(shell()->web_contents());
418       EXPECT_EQ(1, actual_number_of_matches)
419           << "Verifying that \"" << expected_substring << "\" appears "
420           << "exactly once in the text of web contents of "
421           << shell()->web_contents()->GetURL().spec();
422     }
423 
424     for (const auto& forbidden_substring : forbidden_substrings) {
425       FindTrackingDelegate delegate(forbidden_substring);
426       int actual_number_of_matches = delegate.Wait(shell()->web_contents());
427       EXPECT_EQ(0, actual_number_of_matches)
428           << "Verifying that \"" << forbidden_substring << "\" doesn't "
429           << "appear in the text of web contents of "
430           << shell()->web_contents()->GetURL().spec();
431     }
432   }
433 
434   // Tests that the result of setting compute_contents_hash is the same as
435   // manually hashing the file. Because MHTMLGenerationManager depends on
436   // net::GenerateMimeMultipartBoundary() to write the boundary, we cannot
437   // compute the digest in advance. Therefore, we must compute the hash of the
438   // whole file and assert that the computed hash is the same as the hash
439   // produced here.
TestComputeContentsHash(base::FilePath & path)440   void TestComputeContentsHash(base::FilePath& path) {
441     base::ScopedAllowBlockingForTesting allow_blocking;
442 
443     // Reload the file to an mhtml string for hashing
444     std::string test_mhtml;
445     ASSERT_TRUE(base::ReadFileToString(path, &test_mhtml));
446 
447     // Hash the file in one big step. This is not recommended to do outside of
448     // tests because the files being hashed could be too large.
449     std::unique_ptr<crypto::SecureHash> secure_hash =
450         crypto::SecureHash::Create(crypto::SecureHash::Algorithm::SHA256);
451     secure_hash->Update(test_mhtml.c_str(), test_mhtml.size());
452     std::string expected_digest(secure_hash->GetHashLength(), 0);
453     secure_hash->Finish(&(expected_digest[0]), expected_digest.size());
454     secure_hash.reset();
455 
456     ASSERT_TRUE(file_digest());
457     EXPECT_EQ(file_digest().value(), expected_digest);
458   }
459 
460   // In the case that we are using a pre-generated .mhtml file, we do
461   // not have any control over the final mhtml_boundary_marker write
462   // operation. This results in the post-generation verification tests
463   // reporting a malformed multipart archive, unintentionally failing the
464   // test.
DisableWellformednessCheck()465   void DisableWellformednessCheck() { well_formedness_check_ = false; }
466 
has_mhtml_callback_run() const467   bool has_mhtml_callback_run() const { return has_mhtml_callback_run_; }
file_size() const468   int64_t file_size() const { return file_size_; }
file_digest() const469   base::Optional<std::string> file_digest() const { return file_digest_; }
histogram_tester()470   base::HistogramTester* histogram_tester() { return histogram_tester_.get(); }
471 
472   base::ScopedTempDir temp_dir_;
473 
474  private:
MHTMLGenerated(base::OnceClosure quit_closure,int64_t size)475   void MHTMLGenerated(base::OnceClosure quit_closure, int64_t size) {
476     has_mhtml_callback_run_ = true;
477     file_size_ = size;
478     std::move(quit_closure).Run();
479   }
MHTMLGeneratedWithResult(base::OnceClosure quit_closure,const MHTMLGenerationResult & result)480   void MHTMLGeneratedWithResult(base::OnceClosure quit_closure,
481                                 const MHTMLGenerationResult& result) {
482     has_mhtml_callback_run_ = true;
483     file_size_ = result.file_size;
484     file_digest_ = result.file_digest;
485     std::move(quit_closure).Run();
486   }
487 
488   bool has_mhtml_callback_run_;
489   int64_t file_size_;
490   base::Optional<std::string> file_digest_;
491   bool well_formedness_check_;
492   std::unique_ptr<base::HistogramTester> histogram_tester_;
493 };
494 
495 // Tests that generating a MHTML does create contents.
496 // Note that the actual content of the file is not tested, the purpose of this
497 // test is to ensure we were successful in creating the MHTML data from the
498 // renderer.
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateMHTML)499 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTML) {
500   base::FilePath path(temp_dir_.GetPath());
501   path = path.Append(FILE_PATH_LITERAL("test.mht"));
502 
503   GenerateMHTML(path, embedded_test_server()->GetURL("/simple_page.html"));
504 
505   // Make sure the actual generated file has some contents.
506   EXPECT_GT(file_size(), 0);  // Verify the size reported by the callback.
507   EXPECT_GT(ReadFileSizeFromDisk(path), 100);  // Verify the actual file size.
508 
509   {
510     base::ScopedAllowBlockingForTesting allow_blocking;
511     std::string mhtml;
512     ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
513     EXPECT_THAT(mhtml,
514                 HasSubstr("Content-Transfer-Encoding: quoted-printable"));
515   }
516 
517   // Checks that the final status reported to UMA is correct.
518   histogram_tester()->ExpectUniqueSample(
519       "PageSerialization.MhtmlGeneration.FinalSaveStatus",
520       static_cast<int>(mojom::MhtmlSaveStatus::kSuccess), 1);
521 }
522 
523 // Regression test for the crash/race from https://crbug.com/612098.
524 //
525 // TODO(crbug.com/959435): Flaky on Android.
526 #if defined(OS_ANDROID)
527 #define MAYBE_GenerateMHTMLAndCloseConnection \
528   DISABLED_GenerateMHTMLAndCloseConnection
529 #else
530 #define MAYBE_GenerateMHTMLAndCloseConnection GenerateMHTMLAndCloseConnection
531 #endif
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,MAYBE_GenerateMHTMLAndCloseConnection)532 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,
533                        MAYBE_GenerateMHTMLAndCloseConnection) {
534   scoped_refptr<RespondAndDisconnectMockWriter> mock_writer =
535       base::MakeRefCounted<RespondAndDisconnectMockWriter>();
536 
537   EXPECT_TRUE(NavigateToURL(
538       shell(), embedded_test_server()->GetURL("/simple_page.html")));
539   base::FilePath path(temp_dir_.GetPath());
540   path = path.Append(FILE_PATH_LITERAL("test.mht"));
541 
542   OverrideInterface(mock_writer.get());
543   DisableWellformednessCheck();
544 
545   MHTMLGenerationParams params(path);
546   GenerateMHTMLForCurrentPage(params);
547 
548   // Verify the file has some contents written to it.
549   EXPECT_GT(ReadFileSizeFromDisk(path), 100);
550   // Verify the reported file size matches the file written to disk.
551   EXPECT_EQ(ReadFileSizeFromDisk(path), file_size());
552 }
553 
554 // TODO(crbug.com/672313): Flaky on Windows.
555 #if defined(OS_WIN)
556 #define MAYBE_InvalidPath DISABLED_InvalidPath
557 #else
558 #define MAYBE_InvalidPath InvalidPath
559 #endif
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,MAYBE_InvalidPath)560 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, MAYBE_InvalidPath) {
561   base::FilePath path(FILE_PATH_LITERAL("/invalid/file/path"));
562 
563   GenerateMHTML(path, embedded_test_server()->GetURL("/page_with_image.html"));
564 
565   EXPECT_EQ(file_size(), -1);  // Expecting that the callback reported failure.
566 
567   // Checks that the final status reported to UMA is correct.
568   histogram_tester()->ExpectUniqueSample(
569       "PageSerialization.MhtmlGeneration.FinalSaveStatus",
570       static_cast<int>(mojom::MhtmlSaveStatus::kFileCreationError), 1);
571 }
572 
573 // Tests that MHTML generated using the default 'quoted-printable' encoding does
574 // not contain the 'binary' Content-Transfer-Encoding header, and generates
575 // base64 encoding for the image part.
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateNonBinaryMHTMLWithImage)576 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateNonBinaryMHTMLWithImage) {
577   base::FilePath path(temp_dir_.GetPath());
578   path = path.Append(FILE_PATH_LITERAL("test_binary.mht"));
579 
580   GURL url(embedded_test_server()->GetURL("/page_with_image.html"));
581   GenerateMHTML(path, url);
582   EXPECT_GT(file_size(), 0);  // Verify the size reported by the callback.
583   EXPECT_GT(ReadFileSizeFromDisk(path), 100);  // Verify the actual file size.
584 
585   {
586     base::ScopedAllowBlockingForTesting allow_blocking;
587     std::string mhtml;
588     ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
589     EXPECT_THAT(mhtml, HasSubstr("Content-Transfer-Encoding: base64"));
590     EXPECT_THAT(mhtml, Not(HasSubstr("Content-Transfer-Encoding: binary")));
591     EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*blank.jpg"));
592     // Verify the boundary should start with CRLF.
593     EXPECT_THAT(mhtml, HasSubstr("\r\n------MultipartBoundary"));
594   }
595 }
596 
597 // Tests that MHTML generated using the binary encoding contains the 'binary'
598 // Content-Transfer-Encoding header, and does not contain any base64 encoded
599 // parts.
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateBinaryMHTMLWithImage)600 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateBinaryMHTMLWithImage) {
601   base::FilePath path(temp_dir_.GetPath());
602   path = path.Append(FILE_PATH_LITERAL("test_binary.mht"));
603 
604   GURL url(embedded_test_server()->GetURL("/page_with_image.html"));
605   MHTMLGenerationParams params(path);
606   params.use_binary_encoding = true;
607 
608   GenerateMHTML(params, url);
609   EXPECT_GT(file_size(), 0);  // Verify the size reported by the callback.
610   EXPECT_GT(ReadFileSizeFromDisk(path), 100);  // Verify the actual file size.
611 
612   {
613     base::ScopedAllowBlockingForTesting allow_blocking;
614     std::string mhtml;
615     ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
616     EXPECT_THAT(mhtml, HasSubstr("Content-Transfer-Encoding: binary"));
617     EXPECT_THAT(mhtml, Not(HasSubstr("Content-Transfer-Encoding: base64")));
618     EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*blank.jpg"));
619     // Verify the boundary should start with CRLF.
620     EXPECT_THAT(mhtml, HasSubstr("\r\n------MultipartBoundary"));
621   }
622 }
623 
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateMHTMLIgnoreNoStore)624 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLIgnoreNoStore) {
625   base::FilePath path(temp_dir_.GetPath());
626   path = path.Append(FILE_PATH_LITERAL("test.mht"));
627 
628   GURL url(embedded_test_server()->GetURL("/nostore.html"));
629 
630   // Generate MHTML without specifying the FailForNoStoreMainFrame policy.
631   GenerateMHTML(path, url);
632 
633   std::string mhtml;
634   {
635     base::ScopedAllowBlockingForTesting allow_blocking;
636     ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
637   }
638 
639   // Make sure the contents of the body are present.
640   EXPECT_THAT(mhtml, HasSubstr("test body"));
641 
642   // Make sure that URL of the content is present.
643   EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*/nostore.html"));
644 }
645 
646 // TODO(crbug.com/615291): These fail on Android under some circumstances.
647 #if defined(OS_ANDROID)
648 #define MAYBE_ViewedMHTMLContainsNoStoreContent \
649   DISABLED_ViewedMHTMLContainsNoStoreContent
650 #else
651 #define MAYBE_ViewedMHTMLContainsNoStoreContent \
652   ViewedMHTMLContainsNoStoreContent
653 #endif
654 
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,MAYBE_ViewedMHTMLContainsNoStoreContent)655 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,
656                        MAYBE_ViewedMHTMLContainsNoStoreContent) {
657   // Generate MHTML.
658   base::FilePath path(temp_dir_.GetPath());
659   path = path.Append(FILE_PATH_LITERAL("test.mht"));
660   MHTMLGenerationParams params(path);
661 
662   // We should see both frames.
663   std::vector<std::string> expectations = {
664       "Main Frame, normal headers.", "Cache-Control: no-store test body",
665   };
666   std::vector<std::string> forbidden;
667   TestOriginalVsSavedPage(
668       embedded_test_server()->GetURL("/page_with_nostore_iframe.html"), params,
669       2 /* expected number of frames */, expectations, forbidden);
670 }
671 
672 // Test suite that allows testing --site-per-process against cross-site frames.
673 // See http://dev.chromium.org/developers/design-documents/site-isolation.
674 class MHTMLGenerationSitePerProcessTest : public MHTMLGenerationTest {
675  public:
MHTMLGenerationSitePerProcessTest()676   MHTMLGenerationSitePerProcessTest() {}
677 
678  protected:
SetUpCommandLine(base::CommandLine * command_line)679   void SetUpCommandLine(base::CommandLine* command_line) override {
680     MHTMLGenerationTest::SetUpCommandLine(command_line);
681 
682     // Append --site-per-process flag.
683     content::IsolateAllSitesForTesting(command_line);
684   }
685 
SetUpOnMainThread()686   void SetUpOnMainThread() override {
687     host_resolver()->AddRule("*", "127.0.0.1");
688     content::SetupCrossSiteRedirector(embedded_test_server());
689 
690     MHTMLGenerationTest::SetUpOnMainThread();
691   }
692 
693  private:
694   DISALLOW_COPY_AND_ASSIGN(MHTMLGenerationSitePerProcessTest);
695 };
696 
697 // Test for crbug.com/538766.
IN_PROC_BROWSER_TEST_P(MHTMLGenerationSitePerProcessTest,GenerateMHTML)698 IN_PROC_BROWSER_TEST_P(MHTMLGenerationSitePerProcessTest, GenerateMHTML) {
699   base::FilePath path(temp_dir_.GetPath());
700   path = path.Append(FILE_PATH_LITERAL("test.mht"));
701 
702   GURL url(embedded_test_server()->GetURL(
703       "a.com", "/frame_tree/page_with_one_frame.html"));
704   GenerateMHTML(path, url);
705 
706   std::string mhtml;
707   {
708     base::ScopedAllowBlockingForTesting allow_blocking;
709     ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
710   }
711 
712   // Make sure the contents of both frames are present.
713   EXPECT_THAT(mhtml, HasSubstr("This page has one cross-site iframe"));
714   EXPECT_THAT(mhtml, HasSubstr("This page has no title"));  // From title1.html.
715 
716   // Make sure that URLs of both frames are present
717   // (note that these are single-line regexes).
718   EXPECT_THAT(
719       mhtml,
720       ContainsRegex("Content-Location:.*/frame_tree/page_with_one_frame.html"));
721   EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*/title1.html"));
722 }
723 
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,RemovePopupOverlay)724 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, RemovePopupOverlay) {
725   base::FilePath path(temp_dir_.GetPath());
726   path = path.Append(FILE_PATH_LITERAL("test.mht"));
727 
728   GURL url(embedded_test_server()->GetURL("/popup.html"));
729 
730   MHTMLGenerationParams params(path);
731   params.remove_popup_overlay = true;
732 
733   GenerateMHTML(params, url);
734 
735   std::string mhtml;
736   {
737     base::ScopedAllowBlockingForTesting allow_blocking;
738     ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
739   }
740 
741   // Make sure the overlay is removed.
742   EXPECT_THAT(mhtml, Not(HasSubstr("class=3D\"overlay")));
743   EXPECT_THAT(mhtml, Not(HasSubstr("class=3D\"modal")));
744 }
745 
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateMHTMLWithExtraData)746 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLWithExtraData) {
747   const char kFakeSignalData1[] = "FakeSignalData1";
748   const char kFakeSignalData2[] = "OtherMockDataForSignals";
749   const char kFakeContentType[] = "text/plain";
750   const char kFakeContentLocation[] =
751       "cid:signal-data-62691-645341c4-62b3-478e-a8c5-e0dfccc3ca02@mhtml.blink";
752   base::FilePath path(temp_dir_.GetPath());
753   path = path.Append(FILE_PATH_LITERAL("test.mht"));
754   GURL url(embedded_test_server()->GetURL("/page_with_image.html"));
755 
756   // Place the extra data we need into the web contents user data.
757   std::string content_type(kFakeContentType);
758   std::string content_location(kFakeContentLocation);
759   std::string extra_headers;
760 
761   // Get the MHTMLExtraParts
762   MHTMLExtraParts* extra_parts =
763       MHTMLExtraParts::FromWebContents(shell()->web_contents());
764 
765   // Add two extra data parts to the MHTML.
766   extra_parts->AddExtraMHTMLPart(content_type, content_location, extra_headers,
767                                  kFakeSignalData1);
768   extra_parts->AddExtraMHTMLPart(content_type, content_location, extra_headers,
769                                  kFakeSignalData2);
770   EXPECT_EQ(extra_parts->size(), 2);
771   GenerateMHTML(path, url);
772 
773   EXPECT_TRUE(has_mhtml_callback_run());
774 
775   std::string mhtml;
776   {
777     base::ScopedAllowBlockingForTesting allow_blocking;
778     ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
779   }
780 
781   // Make sure that both extra data parts made it into the mhtml.
782   EXPECT_THAT(mhtml, HasSubstr(kFakeSignalData1));
783   EXPECT_THAT(mhtml, HasSubstr(kFakeSignalData2));
784 }
785 
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateMHTMLWithMultipleFrames)786 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLWithMultipleFrames) {
787   base::FilePath path(temp_dir_.GetPath());
788   path = path.Append(FILE_PATH_LITERAL("test.mht"));
789 
790   const std::string kContentURLs[] = {
791       "Content-Location:.*/page_with_image.html",
792       "Content-Location:.*/page_with_popup.html",
793       "Content-Location:.*/page_with_frameset.html",
794       "Content-Location:.*/page_with_allowfullscreen_frame.html",
795       "Content-Location:.*/page_with_iframe_and_link.html"};
796 
797   MHTMLGenerationParams params(path);
798   TestOriginalVsSavedPage(
799       embedded_test_server()->GetURL("/page_with_multiple_iframes.html"),
800       params, 11 /* expected number of frames */, std::vector<std::string>(),
801       std::vector<std::string>());
802 
803   // Test whether generation was successful.
804   EXPECT_GT(file_size(), 0);  // Verify the size reported by the callback.
805   EXPECT_GT(ReadFileSizeFromDisk(path), 100);  // Verify the actual file size.
806 
807   std::string mhtml;
808   {
809     base::ScopedAllowBlockingForTesting allow_blocking;
810     ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
811   }
812 
813   // Expect all frames in the .html are included in the generated file.
814   for (const auto& regex : kContentURLs)
815     EXPECT_THAT(mhtml, ContainsRegex(regex));
816 }
817 
818 // We instantiate the MHTML Generation Tests both using and not using the
819 // GenerateMHTMLWithResults callback.
820 INSTANTIATE_TEST_SUITE_P(MHTMLGenerationTest,
821                          MHTMLGenerationTest,
822                          testing::Bool());
823 INSTANTIATE_TEST_SUITE_P(MHTMLGenerationSitePerProcessTest,
824                          MHTMLGenerationSitePerProcessTest,
825                          testing::Bool());
826 
827 }  // namespace content
828