1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stdint.h>
6 #include <memory>
7
8 #include "base/bind.h"
9 #include "base/callback.h"
10 #include "base/files/file_path.h"
11 #include "base/files/file_util.h"
12 #include "base/files/scoped_temp_dir.h"
13 #include "base/macros.h"
14 #include "base/path_service.h"
15 #include "base/run_loop.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/test/metrics/histogram_tester.h"
18 #include "base/threading/thread_restrictions.h"
19 #include "build/build_config.h"
20 #include "components/download/public/common/download_task_runner.h"
21 #include "content/browser/download/mhtml_generation_manager.h"
22 #include "content/browser/renderer_host/render_process_host_impl.h"
23 #include "content/common/download/mhtml_file_writer.mojom.h"
24 #include "content/public/browser/browser_task_traits.h"
25 #include "content/public/browser/browser_thread.h"
26 #include "content/public/browser/mhtml_extra_parts.h"
27 #include "content/public/browser/mhtml_generation_result.h"
28 #include "content/public/browser/render_frame_host.h"
29 #include "content/public/browser/render_process_host.h"
30 #include "content/public/browser/web_contents.h"
31 #include "content/public/common/content_paths.h"
32 #include "content/public/common/mhtml_generation_params.h"
33 #include "content/public/test/browser_test.h"
34 #include "content/public/test/browser_test_utils.h"
35 #include "content/public/test/content_browser_test.h"
36 #include "content/public/test/content_browser_test_utils.h"
37 #include "content/public/test/test_utils.h"
38 #include "content/shell/browser/shell.h"
39 #include "crypto/secure_hash.h"
40 #include "crypto/sha2.h"
41 #include "mojo/public/cpp/bindings/associated_receiver.h"
42 #include "mojo/public/cpp/bindings/pending_associated_receiver.h"
43 #include "net/base/filename_util.h"
44 #include "net/dns/mock_host_resolver.h"
45 #include "net/test/embedded_test_server/embedded_test_server.h"
46 #include "testing/gmock/include/gmock/gmock.h"
47 #include "testing/gtest/include/gtest/gtest.h"
48 #include "third_party/blink/public/common/associated_interfaces/associated_interface_provider.h"
49 #include "third_party/blink/public/mojom/frame/find_in_page.mojom.h"
50
51 using testing::ContainsRegex;
52 using testing::HasSubstr;
53 using testing::Not;
54
55 namespace content {
56
57 namespace {
58
59 // A dummy WebContentsDelegate which tracks the results of a find operation.
60 class FindTrackingDelegate : public WebContentsDelegate {
61 public:
FindTrackingDelegate(const std::string & search)62 explicit FindTrackingDelegate(const std::string& search)
63 : search_(search), matches_(-1) {}
64
65 // Returns number of results.
Wait(WebContents * web_contents)66 int Wait(WebContents* web_contents) {
67 WebContentsDelegate* old_delegate = web_contents->GetDelegate();
68 web_contents->SetDelegate(this);
69
70 auto options = blink::mojom::FindOptions::New();
71 options->run_synchronously_for_testing = true;
72 options->match_case = false;
73
74 web_contents->Find(global_request_id++, base::UTF8ToUTF16(search_),
75 std::move(options));
76 run_loop_.Run();
77
78 web_contents->SetDelegate(old_delegate);
79
80 return matches_;
81 }
82
FindReply(WebContents * web_contents,int request_id,int number_of_matches,const gfx::Rect & selection_rect,int active_match_ordinal,bool final_update)83 void FindReply(WebContents* web_contents,
84 int request_id,
85 int number_of_matches,
86 const gfx::Rect& selection_rect,
87 int active_match_ordinal,
88 bool final_update) override {
89 if (final_update) {
90 matches_ = number_of_matches;
91 run_loop_.Quit();
92 }
93 }
94
95 static int global_request_id;
96
97 private:
98 std::string search_;
99 int matches_;
100 base::RunLoop run_loop_;
101
102 DISALLOW_COPY_AND_ASSIGN(FindTrackingDelegate);
103 };
104
105 // static
106 int FindTrackingDelegate::global_request_id = 0;
107
108 const char kTestData[] =
109 "Sample Text to write on a generated MHTML "
110 "file for tests to validate whether the implementation is able to access "
111 "and write to the file.";
112
113 class MockWriterBase : public mojom::MhtmlFileWriter {
114 public:
115 MockWriterBase() = default;
116 ~MockWriterBase() override = default;
117
BindReceiver(mojo::ScopedInterfaceEndpointHandle handle)118 void BindReceiver(mojo::ScopedInterfaceEndpointHandle handle) {
119 receiver_.Bind(mojo::PendingAssociatedReceiver<mojom::MhtmlFileWriter>(
120 std::move(handle)));
121 }
122
123 protected:
SendResponse(SerializeAsMHTMLCallback callback)124 void SendResponse(SerializeAsMHTMLCallback callback) {
125 std::vector<std::string> dummy_digests;
126 base::TimeDelta dummy_time_delta = base::TimeDelta::FromMilliseconds(100);
127 std::move(callback).Run(mojom::MhtmlSaveStatus::kSuccess, dummy_digests,
128 dummy_time_delta);
129 }
130
WriteDataToDestinationFile(base::File & destination_file)131 void WriteDataToDestinationFile(base::File& destination_file) {
132 base::ScopedAllowBlockingForTesting allow_blocking;
133 destination_file.WriteAtCurrentPos(kTestData, strlen(kTestData));
134 destination_file.Close();
135 }
136
WriteDataToProducerPipe(mojo::ScopedDataPipeProducerHandle producer_pipe)137 void WriteDataToProducerPipe(
138 mojo::ScopedDataPipeProducerHandle producer_pipe) {
139 base::ScopedAllowBlockingForTesting allow_blocking;
140 uint32_t size = strlen(kTestData);
141 producer_pipe->WriteData(kTestData, &size, MOJO_WRITE_DATA_FLAG_NONE);
142 producer_pipe.reset();
143 }
144
145 mojo::AssociatedReceiver<mojom::MhtmlFileWriter> receiver_{this};
146
147 private:
148 DISALLOW_COPY_AND_ASSIGN(MockWriterBase);
149 };
150
151 // This Mock injects our overwritten interface, running the callback
152 // SerializeAsMHTMLResponse and immediately disconnecting the message pipe.
153 class RespondAndDisconnectMockWriter
154 : public MockWriterBase,
155 public base::RefCountedThreadSafe<RespondAndDisconnectMockWriter> {
156 public:
RespondAndDisconnectMockWriter()157 RespondAndDisconnectMockWriter() {}
158
SerializeAsMHTML(mojom::SerializeAsMHTMLParamsPtr params,SerializeAsMHTMLCallback callback)159 void SerializeAsMHTML(mojom::SerializeAsMHTMLParamsPtr params,
160 SerializeAsMHTMLCallback callback) override {
161 // Upon using the overridden mock interface implementation, this will be
162 // handled by the product code as illustrated below. (1), (2), (3) depict
163 // points in time when product code runs on UI thread and download sequence.
164 // For the repro, the message pipe disconnection needs to happen between (1)
165 // and (3).
166 //
167 // Test instance UI thread download sequence
168 // --------- --------- -----------
169 // | | |
170 // WE ARE HERE | |
171 // | | |
172 // | | |
173 // +--------------->+ |
174 // | | |
175 // | | |
176 // | | |
177 // | | |
178 // | | |
179 // | | |
180 // (1) | MHTMLGenerationManager::Job |
181 // | ::SerializeAsMHTMLResponse |
182 // | +-------------------->+
183 // | | |
184 // | | |
185 // | | |
186 // (2) | | MHTMLGenerationManager::Job
187 // | | ::CloseFileOnFileThread
188 // | | |
189 // | | |
190 // | test needs to |
191 // | disconnect message pipe |
192 // | HERE - between (1) and (3) |
193 // | | |
194 // | | |
195 // | +<--------------------+
196 // | | |
197 // (3) | MHTMLGenerationManager |
198 // | Job::OnFinished |
199 // | | |
200 //
201 // We hope that the error handler is invoked between (1) and (3) by doing
202 // the following:
203 // - From here, run the callback response to the UI thread. This queues
204 // the response message onto the bound message pipe.
205 // - After running the callback response, immediately unbind the message
206 // pipe in order to queue a message onto the bound message pipe to notify
207 // the Browser the connection was closed and invoke the error handler.
208 // - Upon resuming operation, the FIFO ordering property of associated
209 // interfaces guarantees the execution of (1) before the error handler.
210 // (1) posts (2) to the download sequence and terminates. The client end
211 // then accepts the error notification and invokes the connection error
212 // handler, guaranteeing its execution before (3).
213
214 bool compute_contents_hash = params->output_handle->is_producer_handle();
215
216 // Write a valid MHTML file to its respective handle, since we are not
217 // actively running a serialization pipeline in the mock implementation.
218 if (compute_contents_hash) {
219 WriteDataToProducerPipe(
220 std::move(params->output_handle->get_producer_handle()));
221 } else {
222 WriteDataToDestinationFile(params->output_handle->get_file_handle());
223 }
224
225 SendResponse(std::move(callback));
226
227 // Reset the message pipe connection to invoke the disconnect callback. The
228 // disconnect handler from here will finalize the Job and attempt to call
229 // MHTMLGenerationManager::Job::CloseFile a second time. If this situation
230 // is handled correctly, the browser file should be invalidated and
231 // idempotent.
232 if (!compute_contents_hash) {
233 receiver_.reset();
234 return;
235 }
236
237 // In the case we are using a data pipe to stream serialized MHTML data,
238 // we must ensure the write complete notification arrives before the
239 // connection error notification, otherwise the Browser will report
240 // an MhtmlSaveStatus != kSuccess. We can guarantee this by potentially
241 // running tasks after each watcher invocation to send notifications that
242 // it has been completed. We need at least two tasks to guarantee this,
243 // as there can be at most two watcher invocations to write a block of
244 // data smaller than the data pipe buffer to file.
245 download::GetDownloadTaskRunner()->PostTask(
246 FROM_HERE,
247 base::BindOnce(&RespondAndDisconnectMockWriter::TaskX,
248 scoped_refptr<RespondAndDisconnectMockWriter>(this)));
249 }
250
TaskX()251 void TaskX() {
252 download::GetDownloadTaskRunner()->PostTask(
253 FROM_HERE,
254 base::BindOnce(&RespondAndDisconnectMockWriter::TaskY,
255 scoped_refptr<RespondAndDisconnectMockWriter>(this)));
256 }
257
TaskY()258 void TaskY() {
259 GetUIThreadTaskRunner({})->PostTask(
260 FROM_HERE,
261 base::BindOnce(&RespondAndDisconnectMockWriter::TaskZ,
262 scoped_refptr<RespondAndDisconnectMockWriter>(this)));
263 }
264
TaskZ()265 void TaskZ() { receiver_.reset(); }
266
267 private:
268 friend base::RefCountedThreadSafe<RespondAndDisconnectMockWriter>;
269
270 ~RespondAndDisconnectMockWriter() override = default;
271
272 DISALLOW_COPY_AND_ASSIGN(RespondAndDisconnectMockWriter);
273 };
274
275 } // namespace
276
277 class MHTMLGenerationTest : public ContentBrowserTest,
278 public testing::WithParamInterface<bool> {
279 public:
MHTMLGenerationTest()280 MHTMLGenerationTest()
281 : has_mhtml_callback_run_(false),
282 file_size_(0),
283 file_digest_(base::nullopt),
284 well_formedness_check_(true) {}
285
286 enum TaskOrder { WriteThenRespond, RespondThenWrite };
287
288 protected:
SetUpOnMainThread()289 void SetUpOnMainThread() override {
290 ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
291 ASSERT_TRUE(embedded_test_server()->Start());
292 ContentBrowserTest::SetUpOnMainThread();
293 }
294
OverrideInterface(MockWriterBase * mock_writer)295 void OverrideInterface(MockWriterBase* mock_writer) {
296 blink::AssociatedInterfaceProvider* remote_interfaces =
297 shell()
298 ->web_contents()
299 ->GetMainFrame()
300 ->GetRemoteAssociatedInterfaces();
301 remote_interfaces->OverrideBinderForTesting(
302 mojom::MhtmlFileWriter::Name_,
303 base::BindRepeating(&MockWriterBase::BindReceiver,
304 base::Unretained(mock_writer)));
305 }
306
GenerateMHTML(base::FilePath & path,const GURL & url)307 void GenerateMHTML(base::FilePath& path, const GURL& url) {
308 MHTMLGenerationParams params(path);
309 GenerateMHTML(params, url);
310 }
311
GenerateMHTML(MHTMLGenerationParams & params,const GURL & url)312 void GenerateMHTML(MHTMLGenerationParams& params, const GURL& url) {
313 EXPECT_TRUE(NavigateToURL(shell(), url));
314 GenerateMHTMLForCurrentPage(params);
315 }
316
GenerateMHTMLForCurrentPage(MHTMLGenerationParams & params)317 void GenerateMHTMLForCurrentPage(MHTMLGenerationParams& params) {
318 base::RunLoop run_loop;
319 histogram_tester_.reset(new base::HistogramTester());
320
321 bool use_result_callback = GetParam();
322
323 if (use_result_callback) {
324 shell()->web_contents()->GenerateMHTMLWithResult(
325 params,
326 base::BindOnce(&MHTMLGenerationTest::MHTMLGeneratedWithResult,
327 base::Unretained(this), run_loop.QuitClosure()));
328 } else {
329 shell()->web_contents()->GenerateMHTML(
330 params,
331 base::BindOnce(&MHTMLGenerationTest::MHTMLGenerated,
332 base::Unretained(this), run_loop.QuitClosure()));
333 }
334
335 // Block until the MHTML is generated.
336 run_loop.Run();
337
338 ASSERT_TRUE(has_mhtml_callback_run())
339 << "Unexpected error generating MHTML file";
340
341 // TODO(crbug.com/997408): Add tests which will let MHTMLGeneration manager
342 // fail during file write operation. This will allow us to actually test if
343 // we receive a bogus hash instead of a base::nullopt.
344 EXPECT_EQ(base::nullopt, file_digest());
345
346 // Skip well formedness check if explicitly disabled or there was a
347 // generation error.
348 bool generation_failed = file_size() == -1;
349 if (!well_formedness_check_ || generation_failed)
350 return;
351
352 // Loads the generated file to check if it is well formed.
353 WebContentsConsoleObserver console_observer(shell()->web_contents());
354 console_observer.SetPattern("Malformed multipart archive: *");
355
356 EXPECT_TRUE(
357 NavigateToURL(shell(), net::FilePathToFileURL(params.file_path)))
358 << "Error navigating to the generated MHTML file";
359 EXPECT_TRUE(console_observer.messages().empty())
360 << "The generated MHTML file is malformed";
361 }
362
363 void TwoStepSyncTestFor(const TaskOrder order);
364
ReadFileSizeFromDisk(base::FilePath path)365 int64_t ReadFileSizeFromDisk(base::FilePath path) {
366 base::ScopedAllowBlockingForTesting allow_blocking;
367 int64_t file_size;
368 if (!base::GetFileSize(path, &file_size)) return -1;
369 return file_size;
370 }
371
TestOriginalVsSavedPage(const GURL & url,MHTMLGenerationParams params,int expected_number_of_frames,const std::vector<std::string> & expected_substrings,const std::vector<std::string> & forbidden_substrings_in_saved_page,bool skip_verification_of_original_page=false)372 void TestOriginalVsSavedPage(
373 const GURL& url,
374 MHTMLGenerationParams params,
375 int expected_number_of_frames,
376 const std::vector<std::string>& expected_substrings,
377 const std::vector<std::string>& forbidden_substrings_in_saved_page,
378 bool skip_verification_of_original_page = false) {
379 // Navigate to the test page and verify if test expectations
380 // are met (this is mostly a sanity check - a failure to meet
381 // expectations would probably mean that there is a test bug
382 // (i.e. that we got called with wrong expected_foo argument).
383 EXPECT_TRUE(NavigateToURL(shell(), url));
384 if (!skip_verification_of_original_page) {
385 AssertExpectationsAboutCurrentTab(expected_number_of_frames,
386 expected_substrings,
387 std::vector<std::string>());
388 }
389
390 GenerateMHTML(params, url);
391
392 // Stop the test server (to make sure the locally saved page
393 // is self-contained / won't try to open original resources).
394 ASSERT_TRUE(embedded_test_server()->ShutdownAndWaitUntilComplete());
395
396 // Open the saved page and verify if test expectations are
397 // met (i.e. if the same expectations are met for "after"
398 // [saved version of the page] as for the "before"
399 // [the original version of the page].
400 EXPECT_TRUE(
401 NavigateToURL(shell(), net::FilePathToFileURL(params.file_path)));
402 AssertExpectationsAboutCurrentTab(expected_number_of_frames,
403 expected_substrings,
404 forbidden_substrings_in_saved_page);
405 }
406
AssertExpectationsAboutCurrentTab(int expected_number_of_frames,const std::vector<std::string> & expected_substrings,const std::vector<std::string> & forbidden_substrings)407 void AssertExpectationsAboutCurrentTab(
408 int expected_number_of_frames,
409 const std::vector<std::string>& expected_substrings,
410 const std::vector<std::string>& forbidden_substrings) {
411 int actual_number_of_frames =
412 shell()->web_contents()->GetAllFrames().size();
413 EXPECT_EQ(expected_number_of_frames, actual_number_of_frames);
414
415 for (const auto& expected_substring : expected_substrings) {
416 FindTrackingDelegate delegate(expected_substring);
417 int actual_number_of_matches = delegate.Wait(shell()->web_contents());
418 EXPECT_EQ(1, actual_number_of_matches)
419 << "Verifying that \"" << expected_substring << "\" appears "
420 << "exactly once in the text of web contents of "
421 << shell()->web_contents()->GetURL().spec();
422 }
423
424 for (const auto& forbidden_substring : forbidden_substrings) {
425 FindTrackingDelegate delegate(forbidden_substring);
426 int actual_number_of_matches = delegate.Wait(shell()->web_contents());
427 EXPECT_EQ(0, actual_number_of_matches)
428 << "Verifying that \"" << forbidden_substring << "\" doesn't "
429 << "appear in the text of web contents of "
430 << shell()->web_contents()->GetURL().spec();
431 }
432 }
433
434 // Tests that the result of setting compute_contents_hash is the same as
435 // manually hashing the file. Because MHTMLGenerationManager depends on
436 // net::GenerateMimeMultipartBoundary() to write the boundary, we cannot
437 // compute the digest in advance. Therefore, we must compute the hash of the
438 // whole file and assert that the computed hash is the same as the hash
439 // produced here.
TestComputeContentsHash(base::FilePath & path)440 void TestComputeContentsHash(base::FilePath& path) {
441 base::ScopedAllowBlockingForTesting allow_blocking;
442
443 // Reload the file to an mhtml string for hashing
444 std::string test_mhtml;
445 ASSERT_TRUE(base::ReadFileToString(path, &test_mhtml));
446
447 // Hash the file in one big step. This is not recommended to do outside of
448 // tests because the files being hashed could be too large.
449 std::unique_ptr<crypto::SecureHash> secure_hash =
450 crypto::SecureHash::Create(crypto::SecureHash::Algorithm::SHA256);
451 secure_hash->Update(test_mhtml.c_str(), test_mhtml.size());
452 std::string expected_digest(secure_hash->GetHashLength(), 0);
453 secure_hash->Finish(&(expected_digest[0]), expected_digest.size());
454 secure_hash.reset();
455
456 ASSERT_TRUE(file_digest());
457 EXPECT_EQ(file_digest().value(), expected_digest);
458 }
459
460 // In the case that we are using a pre-generated .mhtml file, we do
461 // not have any control over the final mhtml_boundary_marker write
462 // operation. This results in the post-generation verification tests
463 // reporting a malformed multipart archive, unintentionally failing the
464 // test.
DisableWellformednessCheck()465 void DisableWellformednessCheck() { well_formedness_check_ = false; }
466
has_mhtml_callback_run() const467 bool has_mhtml_callback_run() const { return has_mhtml_callback_run_; }
file_size() const468 int64_t file_size() const { return file_size_; }
file_digest() const469 base::Optional<std::string> file_digest() const { return file_digest_; }
histogram_tester()470 base::HistogramTester* histogram_tester() { return histogram_tester_.get(); }
471
472 base::ScopedTempDir temp_dir_;
473
474 private:
MHTMLGenerated(base::OnceClosure quit_closure,int64_t size)475 void MHTMLGenerated(base::OnceClosure quit_closure, int64_t size) {
476 has_mhtml_callback_run_ = true;
477 file_size_ = size;
478 std::move(quit_closure).Run();
479 }
MHTMLGeneratedWithResult(base::OnceClosure quit_closure,const MHTMLGenerationResult & result)480 void MHTMLGeneratedWithResult(base::OnceClosure quit_closure,
481 const MHTMLGenerationResult& result) {
482 has_mhtml_callback_run_ = true;
483 file_size_ = result.file_size;
484 file_digest_ = result.file_digest;
485 std::move(quit_closure).Run();
486 }
487
488 bool has_mhtml_callback_run_;
489 int64_t file_size_;
490 base::Optional<std::string> file_digest_;
491 bool well_formedness_check_;
492 std::unique_ptr<base::HistogramTester> histogram_tester_;
493 };
494
495 // Tests that generating a MHTML does create contents.
496 // Note that the actual content of the file is not tested, the purpose of this
497 // test is to ensure we were successful in creating the MHTML data from the
498 // renderer.
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateMHTML)499 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTML) {
500 base::FilePath path(temp_dir_.GetPath());
501 path = path.Append(FILE_PATH_LITERAL("test.mht"));
502
503 GenerateMHTML(path, embedded_test_server()->GetURL("/simple_page.html"));
504
505 // Make sure the actual generated file has some contents.
506 EXPECT_GT(file_size(), 0); // Verify the size reported by the callback.
507 EXPECT_GT(ReadFileSizeFromDisk(path), 100); // Verify the actual file size.
508
509 {
510 base::ScopedAllowBlockingForTesting allow_blocking;
511 std::string mhtml;
512 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
513 EXPECT_THAT(mhtml,
514 HasSubstr("Content-Transfer-Encoding: quoted-printable"));
515 }
516
517 // Checks that the final status reported to UMA is correct.
518 histogram_tester()->ExpectUniqueSample(
519 "PageSerialization.MhtmlGeneration.FinalSaveStatus",
520 static_cast<int>(mojom::MhtmlSaveStatus::kSuccess), 1);
521 }
522
523 // Regression test for the crash/race from https://crbug.com/612098.
524 //
525 // TODO(crbug.com/959435): Flaky on Android.
526 #if defined(OS_ANDROID)
527 #define MAYBE_GenerateMHTMLAndCloseConnection \
528 DISABLED_GenerateMHTMLAndCloseConnection
529 #else
530 #define MAYBE_GenerateMHTMLAndCloseConnection GenerateMHTMLAndCloseConnection
531 #endif
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,MAYBE_GenerateMHTMLAndCloseConnection)532 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,
533 MAYBE_GenerateMHTMLAndCloseConnection) {
534 scoped_refptr<RespondAndDisconnectMockWriter> mock_writer =
535 base::MakeRefCounted<RespondAndDisconnectMockWriter>();
536
537 EXPECT_TRUE(NavigateToURL(
538 shell(), embedded_test_server()->GetURL("/simple_page.html")));
539 base::FilePath path(temp_dir_.GetPath());
540 path = path.Append(FILE_PATH_LITERAL("test.mht"));
541
542 OverrideInterface(mock_writer.get());
543 DisableWellformednessCheck();
544
545 MHTMLGenerationParams params(path);
546 GenerateMHTMLForCurrentPage(params);
547
548 // Verify the file has some contents written to it.
549 EXPECT_GT(ReadFileSizeFromDisk(path), 100);
550 // Verify the reported file size matches the file written to disk.
551 EXPECT_EQ(ReadFileSizeFromDisk(path), file_size());
552 }
553
554 // TODO(crbug.com/672313): Flaky on Windows.
555 #if defined(OS_WIN)
556 #define MAYBE_InvalidPath DISABLED_InvalidPath
557 #else
558 #define MAYBE_InvalidPath InvalidPath
559 #endif
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,MAYBE_InvalidPath)560 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, MAYBE_InvalidPath) {
561 base::FilePath path(FILE_PATH_LITERAL("/invalid/file/path"));
562
563 GenerateMHTML(path, embedded_test_server()->GetURL("/page_with_image.html"));
564
565 EXPECT_EQ(file_size(), -1); // Expecting that the callback reported failure.
566
567 // Checks that the final status reported to UMA is correct.
568 histogram_tester()->ExpectUniqueSample(
569 "PageSerialization.MhtmlGeneration.FinalSaveStatus",
570 static_cast<int>(mojom::MhtmlSaveStatus::kFileCreationError), 1);
571 }
572
573 // Tests that MHTML generated using the default 'quoted-printable' encoding does
574 // not contain the 'binary' Content-Transfer-Encoding header, and generates
575 // base64 encoding for the image part.
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateNonBinaryMHTMLWithImage)576 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateNonBinaryMHTMLWithImage) {
577 base::FilePath path(temp_dir_.GetPath());
578 path = path.Append(FILE_PATH_LITERAL("test_binary.mht"));
579
580 GURL url(embedded_test_server()->GetURL("/page_with_image.html"));
581 GenerateMHTML(path, url);
582 EXPECT_GT(file_size(), 0); // Verify the size reported by the callback.
583 EXPECT_GT(ReadFileSizeFromDisk(path), 100); // Verify the actual file size.
584
585 {
586 base::ScopedAllowBlockingForTesting allow_blocking;
587 std::string mhtml;
588 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
589 EXPECT_THAT(mhtml, HasSubstr("Content-Transfer-Encoding: base64"));
590 EXPECT_THAT(mhtml, Not(HasSubstr("Content-Transfer-Encoding: binary")));
591 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*blank.jpg"));
592 // Verify the boundary should start with CRLF.
593 EXPECT_THAT(mhtml, HasSubstr("\r\n------MultipartBoundary"));
594 }
595 }
596
597 // Tests that MHTML generated using the binary encoding contains the 'binary'
598 // Content-Transfer-Encoding header, and does not contain any base64 encoded
599 // parts.
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateBinaryMHTMLWithImage)600 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateBinaryMHTMLWithImage) {
601 base::FilePath path(temp_dir_.GetPath());
602 path = path.Append(FILE_PATH_LITERAL("test_binary.mht"));
603
604 GURL url(embedded_test_server()->GetURL("/page_with_image.html"));
605 MHTMLGenerationParams params(path);
606 params.use_binary_encoding = true;
607
608 GenerateMHTML(params, url);
609 EXPECT_GT(file_size(), 0); // Verify the size reported by the callback.
610 EXPECT_GT(ReadFileSizeFromDisk(path), 100); // Verify the actual file size.
611
612 {
613 base::ScopedAllowBlockingForTesting allow_blocking;
614 std::string mhtml;
615 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
616 EXPECT_THAT(mhtml, HasSubstr("Content-Transfer-Encoding: binary"));
617 EXPECT_THAT(mhtml, Not(HasSubstr("Content-Transfer-Encoding: base64")));
618 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*blank.jpg"));
619 // Verify the boundary should start with CRLF.
620 EXPECT_THAT(mhtml, HasSubstr("\r\n------MultipartBoundary"));
621 }
622 }
623
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateMHTMLIgnoreNoStore)624 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLIgnoreNoStore) {
625 base::FilePath path(temp_dir_.GetPath());
626 path = path.Append(FILE_PATH_LITERAL("test.mht"));
627
628 GURL url(embedded_test_server()->GetURL("/nostore.html"));
629
630 // Generate MHTML without specifying the FailForNoStoreMainFrame policy.
631 GenerateMHTML(path, url);
632
633 std::string mhtml;
634 {
635 base::ScopedAllowBlockingForTesting allow_blocking;
636 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
637 }
638
639 // Make sure the contents of the body are present.
640 EXPECT_THAT(mhtml, HasSubstr("test body"));
641
642 // Make sure that URL of the content is present.
643 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*/nostore.html"));
644 }
645
646 // TODO(crbug.com/615291): These fail on Android under some circumstances.
647 #if defined(OS_ANDROID)
648 #define MAYBE_ViewedMHTMLContainsNoStoreContent \
649 DISABLED_ViewedMHTMLContainsNoStoreContent
650 #else
651 #define MAYBE_ViewedMHTMLContainsNoStoreContent \
652 ViewedMHTMLContainsNoStoreContent
653 #endif
654
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,MAYBE_ViewedMHTMLContainsNoStoreContent)655 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,
656 MAYBE_ViewedMHTMLContainsNoStoreContent) {
657 // Generate MHTML.
658 base::FilePath path(temp_dir_.GetPath());
659 path = path.Append(FILE_PATH_LITERAL("test.mht"));
660 MHTMLGenerationParams params(path);
661
662 // We should see both frames.
663 std::vector<std::string> expectations = {
664 "Main Frame, normal headers.", "Cache-Control: no-store test body",
665 };
666 std::vector<std::string> forbidden;
667 TestOriginalVsSavedPage(
668 embedded_test_server()->GetURL("/page_with_nostore_iframe.html"), params,
669 2 /* expected number of frames */, expectations, forbidden);
670 }
671
672 // Test suite that allows testing --site-per-process against cross-site frames.
673 // See http://dev.chromium.org/developers/design-documents/site-isolation.
674 class MHTMLGenerationSitePerProcessTest : public MHTMLGenerationTest {
675 public:
MHTMLGenerationSitePerProcessTest()676 MHTMLGenerationSitePerProcessTest() {}
677
678 protected:
SetUpCommandLine(base::CommandLine * command_line)679 void SetUpCommandLine(base::CommandLine* command_line) override {
680 MHTMLGenerationTest::SetUpCommandLine(command_line);
681
682 // Append --site-per-process flag.
683 content::IsolateAllSitesForTesting(command_line);
684 }
685
SetUpOnMainThread()686 void SetUpOnMainThread() override {
687 host_resolver()->AddRule("*", "127.0.0.1");
688 content::SetupCrossSiteRedirector(embedded_test_server());
689
690 MHTMLGenerationTest::SetUpOnMainThread();
691 }
692
693 private:
694 DISALLOW_COPY_AND_ASSIGN(MHTMLGenerationSitePerProcessTest);
695 };
696
697 // Test for crbug.com/538766.
IN_PROC_BROWSER_TEST_P(MHTMLGenerationSitePerProcessTest,GenerateMHTML)698 IN_PROC_BROWSER_TEST_P(MHTMLGenerationSitePerProcessTest, GenerateMHTML) {
699 base::FilePath path(temp_dir_.GetPath());
700 path = path.Append(FILE_PATH_LITERAL("test.mht"));
701
702 GURL url(embedded_test_server()->GetURL(
703 "a.com", "/frame_tree/page_with_one_frame.html"));
704 GenerateMHTML(path, url);
705
706 std::string mhtml;
707 {
708 base::ScopedAllowBlockingForTesting allow_blocking;
709 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
710 }
711
712 // Make sure the contents of both frames are present.
713 EXPECT_THAT(mhtml, HasSubstr("This page has one cross-site iframe"));
714 EXPECT_THAT(mhtml, HasSubstr("This page has no title")); // From title1.html.
715
716 // Make sure that URLs of both frames are present
717 // (note that these are single-line regexes).
718 EXPECT_THAT(
719 mhtml,
720 ContainsRegex("Content-Location:.*/frame_tree/page_with_one_frame.html"));
721 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*/title1.html"));
722 }
723
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,RemovePopupOverlay)724 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, RemovePopupOverlay) {
725 base::FilePath path(temp_dir_.GetPath());
726 path = path.Append(FILE_PATH_LITERAL("test.mht"));
727
728 GURL url(embedded_test_server()->GetURL("/popup.html"));
729
730 MHTMLGenerationParams params(path);
731 params.remove_popup_overlay = true;
732
733 GenerateMHTML(params, url);
734
735 std::string mhtml;
736 {
737 base::ScopedAllowBlockingForTesting allow_blocking;
738 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
739 }
740
741 // Make sure the overlay is removed.
742 EXPECT_THAT(mhtml, Not(HasSubstr("class=3D\"overlay")));
743 EXPECT_THAT(mhtml, Not(HasSubstr("class=3D\"modal")));
744 }
745
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateMHTMLWithExtraData)746 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLWithExtraData) {
747 const char kFakeSignalData1[] = "FakeSignalData1";
748 const char kFakeSignalData2[] = "OtherMockDataForSignals";
749 const char kFakeContentType[] = "text/plain";
750 const char kFakeContentLocation[] =
751 "cid:signal-data-62691-645341c4-62b3-478e-a8c5-e0dfccc3ca02@mhtml.blink";
752 base::FilePath path(temp_dir_.GetPath());
753 path = path.Append(FILE_PATH_LITERAL("test.mht"));
754 GURL url(embedded_test_server()->GetURL("/page_with_image.html"));
755
756 // Place the extra data we need into the web contents user data.
757 std::string content_type(kFakeContentType);
758 std::string content_location(kFakeContentLocation);
759 std::string extra_headers;
760
761 // Get the MHTMLExtraParts
762 MHTMLExtraParts* extra_parts =
763 MHTMLExtraParts::FromWebContents(shell()->web_contents());
764
765 // Add two extra data parts to the MHTML.
766 extra_parts->AddExtraMHTMLPart(content_type, content_location, extra_headers,
767 kFakeSignalData1);
768 extra_parts->AddExtraMHTMLPart(content_type, content_location, extra_headers,
769 kFakeSignalData2);
770 EXPECT_EQ(extra_parts->size(), 2);
771 GenerateMHTML(path, url);
772
773 EXPECT_TRUE(has_mhtml_callback_run());
774
775 std::string mhtml;
776 {
777 base::ScopedAllowBlockingForTesting allow_blocking;
778 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
779 }
780
781 // Make sure that both extra data parts made it into the mhtml.
782 EXPECT_THAT(mhtml, HasSubstr(kFakeSignalData1));
783 EXPECT_THAT(mhtml, HasSubstr(kFakeSignalData2));
784 }
785
IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest,GenerateMHTMLWithMultipleFrames)786 IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLWithMultipleFrames) {
787 base::FilePath path(temp_dir_.GetPath());
788 path = path.Append(FILE_PATH_LITERAL("test.mht"));
789
790 const std::string kContentURLs[] = {
791 "Content-Location:.*/page_with_image.html",
792 "Content-Location:.*/page_with_popup.html",
793 "Content-Location:.*/page_with_frameset.html",
794 "Content-Location:.*/page_with_allowfullscreen_frame.html",
795 "Content-Location:.*/page_with_iframe_and_link.html"};
796
797 MHTMLGenerationParams params(path);
798 TestOriginalVsSavedPage(
799 embedded_test_server()->GetURL("/page_with_multiple_iframes.html"),
800 params, 11 /* expected number of frames */, std::vector<std::string>(),
801 std::vector<std::string>());
802
803 // Test whether generation was successful.
804 EXPECT_GT(file_size(), 0); // Verify the size reported by the callback.
805 EXPECT_GT(ReadFileSizeFromDisk(path), 100); // Verify the actual file size.
806
807 std::string mhtml;
808 {
809 base::ScopedAllowBlockingForTesting allow_blocking;
810 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
811 }
812
813 // Expect all frames in the .html are included in the generated file.
814 for (const auto& regex : kContentURLs)
815 EXPECT_THAT(mhtml, ContainsRegex(regex));
816 }
817
818 // We instantiate the MHTML Generation Tests both using and not using the
819 // GenerateMHTMLWithResults callback.
820 INSTANTIATE_TEST_SUITE_P(MHTMLGenerationTest,
821 MHTMLGenerationTest,
822 testing::Bool());
823 INSTANTIATE_TEST_SUITE_P(MHTMLGenerationSitePerProcessTest,
824 MHTMLGenerationSitePerProcessTest,
825 testing::Bool());
826
827 } // namespace content
828