1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/download/mhtml_generation_manager.h"
6
7 #include <utility>
8
9 #include "base/bind.h"
10 #include "base/containers/queue.h"
11 #include "base/files/file.h"
12 #include "base/guid.h"
13 #include "base/macros.h"
14 #include "base/memory/ptr_util.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "base/scoped_observer.h"
17 #include "base/stl_util.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/task_runner_util.h"
21 #include "base/time/time.h"
22 #include "base/trace_event/trace_event.h"
23 #include "components/download/public/common/download_task_runner.h"
24 #include "content/browser/bad_message.h"
25 #include "content/browser/download/mhtml_extra_parts_impl.h"
26 #include "content/browser/frame_host/frame_tree_node.h"
27 #include "content/browser/frame_host/render_frame_host_impl.h"
28 #include "content/common/download/mhtml_file_writer.mojom.h"
29 #include "content/public/browser/browser_thread.h"
30 #include "content/public/browser/mhtml_extra_parts.h"
31 #include "content/public/browser/mhtml_generation_result.h"
32 #include "content/public/browser/render_frame_host.h"
33 #include "content/public/browser/render_process_host.h"
34 #include "content/public/browser/web_contents.h"
35 #include "content/public/common/mhtml_generation_params.h"
36 #include "crypto/secure_hash.h"
37 #include "crypto/sha2.h"
38 #include "mojo/core/embedder/embedder.h"
39 #include "mojo/public/cpp/bindings/associated_remote.h"
40 #include "net/base/mime_util.h"
41 #include "third_party/blink/public/common/associated_interfaces/associated_interface_provider.h"
42
43 namespace {
44
45 // Callback to notify the UI thread that writing to the MHTML file is complete.
46 using MHTMLWriteCompleteCallback =
47 base::RepeatingCallback<void(content::mojom::MhtmlSaveStatus)>;
48
49 const char kContentLocation[] = "Content-Location: ";
50 const char kContentType[] = "Content-Type: ";
51 int kInvalidFileSize = -1;
52
53 // CloseFileResult holds the result of closing the generated file using the
54 // status of the operation, a file size and a pointer to a file digest. It
55 // stores the values of the status and size directly, and makes a copy of the
56 // digest if present.
57 struct CloseFileResult {
CloseFileResult__anon30b841df0111::CloseFileResult58 CloseFileResult(content::mojom::MhtmlSaveStatus status,
59 int64_t size,
60 std::string* digest)
61 : save_status(status), file_size(size) {
62 if (digest)
63 file_digest = base::Optional<std::string>(*digest);
64 }
65
66 content::mojom::MhtmlSaveStatus save_status;
67 int64_t file_size;
68 base::Optional<std::string> file_digest;
69
toMHTMLGenerationResult__anon30b841df0111::CloseFileResult70 content::MHTMLGenerationResult toMHTMLGenerationResult() const {
71 return content::MHTMLGenerationResult(file_size,
72 base::OptionalOrNullptr(file_digest));
73 }
74 };
75
CreateMHTMLFile(const base::FilePath & file_path)76 base::File CreateMHTMLFile(const base::FilePath& file_path) {
77 DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
78
79 // SECURITY NOTE: A file descriptor to the file created below will be passed
80 // to multiple renderer processes which (in out-of-process iframes mode) can
81 // act on behalf of separate web principals. Therefore it is important to
82 // only allow writing to the file and forbid reading from the file (as this
83 // would allow reading content generated by other renderers / other web
84 // principals).
85 uint32_t file_flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
86
87 base::File browser_file(file_path, file_flags);
88 if (!browser_file.IsValid()) {
89 DLOG(ERROR) << "Failed to create file to save MHTML at: "
90 << file_path.value();
91 }
92 return browser_file;
93 }
94
95 } // namespace
96
97 namespace content {
98
99 // The class and all of its members live on the UI thread. Only static methods
100 // are executed on other threads.
101 // Job instances are created in MHTMLGenerationManager::Job::StartNewJob(),
102 // proceeding with the MHTML saving process unmanaged. Every instance is
103 // self-owned and responsible for deleting itself upon invoking OnFinished.
104 // With self-ownership lifetime concerns, we make the following precautions:
105 // - SerializeAsMHTMLResponse() always proceeds with finalizing upon detecting
106 // Job completion/cancellation.
107 // - Jobs are prematurely finalized and deleted upon detecting a connection
108 // error with the message pipe during serialization.
109 // - Any pending callbacks after deletion are invalidated using weak pointers.
110 class MHTMLGenerationManager::Job {
111 public:
112 // Creates and registers a new job.
113 static void StartNewJob(
114 WebContents* web_contents,
115 const MHTMLGenerationParams& params,
116 MHTMLGenerationResult::GenerateMHTMLCallback callback);
117
118 private:
119 Job(WebContents* web_contents,
120 const MHTMLGenerationParams& params,
121 MHTMLGenerationResult::GenerateMHTMLCallback callback);
122 ~Job();
123
124 // Begins queuing frames from web_contents, creates a new MHTML file and
125 // begins page serialization to created file.
126 void initializeJob(WebContents* web_contents);
127
128 // Writes the string |to_write| to the file. If successful, updates hash and
129 // returns true, otherwise, returns false. Does not take ownership of |file|
130 // nor |raw_secure_hash|.
131 static bool WriteToFileAndUpdateHash(base::File* file,
132 crypto::SecureHash* secure_hash,
133 std::string to_write);
134
135 // Writes the MHTML footer to the file and closes it. It also receives the
136 // SimpleWatcher instance used to watch the data pipe and the current hash
137 // state for safe destruction on the IO thread.
138 //
139 // Note: The same |boundary| marker must be used for all "boundaries" -- in
140 // the header, parts and footer -- that belong to the same MHTML document (see
141 // also rfc1341, section 7.2.1, "boundary" description).
142 static CloseFileResult FinalizeOnFileThread(
143 mojom::MhtmlSaveStatus save_status,
144 const std::string& boundary,
145 base::File file,
146 const std::vector<MHTMLExtraDataPart>& extra_data_parts,
147 std::unique_ptr<mojo::SimpleWatcher> watcher,
148 std::unique_ptr<crypto::SecureHash> secure_hash);
149
150 void AddFrame(RenderFrameHost* render_frame_host);
151
152 // Creates a string that encompasses any remaining extra data parts to write
153 // to the file.
154 static std::string CreateExtraDataParts(
155 const std::string& boundary,
156 const std::vector<MHTMLExtraDataPart>& extra_data_parts);
157
158 // Creates a string with the contents if htem MHTML file footer.
159 static std::string CreateFooter(const std::string& boundary);
160
161 // Called on the UI thread when the file that should hold the MHTML data has
162 // been created.
163 void OnFileAvailable(base::File browser_file);
164
165 // Called on the UI thread after the file got finalized and we have its size,
166 // or an error occurred while creating a new file.
167 void OnFinished(const CloseFileResult& result);
168
169 // Starts watching a handle on the file thread. Instantiates a new instance
170 // of |watcher_| upon call.
171 void BeginWatchingHandle(MHTMLWriteCompleteCallback callback);
172
173 // Writes data from the consumer handle to the new MHTML file. Only done
174 // with on the fly hash computation.
175 // Bound to the data pipe watcher and called upon notification of write
176 // completion to producer pipe sent to the Renderer.
177 // TODO(https://crbug.com/915966): Eventually simplify this implementation
178 // with a DataPipeDrainer once error signalling is implemented there.
179 void WriteMHTMLToDisk(MHTMLWriteCompleteCallback callback,
180 MojoResult result,
181 const mojo::HandleSignalsState& state);
182
183 // Destroys |watcher_| instance and notifies UI thread of write completion.
184 void OnWriteComplete(MHTMLWriteCompleteCallback callback,
185 mojom::MhtmlSaveStatus save_status);
186
187 // Notifies Job of frame write completion and sends request to next render
188 // frame if the response was blocked by the write operation.
189 void DoneWritingToDisk(mojom::MhtmlSaveStatus save_status);
190
191 // Called when the message pipe to the renderer is disconnected.
192 void OnConnectionError();
193
194 // Handler for the Mojo interface callback (a notification from the
195 // renderer that the MHTML generation for previous frame has finished).
196 void SerializeAsMHTMLResponse(
197 mojom::MhtmlSaveStatus save_status,
198 const std::vector<std::string>& digests_of_uris_of_serialized_resources,
199 base::TimeDelta renderer_main_thread_time);
200
201 // Records newly serialized resource digests into
202 // |digests_of_already_serialized_uris_|.
203 void RecordDigests(
204 const std::vector<std::string>& digests_of_uris_of_serialized_resources);
205
206 // Continues sending serialization requests to the next frame if ready and
207 // there are more frames to be serialized.
208 void MaybeSendToNextRenderFrame(mojom::MhtmlSaveStatus save_status);
209
210 // Packs up the current status of the MHTML file save operation into a Mojo
211 // struct to send to the renderer process.
212 mojom::SerializeAsMHTMLParamsPtr CreateMojoParams();
213
214 // Sends Mojo interface call to the renderer, asking for MHTML
215 // generation of the next frame. Returns MhtmlSaveStatus::kSuccess or a
216 // specific error status.
217 mojom::MhtmlSaveStatus SendToNextRenderFrame();
218
219 // Indicates if the writing operation on the IO thread is complete, and
220 // we have received a response from the Renderer.
221 // This check is necessary to provide synchronization between file writing
222 // operations and MHTML serialization.
223 bool CurrentFrameDone() const;
224
225 // Called on the UI thread when a job has been finished.
226 void Finalize(mojom::MhtmlSaveStatus save_status);
227
228 // Write the MHTML footer and close the file on the file thread and respond
229 // back on the UI thread with the updated status and file size (which will be
230 // negative in case of errors).
231 void CloseFile(mojom::MhtmlSaveStatus save_status);
232
233 // Marks the Job as completed, preventing any further notifications from the
234 // Renderer. This prevents the race/crash from https://crbug.com/612098.
235 void MarkAsFinished();
236
237 void ReportRendererMainThreadTime(base::TimeDelta renderer_main_thread_time);
238
239 // Close the MHTML file if it looks good, setting the size param. Returns
240 // false for failure.
241 static bool CloseFileIfValid(base::File& file, int64_t* file_size);
242
243 // Time tracking for performance metrics reporting.
244 const base::TimeTicks creation_time_;
245 base::TimeTicks wait_on_renderer_start_time_;
246 base::TimeDelta all_renderers_wait_time_;
247 base::TimeDelta all_renderers_main_thread_time_;
248 base::TimeDelta longest_renderer_main_thread_time_;
249
250 // User-configurable parameters. Includes the file location, binary encoding
251 // choices.
252 MHTMLGenerationParams params_;
253
254 // The IDs of frames that still need to be processed.
255 base::queue<int> pending_frame_tree_node_ids_;
256
257 // Identifies a frame to which we've sent through
258 // MhtmlFileWriter::SerializeAsMHTML but for which we didn't yet process
259 // the response via SerializeAsMHTMLResponse.
260 int frame_tree_node_id_of_busy_frame_;
261
262 // The handle to the file the MHTML is saved to for the browser process.
263 base::File browser_file_;
264
265 // MIME multipart boundary to use in the MHTML doc.
266 const std::string mhtml_boundary_marker_;
267
268 // Digests of URIs of already generated MHTML parts.
269 std::set<std::string> digests_of_already_serialized_uris_;
270 std::string salt_;
271
272 // The callback to call once generation is complete.
273 MHTMLGenerationResult::GenerateMHTMLCallback callback_;
274
275 // Whether the job is finished (set to true only for the short duration of
276 // time between MHTMLGenerationManager::Job::Finalize is called and the job is
277 // destroyed by MHTMLGenerationManager::Job::OnFinished).
278 bool is_finished_;
279
280 // Any extra data parts that should be emitted into the output MHTML.
281 std::vector<MHTMLExtraDataPart> extra_data_parts_;
282
283 // MHTMLFileWriter instance for the frame being currently serialized.
284 mojo::AssociatedRemote<mojom::MhtmlFileWriter> writer_;
285
286 // Watcher to detect new data written to |mhtml_data_consumer_|.
287 // This is instantiated and destroyed in the download sequence for each frame.
288 std::unique_ptr<mojo::SimpleWatcher> watcher_;
289
290 // Consumer handle for data pipe streaming.
291 mojo::ScopedDataPipeConsumerHandle mhtml_data_consumer_;
292
293 // Indicates whether there is currently data being streamed from the Renderer.
294 // Not used when the renderer is writing directly to file.
295 bool waiting_on_data_streaming_;
296
297 // Current state of contents hash computation.
298 // This is updated upon every successful file write and finalized in the
299 // download sequence.
300 std::unique_ptr<crypto::SecureHash> secure_hash_;
301
302 base::WeakPtrFactory<Job> weak_factory_{this};
303
304 DISALLOW_COPY_AND_ASSIGN(Job);
305 };
306
Job(WebContents * web_contents,const MHTMLGenerationParams & params,MHTMLGenerationResult::GenerateMHTMLCallback callback)307 MHTMLGenerationManager::Job::Job(
308 WebContents* web_contents,
309 const MHTMLGenerationParams& params,
310 MHTMLGenerationResult::GenerateMHTMLCallback callback)
311 : creation_time_(base::TimeTicks::Now()),
312 params_(params),
313 frame_tree_node_id_of_busy_frame_(FrameTreeNode::kFrameTreeNodeInvalidId),
314 mhtml_boundary_marker_(net::GenerateMimeMultipartBoundary()),
315 salt_(base::GenerateGUID()),
316 callback_(std::move(callback)),
317 is_finished_(false),
318 waiting_on_data_streaming_(false) {
319 initializeJob(web_contents);
320 }
321
~Job()322 MHTMLGenerationManager::Job::~Job() {
323 DCHECK_CURRENTLY_ON(BrowserThread::UI);
324 DCHECK(!watcher_);
325 }
326
initializeJob(WebContents * web_contents)327 void MHTMLGenerationManager::Job::initializeJob(WebContents* web_contents) {
328 DCHECK_CURRENTLY_ON(BrowserThread::UI);
329
330 TRACE_EVENT_NESTABLE_ASYNC_BEGIN2(
331 "page-serialization", "SavingMhtmlJob", this, "url",
332 web_contents->GetLastCommittedURL().possibly_invalid_spec(), "file",
333 params_.file_path.AsUTF8Unsafe());
334
335 web_contents->ForEachFrame(base::BindRepeating(
336 &MHTMLGenerationManager::Job::AddFrame,
337 base::Unretained(this))); // Safe because ForEachFrame() is synchronous.
338
339 // Main frame needs to be processed first.
340 DCHECK(!pending_frame_tree_node_ids_.empty());
341 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front())
342 ->parent() == nullptr);
343
344 // Save off any extra data.
345 auto* extra_parts = static_cast<MHTMLExtraPartsImpl*>(
346 MHTMLExtraParts::FromWebContents(web_contents));
347 if (extra_parts)
348 extra_data_parts_ = extra_parts->parts();
349
350 base::PostTaskAndReplyWithResult(
351 download::GetDownloadTaskRunner().get(), FROM_HERE,
352 base::BindOnce(&CreateMHTMLFile, params_.file_path),
353 base::BindOnce(&Job::OnFileAvailable, weak_factory_.GetWeakPtr()));
354 }
355
356 mojom::SerializeAsMHTMLParamsPtr
CreateMojoParams()357 MHTMLGenerationManager::Job::CreateMojoParams() {
358 mojom::SerializeAsMHTMLParamsPtr mojo_params =
359 mojom::SerializeAsMHTMLParams::New();
360 mojo_params->mhtml_boundary_marker = mhtml_boundary_marker_;
361 mojo_params->mhtml_binary_encoding = params_.use_binary_encoding;
362 mojo_params->mhtml_popup_overlay_removal = params_.remove_popup_overlay;
363 mojo_params->mhtml_problem_detection = params_.use_page_problem_detectors;
364
365 // Tell the renderer to skip (= deduplicate) already covered MHTML parts.
366 mojo_params->salt = salt_;
367 mojo_params->digests_of_uris_to_skip.assign(
368 digests_of_already_serialized_uris_.begin(),
369 digests_of_already_serialized_uris_.end());
370
371 return mojo_params;
372 }
373
SendToNextRenderFrame()374 mojom::MhtmlSaveStatus MHTMLGenerationManager::Job::SendToNextRenderFrame() {
375 DCHECK(browser_file_.IsValid());
376 DCHECK(!pending_frame_tree_node_ids_.empty());
377
378 int frame_tree_node_id = pending_frame_tree_node_ids_.front();
379 pending_frame_tree_node_ids_.pop();
380
381 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id);
382 if (!ftn) // The contents went away.
383 return mojom::MhtmlSaveStatus::kFrameNoLongerExists;
384 RenderFrameHost* rfh = ftn->current_frame_host();
385
386 if (writer_) {
387 // If we reached here, means the work for previous frame is done, so it is
388 // safe to cut the connection to the previous frame.
389 writer_.reset();
390 }
391
392 // Bind Mojo interface to the RenderFrame
393 rfh->GetRemoteAssociatedInterfaces()->GetInterface(&writer_);
394
395 // Safe, as |writer_| is owned by this Job instance.
396 auto error_callback =
397 base::BindOnce(&Job::OnConnectionError, base::Unretained(this));
398 writer_.set_disconnect_handler(std::move(error_callback));
399
400 mojom::SerializeAsMHTMLParamsPtr params(CreateMojoParams());
401
402 // Initialize method of file writing depending on |compute_contents_hash|
403 // flag.
404 params->output_handle = mojom::MhtmlOutputHandle::New();
405 if (params_.compute_contents_hash) {
406 // Create and set up the data pipe.
407 mojo::ScopedDataPipeProducerHandle producer;
408 if (mojo::CreateDataPipe(nullptr, &producer, &mhtml_data_consumer_) !=
409 MOJO_RESULT_OK) {
410 DLOG(ERROR) << "Failed to create Mojo Data Pipe.";
411 return mojom::MhtmlSaveStatus::kStreamingError;
412 }
413 MHTMLWriteCompleteCallback write_complete_callback = base::BindRepeating(
414 &Job::DoneWritingToDisk, weak_factory_.GetWeakPtr());
415 download::GetDownloadTaskRunner().get()->PostTask(
416 FROM_HERE,
417 base::BindOnce(&Job::BeginWatchingHandle, base::Unretained(this),
418 std::move(write_complete_callback)));
419 waiting_on_data_streaming_ = true;
420 params->output_handle->set_producer_handle(std::move(producer));
421 } else {
422 // File::Duplicate() creates a reference to this file for use in the
423 // Renderer.
424 params->output_handle->set_file_handle(browser_file_.Duplicate());
425 }
426
427 // Send a Mojo request to Renderer to serialize its frame.
428 DCHECK_EQ(FrameTreeNode::kFrameTreeNodeInvalidId,
429 frame_tree_node_id_of_busy_frame_);
430 frame_tree_node_id_of_busy_frame_ = frame_tree_node_id;
431
432 auto response_callback = base::BindOnce(&Job::SerializeAsMHTMLResponse,
433 weak_factory_.GetWeakPtr());
434 writer_->SerializeAsMHTML(std::move(params), std::move(response_callback));
435
436 TRACE_EVENT_NESTABLE_ASYNC_BEGIN1("page-serialization", "WaitingOnRenderer",
437 this, "frame tree node id",
438 frame_tree_node_id_of_busy_frame_);
439 DCHECK(wait_on_renderer_start_time_.is_null());
440 wait_on_renderer_start_time_ = base::TimeTicks::Now();
441 return mojom::MhtmlSaveStatus::kSuccess;
442 }
443
BeginWatchingHandle(MHTMLWriteCompleteCallback callback)444 void MHTMLGenerationManager::Job::BeginWatchingHandle(
445 MHTMLWriteCompleteCallback callback) {
446 DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
447
448 DCHECK(!watcher_);
449 watcher_ = std::make_unique<mojo::SimpleWatcher>(
450 FROM_HERE, mojo::SimpleWatcher::ArmingPolicy::AUTOMATIC,
451 download::GetDownloadTaskRunner());
452 // It is entirely possible for BeginWatchingHandle to get bound multiple times
453 // if we have to serialize multiple render frames, but we will only ever want
454 // one secure hash instance created.
455 if (params_.compute_contents_hash && !secure_hash_) {
456 secure_hash_ =
457 crypto::SecureHash::Create(crypto::SecureHash::Algorithm::SHA256);
458 }
459
460 // base::Unretained is safe, as |this| owns |mhtml_data_consumer_|, which
461 // is responsible for invoking |watcher_| callbacks.
462 if (watcher_->Watch(
463 mhtml_data_consumer_.get(),
464 MOJO_HANDLE_SIGNAL_NEW_DATA_READABLE | MOJO_HANDLE_SIGNAL_PEER_CLOSED,
465 MOJO_WATCH_CONDITION_SATISFIED,
466 base::BindRepeating(&Job::WriteMHTMLToDisk, base::Unretained(this),
467 callback)) != MOJO_RESULT_OK) {
468 DLOG(ERROR) << "Failed to strap watcher to consumer handle.";
469 OnWriteComplete(callback, mojom::MhtmlSaveStatus::kStreamingError);
470 }
471 }
472
WriteMHTMLToDisk(MHTMLWriteCompleteCallback callback,MojoResult result,const mojo::HandleSignalsState & state)473 void MHTMLGenerationManager::Job::WriteMHTMLToDisk(
474 MHTMLWriteCompleteCallback callback,
475 MojoResult result,
476 const mojo::HandleSignalsState& state) {
477 DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
478 DCHECK_NE(result, MOJO_RESULT_FAILED_PRECONDITION);
479 // Begin consumer data pipe handle read and file write loop.
480 char buffer[1024];
481 uint32_t num_bytes = sizeof(buffer);
482 while (result == MOJO_RESULT_OK && state.readable()) {
483 result = mhtml_data_consumer_->ReadData(&buffer, &num_bytes,
484 MOJO_READ_DATA_FLAG_NONE);
485 if (result == MOJO_RESULT_OK) {
486 if (secure_hash_)
487 secure_hash_->Update(&buffer, num_bytes);
488 if (browser_file_.WriteAtCurrentPos(buffer, num_bytes) < 0) {
489 DLOG(ERROR) << "Error writing to file handle.";
490 OnWriteComplete(std::move(callback),
491 mojom::MhtmlSaveStatus::kFileWritingError);
492 return;
493 }
494 }
495 }
496
497 if (result != MOJO_RESULT_OK && result != MOJO_RESULT_FAILED_PRECONDITION &&
498 result != MOJO_RESULT_SHOULD_WAIT) {
499 DLOG(ERROR) << "Error streaming MHTML data to the Browser.";
500 OnWriteComplete(std::move(callback),
501 mojom::MhtmlSaveStatus::kStreamingError);
502 return;
503 }
504
505 // Only notify successful write completion if peer handle is closed without
506 // any errors.
507 if (state.peer_closed())
508 OnWriteComplete(std::move(callback), mojom::MhtmlSaveStatus::kSuccess);
509 }
510
OnWriteComplete(MHTMLWriteCompleteCallback callback,mojom::MhtmlSaveStatus save_status)511 void MHTMLGenerationManager::Job::OnWriteComplete(
512 MHTMLWriteCompleteCallback callback,
513 mojom::MhtmlSaveStatus save_status) {
514 DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
515
516 watcher_.reset();
517 base::PostTask(FROM_HERE, {BrowserThread::UI},
518 base::BindOnce(std::move(callback), save_status));
519 }
520
DoneWritingToDisk(mojom::MhtmlSaveStatus save_status)521 void MHTMLGenerationManager::Job::DoneWritingToDisk(
522 mojom::MhtmlSaveStatus save_status) {
523 DCHECK_CURRENTLY_ON(BrowserThread::UI);
524
525 // If the Job has prematurely finalized and marked as finished, make this
526 // response no-op.
527 if (is_finished_)
528 return;
529
530 waiting_on_data_streaming_ = false;
531 MaybeSendToNextRenderFrame(save_status);
532 }
533
OnConnectionError()534 void MHTMLGenerationManager::Job::OnConnectionError() {
535 DCHECK_CURRENTLY_ON(BrowserThread::UI);
536 // If message pipe end closes, then it is an unexpected crash.
537 DLOG(ERROR) << "Message pipe to renderer closed while expecting response";
538 Finalize(mojom::MhtmlSaveStatus::kRenderProcessExited);
539 }
540
OnFileAvailable(base::File browser_file)541 void MHTMLGenerationManager::Job::OnFileAvailable(base::File browser_file) {
542 DCHECK_CURRENTLY_ON(BrowserThread::UI);
543
544 if (!browser_file.IsValid()) {
545 DLOG(ERROR) << "Failed to create file";
546 Finalize(mojom::MhtmlSaveStatus::kFileCreationError);
547 return;
548 }
549
550 browser_file_ = std::move(browser_file);
551
552 mojom::MhtmlSaveStatus save_status = SendToNextRenderFrame();
553 if (save_status != mojom::MhtmlSaveStatus::kSuccess)
554 Finalize(save_status);
555 }
556
OnFinished(const CloseFileResult & close_file_result)557 void MHTMLGenerationManager::Job::OnFinished(
558 const CloseFileResult& close_file_result) {
559 DCHECK_CURRENTLY_ON(BrowserThread::UI);
560 mojom::MhtmlSaveStatus save_status = close_file_result.save_status;
561 int64_t file_size = close_file_result.file_size;
562
563 TRACE_EVENT_NESTABLE_ASYNC_END2("page-serialization", "SavingMhtmlJob", this,
564 "job save status", save_status, "file size",
565 file_size);
566 UMA_HISTOGRAM_TIMES("PageSerialization.MhtmlGeneration.FullPageSavingTime",
567 base::TimeTicks::Now() - creation_time_);
568 UMA_HISTOGRAM_ENUMERATION("PageSerialization.MhtmlGeneration.FinalSaveStatus",
569 save_status);
570
571 std::move(callback_).Run(close_file_result.toMHTMLGenerationResult());
572
573 delete this; // This is the last time the Job is referenced.
574 }
575
MarkAsFinished()576 void MHTMLGenerationManager::Job::MarkAsFinished() {
577 // MarkAsFinished() may be called twice only in the case which
578 // writer_.reset() does not correctly stop OnConnectionError
579 // notifications for the case described in https://crbug.com/612098.
580 if (is_finished_) {
581 NOTREACHED();
582 return;
583 }
584 is_finished_ = true;
585 writer_.reset();
586
587 // Additionally, |watcher_| may also invoke DoneWritingToDisk() from
588 // the download sequence, potentially calling this twice. We cannot disable
589 // |watcher_| notifications similar to |writer_|, since it exists in
590 // the download sequence, so we handle the case in DoneWritingToDisk().
591
592 TRACE_EVENT_NESTABLE_ASYNC_INSTANT0("page-serialization", "JobFinished",
593 this);
594
595 // End of job timing reports.
596 if (!wait_on_renderer_start_time_.is_null()) {
597 base::TimeDelta renderer_wait_time =
598 base::TimeTicks::Now() - wait_on_renderer_start_time_;
599 UMA_HISTOGRAM_TIMES(
600 "PageSerialization.MhtmlGeneration.BrowserWaitForRendererTime."
601 "SingleFrame",
602 renderer_wait_time);
603 all_renderers_wait_time_ += renderer_wait_time;
604 }
605 if (!all_renderers_wait_time_.is_zero()) {
606 UMA_HISTOGRAM_TIMES(
607 "PageSerialization.MhtmlGeneration.BrowserWaitForRendererTime."
608 "FrameTree",
609 all_renderers_wait_time_);
610 }
611 if (!all_renderers_main_thread_time_.is_zero()) {
612 UMA_HISTOGRAM_TIMES(
613 "PageSerialization.MhtmlGeneration.RendererMainThreadTime.FrameTree",
614 all_renderers_main_thread_time_);
615 }
616 if (!longest_renderer_main_thread_time_.is_zero()) {
617 UMA_HISTOGRAM_TIMES(
618 "PageSerialization.MhtmlGeneration.RendererMainThreadTime.SlowestFrame",
619 longest_renderer_main_thread_time_);
620 }
621 }
622
ReportRendererMainThreadTime(base::TimeDelta renderer_main_thread_time)623 void MHTMLGenerationManager::Job::ReportRendererMainThreadTime(
624 base::TimeDelta renderer_main_thread_time) {
625 DCHECK(renderer_main_thread_time > base::TimeDelta());
626 if (renderer_main_thread_time > base::TimeDelta())
627 all_renderers_main_thread_time_ += renderer_main_thread_time;
628 if (renderer_main_thread_time > longest_renderer_main_thread_time_)
629 longest_renderer_main_thread_time_ = renderer_main_thread_time;
630 }
631
AddFrame(RenderFrameHost * render_frame_host)632 void MHTMLGenerationManager::Job::AddFrame(RenderFrameHost* render_frame_host) {
633 auto* rfhi = static_cast<RenderFrameHostImpl*>(render_frame_host);
634 int frame_tree_node_id = rfhi->frame_tree_node()->frame_tree_node_id();
635 pending_frame_tree_node_ids_.push(frame_tree_node_id);
636 }
637
CloseFile(mojom::MhtmlSaveStatus save_status)638 void MHTMLGenerationManager::Job::CloseFile(
639 mojom::MhtmlSaveStatus save_status) {
640 DCHECK_CURRENTLY_ON(BrowserThread::UI);
641 DCHECK(!mhtml_boundary_marker_.empty());
642
643 // Only update the status if that won't hide an earlier error.
644 if (!browser_file_.IsValid() &&
645 save_status == mojom::MhtmlSaveStatus::kSuccess)
646 save_status = mojom::MhtmlSaveStatus::kFileWritingError;
647
648 // If no previous error occurred the boundary should be sent.
649 base::PostTaskAndReplyWithResult(
650 download::GetDownloadTaskRunner().get(), FROM_HERE,
651 base::BindOnce(&MHTMLGenerationManager::Job::FinalizeOnFileThread,
652 save_status, mhtml_boundary_marker_,
653 std::move(browser_file_), std::move(extra_data_parts_),
654 std::move(watcher_), std::move(secure_hash_)),
655 base::BindOnce(&Job::OnFinished, weak_factory_.GetWeakPtr()));
656 }
657
SerializeAsMHTMLResponse(mojom::MhtmlSaveStatus save_status,const std::vector<std::string> & digests_of_uris_of_serialized_resources,base::TimeDelta renderer_main_thread_time)658 void MHTMLGenerationManager::Job::SerializeAsMHTMLResponse(
659 mojom::MhtmlSaveStatus save_status,
660 const std::vector<std::string>& digests_of_uris_of_serialized_resources,
661 base::TimeDelta renderer_main_thread_time) {
662 DCHECK_CURRENTLY_ON(BrowserThread::UI);
663
664 TRACE_EVENT_NESTABLE_ASYNC_END0("page-serialization", "WaitingOnRenderer",
665 this);
666 ReportRendererMainThreadTime(renderer_main_thread_time);
667
668 frame_tree_node_id_of_busy_frame_ = FrameTreeNode::kFrameTreeNodeInvalidId;
669
670 // If the renderer succeeded, update the resource digests.
671 if (save_status == mojom::MhtmlSaveStatus::kSuccess)
672 RecordDigests(digests_of_uris_of_serialized_resources);
673
674 MaybeSendToNextRenderFrame(save_status);
675 }
676
RecordDigests(const std::vector<std::string> & digests_of_uris_of_serialized_resources)677 void MHTMLGenerationManager::Job::RecordDigests(
678 const std::vector<std::string>& digests_of_uris_of_serialized_resources) {
679 DCHECK(!wait_on_renderer_start_time_.is_null());
680 base::TimeDelta renderer_wait_time =
681 base::TimeTicks::Now() - wait_on_renderer_start_time_;
682 UMA_HISTOGRAM_TIMES(
683 "PageSerialization.MhtmlGeneration.BrowserWaitForRendererTime."
684 "SingleFrame",
685 renderer_wait_time);
686 all_renderers_wait_time_ += renderer_wait_time;
687 wait_on_renderer_start_time_ = base::TimeTicks();
688
689 // Renderer should be deduping resources with the same uris.
690 DCHECK_EQ(0u, base::STLSetIntersection<std::set<std::string>>(
691 digests_of_already_serialized_uris_,
692 std::set<std::string>(
693 digests_of_uris_of_serialized_resources.begin(),
694 digests_of_uris_of_serialized_resources.end()))
695 .size());
696 digests_of_already_serialized_uris_.insert(
697 digests_of_uris_of_serialized_resources.begin(),
698 digests_of_uris_of_serialized_resources.end());
699 }
700
MaybeSendToNextRenderFrame(mojom::MhtmlSaveStatus save_status)701 void MHTMLGenerationManager::Job::MaybeSendToNextRenderFrame(
702 mojom::MhtmlSaveStatus save_status) {
703 // If current operation is successful and there are more frames to process,
704 // let save status depend on the result of sending the next request.
705 if (save_status == mojom::MhtmlSaveStatus::kSuccess &&
706 !pending_frame_tree_node_ids_.empty() && CurrentFrameDone()) {
707 save_status = SendToNextRenderFrame();
708 }
709
710 // If there was a failure (either from the renderer or from the job) then
711 // terminate the job and return.
712 if (save_status != mojom::MhtmlSaveStatus::kSuccess) {
713 Finalize(save_status);
714 return;
715 }
716
717 // Otherwise report completion if there are no more frames to process
718 // and Job is done processing the current frame.
719 if (pending_frame_tree_node_ids_.empty() && CurrentFrameDone())
720 Finalize(mojom::MhtmlSaveStatus::kSuccess);
721 }
722
CurrentFrameDone() const723 bool MHTMLGenerationManager::Job::CurrentFrameDone() const {
724 bool waiting_for_response_from_renderer =
725 frame_tree_node_id_of_busy_frame_ !=
726 FrameTreeNode::kFrameTreeNodeInvalidId;
727 return !waiting_for_response_from_renderer && !waiting_on_data_streaming_;
728 }
729
Finalize(mojom::MhtmlSaveStatus save_status)730 void MHTMLGenerationManager::Job::Finalize(mojom::MhtmlSaveStatus save_status) {
731 DCHECK_CURRENTLY_ON(BrowserThread::UI);
732 MarkAsFinished();
733 CloseFile(save_status);
734 }
735
736 // static
StartNewJob(WebContents * web_contents,const MHTMLGenerationParams & params,MHTMLGenerationResult::GenerateMHTMLCallback callback)737 void MHTMLGenerationManager::Job::StartNewJob(
738 WebContents* web_contents,
739 const MHTMLGenerationParams& params,
740 MHTMLGenerationResult::GenerateMHTMLCallback callback) {
741 // Creates a new Job.
742 // The constructor starts the serialization process and it will delete
743 // itself upon finishing.
744 new Job(web_contents, params, std::move(callback));
745 }
746
747 // static
WriteToFileAndUpdateHash(base::File * file,crypto::SecureHash * secure_hash,std::string to_write)748 bool MHTMLGenerationManager::Job::WriteToFileAndUpdateHash(
749 base::File* file,
750 crypto::SecureHash* secure_hash,
751 std::string to_write) {
752 bool result = file->WriteAtCurrentPos(to_write.data(), to_write.size()) >= 0;
753 if (result && secure_hash) {
754 secure_hash->Update(to_write.data(), to_write.size());
755 }
756 return result;
757 }
758
759 // static
FinalizeOnFileThread(mojom::MhtmlSaveStatus save_status,const std::string & boundary,base::File file,const std::vector<MHTMLExtraDataPart> & extra_data_parts,std::unique_ptr<mojo::SimpleWatcher> watcher,std::unique_ptr<crypto::SecureHash> secure_hash)760 CloseFileResult MHTMLGenerationManager::Job::FinalizeOnFileThread(
761 mojom::MhtmlSaveStatus save_status,
762 const std::string& boundary,
763 base::File file,
764 const std::vector<MHTMLExtraDataPart>& extra_data_parts,
765 std::unique_ptr<mojo::SimpleWatcher> watcher,
766 std::unique_ptr<crypto::SecureHash> secure_hash) {
767 DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
768
769 watcher.reset();
770 DCHECK(!boundary.empty());
771
772 if (save_status == mojom::MhtmlSaveStatus::kSuccess) {
773 TRACE_EVENT0("page-serialization",
774 "MHTMLGenerationManager::Job MHTML footer writing");
775
776 // Write the extra data into a part of its own, if we have any.
777 std::string serialized_extra_data_parts =
778 CreateExtraDataParts(boundary, extra_data_parts);
779 // Short circuit to prevent file IO if nothing to write.
780 if (!serialized_extra_data_parts.empty() &&
781 !WriteToFileAndUpdateHash(&file, secure_hash.get(),
782 serialized_extra_data_parts)) {
783 save_status = mojom::MhtmlSaveStatus::kFileWritingError;
784 }
785
786 // Write out the footer at the bottom of the file.
787 std::string footer = CreateFooter(boundary);
788 if (save_status == mojom::MhtmlSaveStatus::kSuccess &&
789 !WriteToFileAndUpdateHash(&file, secure_hash.get(), footer)) {
790 save_status = mojom::MhtmlSaveStatus::kFileWritingError;
791 }
792 }
793
794 // If the file is still valid try to close it. Only update the status if that
795 // won't hide an earlier error.
796 int64_t file_size;
797 if (!CloseFileIfValid(file, &file_size) &&
798 save_status == mojom::MhtmlSaveStatus::kSuccess) {
799 save_status = mojom::MhtmlSaveStatus::kFileClosingError;
800 }
801
802 file_size = save_status == mojom::MhtmlSaveStatus::kSuccess
803 ? file_size
804 : kInvalidFileSize;
805 // If we do not have a pending hash or the file is invalid, finalize operation
806 // with an empty digest result.
807 if (!secure_hash || file_size == kInvalidFileSize)
808 return CloseFileResult(save_status, file_size, nullptr);
809
810 // Record hash and finish operation.
811 std::string file_digest = std::string(secure_hash->GetHashLength(), 0);
812 secure_hash->Finish(&(file_digest[0]), file_digest.size());
813 secure_hash.reset();
814 return CloseFileResult(save_status, file_size, &file_digest);
815 }
816
817 // static
CreateExtraDataParts(const std::string & boundary,const std::vector<MHTMLExtraDataPart> & extra_data_parts)818 std::string MHTMLGenerationManager::Job::CreateExtraDataParts(
819 const std::string& boundary,
820 const std::vector<MHTMLExtraDataPart>& extra_data_parts) {
821 DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
822 std::string serialized_extra_data_parts;
823
824 // Don't write an extra data part if there is none.
825 if (extra_data_parts.empty())
826 return serialized_extra_data_parts;
827
828 // For each extra part, serialize that part and add to our accumulator
829 // string.
830 for (const auto& part : extra_data_parts) {
831 // Write a newline, then a boundary, a newline, then the content
832 // location, a newline, the content type, a newline, extra_headers,
833 // two newlines, the body, and end with a newline.
834 std::string serialized_extra_data_part = base::StringPrintf(
835 "\r\n--%s\r\n%s%s\r\n%s%s\r\n%s\r\n\r\n%s\r\n", boundary.c_str(),
836 kContentLocation, part.content_location.c_str(), kContentType,
837 part.content_type.c_str(), part.extra_headers.c_str(),
838 part.body.c_str());
839 DCHECK(base::IsStringASCII(serialized_extra_data_part));
840
841 serialized_extra_data_parts += serialized_extra_data_part;
842 }
843 return serialized_extra_data_parts;
844 }
845
846 // static
CreateFooter(const std::string & boundary)847 std::string MHTMLGenerationManager::Job::CreateFooter(
848 const std::string& boundary) {
849 DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
850 // Per the spec, the boundary must occur at the beginning of a line.
851 std::string footer = base::StringPrintf("\r\n--%s--\r\n", boundary.c_str());
852 DCHECK(base::IsStringASCII(footer));
853 return footer;
854 }
855
856 // static
CloseFileIfValid(base::File & file,int64_t * file_size)857 bool MHTMLGenerationManager::Job::CloseFileIfValid(base::File& file,
858 int64_t* file_size) {
859 DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
860 DCHECK(file_size);
861 if (file.IsValid()) {
862 *file_size = file.GetLength();
863 file.Close();
864 return true;
865 }
866
867 return false;
868 }
869
GetInstance()870 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() {
871 return base::Singleton<MHTMLGenerationManager>::get();
872 }
873
874 MHTMLGenerationManager::MHTMLGenerationManager() = default;
875
876 MHTMLGenerationManager::~MHTMLGenerationManager() = default;
877
SaveMHTML(WebContents * web_contents,const MHTMLGenerationParams & params,MHTMLGenerationResult::GenerateMHTMLCallback callback)878 void MHTMLGenerationManager::SaveMHTML(
879 WebContents* web_contents,
880 const MHTMLGenerationParams& params,
881 MHTMLGenerationResult::GenerateMHTMLCallback callback) {
882 DCHECK_CURRENTLY_ON(BrowserThread::UI);
883 Job::StartNewJob(web_contents, params, std::move(callback));
884 }
885
886 } // namespace content
887