1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/offline_pages/offline_page_mhtml_archiver.h"
6 
7 #include <utility>
8 
9 #include "base/bind.h"
10 #include "base/callback_helpers.h"
11 #include "base/files/file_path.h"
12 #include "base/files/file_util.h"
13 #include "base/guid.h"
14 #include "base/location.h"
15 #include "base/logging.h"
16 #include "base/metrics/histogram_functions.h"
17 #include "base/strings/string16.h"
18 #include "base/task/post_task.h"
19 #include "base/task/thread_pool.h"
20 #include "base/threading/thread_task_runner_handle.h"
21 #include "chrome/browser/offline_pages/offline_page_utils.h"
22 #include "components/offline_pages/core/archive_validator.h"
23 #include "components/offline_pages/core/model/offline_page_model_utils.h"
24 #include "components/offline_pages/core/offline_clock.h"
25 #include "content/public/browser/browser_thread.h"
26 #include "content/public/browser/web_contents.h"
27 #include "content/public/common/mhtml_generation_params.h"
28 #include "net/base/filename_util.h"
29 
30 namespace offline_pages {
31 namespace {
DeleteFileOnFileThread(const base::FilePath & file_path,base::OnceClosure callback)32 void DeleteFileOnFileThread(const base::FilePath& file_path,
33                             base::OnceClosure callback) {
34   base::ThreadPool::PostTaskAndReply(
35       FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT},
36       base::BindOnce(base::GetDeleteFileCallback(), file_path),
37       std::move(callback));
38 }
39 
40 // Compute a SHA256 digest using a background thread. The computed digest will
41 // be returned in the callback parameter. If it is empty, the digest calculation
42 // fails.
ComputeDigestOnFileThread(const base::FilePath & file_path,base::OnceCallback<void (const std::string &)> callback)43 void ComputeDigestOnFileThread(
44     const base::FilePath& file_path,
45     base::OnceCallback<void(const std::string&)> callback) {
46   base::ThreadPool::PostTaskAndReplyWithResult(
47       FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT},
48       base::BindOnce(&ArchiveValidator::ComputeDigest, file_path),
49       std::move(callback));
50 }
51 }  // namespace
52 
53 // static
OfflinePageMHTMLArchiver()54 OfflinePageMHTMLArchiver::OfflinePageMHTMLArchiver() {}
55 
~OfflinePageMHTMLArchiver()56 OfflinePageMHTMLArchiver::~OfflinePageMHTMLArchiver() {
57 }
58 
CreateArchive(const base::FilePath & archives_dir,const CreateArchiveParams & create_archive_params,content::WebContents * web_contents,CreateArchiveCallback callback)59 void OfflinePageMHTMLArchiver::CreateArchive(
60     const base::FilePath& archives_dir,
61     const CreateArchiveParams& create_archive_params,
62     content::WebContents* web_contents,
63     CreateArchiveCallback callback) {
64   DCHECK(callback_.is_null());
65   DCHECK(!callback.is_null());
66   callback_ = std::move(callback);
67 
68   GenerateMHTML(archives_dir, web_contents, create_archive_params);
69 }
70 
GenerateMHTML(const base::FilePath & archives_dir,content::WebContents * web_contents,const CreateArchiveParams & create_archive_params)71 void OfflinePageMHTMLArchiver::GenerateMHTML(
72     const base::FilePath& archives_dir,
73     content::WebContents* web_contents,
74     const CreateArchiveParams& create_archive_params) {
75   if (archives_dir.empty()) {
76     DVLOG(1) << "Archive path was empty. Can't create archive.";
77     ReportFailure(ArchiverResult::ERROR_ARCHIVE_CREATION_FAILED);
78     return;
79   }
80 
81   if (!web_contents) {
82     DVLOG(1) << "WebContents is missing. Can't create archive.";
83     ReportFailure(ArchiverResult::ERROR_CONTENT_UNAVAILABLE);
84     return;
85   }
86 
87   if (!web_contents->GetRenderViewHost()) {
88     DVLOG(1) << "RenderViewHost is not created yet. Can't create archive.";
89     ReportFailure(ArchiverResult::ERROR_CONTENT_UNAVAILABLE);
90     return;
91   }
92 
93   GURL url(web_contents->GetLastCommittedURL());
94   base::string16 title(web_contents->GetTitle());
95   base::FilePath file_path(
96       archives_dir.Append(base::GenerateGUID())
97           .AddExtension(OfflinePageUtils::kMHTMLExtension));
98   content::MHTMLGenerationParams params(file_path);
99   params.use_binary_encoding = true;
100   params.remove_popup_overlay = create_archive_params.remove_popup_overlay;
101   params.use_page_problem_detectors =
102       create_archive_params.use_page_problem_detectors;
103 
104   web_contents->GenerateMHTMLWithResult(
105       params,
106       base::BindOnce(&OfflinePageMHTMLArchiver::OnGenerateMHTMLDone,
107                      weak_ptr_factory_.GetWeakPtr(), url, file_path, title,
108                      create_archive_params.name_space, OfflineTimeNow()));
109 }
110 
OnGenerateMHTMLDone(const GURL & url,const base::FilePath & file_path,const base::string16 & title,const std::string & name_space,base::Time mhtml_start_time,const content::MHTMLGenerationResult & result)111 void OfflinePageMHTMLArchiver::OnGenerateMHTMLDone(
112     const GURL& url,
113     const base::FilePath& file_path,
114     const base::string16& title,
115     const std::string& name_space,
116     base::Time mhtml_start_time,
117     const content::MHTMLGenerationResult& result) {
118   if (result.file_size < 0) {
119     DeleteFileAndReportFailure(file_path,
120                                ArchiverResult::ERROR_ARCHIVE_CREATION_FAILED);
121     return;
122   }
123 
124   const base::Time digest_start_time = OfflineTimeNow();
125   base::UmaHistogramTimes(
126       model_utils::AddHistogramSuffix(
127           name_space, "OfflinePages.SavePage.CreateArchiveTime"),
128       digest_start_time - mhtml_start_time);
129 
130   if (result.file_digest) {
131     OnComputeDigestDone(url, file_path, title, name_space, base::Time(),
132                         result.file_size, result.file_digest.value());
133   } else {
134     ComputeDigestOnFileThread(
135         file_path,
136         base::BindOnce(&OfflinePageMHTMLArchiver::OnComputeDigestDone,
137                        weak_ptr_factory_.GetWeakPtr(), url, file_path, title,
138                        name_space, digest_start_time, result.file_size));
139   }
140 }
141 
OnComputeDigestDone(const GURL & url,const base::FilePath & file_path,const base::string16 & title,const std::string & name_space,base::Time digest_start_time,int64_t file_size,const std::string & digest)142 void OfflinePageMHTMLArchiver::OnComputeDigestDone(
143     const GURL& url,
144     const base::FilePath& file_path,
145     const base::string16& title,
146     const std::string& name_space,
147     base::Time digest_start_time,
148     int64_t file_size,
149     const std::string& digest) {
150   if (digest.empty()) {
151     DeleteFileAndReportFailure(file_path,
152                                ArchiverResult::ERROR_DIGEST_CALCULATION_FAILED);
153     return;
154   }
155 
156   if (!digest_start_time.is_null()) {
157     base::UmaHistogramTimes(
158         model_utils::AddHistogramSuffix(
159             name_space, "OfflinePages.SavePage.ComputeDigestTime"),
160         OfflineTimeNow() - digest_start_time);
161   }
162 
163   base::ThreadTaskRunnerHandle::Get()->PostTask(
164       FROM_HERE,
165       base::BindOnce(std::move(callback_), ArchiverResult::SUCCESSFULLY_CREATED,
166                      url, file_path, title, file_size, digest));
167 }
168 
DeleteFileAndReportFailure(const base::FilePath & file_path,ArchiverResult result)169 void OfflinePageMHTMLArchiver::DeleteFileAndReportFailure(
170     const base::FilePath& file_path,
171     ArchiverResult result) {
172   DeleteFileOnFileThread(
173       file_path, base::BindOnce(&OfflinePageMHTMLArchiver::ReportFailure,
174                                 weak_ptr_factory_.GetWeakPtr(), result));
175 }
176 
ReportFailure(ArchiverResult result)177 void OfflinePageMHTMLArchiver::ReportFailure(ArchiverResult result) {
178   DCHECK(result != ArchiverResult::SUCCESSFULLY_CREATED);
179   base::ThreadTaskRunnerHandle::Get()->PostTask(
180       FROM_HERE,
181       base::BindOnce(std::move(callback_), result, GURL(), base::FilePath(),
182                      base::string16(), 0, std::string()));
183 }
184 
185 }  // namespace offline_pages
186