1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/dom_distiller/core/distiller_page.h"
6
7 #include <stddef.h>
8
9 #include <utility>
10
11 #include "base/bind.h"
12 #include "base/json/json_writer.h"
13 #include "base/location.h"
14 #include "base/logging.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/threading/thread_task_runner_handle.h"
20 #include "base/time/time.h"
21 #include "components/grit/components_resources.h"
22 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
23 #include "third_party/dom_distiller_js/dom_distiller_json_converter.h"
24 #include "ui/base/resource/resource_bundle.h"
25 #include "url/gurl.h"
26
27 namespace dom_distiller {
28
29 namespace {
30
31 const char* kOptionsPlaceholder = "$$OPTIONS";
32 const char* kStringifyPlaceholder = "$$STRINGIFY";
33
GetDistillerScriptWithOptions(const dom_distiller::proto::DomDistillerOptions & options,bool stringify_output)34 std::string GetDistillerScriptWithOptions(
35 const dom_distiller::proto::DomDistillerOptions& options,
36 bool stringify_output) {
37 std::string script =
38 ui::ResourceBundle::GetSharedInstance().LoadDataResourceString(
39 IDR_DISTILLER_JS);
40 if (script.empty()) {
41 return "";
42 }
43
44 std::unique_ptr<base::Value> options_value(
45 dom_distiller::proto::json::DomDistillerOptions::WriteToValue(options));
46 std::string options_json;
47 if (!base::JSONWriter::Write(*options_value, &options_json)) {
48 NOTREACHED();
49 }
50 size_t options_offset = script.find(kOptionsPlaceholder);
51 DCHECK_NE(std::string::npos, options_offset);
52 DCHECK_EQ(std::string::npos,
53 script.find(kOptionsPlaceholder, options_offset + 1));
54 script =
55 script.replace(options_offset, strlen(kOptionsPlaceholder), options_json);
56
57 std::string stringify = stringify_output ? "true" : "false";
58 size_t stringify_offset = script.find(kStringifyPlaceholder);
59 DCHECK_NE(std::string::npos, stringify_offset);
60 DCHECK_EQ(std::string::npos,
61 script.find(kStringifyPlaceholder, stringify_offset + 1));
62 script = script.replace(stringify_offset, strlen(kStringifyPlaceholder),
63 stringify);
64
65 return script;
66 }
67
68 } // namespace
69
70 DistillerPageFactory::~DistillerPageFactory() = default;
71
DistillerPage()72 DistillerPage::DistillerPage() : ready_(true) {}
73
74 DistillerPage::~DistillerPage() = default;
75
DistillPage(const GURL & gurl,const dom_distiller::proto::DomDistillerOptions options,DistillerPageCallback callback)76 void DistillerPage::DistillPage(
77 const GURL& gurl,
78 const dom_distiller::proto::DomDistillerOptions options,
79 DistillerPageCallback callback) {
80 DCHECK(ready_);
81 // It is only possible to distill one page at a time. |ready_| is reset when
82 // the callback to OnDistillationDone happens.
83 ready_ = false;
84 distiller_page_callback_ = std::move(callback);
85 distillation_start_ = base::TimeTicks::Now();
86 DistillPageImpl(gurl,
87 GetDistillerScriptWithOptions(options, StringifyOutput()));
88 }
89
OnDistillationDone(const GURL & page_url,const base::Value * value)90 void DistillerPage::OnDistillationDone(const GURL& page_url,
91 const base::Value* value) {
92 DCHECK(!ready_);
93 ready_ = true;
94
95 std::unique_ptr<dom_distiller::proto::DomDistillerResult> distiller_result(
96 new dom_distiller::proto::DomDistillerResult());
97 bool found_content;
98 if (value->is_none()) {
99 found_content = false;
100 } else {
101 found_content =
102 dom_distiller::proto::json::DomDistillerResult::ReadFromValue(
103 value, distiller_result.get());
104 if (!found_content) {
105 DVLOG(1) << "Unable to parse DomDistillerResult.";
106 } else {
107 base::TimeDelta distillation_time =
108 base::TimeTicks::Now() - distillation_start_;
109 UMA_HISTOGRAM_TIMES("DomDistiller.Time.DistillPage", distillation_time);
110 VLOG(1) << "DomDistiller.Time.DistillPage = " << distillation_time;
111
112 if (distiller_result->has_timing_info()) {
113 const dom_distiller::proto::TimingInfo& timing =
114 distiller_result->timing_info();
115 if (timing.has_markup_parsing_time()) {
116 UMA_HISTOGRAM_TIMES(
117 "DomDistiller.Time.MarkupParsing",
118 base::TimeDelta::FromMillisecondsD(timing.markup_parsing_time()));
119 }
120 if (timing.has_document_construction_time()) {
121 UMA_HISTOGRAM_TIMES("DomDistiller.Time.DocumentConstruction",
122 base::TimeDelta::FromMillisecondsD(
123 timing.document_construction_time()));
124 }
125 if (timing.has_article_processing_time()) {
126 UMA_HISTOGRAM_TIMES("DomDistiller.Time.ArticleProcessing",
127 base::TimeDelta::FromMillisecondsD(
128 timing.article_processing_time()));
129 }
130 if (timing.has_formatting_time()) {
131 UMA_HISTOGRAM_TIMES(
132 "DomDistiller.Time.Formatting",
133 base::TimeDelta::FromMillisecondsD(timing.formatting_time()));
134 }
135 if (timing.has_total_time()) {
136 UMA_HISTOGRAM_TIMES(
137 "DomDistiller.Time.DistillationTotal",
138 base::TimeDelta::FromMillisecondsD(timing.total_time()));
139 VLOG(1) << "DomDistiller.Time.DistillationTotal = "
140 << base::TimeDelta::FromMillisecondsD(timing.total_time());
141 }
142 }
143 if (distiller_result->has_statistics_info()) {
144 const dom_distiller::proto::StatisticsInfo& statistics =
145 distiller_result->statistics_info();
146 if (statistics.has_word_count()) {
147 UMA_HISTOGRAM_CUSTOM_COUNTS("DomDistiller.Statistics.WordCount",
148 statistics.word_count(), 1, 4000, 50);
149 }
150 }
151 }
152 }
153
154 base::ThreadTaskRunnerHandle::Get()->PostTask(
155 FROM_HERE, base::BindOnce(std::move(distiller_page_callback_),
156 std::move(distiller_result), found_content));
157 }
158
159 } // namespace dom_distiller
160