1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/dom_distiller/core/distiller_page.h"
6 
7 #include <stddef.h>
8 
9 #include <utility>
10 
11 #include "base/bind.h"
12 #include "base/json/json_writer.h"
13 #include "base/location.h"
14 #include "base/logging.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/threading/thread_task_runner_handle.h"
20 #include "base/time/time.h"
21 #include "components/grit/components_resources.h"
22 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
23 #include "third_party/dom_distiller_js/dom_distiller_json_converter.h"
24 #include "ui/base/resource/resource_bundle.h"
25 #include "url/gurl.h"
26 
27 namespace dom_distiller {
28 
29 namespace {
30 
31 const char* kOptionsPlaceholder = "$$OPTIONS";
32 const char* kStringifyPlaceholder = "$$STRINGIFY";
33 
GetDistillerScriptWithOptions(const dom_distiller::proto::DomDistillerOptions & options,bool stringify_output)34 std::string GetDistillerScriptWithOptions(
35     const dom_distiller::proto::DomDistillerOptions& options,
36     bool stringify_output) {
37   std::string script =
38       ui::ResourceBundle::GetSharedInstance().LoadDataResourceString(
39           IDR_DISTILLER_JS);
40   if (script.empty()) {
41     return "";
42   }
43 
44   std::unique_ptr<base::Value> options_value(
45       dom_distiller::proto::json::DomDistillerOptions::WriteToValue(options));
46   std::string options_json;
47   if (!base::JSONWriter::Write(*options_value, &options_json)) {
48     NOTREACHED();
49   }
50   size_t options_offset = script.find(kOptionsPlaceholder);
51   DCHECK_NE(std::string::npos, options_offset);
52   DCHECK_EQ(std::string::npos,
53             script.find(kOptionsPlaceholder, options_offset + 1));
54   script =
55       script.replace(options_offset, strlen(kOptionsPlaceholder), options_json);
56 
57   std::string stringify = stringify_output ? "true" : "false";
58   size_t stringify_offset = script.find(kStringifyPlaceholder);
59   DCHECK_NE(std::string::npos, stringify_offset);
60   DCHECK_EQ(std::string::npos,
61             script.find(kStringifyPlaceholder, stringify_offset + 1));
62   script = script.replace(stringify_offset, strlen(kStringifyPlaceholder),
63                           stringify);
64 
65   return script;
66 }
67 
68 }  // namespace
69 
70 DistillerPageFactory::~DistillerPageFactory() = default;
71 
DistillerPage()72 DistillerPage::DistillerPage() : ready_(true) {}
73 
74 DistillerPage::~DistillerPage() = default;
75 
DistillPage(const GURL & gurl,const dom_distiller::proto::DomDistillerOptions options,DistillerPageCallback callback)76 void DistillerPage::DistillPage(
77     const GURL& gurl,
78     const dom_distiller::proto::DomDistillerOptions options,
79     DistillerPageCallback callback) {
80   DCHECK(ready_);
81   // It is only possible to distill one page at a time. |ready_| is reset when
82   // the callback to OnDistillationDone happens.
83   ready_ = false;
84   distiller_page_callback_ = std::move(callback);
85   distillation_start_ = base::TimeTicks::Now();
86   DistillPageImpl(gurl,
87                   GetDistillerScriptWithOptions(options, StringifyOutput()));
88 }
89 
OnDistillationDone(const GURL & page_url,const base::Value * value)90 void DistillerPage::OnDistillationDone(const GURL& page_url,
91                                        const base::Value* value) {
92   DCHECK(!ready_);
93   ready_ = true;
94 
95   std::unique_ptr<dom_distiller::proto::DomDistillerResult> distiller_result(
96       new dom_distiller::proto::DomDistillerResult());
97   bool found_content;
98   if (value->is_none()) {
99     found_content = false;
100   } else {
101     found_content =
102         dom_distiller::proto::json::DomDistillerResult::ReadFromValue(
103             value, distiller_result.get());
104     if (!found_content) {
105       DVLOG(1) << "Unable to parse DomDistillerResult.";
106     } else {
107       base::TimeDelta distillation_time =
108           base::TimeTicks::Now() - distillation_start_;
109       UMA_HISTOGRAM_TIMES("DomDistiller.Time.DistillPage", distillation_time);
110       VLOG(1) << "DomDistiller.Time.DistillPage = " << distillation_time;
111 
112       if (distiller_result->has_timing_info()) {
113         const dom_distiller::proto::TimingInfo& timing =
114             distiller_result->timing_info();
115         if (timing.has_markup_parsing_time()) {
116           UMA_HISTOGRAM_TIMES(
117               "DomDistiller.Time.MarkupParsing",
118               base::TimeDelta::FromMillisecondsD(timing.markup_parsing_time()));
119         }
120         if (timing.has_document_construction_time()) {
121           UMA_HISTOGRAM_TIMES("DomDistiller.Time.DocumentConstruction",
122                               base::TimeDelta::FromMillisecondsD(
123                                   timing.document_construction_time()));
124         }
125         if (timing.has_article_processing_time()) {
126           UMA_HISTOGRAM_TIMES("DomDistiller.Time.ArticleProcessing",
127                               base::TimeDelta::FromMillisecondsD(
128                                   timing.article_processing_time()));
129         }
130         if (timing.has_formatting_time()) {
131           UMA_HISTOGRAM_TIMES(
132               "DomDistiller.Time.Formatting",
133               base::TimeDelta::FromMillisecondsD(timing.formatting_time()));
134         }
135         if (timing.has_total_time()) {
136           UMA_HISTOGRAM_TIMES(
137               "DomDistiller.Time.DistillationTotal",
138               base::TimeDelta::FromMillisecondsD(timing.total_time()));
139           VLOG(1) << "DomDistiller.Time.DistillationTotal = "
140                   << base::TimeDelta::FromMillisecondsD(timing.total_time());
141         }
142       }
143       if (distiller_result->has_statistics_info()) {
144         const dom_distiller::proto::StatisticsInfo& statistics =
145             distiller_result->statistics_info();
146         if (statistics.has_word_count()) {
147           UMA_HISTOGRAM_CUSTOM_COUNTS("DomDistiller.Statistics.WordCount",
148                                       statistics.word_count(), 1, 4000, 50);
149         }
150       }
151     }
152   }
153 
154   base::ThreadTaskRunnerHandle::Get()->PostTask(
155       FROM_HERE, base::BindOnce(std::move(distiller_page_callback_),
156                                 std::move(distiller_result), found_content));
157 }
158 
159 }  // namespace dom_distiller
160