1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/blink/renderer/core/frame/savable_resources.h"
6
7 #include "third_party/blink/public/mojom/frame/frame.mojom-blink.h"
8 #include "third_party/blink/public/platform/platform.h"
9 #include "third_party/blink/renderer/core/dom/element.h"
10 #include "third_party/blink/renderer/core/frame/local_frame.h"
11 #include "third_party/blink/renderer/core/html/forms/html_input_element.h"
12 #include "third_party/blink/renderer/core/html/html_all_collection.h"
13 #include "third_party/blink/renderer/core/html/html_frame_owner_element.h"
14 #include "third_party/blink/renderer/core/html_names.h"
15 #include "third_party/blink/renderer/core/input_type_names.h"
16 #include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
17
18 namespace blink {
19 namespace {
20
21 // Returns |true| if |frame| contains (or should be assumed to contain)
22 // a html document.
DoesFrameContainHtmlDocument(Frame * frame,Element * element)23 bool DoesFrameContainHtmlDocument(Frame* frame, Element* element) {
24 if (frame->IsLocalFrame()) {
25 Document* document =
26 LocalFrame::FromFrameToken(frame->GetFrameToken())->GetDocument();
27 return document->IsHTMLDocument() || document->IsXHTMLDocument();
28 }
29
30 // Cannot inspect contents of a remote frame, so we use a heuristic:
31 // Assume that <iframe> and <frame> elements contain a html document,
32 // and other elements (i.e. <object>) contain plugins or other resources.
33 // If the heuristic is wrong (i.e. the remote frame in <object> does
34 // contain an html document), then things will still work, but with the
35 // following caveats: 1) original frame content will be saved and 2) links
36 // in frame's html doc will not be rewritten to point to locally saved
37 // files.
38 return element->HasTagName(html_names::kIFrameTag) ||
39 element->HasTagName(html_names::kFrameTag);
40 }
41
42 // If present and valid, then push the link associated with |element|
43 // into either SavableResources::Result::subframes_ or
44 // SavableResources::Result::resources_list_.
GetSavableResourceLinkForElement(Element * element,const Document & current_document,SavableResources::Result * result)45 void GetSavableResourceLinkForElement(Element* element,
46 const Document& current_document,
47 SavableResources::Result* result) {
48 // Get absolute URL.
49 String link_attribute_value =
50 SavableResources::GetSubResourceLinkFromElement(element);
51 KURL element_url = current_document.CompleteURL(link_attribute_value);
52
53 // See whether to report this element as a subframe.
54 if (auto* frame_owner = DynamicTo<HTMLFrameOwnerElement>(element)) {
55 Frame* content_frame = frame_owner->ContentFrame();
56 if (content_frame && DoesFrameContainHtmlDocument(content_frame, element)) {
57 mojom::blink::SavableSubframePtr subframe =
58 mojom::blink::SavableSubframe::New(element_url,
59 content_frame->GetFrameToken());
60 result->AppendSubframe(std::move(subframe));
61 return;
62 }
63 }
64
65 // Check whether the node has sub resource URL or not.
66 if (link_attribute_value.IsNull())
67 return;
68
69 // Ignore invalid URL.
70 if (!element_url.IsValid())
71 return;
72
73 // Ignore those URLs which are not standard protocols. Because FTP
74 // protocol does no have cache mechanism, we will skip all
75 // sub-resources if they use FTP protocol.
76 if (!element_url.ProtocolIsInHTTPFamily() &&
77 !element_url.ProtocolIs(url::kFileScheme))
78 return;
79
80 result->AppendResourceLink(element_url);
81 }
82
83 } // namespace
84
85 // static
GetSavableResourceLinksForFrame(LocalFrame * current_frame,SavableResources::Result * result)86 bool SavableResources::GetSavableResourceLinksForFrame(
87 LocalFrame* current_frame,
88 SavableResources::Result* result) {
89 // Get current frame's URL.
90 KURL current_frame_url = current_frame->GetDocument()->Url();
91
92 // If url of current frame is invalid, ignore it.
93 if (!current_frame_url.IsValid())
94 return false;
95
96 // If url of current frame is not a savable protocol, ignore it.
97 if (!Platform::Current()->IsURLSavableForSavableResource(current_frame_url))
98 return false;
99
100 // Get current using document.
101 Document* current_document = current_frame->GetDocument();
102 DCHECK(current_document);
103
104 // Go through all descent nodes.
105 HTMLAllCollection* collection = current_document->all();
106
107 // Go through all elements in this frame.
108 for (unsigned i = 0; i < collection->length(); ++i) {
109 GetSavableResourceLinkForElement(collection->item(i), *current_document,
110 result);
111 }
112
113 return true;
114 }
115
116 // static
GetSubResourceLinkFromElement(Element * element)117 String SavableResources::GetSubResourceLinkFromElement(Element* element) {
118 const char* attribute_name = nullptr;
119 if (element->HasTagName(html_names::kImgTag) ||
120 element->HasTagName(html_names::kFrameTag) ||
121 element->HasTagName(html_names::kIFrameTag) ||
122 element->HasTagName(html_names::kScriptTag)) {
123 attribute_name = "src";
124 } else if (element->HasTagName(html_names::kInputTag)) {
125 HTMLInputElement* input = To<HTMLInputElement>(element);
126 if (input->type() == input_type_names::kImage) {
127 attribute_name = "src";
128 }
129 } else if (element->HasTagName(html_names::kBodyTag) ||
130 element->HasTagName(html_names::kTableTag) ||
131 element->HasTagName(html_names::kTrTag) ||
132 element->HasTagName(html_names::kTdTag)) {
133 attribute_name = "background";
134 } else if (element->HasTagName(html_names::kBlockquoteTag) ||
135 element->HasTagName(html_names::kQTag) ||
136 element->HasTagName(html_names::kDelTag) ||
137 element->HasTagName(html_names::kInsTag)) {
138 attribute_name = "cite";
139 } else if (element->HasTagName(html_names::kObjectTag)) {
140 attribute_name = "data";
141 } else if (element->HasTagName(html_names::kLinkTag)) {
142 // If the link element is not linked to css, ignore it.
143 String type = element->getAttribute("type");
144 String rel = element->getAttribute("rel");
145 if ((type.ContainsOnlyASCIIOrEmpty() && type.LowerASCII() == "text/css") ||
146 (rel.ContainsOnlyASCIIOrEmpty() && rel.LowerASCII() == "stylesheet")) {
147 // TODO(jnd): Add support for extracting links of sub-resources which
148 // are inside style-sheet such as @import, url(), etc.
149 // See bug: http://b/issue?id=1111667.
150 attribute_name = "href";
151 }
152 }
153 if (!attribute_name)
154 return String();
155 String value = element->getAttribute(attribute_name);
156 // If value has content and not start with "javascript:" then return it,
157 // otherwise return an empty string.
158 if (!value.IsNull() && !value.IsEmpty() &&
159 !value.StartsWith("javascript:", kTextCaseASCIIInsensitive))
160 return value;
161
162 return String();
163 }
164
AppendSubframe(mojom::blink::SavableSubframePtr subframe)165 void SavableResources::Result::AppendSubframe(
166 mojom::blink::SavableSubframePtr subframe) {
167 subframes_->emplace_back(std::move(subframe));
168 }
169
AppendResourceLink(const KURL & url)170 void SavableResources::Result::AppendResourceLink(const KURL& url) {
171 resources_list_->emplace_back(url);
172 }
173
174 } // namespace blink
175