1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/blink/renderer/core/frame/savable_resources.h"
6 
7 #include "third_party/blink/public/mojom/frame/frame.mojom-blink.h"
8 #include "third_party/blink/public/platform/platform.h"
9 #include "third_party/blink/renderer/core/dom/element.h"
10 #include "third_party/blink/renderer/core/frame/local_frame.h"
11 #include "third_party/blink/renderer/core/html/forms/html_input_element.h"
12 #include "third_party/blink/renderer/core/html/html_all_collection.h"
13 #include "third_party/blink/renderer/core/html/html_frame_owner_element.h"
14 #include "third_party/blink/renderer/core/html_names.h"
15 #include "third_party/blink/renderer/core/input_type_names.h"
16 #include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
17 
18 namespace blink {
19 namespace {
20 
21 // Returns |true| if |frame| contains (or should be assumed to contain)
22 // a html document.
DoesFrameContainHtmlDocument(Frame * frame,Element * element)23 bool DoesFrameContainHtmlDocument(Frame* frame, Element* element) {
24   if (frame->IsLocalFrame()) {
25     Document* document =
26         LocalFrame::FromFrameToken(frame->GetFrameToken())->GetDocument();
27     return document->IsHTMLDocument() || document->IsXHTMLDocument();
28   }
29 
30   // Cannot inspect contents of a remote frame, so we use a heuristic:
31   // Assume that <iframe> and <frame> elements contain a html document,
32   // and other elements (i.e. <object>) contain plugins or other resources.
33   // If the heuristic is wrong (i.e. the remote frame in <object> does
34   // contain an html document), then things will still work, but with the
35   // following caveats: 1) original frame content will be saved and 2) links
36   // in frame's html doc will not be rewritten to point to locally saved
37   // files.
38   return element->HasTagName(html_names::kIFrameTag) ||
39          element->HasTagName(html_names::kFrameTag);
40 }
41 
42 // If present and valid, then push the link associated with |element|
43 // into either SavableResources::Result::subframes_ or
44 // SavableResources::Result::resources_list_.
GetSavableResourceLinkForElement(Element * element,const Document & current_document,SavableResources::Result * result)45 void GetSavableResourceLinkForElement(Element* element,
46                                       const Document& current_document,
47                                       SavableResources::Result* result) {
48   // Get absolute URL.
49   String link_attribute_value =
50       SavableResources::GetSubResourceLinkFromElement(element);
51   KURL element_url = current_document.CompleteURL(link_attribute_value);
52 
53   // See whether to report this element as a subframe.
54   if (auto* frame_owner = DynamicTo<HTMLFrameOwnerElement>(element)) {
55     Frame* content_frame = frame_owner->ContentFrame();
56     if (content_frame && DoesFrameContainHtmlDocument(content_frame, element)) {
57       mojom::blink::SavableSubframePtr subframe =
58           mojom::blink::SavableSubframe::New(element_url,
59                                              content_frame->GetFrameToken());
60       result->AppendSubframe(std::move(subframe));
61       return;
62     }
63   }
64 
65   // Check whether the node has sub resource URL or not.
66   if (link_attribute_value.IsNull())
67     return;
68 
69   // Ignore invalid URL.
70   if (!element_url.IsValid())
71     return;
72 
73   // Ignore those URLs which are not standard protocols. Because FTP
74   // protocol does no have cache mechanism, we will skip all
75   // sub-resources if they use FTP protocol.
76   if (!element_url.ProtocolIsInHTTPFamily() &&
77       !element_url.ProtocolIs(url::kFileScheme))
78     return;
79 
80   result->AppendResourceLink(element_url);
81 }
82 
83 }  // namespace
84 
85 // static
GetSavableResourceLinksForFrame(LocalFrame * current_frame,SavableResources::Result * result)86 bool SavableResources::GetSavableResourceLinksForFrame(
87     LocalFrame* current_frame,
88     SavableResources::Result* result) {
89   // Get current frame's URL.
90   KURL current_frame_url = current_frame->GetDocument()->Url();
91 
92   // If url of current frame is invalid, ignore it.
93   if (!current_frame_url.IsValid())
94     return false;
95 
96   // If url of current frame is not a savable protocol, ignore it.
97   if (!Platform::Current()->IsURLSavableForSavableResource(current_frame_url))
98     return false;
99 
100   // Get current using document.
101   Document* current_document = current_frame->GetDocument();
102   DCHECK(current_document);
103 
104   // Go through all descent nodes.
105   HTMLAllCollection* collection = current_document->all();
106 
107   // Go through all elements in this frame.
108   for (unsigned i = 0; i < collection->length(); ++i) {
109     GetSavableResourceLinkForElement(collection->item(i), *current_document,
110                                      result);
111   }
112 
113   return true;
114 }
115 
116 // static
GetSubResourceLinkFromElement(Element * element)117 String SavableResources::GetSubResourceLinkFromElement(Element* element) {
118   const char* attribute_name = nullptr;
119   if (element->HasTagName(html_names::kImgTag) ||
120       element->HasTagName(html_names::kFrameTag) ||
121       element->HasTagName(html_names::kIFrameTag) ||
122       element->HasTagName(html_names::kScriptTag)) {
123     attribute_name = "src";
124   } else if (element->HasTagName(html_names::kInputTag)) {
125     HTMLInputElement* input = To<HTMLInputElement>(element);
126     if (input->type() == input_type_names::kImage) {
127       attribute_name = "src";
128     }
129   } else if (element->HasTagName(html_names::kBodyTag) ||
130              element->HasTagName(html_names::kTableTag) ||
131              element->HasTagName(html_names::kTrTag) ||
132              element->HasTagName(html_names::kTdTag)) {
133     attribute_name = "background";
134   } else if (element->HasTagName(html_names::kBlockquoteTag) ||
135              element->HasTagName(html_names::kQTag) ||
136              element->HasTagName(html_names::kDelTag) ||
137              element->HasTagName(html_names::kInsTag)) {
138     attribute_name = "cite";
139   } else if (element->HasTagName(html_names::kObjectTag)) {
140     attribute_name = "data";
141   } else if (element->HasTagName(html_names::kLinkTag)) {
142     // If the link element is not linked to css, ignore it.
143     String type = element->getAttribute("type");
144     String rel = element->getAttribute("rel");
145     if ((type.ContainsOnlyASCIIOrEmpty() && type.LowerASCII() == "text/css") ||
146         (rel.ContainsOnlyASCIIOrEmpty() && rel.LowerASCII() == "stylesheet")) {
147       // TODO(jnd): Add support for extracting links of sub-resources which
148       // are inside style-sheet such as @import, url(), etc.
149       // See bug: http://b/issue?id=1111667.
150       attribute_name = "href";
151     }
152   }
153   if (!attribute_name)
154     return String();
155   String value = element->getAttribute(attribute_name);
156   // If value has content and not start with "javascript:" then return it,
157   // otherwise return an empty string.
158   if (!value.IsNull() && !value.IsEmpty() &&
159       !value.StartsWith("javascript:", kTextCaseASCIIInsensitive))
160     return value;
161 
162   return String();
163 }
164 
AppendSubframe(mojom::blink::SavableSubframePtr subframe)165 void SavableResources::Result::AppendSubframe(
166     mojom::blink::SavableSubframePtr subframe) {
167   subframes_->emplace_back(std::move(subframe));
168 }
169 
AppendResourceLink(const KURL & url)170 void SavableResources::Result::AppendResourceLink(const KURL& url) {
171   resources_list_->emplace_back(url);
172 }
173 
174 }  // namespace blink
175