1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/blink/renderer/core/frame/frame_serializer_delegate_impl.h"
6
7 #include "third_party/blink/public/web/web_frame_serializer.h"
8 #include "third_party/blink/renderer/core/dom/attribute.h"
9 #include "third_party/blink/renderer/core/dom/document.h"
10 #include "third_party/blink/renderer/core/dom/element.h"
11 #include "third_party/blink/renderer/core/dom/element_traversal.h"
12 #include "third_party/blink/renderer/core/dom/shadow_root.h"
13 #include "third_party/blink/renderer/core/frame/frame.h"
14 #include "third_party/blink/renderer/core/frame/local_dom_window.h"
15 #include "third_party/blink/renderer/core/html/forms/html_input_element.h"
16 #include "third_party/blink/renderer/core/html/html_anchor_element.h"
17 #include "third_party/blink/renderer/core/html/html_frame_element_base.h"
18 #include "third_party/blink/renderer/core/html/html_frame_owner_element.h"
19 #include "third_party/blink/renderer/core/html/html_head_element.h"
20 #include "third_party/blink/renderer/core/html/html_iframe_element.h"
21 #include "third_party/blink/renderer/core/html/html_image_element.h"
22 #include "third_party/blink/renderer/core/html/html_link_element.h"
23 #include "third_party/blink/renderer/core/html/html_meta_element.h"
24 #include "third_party/blink/renderer/core/html/html_template_element.h"
25 #include "third_party/blink/renderer/core/html/link_rel_attribute.h"
26 #include "third_party/blink/renderer/core/html_names.h"
27 #include "third_party/blink/renderer/core/input_type_names.h"
28 #include "third_party/blink/renderer/core/layout/layout_box.h"
29 #include "third_party/blink/renderer/core/layout/layout_object.h"
30 #include "third_party/blink/renderer/core/loader/resource/image_resource_content.h"
31 #include "third_party/blink/renderer/core/page/chrome_client.h"
32 #include "third_party/blink/renderer/core/page/page.h"
33 #include "third_party/blink/renderer/platform/geometry/layout_point.h"
34 #include "third_party/blink/renderer/platform/geometry/layout_rect.h"
35 #include "third_party/blink/renderer/platform/heap/heap.h"
36 #include "third_party/blink/renderer/platform/mhtml/mhtml_parser.h"
37 #include "third_party/blink/renderer/platform/weborigin/kurl.h"
38 #include "third_party/blink/renderer/platform/wtf/assertions.h"
39 #include "third_party/blink/renderer/platform/wtf/text/atomic_string.h"
40
41 namespace blink {
42
43 namespace {
44
45 const int kPopupOverlayZIndexThreshold = 50;
46 const char kShadowModeAttributeName[] = "shadowmode";
47 const char kShadowDelegatesFocusAttributeName[] = "shadowdelegatesfocus";
48
49 } // namespace
50
51 // static
GetContentID(Frame * frame)52 String FrameSerializerDelegateImpl::GetContentID(Frame* frame) {
53 DCHECK(frame);
54 String frame_id = String(frame->ToTraceValue().data());
55 return "<frame-" + frame_id + "@mhtml.blink>";
56 }
57
FrameSerializerDelegateImpl(WebFrameSerializer::MHTMLPartsGenerationDelegate & web_delegate,HeapHashSet<WeakMember<const Element>> & shadow_template_elements)58 FrameSerializerDelegateImpl::FrameSerializerDelegateImpl(
59 WebFrameSerializer::MHTMLPartsGenerationDelegate& web_delegate,
60 HeapHashSet<WeakMember<const Element>>& shadow_template_elements)
61 : web_delegate_(web_delegate),
62 shadow_template_elements_(shadow_template_elements),
63 popup_overlays_skipped_(false) {}
64
ShouldIgnoreElement(const Element & element)65 bool FrameSerializerDelegateImpl::ShouldIgnoreElement(const Element& element) {
66 if (ShouldIgnoreHiddenElement(element))
67 return true;
68 if (ShouldIgnoreMetaElement(element))
69 return true;
70 if (web_delegate_.RemovePopupOverlay() &&
71 ShouldIgnorePopupOverlayElement(element)) {
72 return true;
73 }
74 // Remove <link> for stylesheets that do not load.
75 auto* html_link_element = DynamicTo<HTMLLinkElement>(element);
76 if (html_link_element && html_link_element->RelAttribute().IsStyleSheet() &&
77 !html_link_element->sheet()) {
78 return true;
79 }
80 return false;
81 }
82
ShouldIgnoreHiddenElement(const Element & element)83 bool FrameSerializerDelegateImpl::ShouldIgnoreHiddenElement(
84 const Element& element) {
85 // If an iframe is in the head, it will be moved to the body when the page is
86 // being loaded. But if an iframe is injected into the head later, it will
87 // stay there and not been displayed. To prevent it from being brought to the
88 // saved page and cause it being displayed, we should not include it.
89 if (IsA<HTMLIFrameElement>(element) &&
90 Traversal<HTMLHeadElement>::FirstAncestor(element)) {
91 return true;
92 }
93
94 // Do not include the element that is marked with hidden attribute.
95 if (element.FastHasAttribute(html_names::kHiddenAttr))
96 return true;
97
98 // Do not include the hidden form element.
99 auto* html_element_element = DynamicTo<HTMLInputElement>(&element);
100 return html_element_element &&
101 html_element_element->type() == input_type_names::kHidden;
102 }
103
ShouldIgnoreMetaElement(const Element & element)104 bool FrameSerializerDelegateImpl::ShouldIgnoreMetaElement(
105 const Element& element) {
106 // Do not include meta elements that declare Content-Security-Policy
107 // directives. They should have already been enforced when the original
108 // document is loaded. Since only the rendered resources are encapsulated in
109 // the saved MHTML page, there is no need to carry the directives. If they
110 // are still kept in the MHTML, child frames that are referred to using cid:
111 // scheme could be prevented from loading.
112 if (!IsA<HTMLMetaElement>(element))
113 return false;
114 if (!element.FastHasAttribute(html_names::kContentAttr))
115 return false;
116 const AtomicString& http_equiv =
117 element.FastGetAttribute(html_names::kHttpEquivAttr);
118 return http_equiv == "Content-Security-Policy";
119 }
120
ShouldIgnorePopupOverlayElement(const Element & element)121 bool FrameSerializerDelegateImpl::ShouldIgnorePopupOverlayElement(
122 const Element& element) {
123 // The element should be visible.
124 LayoutBox* box = element.GetLayoutBox();
125 if (!box)
126 return false;
127
128 // The bounding box of the element should contain center point of the
129 // viewport.
130 LocalDOMWindow* window = element.GetDocument().domWindow();
131 DCHECK(window);
132 int center_x = window->innerWidth() / 2;
133 int center_y = window->innerHeight() / 2;
134 if (Page* page = element.GetDocument().GetPage()) {
135 center_x = page->GetChromeClient().WindowToViewportScalar(
136 window->GetFrame(), center_x);
137 center_y = page->GetChromeClient().WindowToViewportScalar(
138 window->GetFrame(), center_y);
139 }
140 LayoutPoint center_point(center_x, center_y);
141 if (!box->FrameRect().Contains(center_point))
142 return false;
143
144 // The z-index should be greater than the threshold.
145 if (box->Style()->ZIndex() < kPopupOverlayZIndexThreshold)
146 return false;
147
148 popup_overlays_skipped_ = true;
149
150 return true;
151 }
152
ShouldIgnoreAttribute(const Element & element,const Attribute & attribute)153 bool FrameSerializerDelegateImpl::ShouldIgnoreAttribute(
154 const Element& element,
155 const Attribute& attribute) {
156 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display
157 // images, as only the value of src is pulled into the archive. Discarding
158 // srcset prevents the problem. Long term we should make sure to MHTML plays
159 // nicely with srcset.
160 if (IsA<HTMLImageElement>(element) &&
161 (attribute.LocalName() == html_names::kSrcsetAttr ||
162 attribute.LocalName() == html_names::kSizesAttr)) {
163 return true;
164 }
165
166 // Do not save ping attribute since anyway the ping will be blocked from
167 // MHTML.
168 if (IsA<HTMLAnchorElement>(element) &&
169 attribute.LocalName() == html_names::kPingAttr) {
170 return true;
171 }
172
173 // The special attribute in a template element to denote the shadow DOM
174 // should only be generated from MHTML serialization. If it is found in the
175 // original page, it should be ignored.
176 if (IsA<HTMLTemplateElement>(element) &&
177 (attribute.LocalName() == kShadowModeAttributeName ||
178 attribute.LocalName() == kShadowDelegatesFocusAttributeName) &&
179 !shadow_template_elements_.Contains(&element)) {
180 return true;
181 }
182
183 // If srcdoc attribute for frame elements will be rewritten as src attribute
184 // containing link instead of html contents, don't ignore the attribute.
185 // Bail out now to avoid the check in Element::isScriptingAttribute.
186 bool is_src_doc_attribute = IsA<HTMLFrameElementBase>(element) &&
187 attribute.GetName() == html_names::kSrcdocAttr;
188 String new_link_for_the_element;
189 if (is_src_doc_attribute && RewriteLink(element, new_link_for_the_element))
190 return false;
191
192 // Drop integrity attribute for those links with subresource loaded.
193 auto* html_link_element = DynamicTo<HTMLLinkElement>(element);
194 if (attribute.LocalName() == html_names::kIntegrityAttr &&
195 html_link_element && html_link_element->sheet()) {
196 return true;
197 }
198
199 // Do not include attributes that contain javascript. This is because the
200 // script will not be executed when a MHTML page is being loaded.
201 return element.IsScriptingAttribute(attribute);
202 }
203
RewriteLink(const Element & element,String & rewritten_link)204 bool FrameSerializerDelegateImpl::RewriteLink(const Element& element,
205 String& rewritten_link) {
206 auto* frame_owner = DynamicTo<HTMLFrameOwnerElement>(element);
207 if (!frame_owner)
208 return false;
209
210 Frame* frame = frame_owner->ContentFrame();
211 if (!frame)
212 return false;
213
214 WebString content_id = GetContentID(frame);
215 KURL cid_uri = MHTMLParser::ConvertContentIDToURI(content_id);
216 DCHECK(cid_uri.IsValid());
217 rewritten_link = cid_uri.GetString();
218 return true;
219 }
220
ShouldSkipResourceWithURL(const KURL & url)221 bool FrameSerializerDelegateImpl::ShouldSkipResourceWithURL(const KURL& url) {
222 return web_delegate_.ShouldSkipResource(url);
223 }
224
GetCustomAttributes(const Element & element)225 Vector<Attribute> FrameSerializerDelegateImpl::GetCustomAttributes(
226 const Element& element) {
227 Vector<Attribute> attributes;
228
229 if (auto* image = DynamicTo<HTMLImageElement>(element)) {
230 GetCustomAttributesForImageElement(*image, &attributes);
231 }
232
233 return attributes;
234 }
235
ShouldCollectProblemMetric()236 bool FrameSerializerDelegateImpl::ShouldCollectProblemMetric() {
237 return web_delegate_.UsePageProblemDetectors();
238 }
239
GetCustomAttributesForImageElement(const HTMLImageElement & element,Vector<Attribute> * attributes)240 void FrameSerializerDelegateImpl::GetCustomAttributesForImageElement(
241 const HTMLImageElement& element,
242 Vector<Attribute>* attributes) {
243 // Currently only the value of src is pulled into the archive and the srcset
244 // attribute is ignored (see shouldIgnoreAttribute() above). If the device
245 // has a higher DPR, a different image from srcset could be loaded instead.
246 // When this occurs, we should provide the rendering width and height for
247 // <img> element if not set.
248
249 // The image should be loaded and participate the layout.
250 ImageResourceContent* image = element.CachedImage();
251 if (!image || !image->HasImage() || image->ErrorOccurred() ||
252 !element.GetLayoutObject()) {
253 return;
254 }
255
256 // The width and height attributes should not be set.
257 if (element.FastHasAttribute(html_names::kWidthAttr) ||
258 element.FastHasAttribute(html_names::kHeightAttr)) {
259 return;
260 }
261
262 // Check if different image is loaded. naturalWidth/naturalHeight will return
263 // the image size adjusted with current DPR.
264 if ((static_cast<int>(element.naturalWidth())) ==
265 image->GetImage()->width() &&
266 (static_cast<int>(element.naturalHeight())) ==
267 image->GetImage()->height()) {
268 return;
269 }
270
271 Attribute width_attribute(html_names::kWidthAttr,
272 AtomicString::Number(element.LayoutBoxWidth()));
273 attributes->push_back(width_attribute);
274 Attribute height_attribute(html_names::kHeightAttr,
275 AtomicString::Number(element.LayoutBoxHeight()));
276 attributes->push_back(height_attribute);
277 }
278
GetAuxiliaryDOMTree(const Element & element) const279 std::pair<Node*, Element*> FrameSerializerDelegateImpl::GetAuxiliaryDOMTree(
280 const Element& element) const {
281 ShadowRoot* shadow_root = element.GetShadowRoot();
282 if (!shadow_root)
283 return std::pair<Node*, Element*>();
284
285 String shadow_mode;
286 switch (shadow_root->GetType()) {
287 case ShadowRootType::kUserAgent:
288 // No need to serialize.
289 return std::pair<Node*, Element*>();
290 case ShadowRootType::V0:
291 shadow_mode = "v0";
292 break;
293 case ShadowRootType::kOpen:
294 shadow_mode = "open";
295 break;
296 case ShadowRootType::kClosed:
297 shadow_mode = "closed";
298 break;
299 }
300
301 // Put the shadow DOM content inside a template element. A special attribute
302 // is set to tell the mode of the shadow DOM.
303 auto* template_element = MakeGarbageCollected<Element>(
304 html_names::kTemplateTag, &(element.GetDocument()));
305 template_element->setAttribute(
306 QualifiedName(g_null_atom, kShadowModeAttributeName, g_null_atom),
307 AtomicString(shadow_mode));
308 if (shadow_root->GetType() != ShadowRootType::V0 &&
309 shadow_root->delegatesFocus()) {
310 template_element->setAttribute(
311 QualifiedName(g_null_atom, kShadowDelegatesFocusAttributeName,
312 g_null_atom),
313 g_empty_atom);
314 }
315 shadow_template_elements_.insert(template_element);
316
317 return std::pair<Node*, Element*>(shadow_root, template_element);
318 }
319
320 } // namespace blink
321