1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/blink/renderer/core/frame/frame_serializer_delegate_impl.h"
6 
7 #include "third_party/blink/public/web/web_frame_serializer.h"
8 #include "third_party/blink/renderer/core/dom/attribute.h"
9 #include "third_party/blink/renderer/core/dom/document.h"
10 #include "third_party/blink/renderer/core/dom/element.h"
11 #include "third_party/blink/renderer/core/dom/element_traversal.h"
12 #include "third_party/blink/renderer/core/dom/shadow_root.h"
13 #include "third_party/blink/renderer/core/frame/frame.h"
14 #include "third_party/blink/renderer/core/frame/local_dom_window.h"
15 #include "third_party/blink/renderer/core/html/forms/html_input_element.h"
16 #include "third_party/blink/renderer/core/html/html_anchor_element.h"
17 #include "third_party/blink/renderer/core/html/html_frame_element_base.h"
18 #include "third_party/blink/renderer/core/html/html_frame_owner_element.h"
19 #include "third_party/blink/renderer/core/html/html_head_element.h"
20 #include "third_party/blink/renderer/core/html/html_iframe_element.h"
21 #include "third_party/blink/renderer/core/html/html_image_element.h"
22 #include "third_party/blink/renderer/core/html/html_link_element.h"
23 #include "third_party/blink/renderer/core/html/html_meta_element.h"
24 #include "third_party/blink/renderer/core/html/html_template_element.h"
25 #include "third_party/blink/renderer/core/html/link_rel_attribute.h"
26 #include "third_party/blink/renderer/core/html_names.h"
27 #include "third_party/blink/renderer/core/input_type_names.h"
28 #include "third_party/blink/renderer/core/layout/layout_box.h"
29 #include "third_party/blink/renderer/core/layout/layout_object.h"
30 #include "third_party/blink/renderer/core/loader/resource/image_resource_content.h"
31 #include "third_party/blink/renderer/core/page/chrome_client.h"
32 #include "third_party/blink/renderer/core/page/page.h"
33 #include "third_party/blink/renderer/platform/geometry/layout_point.h"
34 #include "third_party/blink/renderer/platform/geometry/layout_rect.h"
35 #include "third_party/blink/renderer/platform/heap/heap.h"
36 #include "third_party/blink/renderer/platform/mhtml/mhtml_parser.h"
37 #include "third_party/blink/renderer/platform/weborigin/kurl.h"
38 #include "third_party/blink/renderer/platform/wtf/assertions.h"
39 #include "third_party/blink/renderer/platform/wtf/text/atomic_string.h"
40 
41 namespace blink {
42 
43 namespace {
44 
45 const int kPopupOverlayZIndexThreshold = 50;
46 const char kShadowModeAttributeName[] = "shadowmode";
47 const char kShadowDelegatesFocusAttributeName[] = "shadowdelegatesfocus";
48 
49 }  // namespace
50 
51 // static
GetContentID(Frame * frame)52 String FrameSerializerDelegateImpl::GetContentID(Frame* frame) {
53   DCHECK(frame);
54   String frame_id = String(frame->ToTraceValue().data());
55   return "<frame-" + frame_id + "@mhtml.blink>";
56 }
57 
FrameSerializerDelegateImpl(WebFrameSerializer::MHTMLPartsGenerationDelegate & web_delegate,HeapHashSet<WeakMember<const Element>> & shadow_template_elements)58 FrameSerializerDelegateImpl::FrameSerializerDelegateImpl(
59     WebFrameSerializer::MHTMLPartsGenerationDelegate& web_delegate,
60     HeapHashSet<WeakMember<const Element>>& shadow_template_elements)
61     : web_delegate_(web_delegate),
62       shadow_template_elements_(shadow_template_elements),
63       popup_overlays_skipped_(false) {}
64 
ShouldIgnoreElement(const Element & element)65 bool FrameSerializerDelegateImpl::ShouldIgnoreElement(const Element& element) {
66   if (ShouldIgnoreHiddenElement(element))
67     return true;
68   if (ShouldIgnoreMetaElement(element))
69     return true;
70   if (web_delegate_.RemovePopupOverlay() &&
71       ShouldIgnorePopupOverlayElement(element)) {
72     return true;
73   }
74   // Remove <link> for stylesheets that do not load.
75   auto* html_link_element = DynamicTo<HTMLLinkElement>(element);
76   if (html_link_element && html_link_element->RelAttribute().IsStyleSheet() &&
77       !html_link_element->sheet()) {
78     return true;
79   }
80   return false;
81 }
82 
ShouldIgnoreHiddenElement(const Element & element)83 bool FrameSerializerDelegateImpl::ShouldIgnoreHiddenElement(
84     const Element& element) {
85   // If an iframe is in the head, it will be moved to the body when the page is
86   // being loaded. But if an iframe is injected into the head later, it will
87   // stay there and not been displayed. To prevent it from being brought to the
88   // saved page and cause it being displayed, we should not include it.
89   if (IsA<HTMLIFrameElement>(element) &&
90       Traversal<HTMLHeadElement>::FirstAncestor(element)) {
91     return true;
92   }
93 
94   // Do not include the element that is marked with hidden attribute.
95   if (element.FastHasAttribute(html_names::kHiddenAttr))
96     return true;
97 
98   // Do not include the hidden form element.
99   auto* html_element_element = DynamicTo<HTMLInputElement>(&element);
100   return html_element_element &&
101          html_element_element->type() == input_type_names::kHidden;
102 }
103 
ShouldIgnoreMetaElement(const Element & element)104 bool FrameSerializerDelegateImpl::ShouldIgnoreMetaElement(
105     const Element& element) {
106   // Do not include meta elements that declare Content-Security-Policy
107   // directives. They should have already been enforced when the original
108   // document is loaded. Since only the rendered resources are encapsulated in
109   // the saved MHTML page, there is no need to carry the directives. If they
110   // are still kept in the MHTML, child frames that are referred to using cid:
111   // scheme could be prevented from loading.
112   if (!IsA<HTMLMetaElement>(element))
113     return false;
114   if (!element.FastHasAttribute(html_names::kContentAttr))
115     return false;
116   const AtomicString& http_equiv =
117       element.FastGetAttribute(html_names::kHttpEquivAttr);
118   return http_equiv == "Content-Security-Policy";
119 }
120 
ShouldIgnorePopupOverlayElement(const Element & element)121 bool FrameSerializerDelegateImpl::ShouldIgnorePopupOverlayElement(
122     const Element& element) {
123   // The element should be visible.
124   LayoutBox* box = element.GetLayoutBox();
125   if (!box)
126     return false;
127 
128   // The bounding box of the element should contain center point of the
129   // viewport.
130   LocalDOMWindow* window = element.GetDocument().domWindow();
131   DCHECK(window);
132   int center_x = window->innerWidth() / 2;
133   int center_y = window->innerHeight() / 2;
134   if (Page* page = element.GetDocument().GetPage()) {
135     center_x = page->GetChromeClient().WindowToViewportScalar(
136         window->GetFrame(), center_x);
137     center_y = page->GetChromeClient().WindowToViewportScalar(
138         window->GetFrame(), center_y);
139   }
140   LayoutPoint center_point(center_x, center_y);
141   if (!box->FrameRect().Contains(center_point))
142     return false;
143 
144   // The z-index should be greater than the threshold.
145   if (box->Style()->ZIndex() < kPopupOverlayZIndexThreshold)
146     return false;
147 
148   popup_overlays_skipped_ = true;
149 
150   return true;
151 }
152 
ShouldIgnoreAttribute(const Element & element,const Attribute & attribute)153 bool FrameSerializerDelegateImpl::ShouldIgnoreAttribute(
154     const Element& element,
155     const Attribute& attribute) {
156   // TODO(fgorski): Presence of srcset attribute causes MHTML to not display
157   // images, as only the value of src is pulled into the archive. Discarding
158   // srcset prevents the problem. Long term we should make sure to MHTML plays
159   // nicely with srcset.
160   if (IsA<HTMLImageElement>(element) &&
161       (attribute.LocalName() == html_names::kSrcsetAttr ||
162        attribute.LocalName() == html_names::kSizesAttr)) {
163     return true;
164   }
165 
166   // Do not save ping attribute since anyway the ping will be blocked from
167   // MHTML.
168   if (IsA<HTMLAnchorElement>(element) &&
169       attribute.LocalName() == html_names::kPingAttr) {
170     return true;
171   }
172 
173   // The special attribute in a template element to denote the shadow DOM
174   // should only be generated from MHTML serialization. If it is found in the
175   // original page, it should be ignored.
176   if (IsA<HTMLTemplateElement>(element) &&
177       (attribute.LocalName() == kShadowModeAttributeName ||
178        attribute.LocalName() == kShadowDelegatesFocusAttributeName) &&
179       !shadow_template_elements_.Contains(&element)) {
180     return true;
181   }
182 
183   // If srcdoc attribute for frame elements will be rewritten as src attribute
184   // containing link instead of html contents, don't ignore the attribute.
185   // Bail out now to avoid the check in Element::isScriptingAttribute.
186   bool is_src_doc_attribute = IsA<HTMLFrameElementBase>(element) &&
187                               attribute.GetName() == html_names::kSrcdocAttr;
188   String new_link_for_the_element;
189   if (is_src_doc_attribute && RewriteLink(element, new_link_for_the_element))
190     return false;
191 
192   //  Drop integrity attribute for those links with subresource loaded.
193   auto* html_link_element = DynamicTo<HTMLLinkElement>(element);
194   if (attribute.LocalName() == html_names::kIntegrityAttr &&
195       html_link_element && html_link_element->sheet()) {
196     return true;
197   }
198 
199   // Do not include attributes that contain javascript. This is because the
200   // script will not be executed when a MHTML page is being loaded.
201   return element.IsScriptingAttribute(attribute);
202 }
203 
RewriteLink(const Element & element,String & rewritten_link)204 bool FrameSerializerDelegateImpl::RewriteLink(const Element& element,
205                                               String& rewritten_link) {
206   auto* frame_owner = DynamicTo<HTMLFrameOwnerElement>(element);
207   if (!frame_owner)
208     return false;
209 
210   Frame* frame = frame_owner->ContentFrame();
211   if (!frame)
212     return false;
213 
214   WebString content_id = GetContentID(frame);
215   KURL cid_uri = MHTMLParser::ConvertContentIDToURI(content_id);
216   DCHECK(cid_uri.IsValid());
217   rewritten_link = cid_uri.GetString();
218   return true;
219 }
220 
ShouldSkipResourceWithURL(const KURL & url)221 bool FrameSerializerDelegateImpl::ShouldSkipResourceWithURL(const KURL& url) {
222   return web_delegate_.ShouldSkipResource(url);
223 }
224 
GetCustomAttributes(const Element & element)225 Vector<Attribute> FrameSerializerDelegateImpl::GetCustomAttributes(
226     const Element& element) {
227   Vector<Attribute> attributes;
228 
229   if (auto* image = DynamicTo<HTMLImageElement>(element)) {
230     GetCustomAttributesForImageElement(*image, &attributes);
231   }
232 
233   return attributes;
234 }
235 
ShouldCollectProblemMetric()236 bool FrameSerializerDelegateImpl::ShouldCollectProblemMetric() {
237   return web_delegate_.UsePageProblemDetectors();
238 }
239 
GetCustomAttributesForImageElement(const HTMLImageElement & element,Vector<Attribute> * attributes)240 void FrameSerializerDelegateImpl::GetCustomAttributesForImageElement(
241     const HTMLImageElement& element,
242     Vector<Attribute>* attributes) {
243   // Currently only the value of src is pulled into the archive and the srcset
244   // attribute is ignored (see shouldIgnoreAttribute() above). If the device
245   // has a higher DPR, a different image from srcset could be loaded instead.
246   // When this occurs, we should provide the rendering width and height for
247   // <img> element if not set.
248 
249   // The image should be loaded and participate the layout.
250   ImageResourceContent* image = element.CachedImage();
251   if (!image || !image->HasImage() || image->ErrorOccurred() ||
252       !element.GetLayoutObject()) {
253     return;
254   }
255 
256   // The width and height attributes should not be set.
257   if (element.FastHasAttribute(html_names::kWidthAttr) ||
258       element.FastHasAttribute(html_names::kHeightAttr)) {
259     return;
260   }
261 
262   // Check if different image is loaded. naturalWidth/naturalHeight will return
263   // the image size adjusted with current DPR.
264   if ((static_cast<int>(element.naturalWidth())) ==
265           image->GetImage()->width() &&
266       (static_cast<int>(element.naturalHeight())) ==
267           image->GetImage()->height()) {
268     return;
269   }
270 
271   Attribute width_attribute(html_names::kWidthAttr,
272                             AtomicString::Number(element.LayoutBoxWidth()));
273   attributes->push_back(width_attribute);
274   Attribute height_attribute(html_names::kHeightAttr,
275                              AtomicString::Number(element.LayoutBoxHeight()));
276   attributes->push_back(height_attribute);
277 }
278 
GetAuxiliaryDOMTree(const Element & element) const279 std::pair<Node*, Element*> FrameSerializerDelegateImpl::GetAuxiliaryDOMTree(
280     const Element& element) const {
281   ShadowRoot* shadow_root = element.GetShadowRoot();
282   if (!shadow_root)
283     return std::pair<Node*, Element*>();
284 
285   String shadow_mode;
286   switch (shadow_root->GetType()) {
287     case ShadowRootType::kUserAgent:
288       // No need to serialize.
289       return std::pair<Node*, Element*>();
290     case ShadowRootType::V0:
291       shadow_mode = "v0";
292       break;
293     case ShadowRootType::kOpen:
294       shadow_mode = "open";
295       break;
296     case ShadowRootType::kClosed:
297       shadow_mode = "closed";
298       break;
299   }
300 
301   // Put the shadow DOM content inside a template element. A special attribute
302   // is set to tell the mode of the shadow DOM.
303   auto* template_element = MakeGarbageCollected<Element>(
304       html_names::kTemplateTag, &(element.GetDocument()));
305   template_element->setAttribute(
306       QualifiedName(g_null_atom, kShadowModeAttributeName, g_null_atom),
307       AtomicString(shadow_mode));
308   if (shadow_root->GetType() != ShadowRootType::V0 &&
309       shadow_root->delegatesFocus()) {
310     template_element->setAttribute(
311         QualifiedName(g_null_atom, kShadowDelegatesFocusAttributeName,
312                       g_null_atom),
313         g_empty_atom);
314   }
315   shadow_template_elements_.insert(template_element);
316 
317   return std::pair<Node*, Element*>(shadow_root, template_element);
318 }
319 
320 }  // namespace blink
321