1// Copyright (c) 2014, David Kitchen <david@buro9.com>
2//
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice, this
9//   list of conditions and the following disclaimer.
10//
11// * Redistributions in binary form must reproduce the above copyright notice,
12//   this list of conditions and the following disclaimer in the documentation
13//   and/or other materials provided with the distribution.
14//
15// * Neither the name of the organisation (Microcosm) nor the names of its
16//   contributors may be used to endorse or promote products derived from
17//   this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30package bluemonday
31
32//TODO sgutzwiller create map of styles to default handlers
33//TODO sgutzwiller create handlers for various attributes
34import (
35	"net/url"
36	"regexp"
37	"strings"
38
39	"github.com/microcosm-cc/bluemonday/css"
40)
41
42// Policy encapsulates the allowlist of HTML elements and attributes that will
43// be applied to the sanitised HTML.
44//
45// You should use bluemonday.NewPolicy() to create a blank policy as the
46// unexported fields contain maps that need to be initialized.
47type Policy struct {
48
49	// Declares whether the maps have been initialized, used as a cheap check to
50	// ensure that those using Policy{} directly won't cause nil pointer
51	// exceptions
52	initialized bool
53
54	// If true then we add spaces when stripping tags, specifically the closing
55	// tag is replaced by a space character.
56	addSpaces bool
57
58	// When true, add rel="nofollow" to HTML a, area, and link tags
59	requireNoFollow bool
60
61	// When true, add rel="nofollow" to HTML a, area, and link tags
62	// Will add for href="http://foo"
63	// Will skip for href="/foo" or href="foo"
64	requireNoFollowFullyQualifiedLinks bool
65
66	// When true, add rel="noreferrer" to HTML a, area, and link tags
67	requireNoReferrer bool
68
69	// When true, add rel="noreferrer" to HTML a, area, and link tags
70	// Will add for href="http://foo"
71	// Will skip for href="/foo" or href="foo"
72	requireNoReferrerFullyQualifiedLinks bool
73
74	// When true, add crossorigin="anonymous" to HTML audio, img, link, script, and video tags
75	requireCrossOriginAnonymous bool
76
77	// When true, add and filter sandbox attribute on iframe tags
78	requireSandboxOnIFrame map[string]bool
79
80	// When true add target="_blank" to fully qualified links
81	// Will add for href="http://foo"
82	// Will skip for href="/foo" or href="foo"
83	addTargetBlankToFullyQualifiedLinks bool
84
85	// When true, URLs must be parseable by "net/url" url.Parse()
86	requireParseableURLs bool
87
88	// When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted
89	allowRelativeURLs bool
90
91	// When true, allow data attributes.
92	allowDataAttributes bool
93
94	// When true, allow comments.
95	allowComments bool
96
97	// map[htmlElementName]map[htmlAttributeName][]attrPolicy
98	elsAndAttrs map[string]map[string][]attrPolicy
99
100	// elsMatchingAndAttrs stores regex based element matches along with attributes
101	elsMatchingAndAttrs map[*regexp.Regexp]map[string][]attrPolicy
102
103	// map[htmlAttributeName][]attrPolicy
104	globalAttrs map[string][]attrPolicy
105
106	// map[htmlElementName]map[cssPropertyName][]stylePolicy
107	elsAndStyles map[string]map[string][]stylePolicy
108
109	// map[regex]map[cssPropertyName][]stylePolicy
110	elsMatchingAndStyles map[*regexp.Regexp]map[string][]stylePolicy
111
112	// map[cssPropertyName][]stylePolicy
113	globalStyles map[string][]stylePolicy
114
115	// If urlPolicy is nil, all URLs with matching schema are allowed.
116	// Otherwise, only the URLs with matching schema and urlPolicy(url)
117	// returning true are allowed.
118	allowURLSchemes map[string][]urlPolicy
119
120	// If an element has had all attributes removed as a result of a policy
121	// being applied, then the element would be removed from the output.
122	//
123	// However some elements are valid and have strong layout meaning without
124	// any attributes, i.e. <table>. To prevent those being removed we maintain
125	// a list of elements that are allowed to have no attributes and that will
126	// be maintained in the output HTML.
127	setOfElementsAllowedWithoutAttrs map[string]struct{}
128
129	// If an element has had all attributes removed as a result of a policy
130	// being applied, then the element would be removed from the output.
131	//
132	// However some elements are valid and have strong layout meaning without
133	// any attributes, i.e. <table>.
134	//
135	// In this case, any element matching a regular expression will be accepted without
136	// attributes added.
137	setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp
138
139	setOfElementsToSkipContent map[string]struct{}
140
141	// Permits fundamentally unsafe elements.
142	//
143	// If false (default) then elements such as `style` and `script` will not be
144	// permitted even if declared in a policy. These elements when combined with
145	// untrusted input cannot be safely handled by bluemonday at this point in
146	// time.
147	//
148	// If true then `style` and `script` would be permitted by bluemonday if a
149	// policy declares them. However this is not recommended under any circumstance
150	// and can lead to XSS being rendered thus defeating the purpose of using a
151	// HTML sanitizer.
152	allowUnsafe bool
153}
154
155type attrPolicy struct {
156
157	// optional pattern to match, when not nil the regexp needs to match
158	// otherwise the attribute is removed
159	regexp *regexp.Regexp
160}
161
162type stylePolicy struct {
163	// handler to validate
164	handler func(string) bool
165
166	// optional pattern to match, when not nil the regexp needs to match
167	// otherwise the property is removed
168	regexp *regexp.Regexp
169
170	// optional list of allowed property values, for properties which
171	// have a defined list of allowed values; property will be removed
172	// if the value is not allowed
173	enum []string
174}
175
176type attrPolicyBuilder struct {
177	p *Policy
178
179	attrNames  []string
180	regexp     *regexp.Regexp
181	allowEmpty bool
182}
183
184type stylePolicyBuilder struct {
185	p *Policy
186
187	propertyNames []string
188	regexp        *regexp.Regexp
189	enum          []string
190	handler       func(string) bool
191}
192
193type urlPolicy func(url *url.URL) (allowUrl bool)
194
195type SandboxValue int64
196
197const (
198	SandboxAllowDownloads SandboxValue = iota
199	SandboxAllowDownloadsWithoutUserActivation
200	SandboxAllowForms
201	SandboxAllowModals
202	SandboxAllowOrientationLock
203	SandboxAllowPointerLock
204	SandboxAllowPopups
205	SandboxAllowPopupsToEscapeSandbox
206	SandboxAllowPresentation
207	SandboxAllowSameOrigin
208	SandboxAllowScripts
209	SandboxAllowStorageAccessByUserActivation
210	SandboxAllowTopNavigation
211	SandboxAllowTopNavigationByUserActivation
212)
213
214// init initializes the maps if this has not been done already
215func (p *Policy) init() {
216	if !p.initialized {
217		p.elsAndAttrs = make(map[string]map[string][]attrPolicy)
218		p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string][]attrPolicy)
219		p.globalAttrs = make(map[string][]attrPolicy)
220		p.elsAndStyles = make(map[string]map[string][]stylePolicy)
221		p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy)
222		p.globalStyles = make(map[string][]stylePolicy)
223		p.allowURLSchemes = make(map[string][]urlPolicy)
224		p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
225		p.setOfElementsToSkipContent = make(map[string]struct{})
226		p.initialized = true
227	}
228}
229
230// NewPolicy returns a blank policy with nothing allowed or permitted. This
231// is the recommended way to start building a policy and you should now use
232// AllowAttrs() and/or AllowElements() to construct the allowlist of HTML
233// elements and attributes.
234func NewPolicy() *Policy {
235
236	p := Policy{}
237
238	p.addDefaultElementsWithoutAttrs()
239	p.addDefaultSkipElementContent()
240
241	return &p
242}
243
244// AllowAttrs takes a range of HTML attribute names and returns an
245// attribute policy builder that allows you to specify the pattern and scope of
246// the allowed attribute.
247//
248// The attribute policy is only added to the core policy when either Globally()
249// or OnElements(...) are called.
250func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
251
252	p.init()
253
254	abp := attrPolicyBuilder{
255		p:          p,
256		allowEmpty: false,
257	}
258
259	for _, attrName := range attrNames {
260		abp.attrNames = append(abp.attrNames, strings.ToLower(attrName))
261	}
262
263	return &abp
264}
265
266// AllowDataAttributes permits all data attributes. We can't specify the name
267// of each attribute exactly as they are customized.
268//
269// NOTE: These values are not sanitized and applications that evaluate or process
270// them without checking and verification of the input may be at risk if this option
271// is enabled. This is a 'caveat emptor' option and the person enabling this option
272// needs to fully understand the potential impact with regards to whatever application
273// will be consuming the sanitized HTML afterwards, i.e. if you know you put a link in a
274// data attribute and use that to automatically load some new window then you're giving
275// the author of a HTML fragment the means to open a malicious destination automatically.
276// Use with care!
277func (p *Policy) AllowDataAttributes() {
278	p.allowDataAttributes = true
279}
280
281// AllowComments allows comments.
282//
283// Please note that only one type of comment will be allowed by this, this is the
284// the standard HTML comment <!-- --> which includes the use of that to permit
285// conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN
286//
287// What is not permitted are CDATA XML comments, as the x/net/html package we depend
288// on does not handle this fully and we are not choosing to take on that work:
289// https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html
290// package changes this then these will be considered, otherwise if you AllowComments
291// but provide a CDATA comment, then as per the documentation in x/net/html this will
292// be treated as a plain HTML comment.
293func (p *Policy) AllowComments() {
294	p.allowComments = true
295}
296
297// AllowNoAttrs says that attributes on element are optional.
298//
299// The attribute policy is only added to the core policy when OnElements(...)
300// are called.
301func (p *Policy) AllowNoAttrs() *attrPolicyBuilder {
302
303	p.init()
304
305	abp := attrPolicyBuilder{
306		p:          p,
307		allowEmpty: true,
308	}
309	return &abp
310}
311
312// AllowNoAttrs says that attributes on element are optional.
313//
314// The attribute policy is only added to the core policy when OnElements(...)
315// are called.
316func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
317
318	abp.allowEmpty = true
319
320	return abp
321}
322
323// Matching allows a regular expression to be applied to a nascent attribute
324// policy, and returns the attribute policy.
325func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
326
327	abp.regexp = regex
328
329	return abp
330}
331
332// OnElements will bind an attribute policy to a given range of HTML elements
333// and return the updated policy
334func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
335
336	for _, element := range elements {
337		element = strings.ToLower(element)
338
339		for _, attr := range abp.attrNames {
340
341			if _, ok := abp.p.elsAndAttrs[element]; !ok {
342				abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
343			}
344
345			ap := attrPolicy{}
346			if abp.regexp != nil {
347				ap.regexp = abp.regexp
348			}
349
350			abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap)
351		}
352
353		if abp.allowEmpty {
354			abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
355
356			if _, ok := abp.p.elsAndAttrs[element]; !ok {
357				abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
358			}
359		}
360	}
361
362	return abp.p
363}
364
365// OnElementsMatching will bind an attribute policy to all elements matching a given regex
366// and return the updated policy
367func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
368	for _, attr := range abp.attrNames {
369		if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
370			abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
371		}
372		ap := attrPolicy{}
373		if abp.regexp != nil {
374			ap.regexp = abp.regexp
375		}
376		abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap)
377	}
378
379	if abp.allowEmpty {
380		abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
381		if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
382			abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
383		}
384	}
385
386	return abp.p
387}
388
389// Globally will bind an attribute policy to all HTML elements and return the
390// updated policy
391func (abp *attrPolicyBuilder) Globally() *Policy {
392
393	for _, attr := range abp.attrNames {
394		if _, ok := abp.p.globalAttrs[attr]; !ok {
395			abp.p.globalAttrs[attr] = []attrPolicy{}
396		}
397
398		ap := attrPolicy{}
399		if abp.regexp != nil {
400			ap.regexp = abp.regexp
401		}
402
403		abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap)
404	}
405
406	return abp.p
407}
408
409// AllowStyles takes a range of CSS property names and returns a
410// style policy builder that allows you to specify the pattern and scope of
411// the allowed property.
412//
413// The style policy is only added to the core policy when either Globally()
414// or OnElements(...) are called.
415func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
416
417	p.init()
418
419	abp := stylePolicyBuilder{
420		p: p,
421	}
422
423	for _, propertyName := range propertyNames {
424		abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName))
425	}
426
427	return &abp
428}
429
430// Matching allows a regular expression to be applied to a nascent style
431// policy, and returns the style policy.
432func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
433
434	spb.regexp = regex
435
436	return spb
437}
438
439// MatchingEnum allows a list of allowed values to be applied to a nascent style
440// policy, and returns the style policy.
441func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
442
443	spb.enum = enum
444
445	return spb
446}
447
448// MatchingHandler allows a handler to be applied to a nascent style
449// policy, and returns the style policy.
450func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
451
452	spb.handler = handler
453
454	return spb
455}
456
457// OnElements will bind a style policy to a given range of HTML elements
458// and return the updated policy
459func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
460
461	for _, element := range elements {
462		element = strings.ToLower(element)
463
464		for _, attr := range spb.propertyNames {
465
466			if _, ok := spb.p.elsAndStyles[element]; !ok {
467				spb.p.elsAndStyles[element] = make(map[string][]stylePolicy)
468			}
469
470			sp := stylePolicy{}
471			if spb.handler != nil {
472				sp.handler = spb.handler
473			} else if len(spb.enum) > 0 {
474				sp.enum = spb.enum
475			} else if spb.regexp != nil {
476				sp.regexp = spb.regexp
477			} else {
478				sp.handler = css.GetDefaultHandler(attr)
479			}
480			spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp)
481		}
482	}
483
484	return spb.p
485}
486
487// OnElementsMatching will bind a style policy to any HTML elements matching the pattern
488// and return the updated policy
489func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
490
491	for _, attr := range spb.propertyNames {
492
493		if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
494			spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy)
495		}
496
497		sp := stylePolicy{}
498		if spb.handler != nil {
499			sp.handler = spb.handler
500		} else if len(spb.enum) > 0 {
501			sp.enum = spb.enum
502		} else if spb.regexp != nil {
503			sp.regexp = spb.regexp
504		} else {
505			sp.handler = css.GetDefaultHandler(attr)
506		}
507		spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp)
508	}
509
510	return spb.p
511}
512
513// Globally will bind a style policy to all HTML elements and return the
514// updated policy
515func (spb *stylePolicyBuilder) Globally() *Policy {
516
517	for _, attr := range spb.propertyNames {
518		if _, ok := spb.p.globalStyles[attr]; !ok {
519			spb.p.globalStyles[attr] = []stylePolicy{}
520		}
521
522		// Use only one strategy for validating styles, fallback to default
523		sp := stylePolicy{}
524		if spb.handler != nil {
525			sp.handler = spb.handler
526		} else if len(spb.enum) > 0 {
527			sp.enum = spb.enum
528		} else if spb.regexp != nil {
529			sp.regexp = spb.regexp
530		} else {
531			sp.handler = css.GetDefaultHandler(attr)
532		}
533		spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp)
534	}
535
536	return spb.p
537}
538
539// AllowElements will append HTML elements to the allowlist without applying an
540// attribute policy to those elements (the elements are permitted
541// sans-attributes)
542func (p *Policy) AllowElements(names ...string) *Policy {
543	p.init()
544
545	for _, element := range names {
546		element = strings.ToLower(element)
547
548		if _, ok := p.elsAndAttrs[element]; !ok {
549			p.elsAndAttrs[element] = make(map[string][]attrPolicy)
550		}
551	}
552
553	return p
554}
555
556// AllowElementsMatching will append HTML elements to the allowlist if they
557// match a regexp.
558func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
559	p.init()
560	if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
561		p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
562	}
563	return p
564}
565
566// RequireNoFollowOnLinks will result in all a, area, link tags having a
567// rel="nofollow"added to them if one does not already exist
568//
569// Note: This requires p.RequireParseableURLs(true) and will enable it.
570func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
571
572	p.requireNoFollow = require
573	p.requireParseableURLs = true
574
575	return p
576}
577
578// RequireNoFollowOnFullyQualifiedLinks will result in all a, area, and link
579// tags that point to a non-local destination (i.e. starts with a protocol and
580// has a host) having a rel="nofollow" added to them if one does not already
581// exist
582//
583// Note: This requires p.RequireParseableURLs(true) and will enable it.
584func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
585
586	p.requireNoFollowFullyQualifiedLinks = require
587	p.requireParseableURLs = true
588
589	return p
590}
591
592// RequireNoReferrerOnLinks will result in all a, area, and link tags having a
593// rel="noreferrrer" added to them if one does not already exist
594//
595// Note: This requires p.RequireParseableURLs(true) and will enable it.
596func (p *Policy) RequireNoReferrerOnLinks(require bool) *Policy {
597
598	p.requireNoReferrer = require
599	p.requireParseableURLs = true
600
601	return p
602}
603
604// RequireNoReferrerOnFullyQualifiedLinks will result in all a, area, and link
605// tags that point to a non-local destination (i.e. starts with a protocol and
606// has a host) having a rel="noreferrer" added to them if one does not already
607// exist
608//
609// Note: This requires p.RequireParseableURLs(true) and will enable it.
610func (p *Policy) RequireNoReferrerOnFullyQualifiedLinks(require bool) *Policy {
611
612	p.requireNoReferrerFullyQualifiedLinks = require
613	p.requireParseableURLs = true
614
615	return p
616}
617
618// RequireCrossOriginAnonymous will result in all audio, img, link, script, and
619// video tags having a crossorigin="anonymous" added to them if one does not
620// already exist
621func (p *Policy) RequireCrossOriginAnonymous(require bool) *Policy {
622
623	p.requireCrossOriginAnonymous = require
624
625	return p
626}
627
628// AddTargetBlankToFullyQualifiedLinks will result in all a, area and link tags
629// that point to a non-local destination (i.e. starts with a protocol and has a
630// host) having a target="_blank" added to them if one does not already exist
631//
632// Note: This requires p.RequireParseableURLs(true) and will enable it.
633func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy {
634
635	p.addTargetBlankToFullyQualifiedLinks = require
636	p.requireParseableURLs = true
637
638	return p
639}
640
641// RequireParseableURLs will result in all URLs requiring that they be parseable
642// by "net/url" url.Parse()
643// This applies to:
644// - a.href
645// - area.href
646// - blockquote.cite
647// - img.src
648// - link.href
649// - script.src
650func (p *Policy) RequireParseableURLs(require bool) *Policy {
651
652	p.requireParseableURLs = require
653
654	return p
655}
656
657// AllowRelativeURLs enables RequireParseableURLs and then permits URLs that
658// are parseable, have no schema information and url.IsAbs() returns false
659// This permits local URLs
660func (p *Policy) AllowRelativeURLs(require bool) *Policy {
661
662	p.RequireParseableURLs(true)
663	p.allowRelativeURLs = require
664
665	return p
666}
667
668// AllowURLSchemes will append URL schemes to the allowlist
669// Example: p.AllowURLSchemes("mailto", "http", "https")
670func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
671	p.init()
672
673	p.RequireParseableURLs(true)
674
675	for _, scheme := range schemes {
676		scheme = strings.ToLower(scheme)
677
678		// Allow all URLs with matching scheme.
679		p.allowURLSchemes[scheme] = nil
680	}
681
682	return p
683}
684
685// AllowURLSchemeWithCustomPolicy will append URL schemes with
686// a custom URL policy to the allowlist.
687// Only the URLs with matching schema and urlPolicy(url)
688// returning true will be allowed.
689func (p *Policy) AllowURLSchemeWithCustomPolicy(
690	scheme string,
691	urlPolicy func(url *url.URL) (allowUrl bool),
692) *Policy {
693
694	p.init()
695
696	p.RequireParseableURLs(true)
697
698	scheme = strings.ToLower(scheme)
699
700	p.allowURLSchemes[scheme] = append(p.allowURLSchemes[scheme], urlPolicy)
701
702	return p
703}
704
705// RequireSandboxOnIFrame will result in all iframe tags having a sandbox="" tag
706// Any sandbox values not specified here will be filtered from the generated HTML
707func (p *Policy) RequireSandboxOnIFrame(vals ...SandboxValue) {
708	p.requireSandboxOnIFrame = make(map[string]bool)
709
710	for _, val := range vals {
711		switch SandboxValue(val) {
712		case SandboxAllowDownloads:
713			p.requireSandboxOnIFrame["allow-downloads"] = true
714
715		case SandboxAllowDownloadsWithoutUserActivation:
716			p.requireSandboxOnIFrame["allow-downloads-without-user-activation"] = true
717
718		case SandboxAllowForms:
719			p.requireSandboxOnIFrame["allow-forms"] = true
720
721		case SandboxAllowModals:
722			p.requireSandboxOnIFrame["allow-modals"] = true
723
724		case SandboxAllowOrientationLock:
725			p.requireSandboxOnIFrame["allow-orientation-lock"] = true
726
727		case SandboxAllowPointerLock:
728			p.requireSandboxOnIFrame["allow-pointer-lock"] = true
729
730		case SandboxAllowPopups:
731			p.requireSandboxOnIFrame["allow-popups"] = true
732
733		case SandboxAllowPopupsToEscapeSandbox:
734			p.requireSandboxOnIFrame["allow-popups-to-escape-sandbox"] = true
735
736		case SandboxAllowPresentation:
737			p.requireSandboxOnIFrame["allow-presentation"] = true
738
739		case SandboxAllowSameOrigin:
740			p.requireSandboxOnIFrame["allow-same-origin"] = true
741
742		case SandboxAllowScripts:
743			p.requireSandboxOnIFrame["allow-scripts"] = true
744
745		case SandboxAllowStorageAccessByUserActivation:
746			p.requireSandboxOnIFrame["allow-storage-access-by-user-activation"] = true
747
748		case SandboxAllowTopNavigation:
749			p.requireSandboxOnIFrame["allow-top-navigation"] = true
750
751		case SandboxAllowTopNavigationByUserActivation:
752			p.requireSandboxOnIFrame["allow-top-navigation-by-user-activation"] = true
753		}
754	}
755}
756
757// AddSpaceWhenStrippingTag states whether to add a single space " " when
758// removing tags that are not allowed by the policy.
759//
760// This is useful if you expect to strip tags in dense markup and may lose the
761// value of whitespace.
762//
763// For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld"
764// with the default value of false, but you may wish to sanitize this to
765// " Hello  World " by setting AddSpaceWhenStrippingTag to true as this would
766// retain the intent of the text.
767func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy {
768
769	p.addSpaces = allow
770
771	return p
772}
773
774// SkipElementsContent adds the HTML elements whose tags is needed to be removed
775// with its content.
776func (p *Policy) SkipElementsContent(names ...string) *Policy {
777
778	p.init()
779
780	for _, element := range names {
781		element = strings.ToLower(element)
782
783		if _, ok := p.setOfElementsToSkipContent[element]; !ok {
784			p.setOfElementsToSkipContent[element] = struct{}{}
785		}
786	}
787
788	return p
789}
790
791// AllowElementsContent marks the HTML elements whose content should be
792// retained after removing the tag.
793func (p *Policy) AllowElementsContent(names ...string) *Policy {
794
795	p.init()
796
797	for _, element := range names {
798		delete(p.setOfElementsToSkipContent, strings.ToLower(element))
799	}
800
801	return p
802}
803
804// AllowUnsafe permits fundamentally unsafe elements.
805//
806// If false (default) then elements such as `style` and `script` will not be
807// permitted even if declared in a policy. These elements when combined with
808// untrusted input cannot be safely handled by bluemonday at this point in
809// time.
810//
811// If true then `style` and `script` would be permitted by bluemonday if a
812// policy declares them. However this is not recommended under any circumstance
813// and can lead to XSS being rendered thus defeating the purpose of using a
814// HTML sanitizer.
815func (p *Policy) AllowUnsafe(allowUnsafe bool) *Policy {
816	p.init()
817	p.allowUnsafe = allowUnsafe
818	return p
819}
820
821// addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid
822// without any attributes to an internal map.
823// i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr
824// is mandatory
825func (p *Policy) addDefaultElementsWithoutAttrs() {
826	p.init()
827
828	p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{}
829	p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{}
830	p.setOfElementsAllowedWithoutAttrs["address"] = struct{}{}
831	p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{}
832	p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{}
833	p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{}
834	p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{}
835	p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{}
836	p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{}
837	p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{}
838	p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{}
839	p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{}
840	p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{}
841	p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{}
842	p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{}
843	p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{}
844	p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{}
845	p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{}
846	p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{}
847	p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{}
848	p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{}
849	p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{}
850	p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{}
851	p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{}
852	p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{}
853	p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{}
854	p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{}
855	p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{}
856	p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{}
857	p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{}
858	p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{}
859	p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{}
860	p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{}
861	p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{}
862	p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{}
863	p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{}
864	p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{}
865	p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{}
866	p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{}
867	p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{}
868	p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{}
869	p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{}
870	p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{}
871	p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{}
872	p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{}
873	p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{}
874	p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{}
875	p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{}
876	p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{}
877	p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{}
878	p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{}
879	p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{}
880	p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{}
881	p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{}
882	p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{}
883	p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{}
884	p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{}
885	p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{}
886	p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{}
887	p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{}
888	p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{}
889	p.setOfElementsAllowedWithoutAttrs["script"] = struct{}{}
890	p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{}
891	p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{}
892	p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{}
893	p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{}
894	p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{}
895	p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{}
896	p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{}
897	p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{}
898	p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{}
899	p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{}
900	p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{}
901	p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{}
902	p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{}
903	p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{}
904	p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{}
905	p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{}
906	p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{}
907	p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{}
908	p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{}
909	p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{}
910	p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{}
911	p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{}
912	p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{}
913	p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{}
914	p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{}
915	p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{}
916	p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{}
917
918}
919
920// addDefaultSkipElementContent adds the HTML elements that we should skip
921// rendering the character content of, if the element itself is not allowed.
922// This is all character data that the end user would not normally see.
923// i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or
924// anything else until we encounter the closing </script> tag.
925func (p *Policy) addDefaultSkipElementContent() {
926	p.init()
927
928	p.setOfElementsToSkipContent["frame"] = struct{}{}
929	p.setOfElementsToSkipContent["frameset"] = struct{}{}
930	p.setOfElementsToSkipContent["iframe"] = struct{}{}
931	p.setOfElementsToSkipContent["noembed"] = struct{}{}
932	p.setOfElementsToSkipContent["noframes"] = struct{}{}
933	p.setOfElementsToSkipContent["noscript"] = struct{}{}
934	p.setOfElementsToSkipContent["nostyle"] = struct{}{}
935	p.setOfElementsToSkipContent["object"] = struct{}{}
936	p.setOfElementsToSkipContent["script"] = struct{}{}
937	p.setOfElementsToSkipContent["style"] = struct{}{}
938	p.setOfElementsToSkipContent["title"] = struct{}{}
939}
940