1// Copyright (c) 2014, David Kitchen <david@buro9.com>
2//
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice, this
9//   list of conditions and the following disclaimer.
10//
11// * Redistributions in binary form must reproduce the above copyright notice,
12//   this list of conditions and the following disclaimer in the documentation
13//   and/or other materials provided with the distribution.
14//
15// * Neither the name of the organisation (Microcosm) nor the names of its
16//   contributors may be used to endorse or promote products derived from
17//   this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30package bluemonday
31
32//TODO sgutzwiller create map of styles to default handlers
33//TODO sgutzwiller create handlers for various attributes
34import (
35	"net/url"
36	"regexp"
37	"strings"
38
39	"github.com/microcosm-cc/bluemonday/css"
40)
41
42// Policy encapsulates the allowlist of HTML elements and attributes that will
43// be applied to the sanitised HTML.
44//
45// You should use bluemonday.NewPolicy() to create a blank policy as the
46// unexported fields contain maps that need to be initialized.
47type Policy struct {
48
49	// Declares whether the maps have been initialized, used as a cheap check to
50	// ensure that those using Policy{} directly won't cause nil pointer
51	// exceptions
52	initialized bool
53
54	// If true then we add spaces when stripping tags, specifically the closing
55	// tag is replaced by a space character.
56	addSpaces bool
57
58	// When true, add rel="nofollow" to HTML a, area, and link tags
59	requireNoFollow bool
60
61	// When true, add rel="nofollow" to HTML a, area, and link tags
62	// Will add for href="http://foo"
63	// Will skip for href="/foo" or href="foo"
64	requireNoFollowFullyQualifiedLinks bool
65
66	// When true, add rel="noreferrer" to HTML a, area, and link tags
67	requireNoReferrer bool
68
69	// When true, add rel="noreferrer" to HTML a, area, and link tags
70	// Will add for href="http://foo"
71	// Will skip for href="/foo" or href="foo"
72	requireNoReferrerFullyQualifiedLinks bool
73
74	// When true, add crossorigin="anonymous" to HTML audio, img, link, script, and video tags
75	requireCrossOriginAnonymous bool
76
77	// When true add target="_blank" to fully qualified links
78	// Will add for href="http://foo"
79	// Will skip for href="/foo" or href="foo"
80	addTargetBlankToFullyQualifiedLinks bool
81
82	// When true, URLs must be parseable by "net/url" url.Parse()
83	requireParseableURLs bool
84
85	// When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted
86	allowRelativeURLs bool
87
88	// When true, allow data attributes.
89	allowDataAttributes bool
90
91	// When true, allow comments.
92	allowComments bool
93
94	// map[htmlElementName]map[htmlAttributeName][]attrPolicy
95	elsAndAttrs map[string]map[string][]attrPolicy
96
97	// elsMatchingAndAttrs stores regex based element matches along with attributes
98	elsMatchingAndAttrs map[*regexp.Regexp]map[string][]attrPolicy
99
100	// map[htmlAttributeName][]attrPolicy
101	globalAttrs map[string][]attrPolicy
102
103	// map[htmlElementName]map[cssPropertyName][]stylePolicy
104	elsAndStyles map[string]map[string][]stylePolicy
105
106	// map[regex]map[cssPropertyName][]stylePolicy
107	elsMatchingAndStyles map[*regexp.Regexp]map[string][]stylePolicy
108
109	// map[cssPropertyName][]stylePolicy
110	globalStyles map[string][]stylePolicy
111
112	// If urlPolicy is nil, all URLs with matching schema are allowed.
113	// Otherwise, only the URLs with matching schema and urlPolicy(url)
114	// returning true are allowed.
115	allowURLSchemes map[string][]urlPolicy
116
117	// If an element has had all attributes removed as a result of a policy
118	// being applied, then the element would be removed from the output.
119	//
120	// However some elements are valid and have strong layout meaning without
121	// any attributes, i.e. <table>. To prevent those being removed we maintain
122	// a list of elements that are allowed to have no attributes and that will
123	// be maintained in the output HTML.
124	setOfElementsAllowedWithoutAttrs map[string]struct{}
125
126	// If an element has had all attributes removed as a result of a policy
127	// being applied, then the element would be removed from the output.
128	//
129	// However some elements are valid and have strong layout meaning without
130	// any attributes, i.e. <table>.
131	//
132	// In this case, any element matching a regular expression will be accepted without
133	// attributes added.
134	setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp
135
136	setOfElementsToSkipContent map[string]struct{}
137
138	// Permits fundamentally unsafe elements.
139	//
140	// If false (default) then elements such as `style` and `script` will not be
141	// permitted even if declared in a policy. These elements when combined with
142	// untrusted input cannot be safely handled by bluemonday at this point in
143	// time.
144	//
145	// If true then `style` and `script` would be permitted by bluemonday if a
146	// policy declares them. However this is not recommended under any circumstance
147	// and can lead to XSS being rendered thus defeating the purpose of using a
148	// HTML sanitizer.
149	allowUnsafe bool
150}
151
152type attrPolicy struct {
153
154	// optional pattern to match, when not nil the regexp needs to match
155	// otherwise the attribute is removed
156	regexp *regexp.Regexp
157}
158
159type stylePolicy struct {
160	// handler to validate
161	handler func(string) bool
162
163	// optional pattern to match, when not nil the regexp needs to match
164	// otherwise the property is removed
165	regexp *regexp.Regexp
166
167	// optional list of allowed property values, for properties which
168	// have a defined list of allowed values; property will be removed
169	// if the value is not allowed
170	enum []string
171}
172
173type attrPolicyBuilder struct {
174	p *Policy
175
176	attrNames  []string
177	regexp     *regexp.Regexp
178	allowEmpty bool
179}
180
181type stylePolicyBuilder struct {
182	p *Policy
183
184	propertyNames []string
185	regexp        *regexp.Regexp
186	enum          []string
187	handler       func(string) bool
188}
189
190type urlPolicy func(url *url.URL) (allowUrl bool)
191
192// init initializes the maps if this has not been done already
193func (p *Policy) init() {
194	if !p.initialized {
195		p.elsAndAttrs = make(map[string]map[string][]attrPolicy)
196		p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string][]attrPolicy)
197		p.globalAttrs = make(map[string][]attrPolicy)
198		p.elsAndStyles = make(map[string]map[string][]stylePolicy)
199		p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy)
200		p.globalStyles = make(map[string][]stylePolicy)
201		p.allowURLSchemes = make(map[string][]urlPolicy)
202		p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
203		p.setOfElementsToSkipContent = make(map[string]struct{})
204		p.initialized = true
205	}
206}
207
208// NewPolicy returns a blank policy with nothing allowed or permitted. This
209// is the recommended way to start building a policy and you should now use
210// AllowAttrs() and/or AllowElements() to construct the allowlist of HTML
211// elements and attributes.
212func NewPolicy() *Policy {
213
214	p := Policy{}
215
216	p.addDefaultElementsWithoutAttrs()
217	p.addDefaultSkipElementContent()
218
219	return &p
220}
221
222// AllowAttrs takes a range of HTML attribute names and returns an
223// attribute policy builder that allows you to specify the pattern and scope of
224// the allowed attribute.
225//
226// The attribute policy is only added to the core policy when either Globally()
227// or OnElements(...) are called.
228func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
229
230	p.init()
231
232	abp := attrPolicyBuilder{
233		p:          p,
234		allowEmpty: false,
235	}
236
237	for _, attrName := range attrNames {
238		abp.attrNames = append(abp.attrNames, strings.ToLower(attrName))
239	}
240
241	return &abp
242}
243
244// AllowDataAttributes permits all data attributes. We can't specify the name
245// of each attribute exactly as they are customized.
246//
247// NOTE: These values are not sanitized and applications that evaluate or process
248// them without checking and verification of the input may be at risk if this option
249// is enabled. This is a 'caveat emptor' option and the person enabling this option
250// needs to fully understand the potential impact with regards to whatever application
251// will be consuming the sanitized HTML afterwards, i.e. if you know you put a link in a
252// data attribute and use that to automatically load some new window then you're giving
253// the author of a HTML fragment the means to open a malicious destination automatically.
254// Use with care!
255func (p *Policy) AllowDataAttributes() {
256	p.allowDataAttributes = true
257}
258
259// AllowComments allows comments.
260//
261// Please note that only one type of comment will be allowed by this, this is the
262// the standard HTML comment <!-- --> which includes the use of that to permit
263// conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN
264//
265// What is not permitted are CDATA XML comments, as the x/net/html package we depend
266// on does not handle this fully and we are not choosing to take on that work:
267// https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html
268// package changes this then these will be considered, otherwise if you AllowComments
269// but provide a CDATA comment, then as per the documentation in x/net/html this will
270// be treated as a plain HTML comment.
271func (p *Policy) AllowComments() {
272	p.allowComments = true
273}
274
275// AllowNoAttrs says that attributes on element are optional.
276//
277// The attribute policy is only added to the core policy when OnElements(...)
278// are called.
279func (p *Policy) AllowNoAttrs() *attrPolicyBuilder {
280
281	p.init()
282
283	abp := attrPolicyBuilder{
284		p:          p,
285		allowEmpty: true,
286	}
287	return &abp
288}
289
290// AllowNoAttrs says that attributes on element are optional.
291//
292// The attribute policy is only added to the core policy when OnElements(...)
293// are called.
294func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
295
296	abp.allowEmpty = true
297
298	return abp
299}
300
301// Matching allows a regular expression to be applied to a nascent attribute
302// policy, and returns the attribute policy.
303func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
304
305	abp.regexp = regex
306
307	return abp
308}
309
310// OnElements will bind an attribute policy to a given range of HTML elements
311// and return the updated policy
312func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
313
314	for _, element := range elements {
315		element = strings.ToLower(element)
316
317		for _, attr := range abp.attrNames {
318
319			if _, ok := abp.p.elsAndAttrs[element]; !ok {
320				abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
321			}
322
323			ap := attrPolicy{}
324			if abp.regexp != nil {
325				ap.regexp = abp.regexp
326			}
327
328			abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap)
329		}
330
331		if abp.allowEmpty {
332			abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
333
334			if _, ok := abp.p.elsAndAttrs[element]; !ok {
335				abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
336			}
337		}
338	}
339
340	return abp.p
341}
342
343// OnElementsMatching will bind an attribute policy to all elements matching a given regex
344// and return the updated policy
345func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
346	for _, attr := range abp.attrNames {
347		if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
348			abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
349		}
350		ap := attrPolicy{}
351		if abp.regexp != nil {
352			ap.regexp = abp.regexp
353		}
354		abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap)
355	}
356
357	if abp.allowEmpty {
358		abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
359		if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
360			abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
361		}
362	}
363
364	return abp.p
365}
366
367// Globally will bind an attribute policy to all HTML elements and return the
368// updated policy
369func (abp *attrPolicyBuilder) Globally() *Policy {
370
371	for _, attr := range abp.attrNames {
372		if _, ok := abp.p.globalAttrs[attr]; !ok {
373			abp.p.globalAttrs[attr] = []attrPolicy{}
374		}
375
376		ap := attrPolicy{}
377		if abp.regexp != nil {
378			ap.regexp = abp.regexp
379		}
380
381		abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap)
382	}
383
384	return abp.p
385}
386
387// AllowStyles takes a range of CSS property names and returns a
388// style policy builder that allows you to specify the pattern and scope of
389// the allowed property.
390//
391// The style policy is only added to the core policy when either Globally()
392// or OnElements(...) are called.
393func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
394
395	p.init()
396
397	abp := stylePolicyBuilder{
398		p: p,
399	}
400
401	for _, propertyName := range propertyNames {
402		abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName))
403	}
404
405	return &abp
406}
407
408// Matching allows a regular expression to be applied to a nascent style
409// policy, and returns the style policy.
410func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
411
412	spb.regexp = regex
413
414	return spb
415}
416
417// MatchingEnum allows a list of allowed values to be applied to a nascent style
418// policy, and returns the style policy.
419func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
420
421	spb.enum = enum
422
423	return spb
424}
425
426// MatchingHandler allows a handler to be applied to a nascent style
427// policy, and returns the style policy.
428func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
429
430	spb.handler = handler
431
432	return spb
433}
434
435// OnElements will bind a style policy to a given range of HTML elements
436// and return the updated policy
437func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
438
439	for _, element := range elements {
440		element = strings.ToLower(element)
441
442		for _, attr := range spb.propertyNames {
443
444			if _, ok := spb.p.elsAndStyles[element]; !ok {
445				spb.p.elsAndStyles[element] = make(map[string][]stylePolicy)
446			}
447
448			sp := stylePolicy{}
449			if spb.handler != nil {
450				sp.handler = spb.handler
451			} else if len(spb.enum) > 0 {
452				sp.enum = spb.enum
453			} else if spb.regexp != nil {
454				sp.regexp = spb.regexp
455			} else {
456				sp.handler = css.GetDefaultHandler(attr)
457			}
458			spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp)
459		}
460	}
461
462	return spb.p
463}
464
465// OnElementsMatching will bind a style policy to any HTML elements matching the pattern
466// and return the updated policy
467func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
468
469	for _, attr := range spb.propertyNames {
470
471		if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
472			spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy)
473		}
474
475		sp := stylePolicy{}
476		if spb.handler != nil {
477			sp.handler = spb.handler
478		} else if len(spb.enum) > 0 {
479			sp.enum = spb.enum
480		} else if spb.regexp != nil {
481			sp.regexp = spb.regexp
482		} else {
483			sp.handler = css.GetDefaultHandler(attr)
484		}
485		spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp)
486	}
487
488	return spb.p
489}
490
491// Globally will bind a style policy to all HTML elements and return the
492// updated policy
493func (spb *stylePolicyBuilder) Globally() *Policy {
494
495	for _, attr := range spb.propertyNames {
496		if _, ok := spb.p.globalStyles[attr]; !ok {
497			spb.p.globalStyles[attr] = []stylePolicy{}
498		}
499
500		// Use only one strategy for validating styles, fallback to default
501		sp := stylePolicy{}
502		if spb.handler != nil {
503			sp.handler = spb.handler
504		} else if len(spb.enum) > 0 {
505			sp.enum = spb.enum
506		} else if spb.regexp != nil {
507			sp.regexp = spb.regexp
508		} else {
509			sp.handler = css.GetDefaultHandler(attr)
510		}
511		spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp)
512	}
513
514	return spb.p
515}
516
517// AllowElements will append HTML elements to the allowlist without applying an
518// attribute policy to those elements (the elements are permitted
519// sans-attributes)
520func (p *Policy) AllowElements(names ...string) *Policy {
521	p.init()
522
523	for _, element := range names {
524		element = strings.ToLower(element)
525
526		if _, ok := p.elsAndAttrs[element]; !ok {
527			p.elsAndAttrs[element] = make(map[string][]attrPolicy)
528		}
529	}
530
531	return p
532}
533
534// AllowElementsMatching will append HTML elements to the allowlist if they
535// match a regexp.
536func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
537	p.init()
538	if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
539		p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
540	}
541	return p
542}
543
544// RequireNoFollowOnLinks will result in all a, area, link tags having a
545// rel="nofollow"added to them if one does not already exist
546//
547// Note: This requires p.RequireParseableURLs(true) and will enable it.
548func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
549
550	p.requireNoFollow = require
551	p.requireParseableURLs = true
552
553	return p
554}
555
556// RequireNoFollowOnFullyQualifiedLinks will result in all a, area, and link
557// tags that point to a non-local destination (i.e. starts with a protocol and
558// has a host) having a rel="nofollow" added to them if one does not already
559// exist
560//
561// Note: This requires p.RequireParseableURLs(true) and will enable it.
562func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
563
564	p.requireNoFollowFullyQualifiedLinks = require
565	p.requireParseableURLs = true
566
567	return p
568}
569
570// RequireNoReferrerOnLinks will result in all a, area, and link tags having a
571// rel="noreferrrer" added to them if one does not already exist
572//
573// Note: This requires p.RequireParseableURLs(true) and will enable it.
574func (p *Policy) RequireNoReferrerOnLinks(require bool) *Policy {
575
576	p.requireNoReferrer = require
577	p.requireParseableURLs = true
578
579	return p
580}
581
582// RequireNoReferrerOnFullyQualifiedLinks will result in all a, area, and link
583// tags that point to a non-local destination (i.e. starts with a protocol and
584// has a host) having a rel="noreferrer" added to them if one does not already
585// exist
586//
587// Note: This requires p.RequireParseableURLs(true) and will enable it.
588func (p *Policy) RequireNoReferrerOnFullyQualifiedLinks(require bool) *Policy {
589
590	p.requireNoReferrerFullyQualifiedLinks = require
591	p.requireParseableURLs = true
592
593	return p
594}
595
596// RequireCrossOriginAnonymous will result in all audio, img, link, script, and
597// video tags having a crossorigin="anonymous" added to them if one does not
598// already exist
599func (p *Policy) RequireCrossOriginAnonymous(require bool) *Policy {
600
601	p.requireCrossOriginAnonymous = require
602
603	return p
604}
605
606// AddTargetBlankToFullyQualifiedLinks will result in all a, area and link tags
607// that point to a non-local destination (i.e. starts with a protocol and has a
608// host) having a target="_blank" added to them if one does not already exist
609//
610// Note: This requires p.RequireParseableURLs(true) and will enable it.
611func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy {
612
613	p.addTargetBlankToFullyQualifiedLinks = require
614	p.requireParseableURLs = true
615
616	return p
617}
618
619// RequireParseableURLs will result in all URLs requiring that they be parseable
620// by "net/url" url.Parse()
621// This applies to:
622// - a.href
623// - area.href
624// - blockquote.cite
625// - img.src
626// - link.href
627// - script.src
628func (p *Policy) RequireParseableURLs(require bool) *Policy {
629
630	p.requireParseableURLs = require
631
632	return p
633}
634
635// AllowRelativeURLs enables RequireParseableURLs and then permits URLs that
636// are parseable, have no schema information and url.IsAbs() returns false
637// This permits local URLs
638func (p *Policy) AllowRelativeURLs(require bool) *Policy {
639
640	p.RequireParseableURLs(true)
641	p.allowRelativeURLs = require
642
643	return p
644}
645
646// AllowURLSchemes will append URL schemes to the allowlist
647// Example: p.AllowURLSchemes("mailto", "http", "https")
648func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
649	p.init()
650
651	p.RequireParseableURLs(true)
652
653	for _, scheme := range schemes {
654		scheme = strings.ToLower(scheme)
655
656		// Allow all URLs with matching scheme.
657		p.allowURLSchemes[scheme] = nil
658	}
659
660	return p
661}
662
663// AllowURLSchemeWithCustomPolicy will append URL schemes with
664// a custom URL policy to the allowlist.
665// Only the URLs with matching schema and urlPolicy(url)
666// returning true will be allowed.
667func (p *Policy) AllowURLSchemeWithCustomPolicy(
668	scheme string,
669	urlPolicy func(url *url.URL) (allowUrl bool),
670) *Policy {
671
672	p.init()
673
674	p.RequireParseableURLs(true)
675
676	scheme = strings.ToLower(scheme)
677
678	p.allowURLSchemes[scheme] = append(p.allowURLSchemes[scheme], urlPolicy)
679
680	return p
681}
682
683// AddSpaceWhenStrippingTag states whether to add a single space " " when
684// removing tags that are not allowed by the policy.
685//
686// This is useful if you expect to strip tags in dense markup and may lose the
687// value of whitespace.
688//
689// For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld"
690// with the default value of false, but you may wish to sanitize this to
691// " Hello  World " by setting AddSpaceWhenStrippingTag to true as this would
692// retain the intent of the text.
693func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy {
694
695	p.addSpaces = allow
696
697	return p
698}
699
700// SkipElementsContent adds the HTML elements whose tags is needed to be removed
701// with its content.
702func (p *Policy) SkipElementsContent(names ...string) *Policy {
703
704	p.init()
705
706	for _, element := range names {
707		element = strings.ToLower(element)
708
709		if _, ok := p.setOfElementsToSkipContent[element]; !ok {
710			p.setOfElementsToSkipContent[element] = struct{}{}
711		}
712	}
713
714	return p
715}
716
717// AllowElementsContent marks the HTML elements whose content should be
718// retained after removing the tag.
719func (p *Policy) AllowElementsContent(names ...string) *Policy {
720
721	p.init()
722
723	for _, element := range names {
724		delete(p.setOfElementsToSkipContent, strings.ToLower(element))
725	}
726
727	return p
728}
729
730// AllowUnsafe permits fundamentally unsafe elements.
731//
732// If false (default) then elements such as `style` and `script` will not be
733// permitted even if declared in a policy. These elements when combined with
734// untrusted input cannot be safely handled by bluemonday at this point in
735// time.
736//
737// If true then `style` and `script` would be permitted by bluemonday if a
738// policy declares them. However this is not recommended under any circumstance
739// and can lead to XSS being rendered thus defeating the purpose of using a
740// HTML sanitizer.
741func (p *Policy) AllowUnsafe(allowUnsafe bool) *Policy {
742	p.init()
743	p.allowUnsafe = allowUnsafe
744	return p
745}
746
747// addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid
748// without any attributes to an internal map.
749// i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr
750// is mandatory
751func (p *Policy) addDefaultElementsWithoutAttrs() {
752	p.init()
753
754	p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{}
755	p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{}
756	p.setOfElementsAllowedWithoutAttrs["address"] = struct{}{}
757	p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{}
758	p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{}
759	p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{}
760	p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{}
761	p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{}
762	p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{}
763	p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{}
764	p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{}
765	p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{}
766	p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{}
767	p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{}
768	p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{}
769	p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{}
770	p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{}
771	p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{}
772	p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{}
773	p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{}
774	p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{}
775	p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{}
776	p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{}
777	p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{}
778	p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{}
779	p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{}
780	p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{}
781	p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{}
782	p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{}
783	p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{}
784	p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{}
785	p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{}
786	p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{}
787	p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{}
788	p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{}
789	p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{}
790	p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{}
791	p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{}
792	p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{}
793	p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{}
794	p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{}
795	p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{}
796	p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{}
797	p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{}
798	p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{}
799	p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{}
800	p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{}
801	p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{}
802	p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{}
803	p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{}
804	p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{}
805	p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{}
806	p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{}
807	p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{}
808	p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{}
809	p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{}
810	p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{}
811	p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{}
812	p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{}
813	p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{}
814	p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{}
815	p.setOfElementsAllowedWithoutAttrs["script"] = struct{}{}
816	p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{}
817	p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{}
818	p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{}
819	p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{}
820	p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{}
821	p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{}
822	p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{}
823	p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{}
824	p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{}
825	p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{}
826	p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{}
827	p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{}
828	p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{}
829	p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{}
830	p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{}
831	p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{}
832	p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{}
833	p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{}
834	p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{}
835	p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{}
836	p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{}
837	p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{}
838	p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{}
839	p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{}
840	p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{}
841	p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{}
842	p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{}
843
844}
845
846// addDefaultSkipElementContent adds the HTML elements that we should skip
847// rendering the character content of, if the element itself is not allowed.
848// This is all character data that the end user would not normally see.
849// i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or
850// anything else until we encounter the closing </script> tag.
851func (p *Policy) addDefaultSkipElementContent() {
852	p.init()
853
854	p.setOfElementsToSkipContent["frame"] = struct{}{}
855	p.setOfElementsToSkipContent["frameset"] = struct{}{}
856	p.setOfElementsToSkipContent["iframe"] = struct{}{}
857	p.setOfElementsToSkipContent["noembed"] = struct{}{}
858	p.setOfElementsToSkipContent["noframes"] = struct{}{}
859	p.setOfElementsToSkipContent["noscript"] = struct{}{}
860	p.setOfElementsToSkipContent["nostyle"] = struct{}{}
861	p.setOfElementsToSkipContent["object"] = struct{}{}
862	p.setOfElementsToSkipContent["script"] = struct{}{}
863	p.setOfElementsToSkipContent["style"] = struct{}{}
864	p.setOfElementsToSkipContent["title"] = struct{}{}
865}
866