1// Copyright (c) 2014, David Kitchen <david@buro9.com>
2//
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice, this
9//   list of conditions and the following disclaimer.
10//
11// * Redistributions in binary form must reproduce the above copyright notice,
12//   this list of conditions and the following disclaimer in the documentation
13//   and/or other materials provided with the distribution.
14//
15// * Neither the name of the organisation (Microcosm) nor the names of its
16//   contributors may be used to endorse or promote products derived from
17//   this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30package bluemonday
31
32import (
33	"regexp"
34)
35
36// StrictPolicy returns an empty policy, which will effectively strip all HTML
37// elements and their attributes from a document.
38func StrictPolicy() *Policy {
39	return NewPolicy()
40}
41
42// StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
43func StripTagsPolicy() *Policy {
44	return StrictPolicy()
45}
46
47// UGCPolicy returns a policy aimed at user generated content that is a result
48// of HTML WYSIWYG tools and Markdown conversions.
49//
50// This is expected to be a fairly rich document where as much markup as
51// possible should be retained. Markdown permits raw HTML so we are basically
52// providing a policy to sanitise HTML5 documents safely but with the
53// least intrusion on the formatting expectations of the user.
54func UGCPolicy() *Policy {
55
56	p := NewPolicy()
57
58	///////////////////////
59	// Global attributes //
60	///////////////////////
61
62	// "class" is not permitted as we are not allowing users to style their own
63	// content
64
65	p.AllowStandardAttributes()
66
67	//////////////////////////////
68	// Global URL format policy //
69	//////////////////////////////
70
71	p.AllowStandardURLs()
72
73	////////////////////////////////
74	// Declarations and structure //
75	////////////////////////////////
76
77	// "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
78	// expecting user generated content to be a fragment of HTML and not a full
79	// document.
80
81	//////////////////////////
82	// Sectioning root tags //
83	//////////////////////////
84
85	// "article" and "aside" are permitted and takes no attributes
86	p.AllowElements("article", "aside")
87
88	// "body" is not permitted as we are expecting user generated content to be a fragment
89	// of HTML and not a full document.
90
91	// "details" is permitted, including the "open" attribute which can either
92	// be blank or the value "open".
93	p.AllowAttrs(
94		"open",
95	).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
96
97	// "fieldset" is not permitted as we are not allowing forms to be created.
98
99	// "figure" is permitted and takes no attributes
100	p.AllowElements("figure")
101
102	// "nav" is not permitted as it is assumed that the site (and not the user)
103	// has defined navigation elements
104
105	// "section" is permitted and takes no attributes
106	p.AllowElements("section")
107
108	// "summary" is permitted and takes no attributes
109	p.AllowElements("summary")
110
111	//////////////////////////
112	// Headings and footers //
113	//////////////////////////
114
115	// "footer" is not permitted as we expect user content to be a fragment and
116	// not structural to this extent
117
118	// "h1" through "h6" are permitted and take no attributes
119	p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
120
121	// "header" is not permitted as we expect user content to be a fragment and
122	// not structural to this extent
123
124	// "hgroup" is permitted and takes no attributes
125	p.AllowElements("hgroup")
126
127	/////////////////////////////////////
128	// Content grouping and separating //
129	/////////////////////////////////////
130
131	// "blockquote" is permitted, including the "cite" attribute which must be
132	// a standard URL.
133	p.AllowAttrs("cite").OnElements("blockquote")
134
135	// "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
136	p.AllowElements("br", "div", "hr", "p", "span", "wbr")
137
138	///////////
139	// Links //
140	///////////
141
142	// "a" is permitted
143	p.AllowAttrs("href").OnElements("a")
144
145	// "area" is permitted along with the attributes that map image maps work
146	p.AllowAttrs("name").Matching(
147		regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
148	).OnElements("map")
149	p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
150	p.AllowAttrs("coords").Matching(
151		regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
152	).OnElements("area")
153	p.AllowAttrs("href").OnElements("area")
154	p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
155	p.AllowAttrs("shape").Matching(
156		regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
157	).OnElements("area")
158	p.AllowAttrs("usemap").Matching(
159		regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
160	).OnElements("img")
161
162	// "link" is not permitted
163
164	/////////////////////
165	// Phrase elements //
166	/////////////////////
167
168	// The following are all inline phrasing elements
169	p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
170		"figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
171
172	// "q" is permitted and "cite" is a URL and handled by URL policies
173	p.AllowAttrs("cite").OnElements("q")
174
175	// "time" is permitted
176	p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
177
178	////////////////////
179	// Style elements //
180	////////////////////
181
182	// block and inline elements that impart no semantic meaning but style the
183	// document
184	p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
185
186	// "style" is not permitted as we are not yet sanitising CSS and it is an
187	// XSS attack vector
188
189	//////////////////////
190	// HTML5 Formatting //
191	//////////////////////
192
193	// "bdi" "bdo" are permitted
194	p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
195
196	// "rp" "rt" "ruby" are permitted
197	p.AllowElements("rp", "rt", "ruby")
198
199	///////////////////////////
200	// HTML5 Change tracking //
201	///////////////////////////
202
203	// "del" "ins" are permitted
204	p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
205	p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
206
207	///////////
208	// Lists //
209	///////////
210
211	p.AllowLists()
212
213	////////////
214	// Tables //
215	////////////
216
217	p.AllowTables()
218
219	///////////
220	// Forms //
221	///////////
222
223	// By and large, forms are not permitted. However there are some form
224	// elements that can be used to present data, and we do permit those
225	//
226	// "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
227	// "textarea" "optgroup" "option" are all not permitted
228
229	// "meter" is permitted
230	p.AllowAttrs(
231		"value",
232		"min",
233		"max",
234		"low",
235		"high",
236		"optimum",
237	).Matching(Number).OnElements("meter")
238
239	// "progress" is permitted
240	p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
241
242	//////////////////////
243	// Embedded content //
244	//////////////////////
245
246	// Vast majority not permitted
247	// "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
248	// "video" are all not permitted
249
250	p.AllowImages()
251
252	return p
253}
254