1{-# LANGUAGE OverloadedStrings #-} 2-- | Sanatize HTML to prevent XSS attacks. 3-- 4-- See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details. 5module Text.HTML.SanitizeXSS 6 ( 7 -- * Sanitize 8 sanitize 9 , sanitizeBalance 10 , sanitizeXSS 11 12 -- * Custom filtering 13 , filterTags 14 , safeTags 15 , safeTagsCustom 16 , balanceTags 17 18 -- * Utilities 19 , safeTagName 20 , sanitizeAttribute 21 , sanitaryURI 22 ) where 23 24import Text.HTML.SanitizeXSS.Css 25 26import Text.HTML.TagSoup 27 28import Data.Set (Set(), member, notMember, (\\), fromList, fromAscList) 29import Data.Char ( toLower ) 30import Data.Text (Text) 31import qualified Data.Text as T 32 33import Network.URI ( parseURIReference, URI (..), 34 isAllowedInURI, escapeURIString, uriScheme ) 35import Codec.Binary.UTF8.String ( encodeString ) 36 37import Data.Maybe (mapMaybe) 38 39 40-- | Sanitize HTML to prevent XSS attacks. This is equivalent to @filterTags safeTags@. 41sanitize :: Text -> Text 42sanitize = sanitizeXSS 43 44-- | alias of sanitize function 45sanitizeXSS :: Text -> Text 46sanitizeXSS = filterTags safeTags 47 48-- | Sanitize HTML to prevent XSS attacks and also make sure the tags are balanced. 49-- This is equivalent to @filterTags (balanceTags . safeTags)@. 50sanitizeBalance :: Text -> Text 51sanitizeBalance = filterTags (balanceTags . safeTags) 52 53-- | Filter which makes sure the tags are balanced. Use with 'filterTags' and 'safeTags' to create a custom filter. 54balanceTags :: [Tag Text] -> [Tag Text] 55balanceTags = balance [] 56 57-- | Parse the given text to a list of tags, apply the given filtering 58-- function, and render back to HTML. You can insert your own custom 59-- filtering, but make sure you compose your filtering function with 60-- 'safeTags' or 'safeTagsCustom'. 61filterTags :: ([Tag Text] -> [Tag Text]) -> Text -> Text 62filterTags f = renderTagsOptions renderOptions { 63 optMinimize = \x -> x `member` voidElems -- <img><img> converts to <img />, <a/> converts to <a></a> 64 } . f . canonicalizeTags . parseTags 65 66voidElems :: Set T.Text 67voidElems = fromAscList $ T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr" 68 69balance :: [Text] -- ^ unclosed tags 70 -> [Tag Text] -> [Tag Text] 71balance unclosed [] = map TagClose $ filter (`notMember` voidElems) unclosed 72balance (x:xs) tags'@(TagClose name:tags) 73 | x == name = TagClose name : balance xs tags 74 | x `member` voidElems = balance xs tags' 75 | otherwise = TagOpen name [] : TagClose name : balance (x:xs) tags 76balance unclosed (TagOpen name as : tags) = 77 TagOpen name as : balance (name : unclosed) tags 78balance unclosed (t:ts) = t : balance unclosed ts 79 80-- | Filters out unsafe tags and sanitizes attributes. Use with 81-- filterTags to create a custom filter. 82safeTags :: [Tag Text] -> [Tag Text] 83safeTags = safeTagsCustom safeTagName sanitizeAttribute 84 85-- | Filters out unsafe tags and sanitizes attributes, like 86-- 'safeTags', but uses custom functions for determining which tags 87-- are safe and for sanitizing attributes. This allows you to add or 88-- remove specific tags or attributes on the white list, or to use 89-- your own white list. 90-- 91-- @safeTagsCustom safeTagName sanitizeAttribute@ is equivalent to 92-- 'safeTags'. 93-- 94-- @since 0.3.6 95safeTagsCustom :: 96 (Text -> Bool) -- ^ Select safe tags, like 97 -- 'safeTagName' 98 -> ((Text, Text) -> Maybe (Text, Text)) -- ^ Sanitize attributes, 99 -- like 'sanitizeAttribute' 100 -> [Tag Text] -> [Tag Text] 101safeTagsCustom _ _ [] = [] 102safeTagsCustom safeName sanitizeAttr (t@(TagClose name):tags) 103 | safeName name = t : safeTagsCustom safeName sanitizeAttr tags 104 | otherwise = safeTagsCustom safeName sanitizeAttr tags 105safeTagsCustom safeName sanitizeAttr (TagOpen name attributes:tags) 106 | safeName name = TagOpen name (mapMaybe sanitizeAttr attributes) : 107 safeTagsCustom safeName sanitizeAttr tags 108 | otherwise = safeTagsCustom safeName sanitizeAttr tags 109safeTagsCustom n a (t:tags) = t : safeTagsCustom n a tags 110 111safeTagName :: Text -> Bool 112safeTagName tagname = tagname `member` sanitaryTags 113 114safeAttribute :: (Text, Text) -> Bool 115safeAttribute (name, value) = name `member` sanitaryAttributes && 116 (name `notMember` uri_attributes || sanitaryURI value) 117 118-- | low-level API if you have your own HTML parser. Used by safeTags. 119sanitizeAttribute :: (Text, Text) -> Maybe (Text, Text) 120sanitizeAttribute ("style", value) = 121 let css = sanitizeCSS value 122 in if T.null css then Nothing else Just ("style", css) 123sanitizeAttribute attr | safeAttribute attr = Just attr 124 | otherwise = Nothing 125 126 127-- | Returns @True@ if the specified URI is not a potential security risk. 128sanitaryURI :: Text -> Bool 129sanitaryURI u = 130 case parseURIReference (escapeURI $ T.unpack u) of 131 Just p -> (null (uriScheme p)) || 132 ((map toLower $ init $ uriScheme p) `member` safeURISchemes) 133 Nothing -> False 134 135 136-- | Escape unicode characters in a URI. Characters that are 137-- already valid in a URI, including % and ?, are left alone. 138escapeURI :: String -> String 139escapeURI = escapeURIString isAllowedInURI . encodeString 140 141safeURISchemes :: Set String 142safeURISchemes = fromList acceptable_protocols 143 144sanitaryTags :: Set Text 145sanitaryTags = fromList (acceptable_elements ++ mathml_elements ++ svg_elements) 146 \\ (fromList svg_allow_local_href) -- extra filtering not implemented 147 148sanitaryAttributes :: Set Text 149sanitaryAttributes = fromList (allowed_html_uri_attributes ++ acceptable_attributes ++ mathml_attributes ++ svg_attributes) 150 \\ (fromList svg_attr_val_allows_ref) -- extra unescaping not implemented 151 152allowed_html_uri_attributes :: [Text] 153allowed_html_uri_attributes = ["href", "src", "cite", "action", "longdesc"] 154 155uri_attributes :: Set Text 156uri_attributes = fromList $ allowed_html_uri_attributes ++ ["xlink:href", "xml:base"] 157 158acceptable_elements :: [Text] 159acceptable_elements = ["a", "abbr", "acronym", "address", "area", 160 "article", "aside", "audio", "b", "big", "blockquote", "br", "button", 161 "canvas", "caption", "center", "cite", "code", "col", "colgroup", 162 "command", "datagrid", "datalist", "dd", "del", "details", "dfn", 163 "dialog", "dir", "div", "dl", "dt", "em", "event-source", "fieldset", 164 "figcaption", "figure", "footer", "font", "form", "header", "h1", "h2", 165 "h3", "h4", "h5", "h6", "hr", "i", "img", "input", "ins", "keygen", 166 "kbd", "label", "legend", "li", "m", "main", "map", "menu", "meter", "multicol", 167 "nav", "nextid", "ol", "output", "optgroup", "option", "p", "pre", 168 "progress", "q", "s", "samp", "section", "select", "small", "sound", 169 "source", "spacer", "span", "strike", "strong", "sub", "sup", "table", 170 "tbody", "td", "textarea", "time", "tfoot", "th", "thead", "tr", "tt", 171 "u", "ul", "var", "video"] 172 173mathml_elements :: [Text] 174mathml_elements = ["maction", "math", "merror", "mfrac", "mi", 175 "mmultiscripts", "mn", "mo", "mover", "mpadded", "mphantom", 176 "mprescripts", "mroot", "mrow", "mspace", "msqrt", "mstyle", "msub", 177 "msubsup", "msup", "mtable", "mtd", "mtext", "mtr", "munder", 178 "munderover", "none"] 179 180-- this should include altGlyph I think 181svg_elements :: [Text] 182svg_elements = ["a", "animate", "animateColor", "animateMotion", 183 "animateTransform", "clipPath", "circle", "defs", "desc", "ellipse", 184 "font-face", "font-face-name", "font-face-src", "g", "glyph", "hkern", 185 "linearGradient", "line", "marker", "metadata", "missing-glyph", 186 "mpath", "path", "polygon", "polyline", "radialGradient", "rect", 187 "set", "stop", "svg", "switch", "text", "title", "tspan", "use"] 188 189acceptable_attributes :: [Text] 190acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey", 191 "align", "alt", "autocomplete", "autofocus", "axis", 192 "background", "balance", "bgcolor", "bgproperties", "border", 193 "bordercolor", "bordercolordark", "bordercolorlight", "bottompadding", 194 "cellpadding", "cellspacing", "ch", "challenge", "char", "charoff", 195 "choff", "charset", "checked", "class", "clear", "color", 196 "cols", "colspan", "compact", "contenteditable", "controls", "coords", 197 -- "data", TODO: allow this with further filtering 198 "datafld", "datapagesize", "datasrc", "datetime", "default", 199 "delay", "dir", "disabled", "draggable", "dynsrc", "enctype", "end", 200 "face", "for", "form", "frame", "galleryimg", "gutter", "headers", 201 "height", "hidefocus", "hidden", "high", "hreflang", "hspace", 202 "icon", "id", "inputmode", "ismap", "keytype", "label", "leftspacing", 203 "lang", "list", "loop", "loopcount", "loopend", 204 "loopstart", "low", "lowsrc", "max", "maxlength", "media", "method", 205 "min", "multiple", "name", "nohref", "noshade", "nowrap", "open", 206 "optimum", "pattern", "ping", "point-size", "prompt", "pqg", 207 "radiogroup", "readonly", "rel", "repeat-max", "repeat-min", 208 "replace", "required", "rev", "rightspacing", "rows", "rowspan", 209 "rules", "scope", "selected", "shape", "size", "span", "start", 210 "step", 211 "style", -- gets further filtering 212 "summary", "suppress", "tabindex", "target", 213 "template", "title", "toppadding", "type", "unselectable", "usemap", 214 "urn", "valign", "value", "variable", "volume", "vspace", "vrml", 215 "width", "wrap", "xml:lang"] 216 217acceptable_protocols :: [String] 218acceptable_protocols = [ "ed2k", "ftp", "http", "https", "irc", 219 "mailto", "news", "gopher", "nntp", "telnet", "webcal", 220 "xmpp", "callto", "feed", "urn", "aim", "rsync", "tag", 221 "ssh", "sftp", "rtsp", "afs" ] 222 223mathml_attributes :: [Text] 224mathml_attributes = ["actiontype", "align", "columnalign", "columnalign", 225 "columnalign", "columnlines", "columnspacing", "columnspan", "depth", 226 "display", "displaystyle", "equalcolumns", "equalrows", "fence", 227 "fontstyle", "fontweight", "frame", "height", "linethickness", "lspace", 228 "mathbackground", "mathcolor", "mathvariant", "mathvariant", "maxsize", 229 "minsize", "other", "rowalign", "rowalign", "rowalign", "rowlines", 230 "rowspacing", "rowspan", "rspace", "scriptlevel", "selection", 231 "separator", "stretchy", "width", "width", "xlink:href", "xlink:show", 232 "xlink:type", "xmlns", "xmlns:xlink"] 233 234svg_attributes :: [Text] 235svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic", 236 "arabic-form", "ascent", "attributeName", "attributeType", 237 "baseProfile", "bbox", "begin", "by", "calcMode", "cap-height", 238 "class", "clip-path", "color", "color-rendering", "content", "cx", 239 "cy", "d", "dx", "dy", "descent", "display", "dur", "end", "fill", 240 "fill-opacity", "fill-rule", "font-family", "font-size", 241 "font-stretch", "font-style", "font-variant", "font-weight", "from", 242 "fx", "fy", "g1", "g2", "glyph-name", "gradientUnits", "hanging", 243 "height", "horiz-adv-x", "horiz-origin-x", "id", "ideographic", "k", 244 "keyPoints", "keySplines", "keyTimes", "lang", "marker-end", 245 "marker-mid", "marker-start", "markerHeight", "markerUnits", 246 "markerWidth", "mathematical", "max", "min", "name", "offset", 247 "opacity", "orient", "origin", "overline-position", 248 "overline-thickness", "panose-1", "path", "pathLength", "points", 249 "preserveAspectRatio", "r", "refX", "refY", "repeatCount", 250 "repeatDur", "requiredExtensions", "requiredFeatures", "restart", 251 "rotate", "rx", "ry", "slope", "stemh", "stemv", "stop-color", 252 "stop-opacity", "strikethrough-position", "strikethrough-thickness", 253 "stroke", "stroke-dasharray", "stroke-dashoffset", "stroke-linecap", 254 "stroke-linejoin", "stroke-miterlimit", "stroke-opacity", 255 "stroke-width", "systemLanguage", "target", "text-anchor", "to", 256 "transform", "type", "u1", "u2", "underline-position", 257 "underline-thickness", "unicode", "unicode-range", "units-per-em", 258 "values", "version", "viewBox", "visibility", "width", "widths", "x", 259 "x-height", "x1", "x2", "xlink:actuate", "xlink:arcrole", 260 "xlink:href", "xlink:role", "xlink:show", "xlink:title", "xlink:type", 261 "xml:base", "xml:lang", "xml:space", "xmlns", "xmlns:xlink", "y", 262 "y1", "y2", "zoomAndPan"] 263 264-- the values for these need to be escaped 265svg_attr_val_allows_ref :: [Text] 266svg_attr_val_allows_ref = ["clip-path", "color-profile", "cursor", "fill", 267 "filter", "marker", "marker-start", "marker-mid", "marker-end", 268 "mask", "stroke"] 269 270svg_allow_local_href :: [Text] 271svg_allow_local_href = ["altGlyph", "animate", "animateColor", 272 "animateMotion", "animateTransform", "cursor", "feImage", "filter", 273 "linearGradient", "pattern", "radialGradient", "textpath", "tref", 274 "set", "use"] 275 276