1{-# LANGUAGE OverloadedStrings, CPP #-}
2module Text.HTML.SanitizeXSS.Css (
3  sanitizeCSS
4#ifdef TEST
5, allowedCssAttributeValue
6#endif
7  ) where
8
9import Data.Text (Text)
10import qualified Data.Text as T
11import Data.Attoparsec.Text
12import Data.Text.Lazy.Builder (toLazyText)
13import Data.Text.Lazy (toStrict)
14import Data.Set (member, fromList, Set)
15import Data.Char (isDigit)
16import Control.Applicative ((<|>), pure)
17import Text.CSS.Render (renderAttrs)
18import Text.CSS.Parse (parseAttrs)
19import Prelude hiding (takeWhile)
20
21-- import FileLocation (debug, debugM)
22
23
24-- this is a direct translation from sanitizer.py, except
25--   sanitizer.py filters out url(), but this is redundant
26sanitizeCSS :: Text -> Text
27sanitizeCSS css = toStrict . toLazyText .
28    renderAttrs . filter isSanitaryAttr . filterUrl $ parseAttributes
29  where
30    filterUrl :: [(Text,Text)] -> [(Text,Text)]
31    filterUrl = map filterUrlAttribute
32      where
33        filterUrlAttribute :: (Text, Text) -> (Text, Text)
34        filterUrlAttribute (prop,value) =
35            case parseOnly rejectUrl value of
36              Left _ -> (prop,value)
37              Right noUrl -> filterUrlAttribute (prop, noUrl)
38
39        rejectUrl = do
40          pre <- manyTill anyChar (string "url")
41          skipMany space
42          _<-char '('
43          skipWhile (/= ')')
44          _<-char ')'
45          rest <- takeText
46          return $ T.append (T.pack pre) rest
47
48
49    parseAttributes = case parseAttrs css of
50      Left _ -> []
51      Right as -> as
52
53    isSanitaryAttr (_, "") = False
54    isSanitaryAttr ("",_)  = False
55    isSanitaryAttr (prop, value)
56      | prop `member` allowed_css_properties = True
57      | (T.takeWhile (/= '-') prop) `member` allowed_css_unit_properties &&
58          all allowedCssAttributeValue (T.words value) = True
59      | prop `member` allowed_svg_properties = True
60      | otherwise = False
61
62    allowed_css_unit_properties :: Set Text
63    allowed_css_unit_properties = fromList ["background","border","margin","padding"]
64
65allowedCssAttributeValue :: Text -> Bool
66allowedCssAttributeValue val =
67  val `member` allowed_css_keywords ||
68    case parseOnly allowedCssAttributeParser val of
69        Left _ -> False
70        Right b -> b
71  where
72    allowedCssAttributeParser = do
73      rgb <|> hex <|> rgb <|> cssUnit
74
75    aToF = fromList "abcdef"
76
77    hex = do
78      _ <- char '#'
79      hx <- takeText
80      return $ T.all (\c -> isDigit c || (c `member` aToF)) hx
81
82    -- should have used sepBy (symbol ",")
83    rgb = do
84      _<- string "rgb("
85      skipMany1 digit >> skipOk (== '%')
86      skip (== ',')
87      skipMany digit >> skipOk (== '%')
88      skip (== ',')
89      skipMany digit >> skipOk (== '%')
90      skip (== ')')
91      return True
92
93    cssUnit = do
94      skip isDigit
95      skipOk isDigit
96      skipOk (== '.')
97      skipOk isDigit >> skipOk isDigit
98      skipSpace
99      unit <- takeText
100      return $ T.null unit || unit `member` allowed_css_attribute_value_units
101
102skipOk :: (Char -> Bool) -> Parser ()
103skipOk p = skip p <|> pure ()
104
105allowed_css_attribute_value_units :: Set Text
106allowed_css_attribute_value_units = fromList
107  [ "cm", "em", "ex", "in", "mm", "pc", "pt", "px", "%", ",", "\\"]
108
109allowed_css_properties :: Set Text
110allowed_css_properties = fromList acceptable_css_properties
111  where
112    acceptable_css_properties = ["azimuth", "background-color",
113      "border-bottom-color", "border-collapse", "border-color",
114      "border-left-color", "border-right-color", "border-top-color", "clear",
115      "color", "cursor", "direction", "display", "elevation", "float", "font",
116      "font-family", "font-size", "font-style", "font-variant", "font-weight",
117      "height", "letter-spacing", "line-height", "overflow", "pause",
118      "pause-after", "pause-before", "pitch", "pitch-range", "richness",
119      "speak", "speak-header", "speak-numeral", "speak-punctuation",
120      "speech-rate", "stress", "text-align", "text-decoration", "text-indent",
121      "unicode-bidi", "vertical-align", "voice-family", "volume",
122      "white-space", "width"]
123
124allowed_css_keywords :: Set Text
125allowed_css_keywords = fromList acceptable_css_keywords
126  where
127    acceptable_css_keywords = ["auto", "aqua", "black", "block", "blue",
128      "bold", "both", "bottom", "brown", "center", "collapse", "dashed",
129      "dotted", "fuchsia", "gray", "green", "!important", "italic", "left",
130      "lime", "maroon", "medium", "none", "navy", "normal", "nowrap", "olive",
131      "pointer", "purple", "red", "right", "solid", "silver", "teal", "top",
132      "transparent", "underline", "white", "yellow"]
133
134-- used in css filtering
135allowed_svg_properties :: Set Text
136allowed_svg_properties = fromList acceptable_svg_properties
137  where
138    acceptable_svg_properties = [ "fill", "fill-opacity", "fill-rule",
139        "stroke", "stroke-width", "stroke-linecap", "stroke-linejoin",
140        "stroke-opacity"]
141