1from __future__ import unicode_literals 2 3from functools import partial 4from timeit import timeit 5 6import pytest 7 8from bleach import clean 9 10 11clean = partial(clean, tags=['p'], attributes=['style']) 12 13 14@pytest.mark.parametrize('data, styles, expected', [ 15 ( 16 '<p style="font-family: Arial; color: red; float: left; background-color: red;">bar</p>', 17 ['color'], 18 '<p style="color: red;">bar</p>' 19 ), 20 ( 21 '<p style="border: 1px solid blue; color: red; float: left;">bar</p>', 22 ['color'], 23 '<p style="color: red;">bar</p>' 24 ), 25 ( 26 '<p style="border: 1px solid blue; color: red; float: left;">bar</p>', 27 ['color', 'float'], 28 '<p style="color: red; float: left;">bar</p>' 29 ), 30 ( 31 '<p style="color: red; float: left; padding: 1em;">bar</p>', 32 ['color', 'float'], 33 '<p style="color: red; float: left;">bar</p>' 34 ), 35 ( 36 '<p style="color: red; float: left; padding: 1em;">bar</p>', 37 ['color'], 38 '<p style="color: red;">bar</p>' 39 ), 40 # Handle leading - in attributes 41 # regressed with the fix for bug 1623633 42 pytest.param( 43 '<p style="cursor: -moz-grab;">bar</p>', 44 ['cursor'], 45 '<p style="cursor: -moz-grab;">bar</p>', 46 marks=pytest.mark.xfail, 47 ), 48 # Handle () in attributes 49 ( 50 '<p style="color: hsl(30,100%,50%);">bar</p>', 51 ['color'], 52 '<p style="color: hsl(30,100%,50%);">bar</p>', 53 ), 54 ( 55 '<p style="color: rgba(255,0,0,0.4);">bar</p>', 56 ['color'], 57 '<p style="color: rgba(255,0,0,0.4);">bar</p>', 58 ), 59 # Handle ' in attributes 60 # regressed with the fix for bug 1623633 61 pytest.param( 62 '<p style="text-overflow: \',\' ellipsis;">bar</p>', 63 ['text-overflow'], 64 '<p style="text-overflow: \',\' ellipsis;">bar</p>', 65 marks=pytest.mark.xfail, 66 ), 67 # Handle " in attributes 68 # regressed with the fix for bug 1623633 69 pytest.param( 70 '<p style=\'text-overflow: "," ellipsis;\'>bar</p>', 71 ['text-overflow'], 72 '<p style=\'text-overflow: "," ellipsis;\'>bar</p>', 73 marks=pytest.mark.xfail, 74 ), 75 ( 76 '<p style=\'font-family: "Arial";\'>bar</p>', 77 ['font-family'], 78 '<p style=\'font-family: "Arial";\'>bar</p>' 79 ), 80 # Handle non-ascii characters in attributes 81 ( 82 '<p style="font-family: \u30e1\u30a4\u30ea\u30aa; color: blue;">bar</p>', 83 ['color'], 84 '<p style="color: blue;">bar</p>' 85 ), 86]) 87def test_allowed_css(data, styles, expected): 88 assert clean(data, styles=styles) == expected 89 90 91def test_valid_css(): 92 """The sanitizer should fix missing CSS values.""" 93 styles = ['color', 'float'] 94 assert ( 95 clean('<p style="float: left; color: ">foo</p>', styles=styles) == 96 '<p style="float: left;">foo</p>' 97 ) 98 assert ( 99 clean('<p style="color: float: left;">foo</p>', styles=styles) == 100 '<p style="">foo</p>' 101 ) 102 103 104@pytest.mark.parametrize('data, expected', [ 105 # No url--unchanged 106 ( 107 '<p style="background: #00D;">foo</p>', 108 '<p style="background: #00D;">foo</p>' 109 ), 110 111 # Verify urls with no quotes, single quotes, and double quotes are all dropped 112 ( 113 '<p style="background: url(topbanner.png) #00D;">foo</p>', 114 '<p style="background: #00D;">foo</p>' 115 ), 116 ( 117 '<p style="background: url(\'topbanner.png\') #00D;">foo</p>', 118 '<p style="background: #00D;">foo</p>' 119 ), 120 ( 121 '<p style=\'background: url("topbanner.png") #00D;\'>foo</p>', 122 '<p style="background: #00D;">foo</p>' 123 ), 124 125 # Verify urls with spacing 126 ( 127 '<p style="background: url( \'topbanner.png\') #00D;">foo</p>', 128 '<p style="background: #00D;">foo</p>' 129 ), 130 ( 131 '<p style="background: url(\'topbanner.png\' ) #00D;">foo</p>', 132 '<p style="background: #00D;">foo</p>' 133 ), 134 ( 135 '<p style="background: url( \'topbanner.png\' ) #00D;">foo</p>', 136 '<p style="background: #00D;">foo</p>' 137 ), 138 ( 139 '<p style="background: url ( \'topbanner.png\' ) #00D;">foo</p>', 140 '<p style="background: #00D;">foo</p>' 141 ), 142 143 # Verify urls with character entities 144 ( 145 '<p style="background: url	(\'topbanner.png\') #00D;">foo</p>', 146 '<p style="background: #00D;">foo</p>' 147 ), 148 149]) 150def test_urls(data, expected): 151 assert clean(data, styles=['background']) == expected 152 153 154def test_style_hang(): 155 """The sanitizer should not hang on any inline styles""" 156 style = [ 157 'margin-top: 0px;', 158 'margin-right: 0px;', 159 'margin-bottom: 1.286em;', 160 'margin-left: 0px;', 161 'padding-top: 15px;', 162 'padding-right: 15px;', 163 'padding-bottom: 15px;', 164 'padding-left: 15px;', 165 'border-top-width: 1px;', 166 'border-right-width: 1px;', 167 'border-bottom-width: 1px;', 168 'border-left-width: 1px;', 169 'border-top-style: dotted;', 170 'border-right-style: dotted;', 171 'border-bottom-style: dotted;', 172 'border-left-style: dotted;', 173 'border-top-color: rgb(203, 200, 185);', 174 'border-right-color: rgb(203, 200, 185);', 175 'border-bottom-color: rgb(203, 200, 185);', 176 'border-left-color: rgb(203, 200, 185);', 177 'background-image: initial;', 178 'background-attachment: initial;', 179 'background-origin: initial;', 180 'background-clip: initial;', 181 'background-color: rgb(246, 246, 242);', 182 'overflow-x: auto;', 183 'overflow-y: auto;', 184 'font: italic small-caps bolder condensed 16px/3 cursive;', 185 'background-position: initial initial;', 186 'background-repeat: initial initial;' 187 ] 188 html = '<p style="%s">Hello world</p>' % ' '.join(style) 189 styles = [ 190 'border', 'float', 'overflow', 'min-height', 'vertical-align', 191 'white-space', 192 'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right', 193 'padding', 'padding-left', 'padding-top', 'padding-bottom', 194 'padding-right', 195 'background', 196 'background-color', 197 'font', 'font-size', 'font-weight', 'text-align', 'text-transform', 198 ] 199 200 expected = ( 201 '<p style="' 202 'margin-top: 0px; ' 203 'margin-right: 0px; ' 204 'margin-bottom: 1.286em; ' 205 'margin-left: 0px; ' 206 'padding-top: 15px; ' 207 'padding-right: 15px; ' 208 'padding-bottom: 15px; ' 209 'padding-left: 15px; ' 210 'background-color: rgb(246, 246, 242); ' 211 'font: italic small-caps bolder condensed 16px/3 cursive;' 212 '">Hello world</p>' 213 ) 214 215 assert clean(html, styles=styles) == expected 216 217 218@pytest.mark.parametrize('data, styles, expected', [ 219 ( 220 '<p style="font-family: Droid Sans, serif; white-space: pre-wrap;">text</p>', 221 ['font-family', 'white-space'], 222 '<p style="font-family: Droid Sans, serif; white-space: pre-wrap;">text</p>' 223 ), 224 ( 225 '<p style="font-family: "Droid Sans", serif; white-space: pre-wrap;">text</p>', 226 ['font-family', 'white-space'], 227 '<p style=\'font-family: "Droid Sans", serif; white-space: pre-wrap;\'>text</p>' 228 ), 229]) 230def test_css_parsing_with_entities(data, styles, expected): 231 """The sanitizer should be ok with character entities""" 232 assert clean(data, tags=['p'], attributes={'p': ['style']}, styles=styles) == expected 233 234 235@pytest.mark.parametrize('overlap_test_char', ["\"", "'", "-"]) 236def test_css_parsing_gauntlet_regex_backtracking(overlap_test_char): 237 """The sanitizer gauntlet regex should not catastrophically backtrack""" 238 # refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1623633 239 240 def time_clean(test_char, size): 241 style_attr_value = (test_char + 'a' + test_char) * size + '^' 242 stmt = """clean('''<a style='%s'></a>''', attributes={'a': ['style']})""" % style_attr_value 243 return timeit(stmt=stmt, setup='from bleach import clean', number=1) 244 245 # should complete in less than one second 246 assert time_clean(overlap_test_char, 22) < 1.0 247