1from __future__ import unicode_literals
2
3from functools import partial
4from timeit import timeit
5
6import pytest
7
8from bleach import clean
9
10
11clean = partial(clean, tags=['p'], attributes=['style'])
12
13
14@pytest.mark.parametrize('data, styles, expected', [
15    (
16        '<p style="font-family: Arial; color: red; float: left; background-color: red;">bar</p>',
17        ['color'],
18        '<p style="color: red;">bar</p>'
19    ),
20    (
21        '<p style="border: 1px solid blue; color: red; float: left;">bar</p>',
22        ['color'],
23        '<p style="color: red;">bar</p>'
24    ),
25    (
26        '<p style="border: 1px solid blue; color: red; float: left;">bar</p>',
27        ['color', 'float'],
28        '<p style="color: red; float: left;">bar</p>'
29    ),
30    (
31        '<p style="color: red; float: left; padding: 1em;">bar</p>',
32        ['color', 'float'],
33        '<p style="color: red; float: left;">bar</p>'
34    ),
35    (
36        '<p style="color: red; float: left; padding: 1em;">bar</p>',
37        ['color'],
38        '<p style="color: red;">bar</p>'
39    ),
40    # Handle leading - in attributes
41    # regressed with the fix for bug 1623633
42    pytest.param(
43        '<p style="cursor: -moz-grab;">bar</p>',
44        ['cursor'],
45        '<p style="cursor: -moz-grab;">bar</p>',
46        marks=pytest.mark.xfail,
47    ),
48    # Handle () in attributes
49    (
50        '<p style="color: hsl(30,100%,50%);">bar</p>',
51        ['color'],
52        '<p style="color: hsl(30,100%,50%);">bar</p>',
53    ),
54    (
55        '<p style="color: rgba(255,0,0,0.4);">bar</p>',
56        ['color'],
57        '<p style="color: rgba(255,0,0,0.4);">bar</p>',
58    ),
59    # Handle ' in attributes
60    # regressed with the fix for bug 1623633
61    pytest.param(
62        '<p style="text-overflow: \',\' ellipsis;">bar</p>',
63        ['text-overflow'],
64        '<p style="text-overflow: \',\' ellipsis;">bar</p>',
65        marks=pytest.mark.xfail,
66    ),
67    # Handle " in attributes
68    # regressed with the fix for bug 1623633
69    pytest.param(
70        '<p style=\'text-overflow: "," ellipsis;\'>bar</p>',
71        ['text-overflow'],
72        '<p style=\'text-overflow: "," ellipsis;\'>bar</p>',
73        marks=pytest.mark.xfail,
74    ),
75    (
76        '<p style=\'font-family: "Arial";\'>bar</p>',
77        ['font-family'],
78        '<p style=\'font-family: "Arial";\'>bar</p>'
79    ),
80    # Handle non-ascii characters in attributes
81    (
82        '<p style="font-family: \u30e1\u30a4\u30ea\u30aa; color: blue;">bar</p>',
83        ['color'],
84        '<p style="color: blue;">bar</p>'
85    ),
86])
87def test_allowed_css(data, styles, expected):
88    assert clean(data, styles=styles) == expected
89
90
91def test_valid_css():
92    """The sanitizer should fix missing CSS values."""
93    styles = ['color', 'float']
94    assert (
95        clean('<p style="float: left; color: ">foo</p>', styles=styles) ==
96        '<p style="float: left;">foo</p>'
97    )
98    assert (
99        clean('<p style="color: float: left;">foo</p>', styles=styles) ==
100        '<p style="">foo</p>'
101    )
102
103
104@pytest.mark.parametrize('data, expected', [
105    # No url--unchanged
106    (
107        '<p style="background: #00D;">foo</p>',
108        '<p style="background: #00D;">foo</p>'
109    ),
110
111    # Verify urls with no quotes, single quotes, and double quotes are all dropped
112    (
113        '<p style="background: url(topbanner.png) #00D;">foo</p>',
114        '<p style="background: #00D;">foo</p>'
115    ),
116    (
117        '<p style="background: url(\'topbanner.png\') #00D;">foo</p>',
118        '<p style="background: #00D;">foo</p>'
119    ),
120    (
121        '<p style=\'background: url("topbanner.png") #00D;\'>foo</p>',
122        '<p style="background: #00D;">foo</p>'
123    ),
124
125    # Verify urls with spacing
126    (
127        '<p style="background: url(  \'topbanner.png\') #00D;">foo</p>',
128        '<p style="background: #00D;">foo</p>'
129    ),
130    (
131        '<p style="background: url(\'topbanner.png\'  ) #00D;">foo</p>',
132        '<p style="background: #00D;">foo</p>'
133    ),
134    (
135        '<p style="background: url(  \'topbanner.png\'  ) #00D;">foo</p>',
136        '<p style="background: #00D;">foo</p>'
137    ),
138    (
139        '<p style="background: url (  \'topbanner.png\'  ) #00D;">foo</p>',
140        '<p style="background: #00D;">foo</p>'
141    ),
142
143    # Verify urls with character entities
144    (
145        '<p style="background: url&#x09;(\'topbanner.png\') #00D;">foo</p>',
146        '<p style="background: #00D;">foo</p>'
147    ),
148
149])
150def test_urls(data, expected):
151    assert clean(data, styles=['background']) == expected
152
153
154def test_style_hang():
155    """The sanitizer should not hang on any inline styles"""
156    style = [
157        'margin-top: 0px;',
158        'margin-right: 0px;',
159        'margin-bottom: 1.286em;',
160        'margin-left: 0px;',
161        'padding-top: 15px;',
162        'padding-right: 15px;',
163        'padding-bottom: 15px;',
164        'padding-left: 15px;',
165        'border-top-width: 1px;',
166        'border-right-width: 1px;',
167        'border-bottom-width: 1px;',
168        'border-left-width: 1px;',
169        'border-top-style: dotted;',
170        'border-right-style: dotted;',
171        'border-bottom-style: dotted;',
172        'border-left-style: dotted;',
173        'border-top-color: rgb(203, 200, 185);',
174        'border-right-color: rgb(203, 200, 185);',
175        'border-bottom-color: rgb(203, 200, 185);',
176        'border-left-color: rgb(203, 200, 185);',
177        'background-image: initial;',
178        'background-attachment: initial;',
179        'background-origin: initial;',
180        'background-clip: initial;',
181        'background-color: rgb(246, 246, 242);',
182        'overflow-x: auto;',
183        'overflow-y: auto;',
184        'font: italic small-caps bolder condensed 16px/3 cursive;',
185        'background-position: initial initial;',
186        'background-repeat: initial initial;'
187    ]
188    html = '<p style="%s">Hello world</p>' % ' '.join(style)
189    styles = [
190        'border', 'float', 'overflow', 'min-height', 'vertical-align',
191        'white-space',
192        'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right',
193        'padding', 'padding-left', 'padding-top', 'padding-bottom',
194        'padding-right',
195        'background',
196        'background-color',
197        'font', 'font-size', 'font-weight', 'text-align', 'text-transform',
198    ]
199
200    expected = (
201        '<p style="'
202        'margin-top: 0px; '
203        'margin-right: 0px; '
204        'margin-bottom: 1.286em; '
205        'margin-left: 0px; '
206        'padding-top: 15px; '
207        'padding-right: 15px; '
208        'padding-bottom: 15px; '
209        'padding-left: 15px; '
210        'background-color: rgb(246, 246, 242); '
211        'font: italic small-caps bolder condensed 16px/3 cursive;'
212        '">Hello world</p>'
213    )
214
215    assert clean(html, styles=styles) == expected
216
217
218@pytest.mark.parametrize('data, styles, expected', [
219    (
220        '<p style="font-family: Droid Sans, serif; white-space: pre-wrap;">text</p>',
221        ['font-family', 'white-space'],
222        '<p style="font-family: Droid Sans, serif; white-space: pre-wrap;">text</p>'
223    ),
224    (
225        '<p style="font-family: &quot;Droid Sans&quot;, serif; white-space: pre-wrap;">text</p>',
226        ['font-family', 'white-space'],
227        '<p style=\'font-family: "Droid Sans", serif; white-space: pre-wrap;\'>text</p>'
228    ),
229])
230def test_css_parsing_with_entities(data, styles, expected):
231    """The sanitizer should be ok with character entities"""
232    assert clean(data, tags=['p'], attributes={'p': ['style']}, styles=styles) == expected
233
234
235@pytest.mark.parametrize('overlap_test_char', ["\"", "'", "-"])
236def test_css_parsing_gauntlet_regex_backtracking(overlap_test_char):
237    """The sanitizer gauntlet regex should not catastrophically backtrack"""
238    # refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1623633
239
240    def time_clean(test_char, size):
241        style_attr_value = (test_char + 'a' + test_char) * size + '^'
242        stmt = """clean('''<a style='%s'></a>''', attributes={'a': ['style']})""" % style_attr_value
243        return timeit(stmt=stmt, setup='from bleach import clean', number=1)
244
245    # should complete in less than one second
246    assert time_clean(overlap_test_char, 22) < 1.0
247