from __future__ import unicode_literals from functools import partial from timeit import timeit import pytest from bleach import clean clean = partial(clean, tags=['p'], attributes=['style']) @pytest.mark.parametrize('data, styles, expected', [ ( '

bar

', ['color'], '

bar

' ), ( '

bar

', ['color'], '

bar

' ), ( '

bar

', ['color', 'float'], '

bar

' ), ( '

bar

', ['color', 'float'], '

bar

' ), ( '

bar

', ['color'], '

bar

' ), # Handle leading - in attributes # regressed with the fix for bug 1623633 pytest.param( '

bar

', ['cursor'], '

bar

', marks=pytest.mark.xfail, ), # Handle () in attributes ( '

bar

', ['color'], '

bar

', ), ( '

bar

', ['color'], '

bar

', ), # Handle ' in attributes # regressed with the fix for bug 1623633 pytest.param( '

bar

', ['text-overflow'], '

bar

', marks=pytest.mark.xfail, ), # Handle " in attributes # regressed with the fix for bug 1623633 pytest.param( '

bar

', ['text-overflow'], '

bar

', marks=pytest.mark.xfail, ), ( '

bar

', ['font-family'], '

bar

' ), # Handle non-ascii characters in attributes ( '

bar

', ['color'], '

bar

' ), ]) def test_allowed_css(data, styles, expected): assert clean(data, styles=styles) == expected def test_valid_css(): """The sanitizer should fix missing CSS values.""" styles = ['color', 'float'] assert ( clean('

foo

', styles=styles) == '

foo

' ) assert ( clean('

foo

', styles=styles) == '

foo

' ) @pytest.mark.parametrize('data, expected', [ # No url--unchanged ( '

foo

', '

foo

' ), # Verify urls with no quotes, single quotes, and double quotes are all dropped ( '

foo

', '

foo

' ), ( '

foo

', '

foo

' ), ( '

foo

', '

foo

' ), # Verify urls with spacing ( '

foo

', '

foo

' ), ( '

foo

', '

foo

' ), ( '

foo

', '

foo

' ), ( '

foo

', '

foo

' ), # Verify urls with character entities ( '

foo

', '

foo

' ), ]) def test_urls(data, expected): assert clean(data, styles=['background']) == expected def test_style_hang(): """The sanitizer should not hang on any inline styles""" style = [ 'margin-top: 0px;', 'margin-right: 0px;', 'margin-bottom: 1.286em;', 'margin-left: 0px;', 'padding-top: 15px;', 'padding-right: 15px;', 'padding-bottom: 15px;', 'padding-left: 15px;', 'border-top-width: 1px;', 'border-right-width: 1px;', 'border-bottom-width: 1px;', 'border-left-width: 1px;', 'border-top-style: dotted;', 'border-right-style: dotted;', 'border-bottom-style: dotted;', 'border-left-style: dotted;', 'border-top-color: rgb(203, 200, 185);', 'border-right-color: rgb(203, 200, 185);', 'border-bottom-color: rgb(203, 200, 185);', 'border-left-color: rgb(203, 200, 185);', 'background-image: initial;', 'background-attachment: initial;', 'background-origin: initial;', 'background-clip: initial;', 'background-color: rgb(246, 246, 242);', 'overflow-x: auto;', 'overflow-y: auto;', 'font: italic small-caps bolder condensed 16px/3 cursive;', 'background-position: initial initial;', 'background-repeat: initial initial;' ] html = '

Hello world

' % ' '.join(style) styles = [ 'border', 'float', 'overflow', 'min-height', 'vertical-align', 'white-space', 'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right', 'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right', 'background', 'background-color', 'font', 'font-size', 'font-weight', 'text-align', 'text-transform', ] expected = ( '

Hello world

' ) assert clean(html, styles=styles) == expected @pytest.mark.parametrize('data, styles, expected', [ ( '

text

', ['font-family', 'white-space'], '

text

' ), ( '

text

', ['font-family', 'white-space'], '

text

' ), ]) def test_css_parsing_with_entities(data, styles, expected): """The sanitizer should be ok with character entities""" assert clean(data, tags=['p'], attributes={'p': ['style']}, styles=styles) == expected @pytest.mark.parametrize('overlap_test_char', ["\"", "'", "-"]) def test_css_parsing_gauntlet_regex_backtracking(overlap_test_char): """The sanitizer gauntlet regex should not catastrophically backtrack""" # refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1623633 def time_clean(test_char, size): style_attr_value = (test_char + 'a' + test_char) * size + '^' stmt = """clean('''''', attributes={'a': ['style']})""" % style_attr_value return timeit(stmt=stmt, setup='from bleach import clean', number=1) # should complete in less than one second assert time_clean(overlap_test_char, 22) < 1.0