1#!/usr/bin/env python 2"""A validating CSSParser""" 3 4from __future__ import unicode_literals, division, absolute_import, print_function 5 6__all__ = ['CSSParser'] 7__docformat__ = 'restructuredtext' 8__version__ = '$Id$' 9 10from .helper import path2url 11import codecs 12import css_parser 13import sys 14from . import tokenize2 15 16from css_parser import css 17 18if sys.version_info < (2, 6): 19 bytes = str 20 21 22class CSSParser(object): 23 """Parse a CSS StyleSheet from URL, string or file and return a DOM Level 2 24 CSS StyleSheet object. 25 26 Usage:: 27 28 parser = CSSParser() 29 # optionally 30 parser.setFetcher(fetcher) 31 sheet = parser.parseFile('test1.css', 'ascii') 32 print sheet.cssText 33 """ 34 35 def __init__(self, log=None, loglevel=None, raiseExceptions=None, 36 fetcher=None, parseComments=True, 37 validate=True): 38 """ 39 :param log: 40 logging object 41 :param loglevel: 42 logging loglevel 43 :param raiseExceptions: 44 if log should simply log (default) or raise errors during 45 parsing. Later while working with the resulting sheets 46 the setting used in css_parser.log.raiseExeptions is used 47 :param fetcher: 48 see ``setFetcher(fetcher)`` 49 :param parseComments: 50 if comments should be added to CSS DOM or simply omitted 51 :param validate: 52 if parsing should validate, may be overwritten in parse methods 53 """ 54 if log is not None: 55 css_parser.log.setLog(log) 56 if loglevel is not None: 57 css_parser.log.setLevel(loglevel) 58 59 # remember global setting 60 self.__globalRaising = css_parser.log.raiseExceptions 61 if raiseExceptions: 62 self.__parseRaising = raiseExceptions 63 else: 64 # DEFAULT during parse 65 self.__parseRaising = False 66 67 self.__tokenizer = tokenize2.Tokenizer(doComments=parseComments) 68 self.setFetcher(fetcher) 69 70 self._validate = validate 71 72 def __parseSetting(self, parse): 73 """during parse exceptions may be handled differently depending on 74 init parameter ``raiseExceptions`` 75 """ 76 if parse: 77 css_parser.log.raiseExceptions = self.__parseRaising 78 else: 79 css_parser.log.raiseExceptions = self.__globalRaising 80 81 def parseStyle(self, cssText, encoding='utf-8', validate=None): 82 """Parse given `cssText` which is assumed to be the content of 83 a HTML style attribute. 84 85 :param cssText: 86 CSS string to parse 87 :param encoding: 88 It will be used to decode `cssText` if given as a (byte) 89 string. 90 :param validate: 91 If given defines if validation is used. Uses CSSParser settings as 92 fallback 93 :returns: 94 :class:`~css_parser.css.CSSStyleDeclaration` 95 """ 96 self.__parseSetting(True) 97 if isinstance(cssText, bytes): 98 # TODO: use codecs.getdecoder('css') here? 99 cssText = cssText.decode(encoding) 100 if validate is None: 101 validate = self._validate 102 style = css.CSSStyleDeclaration(cssText, validating=validate) 103 self.__parseSetting(False) 104 return style 105 106 def parseString(self, cssText, encoding=None, href=None, media=None, 107 title=None, 108 validate=None): 109 """Parse `cssText` as :class:`~css_parser.css.CSSStyleSheet`. 110 Errors may be raised (e.g. UnicodeDecodeError). 111 112 :param cssText: 113 CSS string to parse 114 :param encoding: 115 If ``None`` the encoding will be read from BOM or an @charset 116 rule or defaults to UTF-8. 117 If given overrides any found encoding including the ones for 118 imported sheets. 119 It also will be used to decode `cssText` if given as a (byte) 120 string. 121 :param href: 122 The ``href`` attribute to assign to the parsed style sheet. 123 Used to resolve other urls in the parsed sheet like @import hrefs. 124 :param media: 125 The ``media`` attribute to assign to the parsed style sheet 126 (may be a MediaList, list or a string). 127 :param title: 128 The ``title`` attribute to assign to the parsed style sheet. 129 :param validate: 130 If given defines if validation is used. Uses CSSParser settings as 131 fallback 132 :returns: 133 :class:`~css_parser.css.CSSStyleSheet`. 134 """ 135 self.__parseSetting(True) 136 # TODO: py3 needs bytes here! 137 if isinstance(cssText, bytes): 138 cssText = codecs.getdecoder('css')(cssText, encoding=encoding)[0] 139 140 if validate is None: 141 validate = self._validate 142 143 sheet = css_parser.css.CSSStyleSheet( 144 href=href, 145 media=css_parser.stylesheets.MediaList(media), 146 title=title, 147 validating=validate) 148 sheet._setFetcher(self.__fetcher) 149 # tokenizing this ways closes open constructs and adds EOF 150 sheet._setCssTextWithEncodingOverride(self.__tokenizer.tokenize(cssText, 151 fullsheet=True), 152 encodingOverride=encoding) 153 self.__parseSetting(False) 154 return sheet 155 156 def parseFile(self, filename, encoding=None, 157 href=None, media=None, title=None, 158 validate=None): 159 """Retrieve content from `filename` and parse it. Errors may be raised 160 (e.g. IOError). 161 162 :param filename: 163 of the CSS file to parse, if no `href` is given filename is 164 converted to a (file:) URL and set as ``href`` of resulting 165 stylesheet. 166 If `href` is given it is set as ``sheet.href``. Either way 167 ``sheet.href`` is used to resolve e.g. stylesheet imports via 168 @import rules. 169 :param encoding: 170 Value ``None`` defaults to encoding detection via BOM or an 171 @charset rule. 172 Other values override detected encoding for the sheet at 173 `filename` including any imported sheets. 174 :returns: 175 :class:`~css_parser.css.CSSStyleSheet`. 176 """ 177 if not href: 178 href = path2url(filename) 179 180 f = open(filename, 'rb') 181 css = f.read() 182 f.close() 183 184 return self.parseString(css, 185 encoding=encoding, # read returns a str 186 href=href, media=media, title=title, 187 validate=validate) 188 189 def parseUrl(self, href, encoding=None, media=None, title=None, 190 validate=None): 191 """Retrieve content from URL `href` and parse it. Errors may be raised 192 (e.g. URLError). 193 194 :param href: 195 URL of the CSS file to parse, will also be set as ``href`` of 196 resulting stylesheet 197 :param encoding: 198 Value ``None`` defaults to encoding detection via HTTP, BOM or an 199 @charset rule. 200 A value overrides detected encoding for the sheet at ``href`` 201 including any imported sheets. 202 :returns: 203 :class:`~css_parser.css.CSSStyleSheet`. 204 """ 205 encoding, enctype, text = css_parser.util._readUrl( 206 href, 207 fetcher=self.__fetcher, 208 overrideEncoding=encoding) 209 if enctype == 5: 210 # do not use if defaulting to UTF-8 211 encoding = None 212 213 if text is not None: 214 return self.parseString(text, encoding=encoding, 215 href=href, media=media, title=title, 216 validate=validate) 217 218 def setFetcher(self, fetcher=None): 219 """Replace the default URL fetch function with a custom one. 220 221 :param fetcher: 222 A function which gets a single parameter 223 224 ``url`` 225 the URL to read 226 227 and must return ``(encoding, content)`` where ``encoding`` is the 228 HTTP charset normally given via the Content-Type header (which may 229 simply omit the charset in which case ``encoding`` would be 230 ``None``) and ``content`` being the string (or unicode) content. 231 232 The Mimetype should be 'text/css' but this has to be checked by the 233 fetcher itself (the default fetcher emits a warning if encountering 234 a different mimetype). 235 236 Calling ``setFetcher`` with ``fetcher=None`` resets css_parser 237 to use its default function. 238 """ 239 self.__fetcher = fetcher 240