1#!/usr/bin/env python
2"""A validating CSSParser"""
3
4from __future__ import unicode_literals, division, absolute_import, print_function
5
6__all__ = ['CSSParser']
7__docformat__ = 'restructuredtext'
8__version__ = '$Id$'
9
10from .helper import path2url
11import codecs
12import css_parser
13import sys
14from . import tokenize2
15
16from css_parser import css
17
18if sys.version_info < (2, 6):
19    bytes = str
20
21
22class CSSParser(object):
23    """Parse a CSS StyleSheet from URL, string or file and return a DOM Level 2
24    CSS StyleSheet object.
25
26    Usage::
27
28        parser = CSSParser()
29        # optionally
30        parser.setFetcher(fetcher)
31        sheet = parser.parseFile('test1.css', 'ascii')
32        print sheet.cssText
33    """
34
35    def __init__(self, log=None, loglevel=None, raiseExceptions=None,
36                 fetcher=None, parseComments=True,
37                 validate=True):
38        """
39        :param log:
40            logging object
41        :param loglevel:
42            logging loglevel
43        :param raiseExceptions:
44            if log should simply log (default) or raise errors during
45            parsing. Later while working with the resulting sheets
46            the setting used in css_parser.log.raiseExeptions is used
47        :param fetcher:
48            see ``setFetcher(fetcher)``
49        :param parseComments:
50            if comments should be added to CSS DOM or simply omitted
51        :param validate:
52            if parsing should validate, may be overwritten in parse methods
53        """
54        if log is not None:
55            css_parser.log.setLog(log)
56        if loglevel is not None:
57            css_parser.log.setLevel(loglevel)
58
59        # remember global setting
60        self.__globalRaising = css_parser.log.raiseExceptions
61        if raiseExceptions:
62            self.__parseRaising = raiseExceptions
63        else:
64            # DEFAULT during parse
65            self.__parseRaising = False
66
67        self.__tokenizer = tokenize2.Tokenizer(doComments=parseComments)
68        self.setFetcher(fetcher)
69
70        self._validate = validate
71
72    def __parseSetting(self, parse):
73        """during parse exceptions may be handled differently depending on
74        init parameter ``raiseExceptions``
75        """
76        if parse:
77            css_parser.log.raiseExceptions = self.__parseRaising
78        else:
79            css_parser.log.raiseExceptions = self.__globalRaising
80
81    def parseStyle(self, cssText, encoding='utf-8', validate=None):
82        """Parse given `cssText` which is assumed to be the content of
83        a HTML style attribute.
84
85        :param cssText:
86            CSS string to parse
87        :param encoding:
88            It will be used to decode `cssText` if given as a (byte)
89            string.
90        :param validate:
91            If given defines if validation is used. Uses CSSParser settings as
92            fallback
93        :returns:
94            :class:`~css_parser.css.CSSStyleDeclaration`
95        """
96        self.__parseSetting(True)
97        if isinstance(cssText, bytes):
98            # TODO: use codecs.getdecoder('css') here?
99            cssText = cssText.decode(encoding)
100        if validate is None:
101            validate = self._validate
102        style = css.CSSStyleDeclaration(cssText, validating=validate)
103        self.__parseSetting(False)
104        return style
105
106    def parseString(self, cssText, encoding=None, href=None, media=None,
107                    title=None,
108                    validate=None):
109        """Parse `cssText` as :class:`~css_parser.css.CSSStyleSheet`.
110        Errors may be raised (e.g. UnicodeDecodeError).
111
112        :param cssText:
113            CSS string to parse
114        :param encoding:
115            If ``None`` the encoding will be read from BOM or an @charset
116            rule or defaults to UTF-8.
117            If given overrides any found encoding including the ones for
118            imported sheets.
119            It also will be used to decode `cssText` if given as a (byte)
120            string.
121        :param href:
122            The ``href`` attribute to assign to the parsed style sheet.
123            Used to resolve other urls in the parsed sheet like @import hrefs.
124        :param media:
125            The ``media`` attribute to assign to the parsed style sheet
126            (may be a MediaList, list or a string).
127        :param title:
128            The ``title`` attribute to assign to the parsed style sheet.
129        :param validate:
130            If given defines if validation is used. Uses CSSParser settings as
131            fallback
132        :returns:
133            :class:`~css_parser.css.CSSStyleSheet`.
134        """
135        self.__parseSetting(True)
136        # TODO: py3 needs bytes here!
137        if isinstance(cssText, bytes):
138            cssText = codecs.getdecoder('css')(cssText, encoding=encoding)[0]
139
140        if validate is None:
141            validate = self._validate
142
143        sheet = css_parser.css.CSSStyleSheet(
144                href=href,
145                media=css_parser.stylesheets.MediaList(media),
146                title=title,
147                validating=validate)
148        sheet._setFetcher(self.__fetcher)
149        # tokenizing this ways closes open constructs and adds EOF
150        sheet._setCssTextWithEncodingOverride(self.__tokenizer.tokenize(cssText,
151                                                                        fullsheet=True),
152                                              encodingOverride=encoding)
153        self.__parseSetting(False)
154        return sheet
155
156    def parseFile(self, filename, encoding=None,
157                  href=None, media=None, title=None,
158                  validate=None):
159        """Retrieve content from `filename` and parse it. Errors may be raised
160        (e.g. IOError).
161
162        :param filename:
163            of the CSS file to parse, if no `href` is given filename is
164            converted to a (file:) URL and set as ``href`` of resulting
165            stylesheet.
166            If `href` is given it is set as ``sheet.href``. Either way
167            ``sheet.href`` is used to resolve e.g. stylesheet imports via
168            @import rules.
169        :param encoding:
170            Value ``None`` defaults to encoding detection via BOM or an
171            @charset rule.
172            Other values override detected encoding for the sheet at
173            `filename` including any imported sheets.
174        :returns:
175            :class:`~css_parser.css.CSSStyleSheet`.
176        """
177        if not href:
178            href = path2url(filename)
179
180        f = open(filename, 'rb')
181        css = f.read()
182        f.close()
183
184        return self.parseString(css,
185                                encoding=encoding,  # read returns a str
186                                href=href, media=media, title=title,
187                                validate=validate)
188
189    def parseUrl(self, href, encoding=None, media=None, title=None,
190                 validate=None):
191        """Retrieve content from URL `href` and parse it. Errors may be raised
192        (e.g. URLError).
193
194        :param href:
195            URL of the CSS file to parse, will also be set as ``href`` of
196            resulting stylesheet
197        :param encoding:
198            Value ``None`` defaults to encoding detection via HTTP, BOM or an
199            @charset rule.
200            A value overrides detected encoding for the sheet at ``href``
201            including any imported sheets.
202        :returns:
203            :class:`~css_parser.css.CSSStyleSheet`.
204        """
205        encoding, enctype, text = css_parser.util._readUrl(
206                href,
207                fetcher=self.__fetcher,
208                overrideEncoding=encoding)
209        if enctype == 5:
210            # do not use if defaulting to UTF-8
211            encoding = None
212
213        if text is not None:
214            return self.parseString(text, encoding=encoding,
215                                    href=href, media=media, title=title,
216                                    validate=validate)
217
218    def setFetcher(self, fetcher=None):
219        """Replace the default URL fetch function with a custom one.
220
221        :param fetcher:
222            A function which gets a single parameter
223
224            ``url``
225                the URL to read
226
227            and must return ``(encoding, content)`` where ``encoding`` is the
228            HTTP charset normally given via the Content-Type header (which may
229            simply omit the charset in which case ``encoding`` would be
230            ``None``) and ``content`` being the string (or unicode) content.
231
232            The Mimetype should be 'text/css' but this has to be checked by the
233            fetcher itself (the default fetcher emits a warning if encountering
234            a different mimetype).
235
236            Calling ``setFetcher`` with ``fetcher=None`` resets css_parser
237            to use its default function.
238        """
239        self.__fetcher = fetcher
240