1"""
2XPath selectors based on lxml
3"""
4
5from parsel import Selector as _ParselSelector
6from scrapy.utils.trackref import object_ref
7from scrapy.utils.python import to_bytes
8from scrapy.http import HtmlResponse, XmlResponse
9
10
11__all__ = ['Selector', 'SelectorList']
12
13
14def _st(response, st):
15    if st is None:
16        return 'xml' if isinstance(response, XmlResponse) else 'html'
17    return st
18
19
20def _response_from_text(text, st):
21    rt = XmlResponse if st == 'xml' else HtmlResponse
22    return rt(url='about:blank', encoding='utf-8',
23              body=to_bytes(text, 'utf-8'))
24
25
26class SelectorList(_ParselSelector.selectorlist_cls, object_ref):
27    """
28    The :class:`SelectorList` class is a subclass of the builtin ``list``
29    class, which provides a few additional methods.
30    """
31
32
33class Selector(_ParselSelector, object_ref):
34    """
35    An instance of :class:`Selector` is a wrapper over response to select
36    certain parts of its content.
37
38    ``response`` is an :class:`~scrapy.http.HtmlResponse` or an
39    :class:`~scrapy.http.XmlResponse` object that will be used for selecting
40    and extracting data.
41
42    ``text`` is a unicode string or utf-8 encoded text for cases when a
43    ``response`` isn't available. Using ``text`` and ``response`` together is
44    undefined behavior.
45
46    ``type`` defines the selector type, it can be ``"html"``, ``"xml"``
47    or ``None`` (default).
48
49    If ``type`` is ``None``, the selector automatically chooses the best type
50    based on ``response`` type (see below), or defaults to ``"html"`` in case it
51    is used together with ``text``.
52
53    If ``type`` is ``None`` and a ``response`` is passed, the selector type is
54    inferred from the response type as follows:
55
56    * ``"html"`` for :class:`~scrapy.http.HtmlResponse` type
57    * ``"xml"`` for :class:`~scrapy.http.XmlResponse` type
58    * ``"html"`` for anything else
59
60    Otherwise, if ``type`` is set, the selector type will be forced and no
61    detection will occur.
62    """
63
64    __slots__ = ['response']
65    selectorlist_cls = SelectorList
66
67    def __init__(self, response=None, text=None, type=None, root=None, **kwargs):
68        if response is not None and text is not None:
69            raise ValueError(f'{self.__class__.__name__}.__init__() received '
70                             'both response and text')
71
72        st = _st(response, type)
73
74        if text is not None:
75            response = _response_from_text(text, st)
76
77        if response is not None:
78            text = response.text
79            kwargs.setdefault('base_url', response.url)
80
81        self.response = response
82        super().__init__(text=text, type=st, root=root, **kwargs)
83