1"""
2Base class for Scrapy spiders
3
4See documentation in docs/topics/spiders.rst
5"""
6import logging
7import warnings
8from typing import Optional
9
10from scrapy import signals
11from scrapy.http import Request
12from scrapy.utils.trackref import object_ref
13from scrapy.utils.url import url_is_from_spider
14from scrapy.utils.deprecate import method_is_overridden
15
16
17class Spider(object_ref):
18    """Base class for scrapy spiders. All spiders must inherit from this
19    class.
20    """
21
22    name: Optional[str] = None
23    custom_settings: Optional[dict] = None
24
25    def __init__(self, name=None, **kwargs):
26        if name is not None:
27            self.name = name
28        elif not getattr(self, 'name', None):
29            raise ValueError(f"{type(self).__name__} must have a name")
30        self.__dict__.update(kwargs)
31        if not hasattr(self, 'start_urls'):
32            self.start_urls = []
33
34    @property
35    def logger(self):
36        logger = logging.getLogger(self.name)
37        return logging.LoggerAdapter(logger, {'spider': self})
38
39    def log(self, message, level=logging.DEBUG, **kw):
40        """Log the given message at the given log level
41
42        This helper wraps a log call to the logger within the spider, but you
43        can use it directly (e.g. Spider.logger.info('msg')) or use any other
44        Python logger too.
45        """
46        self.logger.log(level, message, **kw)
47
48    @classmethod
49    def from_crawler(cls, crawler, *args, **kwargs):
50        spider = cls(*args, **kwargs)
51        spider._set_crawler(crawler)
52        return spider
53
54    def _set_crawler(self, crawler):
55        self.crawler = crawler
56        self.settings = crawler.settings
57        crawler.signals.connect(self.close, signals.spider_closed)
58
59    def start_requests(self):
60        cls = self.__class__
61        if not self.start_urls and hasattr(self, 'start_url'):
62            raise AttributeError(
63                "Crawling could not start: 'start_urls' not found "
64                "or empty (but found 'start_url' attribute instead, "
65                "did you miss an 's'?)")
66        if method_is_overridden(cls, Spider, 'make_requests_from_url'):
67            warnings.warn(
68                "Spider.make_requests_from_url method is deprecated; it "
69                "won't be called in future Scrapy releases. Please "
70                "override Spider.start_requests method instead "
71                f"(see {cls.__module__}.{cls.__name__}).",
72            )
73            for url in self.start_urls:
74                yield self.make_requests_from_url(url)
75        else:
76            for url in self.start_urls:
77                yield Request(url, dont_filter=True)
78
79    def make_requests_from_url(self, url):
80        """ This method is deprecated. """
81        warnings.warn(
82            "Spider.make_requests_from_url method is deprecated: "
83            "it will be removed and not be called by the default "
84            "Spider.start_requests method in future Scrapy releases. "
85            "Please override Spider.start_requests method instead."
86        )
87        return Request(url, dont_filter=True)
88
89    def _parse(self, response, **kwargs):
90        return self.parse(response, **kwargs)
91
92    def parse(self, response, **kwargs):
93        raise NotImplementedError(f'{self.__class__.__name__}.parse callback is not defined')
94
95    @classmethod
96    def update_settings(cls, settings):
97        settings.setdict(cls.custom_settings or {}, priority='spider')
98
99    @classmethod
100    def handles_request(cls, request):
101        return url_is_from_spider(request.url, cls)
102
103    @staticmethod
104    def close(spider, reason):
105        closed = getattr(spider, 'closed', None)
106        if callable(closed):
107            return closed(reason)
108
109    def __str__(self):
110        return f"<{type(self).__name__} {self.name!r} at 0x{id(self):0x}>"
111
112    __repr__ = __str__
113
114
115# Top-level imports
116from scrapy.spiders.crawl import CrawlSpider, Rule
117from scrapy.spiders.feed import XMLFeedSpider, CSVFeedSpider
118from scrapy.spiders.sitemap import SitemapSpider
119