1from scrapy.spiders import Spider 2from scrapy.utils.spider import iterate_spider_output 3 4 5class InitSpider(Spider): 6 """Base Spider with initialization facilities""" 7 8 def start_requests(self): 9 self._postinit_reqs = super().start_requests() 10 return iterate_spider_output(self.init_request()) 11 12 def initialized(self, response=None): 13 """This method must be set as the callback of your last initialization 14 request. See self.init_request() docstring for more info. 15 """ 16 return self.__dict__.pop('_postinit_reqs') 17 18 def init_request(self): 19 """This function should return one initialization request, with the 20 self.initialized method as callback. When the self.initialized method 21 is called this spider is considered initialized. If you need to perform 22 several requests for initializing your spider, you can do so by using 23 different callbacks. The only requirement is that the final callback 24 (of the last initialization request) must be self.initialized. 25 26 The default implementation calls self.initialized immediately, and 27 means that no initialization is needed. This method should be 28 overridden only when you need to perform requests to initialize your 29 spider 30 """ 31 return self.initialized() 32