1""" 2Extension for collecting core stats like items scraped and start/finish times 3""" 4from datetime import datetime 5 6from scrapy import signals 7 8 9class CoreStats: 10 11 def __init__(self, stats): 12 self.stats = stats 13 self.start_time = None 14 15 @classmethod 16 def from_crawler(cls, crawler): 17 o = cls(crawler.stats) 18 crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) 19 crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) 20 crawler.signals.connect(o.item_scraped, signal=signals.item_scraped) 21 crawler.signals.connect(o.item_dropped, signal=signals.item_dropped) 22 crawler.signals.connect(o.response_received, signal=signals.response_received) 23 return o 24 25 def spider_opened(self, spider): 26 self.start_time = datetime.utcnow() 27 self.stats.set_value('start_time', self.start_time, spider=spider) 28 29 def spider_closed(self, spider, reason): 30 finish_time = datetime.utcnow() 31 elapsed_time = finish_time - self.start_time 32 elapsed_time_seconds = elapsed_time.total_seconds() 33 self.stats.set_value('elapsed_time_seconds', elapsed_time_seconds, spider=spider) 34 self.stats.set_value('finish_time', finish_time, spider=spider) 35 self.stats.set_value('finish_reason', reason, spider=spider) 36 37 def item_scraped(self, item, spider): 38 self.stats.inc_value('item_scraped_count', spider=spider) 39 40 def response_received(self, spider): 41 self.stats.inc_value('response_received_count', spider=spider) 42 43 def item_dropped(self, item, spider, exception): 44 reason = exception.__class__.__name__ 45 self.stats.inc_value('item_dropped_count', spider=spider) 46 self.stats.inc_value(f'item_dropped_reasons_count/{reason}', spider=spider) 47