1# -*- coding: utf-8 -*- 2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs 3# Copyright (C) 2012-2014 Bastian Kleineidam 4# Copyright (C) 2015-2020 Tobias Gruetzmacher 5# Copyright (C) 2019-2020 Daniel Ring 6 7from __future__ import absolute_import, division, print_function 8 9from ..scraper import _ParserScraper 10from ..helpers import indirectStarter, xpath_class 11 12# Common base classes for comics with the same structure (same hosting 13# software, for example) go here. Since those are shared by many modules, 14# please don't use lists of expression, as that makes it hard to track which 15# expression is for which comics. 16 17 18class _WordPressScraper(_ParserScraper): 19 imageSearch = '//div[@id="comic"]//img' 20 prevSearch = '//a[%s]' % xpath_class('comic-nav-previous') 21 nextSearch = '//a[%s]' % xpath_class('comic-nav-next') 22 latestSearch = '//a[%s]' % xpath_class('comic-nav-last') 23 24 25class _WPNavi(_WordPressScraper): 26 prevSearch = '//a[%s]' % xpath_class('navi-prev') 27 28 29class _WPNaviIn(_WordPressScraper): 30 prevSearch = '//a[%s]' % xpath_class('navi-prev-in') 31 32 33class _WPWebcomic(_WordPressScraper): 34 imageSearch = '//div[{}]//img'.format(xpath_class('webcomic-image')) 35 prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link')) 36 nextSearch = '///a[{}]'.format(xpath_class('next-webcomic-link')) 37 latestSearch = '//a[{}]'.format(xpath_class('last-webcomic-link')) 38 39 40class _ComicControlScraper(_ParserScraper): 41 imageSearch = '//img[@id="cc-comic"]' 42 prevSearch = '//a[@rel="prev"]' 43 nextSearch = '//a[@rel="next"]' 44 latestSearch = '//a[@rel="last"]' 45 46 47class _TumblrScraper(_ParserScraper): 48 starter = indirectStarter 49 50 def namer(self, image_url, page_url): 51 # tumblr URLs: http://host/post/num/name 52 # 0 1 2 3 4 5 53 parts = page_url.split('/') 54 if len(parts) > 5: 55 return '%s_%s' % (parts[4], parts[5]) 56 else: 57 return parts[4] 58 59 def shouldSkipUrl(self, url, data): 60 """Reblogged stuff is iframed""" 61 return data.xpath('//div[@id="post"]//iframe') 62