1# -*- coding: utf-8 -*-
2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
3# Copyright (C) 2012-2014 Bastian Kleineidam
4# Copyright (C) 2015-2020 Tobias Gruetzmacher
5# Copyright (C) 2019-2020 Daniel Ring
6
7from __future__ import absolute_import, division, print_function
8
9from re import compile, escape
10
11from ..scraper import _BasicScraper, _ParserScraper
12from ..helpers import indirectStarter, bounceStarter, xpath_class
13from ..util import tagre
14from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn, _WPWebcomic
15
16
17class Damonk(_BasicScraper):
18    url = 'http://www.damonk.com/'
19    stripUrl = url + 'd/%s.html'
20    firstStripUrl = stripUrl % '20060522'
21    imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
22    prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
23                         tagre("img", "src", r'/images/previous_day\.gif'))
24    help = 'Index format: yyyymmdd'
25
26
27class DangerouslyChloe(_ComicControlScraper):
28    url = 'http://www.dangerouslychloe.com/'
29    firstStripUrl = url + 'strips-dc/Chapter_1_-_That_damned_girl'
30
31
32class DarkWhite(_WordPressScraper):
33    url = 'https://www.darkwhitecomic.com/'
34    stripUrl = url + 'comic/%s/'
35    firstStripUrl = stripUrl % 'chapter-1-sleep'
36
37
38class DarthsAndDroids(_BasicScraper):
39    url = 'http://www.darthsanddroids.net/'
40    stripUrl = url + 'episodes/%s.html'
41    firstStripUrl = stripUrl % '0001'
42    prevSearch = compile(tagre("a", "href", r'(/episodes/\d\d\d\d.html)') +
43                         '<PREVIOUS')
44    imageSearch = compile(tagre("img", "src", r'(/comics/darths\d\d\d\d\.jpg)'))
45
46
47class DasLebenIstKeinPonyhof(_WPNaviIn):
48    url = 'http://sarahburrini.com/wordpress/'
49    firstStripUrl = url + 'comic/mein-erster-webcomic/'
50    lang = 'de'
51
52
53class DeadWinter(_BasicScraper):
54    url = 'http://deadwinter.cc/'
55    stripUrl = url + 'page/%s'
56    firstStripUrl = stripUrl % '1'
57    imageSearch = compile(tagre("img", "src", r"(/static/page/strip/\d+[^']+)", quote="'"))
58    prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "Previous")
59    help = 'Index format: number'
60
61
62class Deathbulge(_BasicScraper):
63    url = 'http://www.deathbulge.com/api/comics'
64    imageSearch = compile(r"(/images/comics/[^\.]+\.jpg)")
65    prevSearch = compile(r'"previous":(\d+),')
66    firstStripUrl = url + '/1'
67
68    def getPrevUrl(self, url, data):
69        if data[1] == self.url:
70            data = (data[0], data[1] + '/')
71        return _BasicScraper.getPrevUrl(self, url, data)
72
73
74class DeepFried(_BasicScraper):
75    url = 'http://www.whatisdeepfried.com/'
76    rurl = escape(url)
77    stripUrl = url + '%s/'
78    firstStripUrl = stripUrl % '2001/09/16/new-world-out-of-order'
79    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
80    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
81    help = 'Index format: none'
82
83
84class DeerMe(_ParserScraper):
85    url = 'http://deerme.net/'
86    stripUrl = url + 'comics/%s'
87    firstStripUrl = stripUrl % '1'
88    imageSearch = ('//img[@id="comicimage"]', '//img[@id="latestcomicimage"]')
89    prevSearch = '//a[@rel="prev"]'
90    nextSearch = '//a[@rel="next"]'
91    starter = bounceStarter
92
93    def namer(self, imageUrl, pageUrl):
94        return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
95
96
97class Delve(_WordPressScraper):
98    url = 'https://thisis.delvecomic.com/NewWP/'
99    stripUrl = url + 'comic/%s/'
100    firstStripUrl = stripUrl % 'in-too-deep'
101    adult = True
102    maxLen = len('episode999')
103
104    def namer(self, imageUrl, pageUrl):
105        # Fix inconsistent filenames
106        filename = imageUrl.rsplit('/', 1)[-1].rsplit('?', 1)[0]
107        if (pageUrl == self.stripUrl % 'engagement' or
108                pageUrl == self.stripUrl % 'losing-it'):
109            self.maxLen = self.maxLen - 1
110        if ('episode' in filename and
111                len(filename) - len('.jpg') > self.maxLen and
112                filename[self.maxLen] != '-'):
113            filename = filename[:self.maxLen] + '-' + filename[self.maxLen:]
114        return filename
115
116
117class DemolitionSquad(_ParserScraper):
118    url = 'http://www.demolitionsquad.de/'
119    stripUrl = url + '?comicbeitrag=%s'
120    firstStripUrl = stripUrl % '181'
121    imageSearch = '//img[contains(@src,"uploads/pics/")]'
122    prevSearch = '//img[@name="zuruck"]/..'
123    help = 'Index format: number'
124    lang = 'de'
125
126
127class DerTodUndDasMaedchen(_ParserScraper):
128    url = ('https://web.archive.org/web/20180106180134/'
129        'http://www.cartoontomb.de/deutsch/tod2.php')
130    stripUrl = url + '?bild=%s.jpg'
131    firstStripUrl = stripUrl % '00_01_01'
132    imageSearch = '//img[contains(@src, "images/tod/teil2")]'
133    prevSearch = u'//a[text()="zur\u00FCck"]'
134    help = 'Index format: nn_nn_nn'
135    lang = 'de'
136
137
138class DesertFox(_WPWebcomic):
139    url = 'https://www.desertfoxcomics.net/'
140    stripUrl = url + 'desertfox/comic/%s/'
141    firstStripUrl = stripUrl % 'origins-1'
142
143    def namer(self, imageUrl, pageUrl):
144        # Fix inconsistent filenames
145        filename = imageUrl.rsplit('/', 1)[-1]
146        filename = filename.replace('Pg', 'Page').replace('Desert-Fox', '')
147        if 'origins' in pageUrl:
148            filename = filename.replace('Page-', 'Page-0-')
149        return filename
150
151
152class DieFruehreifen(_BasicScraper):
153    url = 'http://www.die-fruehreifen.de/index.php'
154    stripUrl = url + '?id=%s&order=DESC'
155    firstStripUrl = stripUrl % '1'
156    imageSearch = compile(tagre("img", "src", r'([^"]*/strips/[Ff]rueh_?[Ss]trip_\d+.jpg)'))
157    prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+&order=DESC)") +
158                         tagre("img", "id", r"naechster"))
159    help = 'Index format: n (unpadded)'
160    lang = 'de'
161
162
163class DieselSweeties(_ParserScraper):
164    url = 'http://dieselsweeties.com/'
165    stripUrl = url + 'ics/%s'
166    firstStripUrl = stripUrl % '1'
167    imageSearch = '//img[@class="xomic"]'
168    prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]'
169    latestSearch = prevSearch
170    starter = indirectStarter
171    help = 'Index format: n (unpadded)'
172
173
174class DieselSweetiesOld(_ParserScraper):
175    url = 'http://dieselsweeties.com/archive/'
176    stripUrl = url + '%s'
177    firstStripUrl = stripUrl % '1'
178    imageSearch = '//img[contains(@src, "/hstrips/")]'
179    prevSearch = '//a[contains(@title, "previous")]'
180    help = 'Index format: n (unpadded)'
181    endOfLife = True
182
183    def starter(self):
184        return self.stripUrl % '4000'
185
186    def namer(self, image_url, page_url):
187        index = int(image_url.split('/')[-1].split('.')[0])
188        return 'sw%02d' % index
189
190
191class Dilbert(_ParserScraper):
192    url = 'https://dilbert.com/'
193    stripUrl = url + 'strip/%s'
194    firstStripUrl = stripUrl % '1989-04-16'
195    starter = indirectStarter
196    prevSearch = '//div[%s]/a' % xpath_class('nav-left')
197    imageSearch = '//img[%s]' % xpath_class('img-comic')
198    latestSearch = '//a[@class="img-comic-link"]'
199    help = 'Index format: yyyy-mm-dd'
200
201    def namer(self, image_url, page_url):
202        name = page_url.rsplit("/", 1)[1]
203        return "%s" % name
204
205
206class DocRat(_WPWebcomic):
207    url = 'https://www.docrat.com.au/'
208    stripUrl = url + 'comic/%s/'
209    firstStripUrl = stripUrl % 'begin-with-eye-contact'
210
211    def namer(self, imageUrl, pageUrl):
212        # Fix inconsistent filenames
213        filename = imageUrl.rsplit('/', 1)[-1].rsplit('?', 1)[0]
214        filename = filename.replace('2006-08-01', 'DR0027')
215        filename = filename.replace('2006-07-31', 'DR0026')
216        return filename
217
218
219class DoemainOfOurOwn(_ParserScraper):
220    url = 'http://www.doemain.com/'
221    stripUrl = url + 'html/%s.html'
222    firstStripUrl = stripUrl % '1999/1999-04-24'
223    imageSearch = '//img[contains(@src, "strips/")]'
224    prevSearch = '//a[img[@alt="Previous Strip"]]'
225    endOfLife = True
226    help = 'Index format: yyyy-mm-dd'
227
228    def namer(self, imageUrl, pageUrl):
229        # Fix date formatting
230        filename = imageUrl.rsplit('/', 1)[-1]
231        if len(filename) > 6 and filename[0:6].isdigit():
232            month = filename[0:2]
233            day = filename[2:4]
234            year = ('19' if filename[4] == '9' else '20') + filename[4:6]
235            filename = '%s-%s-%s%s' % (year, month, day, filename[6:])
236        return filename
237
238
239class DoghouseDiaries(_ParserScraper):
240    url = 'http://thedoghousediaries.com/'
241    stripUrl = url + '%s'
242    firstStripUrl = stripUrl % '34'
243    imageSearch = '//img[@class="imgcomic"]'
244    textSearch = imageSearch + '/@title'
245    prevSearch = '//a[@id="previouslink"]'
246    nextSearch = '//a[@id="nextlink"]'
247    starter = bounceStarter
248    help = 'Index format: number'
249
250    def namer(self, imageUrl, pageUrl):
251        return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
252
253
254class DominicDeegan(_ParserScraper):
255    url = 'https://www.dominic-deegan.com/'
256    stripUrl = url + 'comic/%s/'
257    firstStripUrl = stripUrl % '0001-20020521'
258    imageSearch = '//img[contains(@class, "wp-post-image")]'
259    prevSearch = '//a[@title="Prev"]'
260    help = 'Index format: ####-yyyymmdd'
261
262
263class DorkTower(_ParserScraper):
264    url = 'http://www.dorktower.com/'
265    firstStripUrl = url + '1997/01/01/shadis-magazine-strip-1/'
266    imageSearch = '//div[%s]//a/img' % xpath_class('entry-content')
267    prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')
268
269
270class DoomsdayMyDear(_ParserScraper):
271    url = 'http://doomsdaymydear.com/'
272    imageSearch = '//img[{}]'.format(xpath_class('attachment-full'))
273    prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
274
275
276class Dracula(_BasicScraper):
277    url = 'http://draculacomic.net/'
278    stripUrl = url + 'comic.php?comicID=%s'
279    firstStripUrl = stripUrl % '0'
280    imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
281    prevSearch = compile(r'&nbsp;<a class="archivelink" href="(.+?)">&laquo; Prev</a>')
282    help = 'Index format: nnn'
283
284
285class DreamKeepers(_ParserScraper):
286    url = 'http://www.dreamkeeperscomic.com/GNSaga.php'
287    stripUrl = url + '?pg=%s'
288    firstStripUrl = stripUrl % '1'
289    imageSearch = '//img[contains(@src, "GNSagapages")]'
290    prevSearch = '//a[@id="prev"]'
291    help = 'Index format: n'
292
293
294class DreamKeepersPrelude(_ParserScraper):
295    url = 'http://www.dreamkeeperscomic.com/Prelude.php'
296    stripUrl = url + '?pg=%s'
297    firstStripUrl = stripUrl % '0001'
298    imageSearch = '//div[@class="Preludecomic"]/table//a/img'
299    prevSearch = '//a[@id="prev"]'
300    help = 'Index format: n'
301
302
303class DresdenCodak(_ParserScraper):
304    url = 'http://dresdencodak.com/'
305    startUrl = url + 'cat/comic/'
306    firstStripUrl = url + '2007/02/08/pom/'
307    imageSearch = '//section[%s]//img[%s]' % (
308        xpath_class('entry-content'), xpath_class('aligncenter'))
309    prevSearch = '//a[img[contains(@src, "prev")]]'
310    latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link')
311    starter = indirectStarter
312
313    # Blog and comic are mixed...
314    def shouldSkipUrl(self, url, data):
315        return not data.xpath(self.imageSearch)
316
317
318class DrFun(_ParserScraper):
319    baseUrl = ('https://web.archive.org/web/20180726145737/'
320        'http://www.ibiblio.org/Dave/')
321    stripUrl = baseUrl + 'ar%s.htm'
322    url = stripUrl % '00502'
323    firstStripUrl = stripUrl % '00001'
324    imageSearch = '//a[contains(@href, "Dr-Fun/df")]'
325    multipleImagesPerStrip = True
326    prevSearch = '//a[contains(text(), "Previous Week")]'
327    endOfLife = True
328    help = 'Index format: nnnnn'
329
330
331class Drive(_BasicScraper):
332    url = 'http://www.drivecomic.com/'
333    rurl = escape(url)
334    stripUrl = url + 'archive/%s.html'
335    firstStripUrl = stripUrl % '090815'
336    imageSearch = compile(tagre("img", "src", r'(http://cdn\.drivecomic\.com/strips/main/[^"]+)'))
337    prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl) + "Previous")
338    help = 'Index format: yymmdd'
339
340
341class DrMcNinja(_ParserScraper):
342    url = 'http://drmcninja.com/'
343    stripUrl = url + 'archives/comic/%s/'
344    firstStripUrl = stripUrl % '0p1'
345    css = True
346    imageSearch = 'div#comic img'
347    prevSearch = 'a.prev'
348    help = 'Index format: {episode}p{page}'
349
350
351class Drowtales(_BasicScraper):
352    baseUrl = 'http://www.drowtales.com/'
353    rurl = escape(baseUrl)
354    url = baseUrl + 'mainarchive.php'
355    stripUrl = url + '?sid=%s'
356    firstStripUrl = stripUrl % '4192'
357    imageSearch = (
358        compile(tagre("img", "src", r'((%s)?mainarchive/[^"]+)' % rurl)),
359        compile(r'background-image:url\((mainarchive/[^\)]+center\.jpg)'),
360    )
361    prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
362    help = 'Index format: number'
363
364
365class DumbingOfAge(_BasicScraper):
366    url = 'http://www.dumbingofage.com/'
367    rurl = escape(url)
368    stripUrl = url + '%s/'
369    prevSearch = compile(tagre("a", "href", r'(%s\d+/[^"]+)' % rurl, after="prev"))
370    imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
371    help = 'Index format: yyyy/comic/book-num/seriesname/stripname'
372