1# -*- coding: utf-8 -*- 2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs 3# Copyright (C) 2012-2014 Bastian Kleineidam 4# Copyright (C) 2015-2020 Tobias Gruetzmacher 5# Copyright (C) 2019-2020 Daniel Ring 6 7from __future__ import absolute_import, division, print_function 8 9from re import compile, escape 10 11from ..scraper import _BasicScraper, _ParserScraper 12from ..helpers import indirectStarter, bounceStarter, xpath_class 13from ..util import tagre 14from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn, _WPWebcomic 15 16 17class Damonk(_BasicScraper): 18 url = 'http://www.damonk.com/' 19 stripUrl = url + 'd/%s.html' 20 firstStripUrl = stripUrl % '20060522' 21 imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)')) 22 prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + 23 tagre("img", "src", r'/images/previous_day\.gif')) 24 help = 'Index format: yyyymmdd' 25 26 27class DangerouslyChloe(_ComicControlScraper): 28 url = 'http://www.dangerouslychloe.com/' 29 firstStripUrl = url + 'strips-dc/Chapter_1_-_That_damned_girl' 30 31 32class DarkWhite(_WordPressScraper): 33 url = 'https://www.darkwhitecomic.com/' 34 stripUrl = url + 'comic/%s/' 35 firstStripUrl = stripUrl % 'chapter-1-sleep' 36 37 38class DarthsAndDroids(_BasicScraper): 39 url = 'http://www.darthsanddroids.net/' 40 stripUrl = url + 'episodes/%s.html' 41 firstStripUrl = stripUrl % '0001' 42 prevSearch = compile(tagre("a", "href", r'(/episodes/\d\d\d\d.html)') + 43 '<PREVIOUS') 44 imageSearch = compile(tagre("img", "src", r'(/comics/darths\d\d\d\d\.jpg)')) 45 46 47class DasLebenIstKeinPonyhof(_WPNaviIn): 48 url = 'http://sarahburrini.com/wordpress/' 49 firstStripUrl = url + 'comic/mein-erster-webcomic/' 50 lang = 'de' 51 52 53class DeadWinter(_BasicScraper): 54 url = 'http://deadwinter.cc/' 55 stripUrl = url + 'page/%s' 56 firstStripUrl = stripUrl % '1' 57 imageSearch = compile(tagre("img", "src", r"(/static/page/strip/\d+[^']+)", quote="'")) 58 prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "Previous") 59 help = 'Index format: number' 60 61 62class Deathbulge(_BasicScraper): 63 url = 'http://www.deathbulge.com/api/comics' 64 imageSearch = compile(r"(/images/comics/[^\.]+\.jpg)") 65 prevSearch = compile(r'"previous":(\d+),') 66 firstStripUrl = url + '/1' 67 68 def getPrevUrl(self, url, data): 69 if data[1] == self.url: 70 data = (data[0], data[1] + '/') 71 return _BasicScraper.getPrevUrl(self, url, data) 72 73 74class DeepFried(_BasicScraper): 75 url = 'http://www.whatisdeepfried.com/' 76 rurl = escape(url) 77 stripUrl = url + '%s/' 78 firstStripUrl = stripUrl % '2001/09/16/new-world-out-of-order' 79 imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) 80 prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) 81 help = 'Index format: none' 82 83 84class DeerMe(_ParserScraper): 85 url = 'http://deerme.net/' 86 stripUrl = url + 'comics/%s' 87 firstStripUrl = stripUrl % '1' 88 imageSearch = ('//img[@id="comicimage"]', '//img[@id="latestcomicimage"]') 89 prevSearch = '//a[@rel="prev"]' 90 nextSearch = '//a[@rel="next"]' 91 starter = bounceStarter 92 93 def namer(self, imageUrl, pageUrl): 94 return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1] 95 96 97class Delve(_WordPressScraper): 98 url = 'https://thisis.delvecomic.com/NewWP/' 99 stripUrl = url + 'comic/%s/' 100 firstStripUrl = stripUrl % 'in-too-deep' 101 adult = True 102 maxLen = len('episode999') 103 104 def namer(self, imageUrl, pageUrl): 105 # Fix inconsistent filenames 106 filename = imageUrl.rsplit('/', 1)[-1].rsplit('?', 1)[0] 107 if (pageUrl == self.stripUrl % 'engagement' or 108 pageUrl == self.stripUrl % 'losing-it'): 109 self.maxLen = self.maxLen - 1 110 if ('episode' in filename and 111 len(filename) - len('.jpg') > self.maxLen and 112 filename[self.maxLen] != '-'): 113 filename = filename[:self.maxLen] + '-' + filename[self.maxLen:] 114 return filename 115 116 117class DemolitionSquad(_ParserScraper): 118 url = 'http://www.demolitionsquad.de/' 119 stripUrl = url + '?comicbeitrag=%s' 120 firstStripUrl = stripUrl % '181' 121 imageSearch = '//img[contains(@src,"uploads/pics/")]' 122 prevSearch = '//img[@name="zuruck"]/..' 123 help = 'Index format: number' 124 lang = 'de' 125 126 127class DerTodUndDasMaedchen(_ParserScraper): 128 url = ('https://web.archive.org/web/20180106180134/' 129 'http://www.cartoontomb.de/deutsch/tod2.php') 130 stripUrl = url + '?bild=%s.jpg' 131 firstStripUrl = stripUrl % '00_01_01' 132 imageSearch = '//img[contains(@src, "images/tod/teil2")]' 133 prevSearch = u'//a[text()="zur\u00FCck"]' 134 help = 'Index format: nn_nn_nn' 135 lang = 'de' 136 137 138class DesertFox(_WPWebcomic): 139 url = 'https://www.desertfoxcomics.net/' 140 stripUrl = url + 'desertfox/comic/%s/' 141 firstStripUrl = stripUrl % 'origins-1' 142 143 def namer(self, imageUrl, pageUrl): 144 # Fix inconsistent filenames 145 filename = imageUrl.rsplit('/', 1)[-1] 146 filename = filename.replace('Pg', 'Page').replace('Desert-Fox', '') 147 if 'origins' in pageUrl: 148 filename = filename.replace('Page-', 'Page-0-') 149 return filename 150 151 152class DieFruehreifen(_BasicScraper): 153 url = 'http://www.die-fruehreifen.de/index.php' 154 stripUrl = url + '?id=%s&order=DESC' 155 firstStripUrl = stripUrl % '1' 156 imageSearch = compile(tagre("img", "src", r'([^"]*/strips/[Ff]rueh_?[Ss]trip_\d+.jpg)')) 157 prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+&order=DESC)") + 158 tagre("img", "id", r"naechster")) 159 help = 'Index format: n (unpadded)' 160 lang = 'de' 161 162 163class DieselSweeties(_ParserScraper): 164 url = 'http://dieselsweeties.com/' 165 stripUrl = url + 'ics/%s' 166 firstStripUrl = stripUrl % '1' 167 imageSearch = '//img[@class="xomic"]' 168 prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]' 169 latestSearch = prevSearch 170 starter = indirectStarter 171 help = 'Index format: n (unpadded)' 172 173 174class DieselSweetiesOld(_ParserScraper): 175 url = 'http://dieselsweeties.com/archive/' 176 stripUrl = url + '%s' 177 firstStripUrl = stripUrl % '1' 178 imageSearch = '//img[contains(@src, "/hstrips/")]' 179 prevSearch = '//a[contains(@title, "previous")]' 180 help = 'Index format: n (unpadded)' 181 endOfLife = True 182 183 def starter(self): 184 return self.stripUrl % '4000' 185 186 def namer(self, image_url, page_url): 187 index = int(image_url.split('/')[-1].split('.')[0]) 188 return 'sw%02d' % index 189 190 191class Dilbert(_ParserScraper): 192 url = 'https://dilbert.com/' 193 stripUrl = url + 'strip/%s' 194 firstStripUrl = stripUrl % '1989-04-16' 195 starter = indirectStarter 196 prevSearch = '//div[%s]/a' % xpath_class('nav-left') 197 imageSearch = '//img[%s]' % xpath_class('img-comic') 198 latestSearch = '//a[@class="img-comic-link"]' 199 help = 'Index format: yyyy-mm-dd' 200 201 def namer(self, image_url, page_url): 202 name = page_url.rsplit("/", 1)[1] 203 return "%s" % name 204 205 206class DocRat(_WPWebcomic): 207 url = 'https://www.docrat.com.au/' 208 stripUrl = url + 'comic/%s/' 209 firstStripUrl = stripUrl % 'begin-with-eye-contact' 210 211 def namer(self, imageUrl, pageUrl): 212 # Fix inconsistent filenames 213 filename = imageUrl.rsplit('/', 1)[-1].rsplit('?', 1)[0] 214 filename = filename.replace('2006-08-01', 'DR0027') 215 filename = filename.replace('2006-07-31', 'DR0026') 216 return filename 217 218 219class DoemainOfOurOwn(_ParserScraper): 220 url = 'http://www.doemain.com/' 221 stripUrl = url + 'html/%s.html' 222 firstStripUrl = stripUrl % '1999/1999-04-24' 223 imageSearch = '//img[contains(@src, "strips/")]' 224 prevSearch = '//a[img[@alt="Previous Strip"]]' 225 endOfLife = True 226 help = 'Index format: yyyy-mm-dd' 227 228 def namer(self, imageUrl, pageUrl): 229 # Fix date formatting 230 filename = imageUrl.rsplit('/', 1)[-1] 231 if len(filename) > 6 and filename[0:6].isdigit(): 232 month = filename[0:2] 233 day = filename[2:4] 234 year = ('19' if filename[4] == '9' else '20') + filename[4:6] 235 filename = '%s-%s-%s%s' % (year, month, day, filename[6:]) 236 return filename 237 238 239class DoghouseDiaries(_ParserScraper): 240 url = 'http://thedoghousediaries.com/' 241 stripUrl = url + '%s' 242 firstStripUrl = stripUrl % '34' 243 imageSearch = '//img[@class="imgcomic"]' 244 textSearch = imageSearch + '/@title' 245 prevSearch = '//a[@id="previouslink"]' 246 nextSearch = '//a[@id="nextlink"]' 247 starter = bounceStarter 248 help = 'Index format: number' 249 250 def namer(self, imageUrl, pageUrl): 251 return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1] 252 253 254class DominicDeegan(_ParserScraper): 255 url = 'https://www.dominic-deegan.com/' 256 stripUrl = url + 'comic/%s/' 257 firstStripUrl = stripUrl % '0001-20020521' 258 imageSearch = '//img[contains(@class, "wp-post-image")]' 259 prevSearch = '//a[@title="Prev"]' 260 help = 'Index format: ####-yyyymmdd' 261 262 263class DorkTower(_ParserScraper): 264 url = 'http://www.dorktower.com/' 265 firstStripUrl = url + '1997/01/01/shadis-magazine-strip-1/' 266 imageSearch = '//div[%s]//a/img' % xpath_class('entry-content') 267 prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn') 268 269 270class DoomsdayMyDear(_ParserScraper): 271 url = 'http://doomsdaymydear.com/' 272 imageSearch = '//img[{}]'.format(xpath_class('attachment-full')) 273 prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link')) 274 275 276class Dracula(_BasicScraper): 277 url = 'http://draculacomic.net/' 278 stripUrl = url + 'comic.php?comicID=%s' 279 firstStripUrl = stripUrl % '0' 280 imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)')) 281 prevSearch = compile(r' <a class="archivelink" href="(.+?)">« Prev</a>') 282 help = 'Index format: nnn' 283 284 285class DreamKeepers(_ParserScraper): 286 url = 'http://www.dreamkeeperscomic.com/GNSaga.php' 287 stripUrl = url + '?pg=%s' 288 firstStripUrl = stripUrl % '1' 289 imageSearch = '//img[contains(@src, "GNSagapages")]' 290 prevSearch = '//a[@id="prev"]' 291 help = 'Index format: n' 292 293 294class DreamKeepersPrelude(_ParserScraper): 295 url = 'http://www.dreamkeeperscomic.com/Prelude.php' 296 stripUrl = url + '?pg=%s' 297 firstStripUrl = stripUrl % '0001' 298 imageSearch = '//div[@class="Preludecomic"]/table//a/img' 299 prevSearch = '//a[@id="prev"]' 300 help = 'Index format: n' 301 302 303class DresdenCodak(_ParserScraper): 304 url = 'http://dresdencodak.com/' 305 startUrl = url + 'cat/comic/' 306 firstStripUrl = url + '2007/02/08/pom/' 307 imageSearch = '//section[%s]//img[%s]' % ( 308 xpath_class('entry-content'), xpath_class('aligncenter')) 309 prevSearch = '//a[img[contains(@src, "prev")]]' 310 latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link') 311 starter = indirectStarter 312 313 # Blog and comic are mixed... 314 def shouldSkipUrl(self, url, data): 315 return not data.xpath(self.imageSearch) 316 317 318class DrFun(_ParserScraper): 319 baseUrl = ('https://web.archive.org/web/20180726145737/' 320 'http://www.ibiblio.org/Dave/') 321 stripUrl = baseUrl + 'ar%s.htm' 322 url = stripUrl % '00502' 323 firstStripUrl = stripUrl % '00001' 324 imageSearch = '//a[contains(@href, "Dr-Fun/df")]' 325 multipleImagesPerStrip = True 326 prevSearch = '//a[contains(text(), "Previous Week")]' 327 endOfLife = True 328 help = 'Index format: nnnnn' 329 330 331class Drive(_BasicScraper): 332 url = 'http://www.drivecomic.com/' 333 rurl = escape(url) 334 stripUrl = url + 'archive/%s.html' 335 firstStripUrl = stripUrl % '090815' 336 imageSearch = compile(tagre("img", "src", r'(http://cdn\.drivecomic\.com/strips/main/[^"]+)')) 337 prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl) + "Previous") 338 help = 'Index format: yymmdd' 339 340 341class DrMcNinja(_ParserScraper): 342 url = 'http://drmcninja.com/' 343 stripUrl = url + 'archives/comic/%s/' 344 firstStripUrl = stripUrl % '0p1' 345 css = True 346 imageSearch = 'div#comic img' 347 prevSearch = 'a.prev' 348 help = 'Index format: {episode}p{page}' 349 350 351class Drowtales(_BasicScraper): 352 baseUrl = 'http://www.drowtales.com/' 353 rurl = escape(baseUrl) 354 url = baseUrl + 'mainarchive.php' 355 stripUrl = url + '?sid=%s' 356 firstStripUrl = stripUrl % '4192' 357 imageSearch = ( 358 compile(tagre("img", "src", r'((%s)?mainarchive/[^"]+)' % rurl)), 359 compile(r'background-image:url\((mainarchive/[^\)]+center\.jpg)'), 360 ) 361 prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top")) 362 help = 'Index format: number' 363 364 365class DumbingOfAge(_BasicScraper): 366 url = 'http://www.dumbingofage.com/' 367 rurl = escape(url) 368 stripUrl = url + '%s/' 369 prevSearch = compile(tagre("a", "href", r'(%s\d+/[^"]+)' % rurl, after="prev")) 370 imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) 371 help = 'Index format: yyyy/comic/book-num/seriesname/stripname' 372