1# -*- coding: utf-8 -*- 2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs 3# Copyright (C) 2012-2014 Bastian Kleineidam 4# Copyright (C) 2015-2020 Tobias Gruetzmacher 5# Copyright (C) 2019-2020 Daniel Ring 6 7from __future__ import absolute_import, division, print_function 8 9from re import compile, escape, MULTILINE 10 11from ..util import tagre 12from ..scraper import _BasicScraper, _ParserScraper 13from ..helpers import regexNamer, bounceStarter, indirectStarter 14from .common import _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic 15 16 17class AbbysAgency(_WordPressScraper): 18 url = 'https://abbysagency.us/' 19 stripUrl = url + 'blog/comic/%s/' 20 firstStripUrl = stripUrl % 'a' 21 22 23class AbstruseGoose(_ParserScraper): 24 url = 'https://abstrusegoose.com/' 25 starter = bounceStarter 26 stripUrl = url + '%s' 27 firstStripUrl = stripUrl % '1' 28 imageSearch = '//img[contains(@src, "/strips/")]' 29 textSearch = imageSearch + '/@title' 30 textOptional = True 31 prevSearch = '//a[contains(text(), "Previous")]' 32 nextSearch = '//a[contains(text(), "Next")]' 33 help = 'Index format: n (unpadded)' 34 35 def namer(self, imageurl, pageurl): 36 index = int(pageurl.rsplit('/', 1)[1]) 37 name = imageurl.rsplit('/', 1)[1] 38 return 'c%03d-%s' % (index, name) 39 40 41class AbsurdNotions(_BasicScraper): 42 baseUrl = 'http://www.absurdnotions.org/' 43 url = baseUrl + 'page129.html' 44 stripUrl = baseUrl + 'page%s.html' 45 firstStripUrl = stripUrl % '1' 46 imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) 47 multipleImagesPerStrip = True 48 prevSearch = compile(tagre('a', 'href', r'([^"]+)') + 49 tagre('img', 'src', r'nprev\.gif')) 50 help = 'Index format: n (unpadded)' 51 52 53class AcademyVale(_BasicScraper): 54 url = 'http://www.imagerie.com/vale/' 55 stripUrl = url + 'avarch.cgi?%s' 56 firstStripUrl = stripUrl % '001' 57 imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) 58 prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + 59 tagre('img', 'src', r'AVNavBack\.gif')) 60 help = 'Index format: nnn' 61 62 63class Achewood(_BasicScraper): 64 url = 'http://www.achewood.com/' 65 stripUrl = url + 'index.php?date=%s' 66 firstStripUrl = stripUrl % '00000000' 67 imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) 68 prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', 69 after="Previous")) 70 help = 'Index format: mmddyyyy' 71 namer = regexNamer(compile(r'date=(\d+)')) 72 73 74class AdventuresOfFifne(_ParserScraper): 75 stripUrl = 'http://fifine.purrsia.com/%s.html' 76 url = stripUrl % 'COMICS' 77 firstStripUrl = stripUrl % 'Fifine01' 78 imageSearch = '//img[contains(@src, "jpg")]' 79 prevSearch = '//a[text()="PREVIOUS"]' 80 multipleImagesPerStrip = True 81 endOfLife = True 82 83 def namer(self, imageUrl, pageUrl): 84 # Prepend chapter number to image filename 85 filename = imageUrl.rsplit('/', 1)[-1] 86 if filename[0] == 'p': 87 filename = filename.replace('p', '1_p') 88 filename = filename.replace('TIL', '2_TIL') 89 filename = filename.replace('NS', '3_NS') 90 filename = filename.replace('LG', '4_LG') 91 filename = filename.replace('WM', '5_WM') 92 return filename 93 94 def getPrevUrl(self, url, data): 95 # Fix broken navigation links 96 if url == self.stripUrl % 'lg06': 97 return self.stripUrl % 'lg05' 98 return super(AdventuresOfFifne, self).getPrevUrl(url, data) 99 100 101class AfterStrife(_WPNavi): 102 baseUrl = 'http://afterstrife.com/' 103 stripUrl = baseUrl + '?p=%s' 104 url = stripUrl % '262' 105 firstStripUrl = stripUrl % '1' 106 help = 'Index format: nnn' 107 endOfLife = True 108 109 110class AGirlAndHerFed(_ParserScraper): 111 url = 'https://agirlandherfed.com/' 112 stripUrl = url + '1.%s.html' 113 firstStripUrl = stripUrl % '1' 114 imageSearch = '//div[@id="comic-image"]/img' 115 prevSearch = '//div[@id="comic-nav"]/a[.//img[contains(@src, "back")]]' 116 help = 'Index format: nnn' 117 118 119class AHClub(_WPNaviIn): 120 baseUrl = 'http://rickgriffinstudios.com/' 121 url = baseUrl + 'ah-club/' 122 stripUrl = baseUrl + 'comic-post/%s/' 123 firstStripUrl = stripUrl % 'cover' 124 latestSearch = '//a[contains(@title, "Permanent Link")]' 125 starter = indirectStarter 126 nav = { 127 'ah-club-2-cover': 'ah-club-1-page-24', 128 'ah-club-3-cover': 'ah-club-2-page-28', 129 'ah-club-4-cover': 'ah-club-3-page-22' 130 } 131 132 def getPrevUrl(self, url, data): 133 # Links between chapters 134 url = url.rstrip('/').rsplit('/', 1)[-1] 135 if self.nav and url in self.nav: 136 return self.stripUrl % self.nav[url] 137 return super(AHClub, self).getPrevUrl(url, data) 138 139 140class AhoiPolloi(_ParserScraper): 141 url = 'https://ahoipolloi.blogger.de/' 142 stripUrl = url + '?day=%s' 143 firstStripUrl = stripUrl % '20060306' 144 multipleImagesPerStrip = True 145 lang = 'de' 146 imageSearch = '//img[contains(@src, "/static/antville/ahoipolloi/")]' 147 prevSearch = '//a[contains(@href, "/?day=")]' 148 help = 'Index format: yyyymmdd' 149 150 151class AhoyEarth(_WPNavi): 152 url = 'http://www.ahoyearth.com/' 153 154 155class AirForceBlues(_WordPressScraper): 156 url = 'http://farvatoons.com/' 157 firstStripUrl = url + 'comic/in-texas-there-are-texans/' 158 159 160class ALessonIsLearned(_BasicScraper): 161 url = 'http://www.alessonislearned.com/' 162 prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", 163 quote="'") + r"[^>]+previous") 164 stripUrl = url + 'index.php?comic=%s' 165 firstStripUrl = stripUrl % '1' 166 imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)")) 167 help = 'Index format: nnn' 168 169 170class Alice(_WordPressScraper): 171 url = 'http://www.alicecomics.com/' 172 latestSearch = '//a[text()="Latest Alice!"]' 173 starter = indirectStarter 174 175 176class AlienDice(_WordPressScraper): 177 url = 'https://aliendice.com/' 178 stripUrl = url + 'comic/%s/' 179 firstStripUrl = stripUrl % '05162001' 180 181 def getPrevUrl(self, url, data): 182 # Fix broken navigation 183 if url == self.stripUrl % 'day-29-part-2-page-3-4': 184 return self.stripUrl % 'day-29-part-2-page-3-2' 185 return super(AlienDice, self).getPrevUrl(url, data) 186 187 def namer(self, imageUrl, pageUrl): 188 # Fix inconsistent filename 189 return imageUrl.rsplit('/', 1)[-1].replace('20010831', '2001-08-31') 190 191 192class AlienDiceLegacy(_WordPressScraper): 193 name = 'AlienDice/Legacy' 194 stripUrl = 'https://aliendice.com/comic/%s/' 195 url = stripUrl % 'legacy-2-15' 196 firstStripUrl = stripUrl % 'legacy-1' 197 198 199class AlienLovesPredator(_BasicScraper): 200 url = 'http://alienlovespredator.com/' 201 stripUrl = url + '%s/' 202 firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay' 203 imageSearch = compile(tagre("img", "src", r'([^"]+)', 204 after='border="1" alt="" width="750"')) 205 prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) 206 help = 'Index format: yyyy/mm/dd/name' 207 208 209class AlienShores(_WordPressScraper): 210 url = 'http://alienshores.com/alienshores_band/' 211 firstStripUrl = url + 'AScomic/updated-cover/' 212 213 214class AllTheGrowingThings(_WordPressScraper): 215 url = ('https://web.archive.org/web/20160611212229/' 216 'http://growingthings.typodmary.com/') 217 stripUrl = url + '%s/' 218 firstStripUrl = stripUrl % 'all-the-growing-things' 219 endOfLife = True 220 221 222class AlphaLuna(_ParserScraper): 223 url = 'https://alphaluna.net/' 224 stripUrl = url + 'comic/%s/' 225 firstStripUrl = stripUrl % 'issue-1-cover' 226 imageSearch = '//main[@id="comic"]//img' 227 prevSearch = '//a[@rel="prev"]' 228 229 230class AlphaLunaSpanish(_ParserScraper): 231 name = 'AlphaLuna/Spanish' 232 lang = 'es' 233 url = 'https://alphaluna.net/spanish/' 234 stripUrl = url + 'comic/%s/' 235 firstStripUrl = stripUrl % 'issue-1-cover' 236 imageSearch = '//main[@id="comic"]//img' 237 prevSearch = '//a[@rel="prev"]' 238 239 240class Altermeta(_ParserScraper): 241 url = 'http://altermeta.net/' 242 stripUrl = url + 'archive.php?comic=%s' 243 firstStripUrl = stripUrl % '0' 244 imageSearch = '//img[contains(@src, "comics/")]' 245 prevSearch = '//a[./img[contains(@src, "back")]]' 246 nextSearch = '//a[./img[contains(@src, "forward")]]' 247 starter = bounceStarter 248 help = 'Index format: n (unpadded)' 249 250 def namer(self, imageUrl, pageUrl): 251 return pageUrl.rsplit('=', 1)[-1] + '_' + imageUrl.rsplit('/', 1)[-1] 252 253 254class AltermetaOld(_ParserScraper): 255 url = Altermeta.url + 'oldarchive/index.php' 256 stripUrl = Altermeta.url + 'oldarchive/archive.php?comic=%s' 257 firstStripUrl = stripUrl % '0' 258 imageSearch = '//img[contains(@src, "comics/")]' 259 prevSearch = '//a[text()="Back"]' 260 help = 'Index format: n (unpadded)' 261 262 263class AmazingSuperPowers(_BasicScraper): 264 url = 'http://www.amazingsuperpowers.com/' 265 rurl = escape(url) 266 stripUrl = url + '%s/' 267 firstStripUrl = stripUrl % '2007/09/heredity' 268 imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) 269 prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) 270 help = 'Index format: yyyy/mm/name' 271 272 def shouldSkipUrl(self, url, data): 273 """Skip pages without images.""" 274 return url in ( 275 # video 276 self.stripUrl % '2013/05/orbital-deathray-kickstarter', 277 ) 278 279 280class AmbersNoBrainers(_ParserScraper): 281 baseUrl = 'https://foxyverse.com/' 282 url = baseUrl + 'comics/' 283 stripUrl = baseUrl + 'ambers-no-brainers-%s/' 284 firstStripUrl = stripUrl % '1' 285 imageSearch = '//img[contains(@src, "Page")]' 286 latestSearch = '//a[contains(@href, "ambers-no-brainers")]' 287 starter = indirectStarter 288 289 def getPrevUrl(self, url, data): 290 # Replace missing navigation links 291 pageNum = int(url.rstrip('/').rsplit('-', 1)[-1]) 292 return self.stripUrl % str(pageNum - 1) 293 294 295class Amya(_WordPressScraper): 296 url = 'http://www.amyachronicles.com/' 297 298 299class Anaria(_ParserScraper): 300 url = 'https://www.leahbriere.com/anaria-the-witchs-dream/' 301 firstStripUrl = url 302 imageSearch = '//div[contains(@class, "gallery")]//a' 303 multipleImagesPerStrip = True 304 endOfLife = True 305 306 def namer(self, imageUrl, pageUrl): 307 filename = imageUrl.rsplit('/', 1)[-1] 308 return filename.replace('00.jpg', 'new00.jpg').replace('new', '1') 309 310 311class Angband(_BasicScraper): 312 url = 'http://angband.calamarain.net/' 313 stripUrl = url + 'view.php?date=%s' 314 firstStripUrl = stripUrl % '2005-12-30' 315 imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) 316 prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)') + 317 "Previous") 318 help = 'Index format: yyyy-mm-dd' 319 320 321class Angels2200(_BasicScraper): 322 url = 'http://www.janahoffmann.com/angels/' 323 stripUrl = url + '%s' 324 imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'")) 325 prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous") 326 help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>' 327 328 329class Annyseed(_ParserScraper): 330 baseUrl = ('https://web.archive.org/web/20190511031451/' 331 'http://www.mirrorwoodcomics.com/') 332 stripUrl = baseUrl + 'Annyseed%s.htm' 333 url = stripUrl % 'Latest' 334 firstStripUrl = stripUrl % '000' 335 imageSearch = '//div/img[contains(@src, "Annyseed")]' 336 prevSearch = '//a[img[@name="Previousbtn"]]' 337 endOfLife = True 338 help = 'Index format: nnn' 339 FIX_RE = compile(r'Annyseed/Finished%20For%20Print/') 340 341 def imageUrlModifier(self, image_url, data): 342 return self.FIX_RE.sub('', image_url) 343 344 def link_modifier(self, fromurl, tourl): 345 """Fix circular link.""" 346 if 'Annyseed150' in fromurl and 'Annyseed150' in tourl: 347 return self.stripUrl % '149' 348 return tourl 349 350 351class AntiheroForHire(_ParserScraper): 352 stripUrl = 'https://www.giantrobot.club/antihero-for-hire/%s' 353 firstStripUrl = stripUrl % '2016/6/8/entrance-vigil' 354 url = firstStripUrl 355 imageSearch = '//div[@class="image-wrapper"]//img[not(@class="thumb-image")]' 356 multipleImagesPerStrip = True 357 endOfLife = True 358 archive = [] 359 360 def starter(self): 361 # Build list of chapters for navigation 362 page = self.getPage(self.url) 363 archiveLinks = page.xpath('//ul[@class="archive-group-list"]//a[contains(@class, "archive-item-link")]') 364 for link in archiveLinks: 365 self.archive.append(link.get('href')) 366 return self.archive[0] 367 368 def getPrevUrl(self, url, data): 369 # Retrieve previous chapter from list 370 index = self.archive.index(url) + 1 371 return self.archive[index] if index < len(self.archive) else None 372 373 374class AppleGeeks(_BasicScraper): 375 url = 'http://www.applegeeks.com/' 376 stripUrl = url + 'comics/viewcomic.php?issue=%s' 377 firstStripUrl = stripUrl % '1' 378 imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)')) 379 prevSearch = compile(r'<div class="caption">Previous Comic</div>\s*<p><a href="([^"]+)">', MULTILINE) 380 allow_errors = (404,) 381 help = 'Index format: n (unpadded)' 382 383 384class ARedTailsDream(_BasicScraper): 385 baseUrl = 'http://www.minnasundberg.fi/' 386 stripUrl = baseUrl + 'comic/page%s.php' 387 firstStripUrl = stripUrl % '00' 388 url = baseUrl + 'comic/recent.php' 389 imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)')) 390 prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') + 391 tagre("img", "src", r'.*?aprev.*?')) 392 help = 'Index format: nn' 393 394 395class ArtificialIncident(_WPWebcomic): 396 url = 'https://www.artificialincident.com/' 397 stripUrl = url + 'comic/%s/' 398 firstStripUrl = stripUrl % 'issue-one-life-changing' 399 400 401class Ashes(_WordPressScraper): 402 url = 'http://www.flowerlarkstudios.com/comicpage/prologue/10232009/' 403 firstStripUrl = url 404 starter = indirectStarter 405 406 407class AstronomyPOTD(_ParserScraper): 408 baseUrl = 'http://apod.nasa.gov/apod/' 409 url = baseUrl + 'astropix.html' 410 starter = bounceStarter 411 stripUrl = baseUrl + 'ap%s.html' 412 firstStripUrl = stripUrl % '061012' 413 imageSearch = '//a/img' 414 multipleImagesPerStrip = True 415 prevSearch = '//a[text()="<"]' 416 nextSearch = '//a[text()=">"]' 417 help = 'Index format: yymmdd' 418 419 def shouldSkipUrl(self, url, data): 420 """Skip pages without images.""" 421 return data.xpath('//iframe') # videos 422 423 def namer(self, image_url, page_url): 424 return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:], 425 image_url.split('/')[-1].split('.')[0]) 426 427 428class ATaleOfTails(_WordPressScraper): 429 url = 'http://www.feretta.net/' 430 stripUrl = url + 'comic/%s/' 431 firstStripUrl = stripUrl % 'a-tale-of-tails-1-0' 432 adult = True 433 434 435class AxeCop(_WordPressScraper): 436 url = 'http://axecop.com/comic/season-two/' 437