1# -*- coding: utf-8 -*- 2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs 3# Copyright (C) 2012-2014 Bastian Kleineidam 4# Copyright (C) 2015-2020 Tobias Gruetzmacher 5# Copyright (C) 2019-2020 Daniel Ring 6 7from __future__ import absolute_import, division, print_function 8 9from re import compile, escape, IGNORECASE, sub 10from os.path import splitext 11 12from ..scraper import _BasicScraper, _ParserScraper 13from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class 14from ..util import tagre 15from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic 16 17 18class SabrinaOnline(_BasicScraper): 19 url = 'http://sabrina-online.com/' 20 stripUrl = url + '%s.html' 21 firstStripUrl = stripUrl % '1996-01' 22 imageSearch = (compile(tagre("a", "href", r'(strips/[^"]*)')), 23 compile(tagre("img", "src", r'(pages/[^"]*)'))) 24 prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") + 25 tagre("img", "src", "b_back.gif")) 26 help = 'Index format: yyyy-qq' 27 adult = True 28 multipleImagesPerStrip = True 29 30 def starter(self): 31 """Pick last one in a list of archive pages.""" 32 archive = self.url + 'archive.html' 33 data = self.getPage(archive) 34 search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)")) 35 archivepages = self.fetchUrls(archive, data, search) 36 return archivepages[-1] 37 38 39class SafelyEndangered(_WPNavi): 40 url = 'http://www.safelyendangered.com/' 41 firstStripUrl = url + 'comic/ignored/' 42 43 44class SailorsunOrg(_WordPressScraper): 45 url = 'http://sailorsun.org/' 46 47 48class SamAndFuzzy(_ParserScraper): 49 url = 'http://www.samandfuzzy.com/' 50 stripUrl = url + '%s' 51 firstStripUrl = stripUrl % '1' 52 imageSearch = '//img[@class="comic-image"]' 53 prevSearch = '//li[@class="prev-page"]/a' 54 help = 'Index format: n (unpadded)' 55 56 57class SandraOnTheRocks(_BasicScraper): 58 url = 'http://www.sandraontherocks.com/' 59 stripUrl = url + 'strips-sotr/%s' 60 firstStripUrl = stripUrl % 'start_by_running' 61 imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) 62 prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sotr/[^"]+)', before="cn[id]prev")) 63 help = 'Index format: name' 64 65 66class Savestate(_WPNavi): 67 url = 'http://www.savestatecomic.com/' 68 stripUrl = url + '%s' 69 firstStripUrl = stripUrl % '2014/02/pokemon-bank' 70 71 72class ScandinaviaAndTheWorld(_ParserScraper): 73 url = 'https://satwcomic.com/' 74 stripUrl = url + '%s' 75 firstStripUrl = stripUrl % 'sweden-denmark-and-norway' 76 starter = indirectStarter 77 imageSearch = '//img[@itemprop="image"]' 78 prevSearch = '//a[@accesskey="p"]' 79 latestSearch = '//a[text()="View latest comic"]' 80 textSearch = '//span[@itemprop="articleBody"]' 81 help = 'Index format: stripname' 82 83 84class ScaryGoRound(_ParserScraper): 85 url = 'http://www.scarygoround.com/sgr/ar.php' 86 stripUrl = url + '?date=%s' 87 firstStripUrl = stripUrl % '20020604' 88 imageSearch = '//img[contains(@src, "/strips/")]' 89 prevSearch = '//a[contains(text(), "Previous")]' 90 endOfLife = True 91 help = 'Index format: yyyymmdd' 92 93 94class ScenesFromAMultiverse(_BasicScraper): 95 url = 'http://amultiverse.com/' 96 rurl = escape(url) 97 stripUrl = url + '%s/' 98 firstStripUrl = stripUrl % '2010/06/14/parenthood' 99 imageSearch = ( 100 compile(tagre("div", "id", "comic") + r"\s*" + 101 tagre("img", "src", 102 r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')), 103 compile(tagre("div", "id", "comic") + r"\s*" + 104 tagre("a", "href", r'[^"]*') + 105 tagre("img", "src", 106 r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')), 107 ) 108 prevSearch = compile(tagre("a", "href", r'(%scomic/\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) 109 help = 'Index format: yyyy/mm/dd/stripname' 110 111 112class SchlockMercenary(_ParserScraper): 113 url = 'http://www.schlockmercenary.com/' 114 stripUrl = url + '%s' 115 firstStripUrl = stripUrl % '2000-06-12' 116 imageSearch = '//div[@class="strip-image-wrapper"]/img' 117 multipleImagesPerStrip = True 118 prevSearch = '//a[@class="previous-strip"]' 119 help = 'Index format: yyyy-mm-dd' 120 121 122class SchoolBites(_ParserScraper): 123 url = ('https://web.archive.org/web/20170215065523/' 124 'http://schoolbites.net/') 125 stripUrl = url + 'd/%s.html' 126 imageSearch = '//img[{}]'.format(xpath_class('ksc')) 127 prevSearch = '//a[@rel="prev"]' 128 endOfLife = True 129 help = 'Index format: yyyymmdd' 130 131 132class Schuelert(_ParserScraper): 133 url = ('https://web.archive.org/web/20190103022830/' 134 'http://www.schuelert.de/') 135 stripUrl = url + 'index.php?paged=%s' 136 firstStripUrl = stripUrl % '3' 137 imageSearch = '//img[contains(@src, "wp-content")]' 138 prevSearch = '//span[{}]/a'.format(xpath_class('prevlink')) 139 multipleImagesPerStrip = True 140 endOfLife = True 141 lang = 'de' 142 143 144class Science(_ParserScraper): 145 stripUrl = ('https://web.archive.org/web/20180616152753/' 146 'http://sci-ence.org/%s/') 147 url = stripUrl % 'new-york-comic-con-2013' 148 firstStripUrl = stripUrl % 'periodic-table-element-ass' 149 prevSearch = '//a[{}]'.format(xpath_class('navi-prev')) 150 imageSearch = '//div[@class="comicpane"]//img' 151 endOfLife = True 152 153 154class SeelPeel(_WPNaviIn): 155 url = 'https://seelpeel.com/' 156 stripUrl = url + 'comic/%s/' 157 firstStripUrl = stripUrl % 'seelpeel-goes-live' 158 multipleImagesPerStrip = True 159 160 161class SequentialArt(_BasicScraper): 162 url = 'http://www.collectedcurios.com/sequentialart.php' 163 stripUrl = url + '?s=%s' 164 firstStripUrl = stripUrl % '1' 165 imageSearch = compile(tagre("img", "src", r'([^"]+)', before="strip")) 166 prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)') + 167 tagre("img", "src", r'Nav_BackOne\.gif')) 168 help = 'Index format: name' 169 170 171class SexyLosers(_ParserScraper): 172 adult = True 173 url = 'https://www.sexylosers.com/' 174 stripUrl = url + 'comic/%s/' 175 firstStripUrl = stripUrl % '003' 176 imageSearch = '//div[@class="entry-content"]//img' 177 prevSearch = '//a[@rel="prev"]' 178 latestSearch = '//a[@rel="bookmark"]' 179 help = 'Index format: nnn' 180 starter = indirectStarter 181 namer = joinPathPartsNamer((-2,), (-1,), '-') 182 183 184class ShadesOfGray(_ParserScraper): 185 url = 'https://www.theduckwebcomics.com/Shades_of_Gray/' 186 stripUrl = url + '%s/' 187 firstStripUrl = stripUrl % '4820502' 188 imageSearch = '//div[@id="comic"]/img' 189 prevSearch = '//a[img[@class="arrow_prev"]]' 190 nextSearch = '//a[img[@class="arrow_next"]]' 191 starter = bounceStarter 192 endOfLife = True 193 194 def namer(self, imageUrl, pageUrl): 195 return pageUrl.rstrip('/').rsplit('/', 1)[-1] 196 197 198class Sharksplode(_WordPressScraper): 199 url = 'http://sharksplode.com/' 200 textSearch = '//div[@id="comic"]//img/@alt' 201 allow_errors = (403,) 202 203 204class Sheldon(_BasicScraper): 205 url = 'http://www.sheldoncomics.com/' 206 rurl = escape(url) 207 stripUrl = url + 'archive/%s.html' 208 firstStripUrl = stripUrl % '011130' 209 imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)')) 210 prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl, 211 after="sidenav-prev")) 212 help = 'Index format: yymmdd' 213 214 215class ShipInABottle(_WPNavi): 216 url = 'http://shipinbottle.pepsaga.com/' 217 stripUrl = url + '?p=%s' 218 firstStripUrl = stripUrl % '281' 219 adult = True 220 help = 'Index format: number' 221 222 223class Shortpacked(_ParserScraper): 224 url = 'http://www.shortpacked.com/index.php' 225 stripUrl = url + '?id=%s' 226 css = True 227 imageSearch = 'img#comic' 228 prevSearch = 'a.prev' 229 help = 'Index format: nnn' 230 231 232class ShotgunShuffle(_WordPressScraper): 233 url = 'http://shotgunshuffle.com/' 234 firstStripUrl = url + 'comic/pilot/' 235 236 237class SinFest(_BasicScraper): 238 url = 'http://www.sinfest.net/' 239 stripUrl = url + 'view.php?date=%s' 240 imageSearch = compile(tagre("img", "src", r'(btphp/comics/.+)', 241 after="alt")) 242 prevSearch = compile(tagre("a", "href", r'(view\.php\?date=.+)') + '\\s*' + 243 tagre("img", "src", r'\.\./images/prev\.gif')) 244 help = 'Index format: yyyy-mm-dd' 245 246 247class SixPackOfOtters(_WPWebcomic): 248 url = 'http://sixpackofotters.com/' 249 stripUrl = url + 'pages/%s/' 250 firstStripUrl = stripUrl % 'chapter-01-tandem' 251 252 253class SkinDeep(_WPWebcomic): 254 url = 'http://www.skindeepcomic.com/' 255 stripUrl = url + 'archive/%s/' 256 firstStripUrl = stripUrl % 'issue-1-cover' 257 258 259class SleeplessDomain(_ComicControlScraper): 260 url = 'http://www.sleeplessdomain.com/' 261 stripUrl = url + 'comic/%s' 262 firstStripUrl = stripUrl % 'chapter-1-cover' 263 starter = bounceStarter 264 265 def namer(self, imageUrl, pageUrl): 266 return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1] 267 268 269class SlightlyDamned(_ComicControlScraper): 270 url = 'http://www.sdamned.com/' 271 firstStripUrl = url + 'comic/prologue' 272 273 def namer(self, imageurl, pageurl): 274 """Clean up mixed filename formats.""" 275 filename = pageurl.rsplit('/', 1)[-1] 276 if filename == '': 277 filename = imageurl.rsplit('-', 1)[-1] 278 else: 279 filename = 'SD' + filename + '.' + imageurl.rsplit('.', 1)[-1] 280 return filename 281 282 283class SluggyFreelance(_ParserScraper): 284 url = 'http://sluggy.com/' 285 stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s' 286 imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content') 287 prevSearch = '//div[%s]/a' % xpath_class('previous') 288 latestSearch = '//a[%s]' % xpath_class('archives_link') 289 starter = indirectStarter 290 multipleImagesPerStrip = True 291 help = 'Index format: chapter' 292 293 def namer(self, imageurl, pageurl): 294 """Remove random noise from name.""" 295 fn = imageurl.rsplit('/', 1)[-1] 296 return sub(r'\.(png|gif|jpg).*\.\1', '', fn) 297 298 299class SMBC(_ComicControlScraper): 300 url = 'http://www.smbc-comics.com/' 301 firstStripUrl = url + 'comic/2002-09-05' 302 multipleImagesPerStrip = True 303 imageSearch = ['//img[@id="cc-comic"]', '//div[@id="aftercomic"]/img'] 304 textSearch = '//img[@id="cc-comic"]/@title' 305 306 def namer(self, image_url, page_url): 307 """Remove random noise from name.""" 308 return image_url.rsplit('-', 1)[-1] 309 310 311class SnowFlame(_WordPressScraper): 312 url = ('https://web.archive.org/web/20160905071051/' 313 'http://www.snowflamecomic.com/') 314 stripUrl = url + '?comic=snowflame-%s-%s' 315 firstStripUrl = stripUrl % ('01', '01') 316 starter = bounceStarter 317 endOfLife = True 318 help = 'Index format: chapter-page' 319 320 def getIndexStripUrl(self, index): 321 return self.stripUrl % tuple(index.split('-')) 322 323 def namer(self, image_url, page_url): 324 prefix, filename = image_url.rsplit('/', 1) 325 ro = compile(r'snowflame-([^-]+)-([^-]+)') 326 mo = ro.search(page_url) 327 chapter = mo.group(1) 328 page = mo.group(2) 329 return "%s-%s-%s" % (chapter, page, filename) 330 331 332class SodiumEyes(_WordPressScraper): 333 url = 'http://sodiumeyes.com/' 334 335 336class SomethingPositive(_ParserScraper): 337 url = 'https://www.somethingpositive.net/' 338 stripUrl = url + 'sp%s.shtml' 339 imageSearch = r'//img[re:test(@src, "/sp\d+")]' 340 prevSearch = ('//a[contains(text(), "Previous")]', 341 '//a[img[contains(@src, "previous")]]') 342 multipleImagesPerStrip = True 343 help = 'Index format: mmddyyyy' 344 345 346class Sorcery101(_WPWebcomic): 347 baseUrl = 'https://kelmcdonald.com/sorcery-101/' 348 stripUrl = baseUrl + '%s/' 349 url = stripUrl % 'sorcery101-ch-01' 350 firstStripUrl = url 351 starter = indirectStarter 352 help = 'Index format: stripname' 353 354 355class SpaceFurries(_ParserScraper): 356 url = 'http://www.spacefurrs.org/' 357 firstStripUrl = url 358 multipleImagesPerStrip = True 359 adult = True 360 endOfLife = True 361 362 def fetchUrls(self, url, data, urlSearch): 363 # Website requires JS, so build the list of image URLs manually 364 imageUrls = [] 365 currentPage = int(data.xpath('//input[@name="pagnum"]')[0].get('value')) 366 for page in reversed(range(1, currentPage + 1)): 367 imageUrls.append(self.url + 'comics/' + str(page) + '.jpg') 368 return imageUrls 369 370 371class SpaceJunkArlia(_ParserScraper): 372 url = 'http://spacejunkarlia.com/' 373 stripUrl = url + '?strip_id=%s' 374 firstStripUrl = stripUrl % '0' 375 imageSearch = '//div[%s]/img' % xpath_class('content') 376 prevSearch = '//a[text()="<"]' 377 help = 'Index format: number' 378 379 380class SpaceTrawler(_ParserScraper): 381 url = 'https://www.baldwinpage.com/spacetrawler/' 382 firstStripUrl = url + '2010/01/01/spacetrawler-4/' 383 imageSearch = '//img[%s]' % xpath_class('size-full') 384 prevSearch = '//a[@rel="prev"]' 385 386 387class Spamusement(_BasicScraper): 388 url = 'http://spamusement.com/' 389 rurl = escape(url) 390 stripUrl = url + 'index.php/comics/view/%s' 391 imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE) 392 prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl, 393 IGNORECASE) 394 latestSearch = prevSearch 395 help = 'Index format: n (unpadded)' 396 starter = indirectStarter 397 398 399class SpareParts(_BasicScraper): 400 baseUrl = 'http://www.sparepartscomics.com/' 401 url = baseUrl + 'comics/?date=20080328' 402 stripUrl = baseUrl + 'comics/index.php?date=%s' 403 firstStripUrl = stripUrl % '20031022' 404 imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)')) 405 prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', 406 quote="'") + "Previous Comic") 407 help = 'Index format: yyyymmdd' 408 409 410class Spinnerette(_ComicControlScraper): 411 url = 'http://www.spinnyverse.com' 412 413 414class SPQRBlues(_WordPressScraper): 415 url = 'http://spqrblues.com/IV/' 416 417 418class SSDD(_ParserScraper): 419 url = 'http://www.poisonedminds.com/' 420 stripUrl = url + 'd/%s.html' 421 firstStripUrl = stripUrl % '19980927' 422 imageSearch = ('//img[contains(@src, "/comics/")]', 423 '//source[contains(@src, "/video/")]') 424 prevSearch = '//a[@rel="prev"]' 425 multipleImagesPerStrip = True 426 adult = True 427 help = 'Index format: yyyymmdd' 428 429 def shouldSkipUrl(self, url, data): 430 # Skip news, flash animation, and non-comic pages. 431 return url in ( 432 # News post 433 self.stripUrl % '20060712', 434 self.stripUrl % '20060719', 435 self.stripUrl % '20071225', 436 self.stripUrl % '20110321', 437 self.stripUrl % '20110830', 438 self.stripUrl % '20110929', 439 self.stripUrl % '20180927', 440 441 # Flash animation 442 self.stripUrl % '20180401', 443 self.stripUrl % '20170429', 444 self.stripUrl % '20041203', 445 446 # Comic missing 447 self.stripUrl % '20070402', 448 self.stripUrl % '20060413', 449 self.stripUrl % '20060412', 450 self.stripUrl % '20060202', 451 self.stripUrl % '20051026', 452 self.stripUrl % '20050805', 453 self.stripUrl % '20050530', 454 self.stripUrl % '20050526', 455 self.stripUrl % '20050525', 456 self.stripUrl % '20050524', 457 self.stripUrl % '20050523', 458 self.stripUrl % '20050504', 459 self.stripUrl % '20040705', 460 self.stripUrl % '20030418', 461 self.stripUrl % '20030214' 462 ) 463 464 465class StandStillStaySilent(_ParserScraper): 466 baseUrl = 'http://sssscomic.com/' 467 url = baseUrl + 'comic2.php' 468 stripUrl = baseUrl + 'comic%s.php?page=%s' 469 firstStripUrl = stripUrl % ('', '1') 470 imageSearch = '//img[@class="comicnormal"]' 471 prevSearch = '//a[./img[contains(@src, "nav_prev")]]' 472 473 def namer(self, imageUrl, pageUrl): 474 chapter = '2' if ('adv2_comicpages' in imageUrl) else '1' 475 return '%s-%s' % (chapter, imageUrl.rsplit('/', 1)[-1].replace('page_', '')) 476 477 478class StarCrossdDestiny(_ParserScraper): 479 baseUrl = ('https://web.archive.org/web/20190918132321/' 480 'http://starcrossd.net/') 481 url = baseUrl + 'comic.html' 482 stripUrl = baseUrl + 'archives/%s.html' 483 firstStripUrl = stripUrl % '00000001' 484 imageSearch = '//div[@id="comic"]//img' 485 prevSearch = '//a[text()="prev"]' 486 endOfLife = True 487 help = 'Index format: nnnnnnnn' 488 489 def namer(self, image_url, page_url): 490 if image_url.find('ch1') == -1: 491 # At first all images were stored in a strips/ directory but 492 # that was changed with the introduction of book2 493 image_url = sub('(?:strips)|(?:images)', 'book1', image_url) 494 elif not image_url.find('strips') == -1: 495 image_url = image_url.replace('strips/', '') 496 directory, filename = image_url.split('/')[-2:] 497 filename, extension = splitext(filename) 498 return directory + '-' + filename 499 500 501class StarfireAgency(_WordPressScraper): 502 url = 'http://starfire.poecatcomix.com/' 503 stripUrl = url + 'comic/%s/' 504 firstStripUrl = stripUrl % 'sfa-issue-1' 505 506 def namer(self, imageUrl, pageUrl): 507 # Prepend chapter title to page filenames 508 page = self.getPage(pageUrl) 509 chapter = page.xpath('//div[@class="comic-chapter"]/a') 510 if len(chapter) > 0: 511 chapter = chapter[0].text.replace(' ', '-').lower() 512 else: 513 chapter = 'chapter-1' 514 515 # Fix inconsistent filenames 516 filename = imageUrl.rsplit('/', 1)[-1] 517 if 'cover' not in filename.lower(): 518 filename = filename.replace('SFA', 'Page') 519 return chapter + '_' + filename 520 521 522class StarTrip(_ComicControlScraper): 523 url = 'https://www.startripcomic.com/' 524 525 526class StationV3(_ParserScraper): 527 url = 'http://www.stationv3.com/' 528 stripUrl = url + 'd3/%s.html' 529 firstStripUrl = stripUrl % '20170101' 530 imageSearch = '//img[contains(@src,"/comics3/")]' 531 prevSearch = '//a[img[contains(@src,"/previous2")]]' 532 help = 'Index format: yyyymmdd' 533 534 535class StickyDillyBuns(_BasicScraper): 536 url = 'http://www.stickydillybuns.com/' 537 stripUrl = url + 'strips-sdb/%s' 538 firstStripUrl = stripUrl % 'awesome_leading_man' 539 imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) 540 prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)', 541 before="cn[id]prev")) 542 help = 'Index format: name' 543 544 545class StreetFighter(_ComicControlScraper): 546 url = 'http://www.streetfightercomics.com' 547 548 549class StringTheory(_WPNavi): 550 url = 'http://www.stringtheorycomic.com/' 551 firstStripUrl = url + 'comics/chapterone/chapterone/' 552 553 554class StrongFemaleProtagonist(_ParserScraper): 555 url = 'http://strongfemaleprotagonist.com/' 556 stripUrl = url + '%s/' 557 css = True 558 imageSearch = 'article p img' 559 prevSearch = 'a.page-nav__item--left' 560 help = 'Index format: issue-?/page-??' 561 562 def shouldSkipUrl(self, url, data): 563 """Skip hiatus & non-comic pages.""" 564 return url in ( 565 self.stripUrl % 'guest-art/tuesday', 566 self.stripUrl % 'guest-art/friday', 567 self.stripUrl % 'guest-art/wednesday', 568 self.stripUrl % 'issue-5/newspaper', 569 self.stripUrl % 'issue-5/hiatus-1', 570 self.stripUrl % 'issue-5/hiatus-2', 571 self.stripUrl % 'issue-1/no-page', 572 ) 573 574 575class StuffNoOneToldMe(_BasicScraper): 576 url = 'http://www.snotm.com/' 577 stripUrl = url + '%s.html' 578 firstStripUrl = stripUrl % '2010/05/01' 579 olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)" 580 starter = indirectStarter 581 imageSearch = ( 582 compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + 583 r"(?:</a>|<br />)"), 584 compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') + 585 r"(?:(?: )?</a>|<span |<br />)"), 586 compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r"</a>"), 587 ) 588 prevSearch = compile(tagre("a", "href", olderHref, quote="'", 589 before="older-link")) 590 latestSearch = compile(tagre("a", "href", olderHref, quote="'")) 591 multipleImagesPerStrip = True 592 help = 'Index format: yyyy/mm/stripname' 593 594 def namer(self, image_url, page_url): 595 """Use page URL to construct meaningful image name.""" 596 parts, year, month, stripname = page_url.rsplit('/', 3) 597 stripname = stripname.rsplit('.', 1)[0] 598 parts, imagename = image_url.rsplit('/', 1) 599 return '%s-%s-%s-%s' % (year, month, stripname, imagename) 600 601 def shouldSkipUrl(self, url, data): 602 """Skip pages without images.""" 603 return url in ( 604 self.stripUrl % '2016/05/so-you-would-like-to-share-my-comics', # no comic 605 self.stripUrl % '2012/08/self-rant', # no comic 606 self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video 607 self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video 608 self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic 609 self.stripUrl % '2010/11/ear-infection', # no comic 610 ) 611 612 613class SuburbanJungle(_ParserScraper): 614 url = 'http://suburbanjungleclassic.com/' 615 stripUrl = url + '?p=%s' 616 firstStripUrl = stripUrl % '10' 617 imageSearch = '//div[@id="comic"]/img' 618 prevSearch = '//div[@class="nav-previous"]/a' 619 620 621class SuburbanJungleRoughHousing(_WordPressScraper): 622 url = 'http://roughhouse.suburbanjungle.com/' 623 stripUrl = url + 'comic/%s/' 624 firstStripUrl = stripUrl % 'rough-housing-issue-1-cover' 625 626 627class Supercell(_ParserScraper): 628 url = 'https://www.supercellcomic.com/' 629 stripUrl = url + 'pages/%s.html' 630 firstStripUrl = stripUrl % '0001' 631 imageSearch = '//div[@class="comicpage"]//img' 632 prevSearch = '//div[@class="comicnav"]/a[./img[contains(@src, "comnav_02")]]' 633 634 635class SupernormalStep(_ComicControlScraper): 636 url = 'http://supernormalstep.com/' 637 638 639class SurvivingTheWorld(_ParserScraper): 640 url = 'http://survivingtheworld.net/' 641 stripUrl = url + '%s.html' 642 firstStripUrl = stripUrl % 'Lesson1' 643 imageSearch = ( 644 '//div[@class="img"]/img', # When there's one image per strip 645 '//div[@class="img"]/p/img', # When there's multiple images per strip 646 '//td/img' # Special case for Lesson1296.html 647 ) 648 prevSearch = ( 649 '//li[@class="previous"]/a', 650 '//td/a' # Special case for Lesson1296.html 651 ) 652 multipleImagesPerStrip = True 653 help = 'Index format: name' 654 655 656class SwordsAndSausages(_ParserScraper): 657 url = 'https://www.tigerknight.com/ss' 658 stripUrl = url + '/%s' 659 firstStripUrl = stripUrl % '1-1' 660 imageSearch = '//img[@class="comic-image"]' 661 prevSearch = '//a[@class="prev"]' 662 multipleImagesPerStrip = True 663