1# -*- coding: utf-8 -*-
2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
3# Copyright (C) 2012-2014 Bastian Kleineidam
4# Copyright (C) 2015-2020 Tobias Gruetzmacher
5# Copyright (C) 2019-2020 Daniel Ring
6
7from __future__ import absolute_import, division, print_function
8
9from re import compile, escape, IGNORECASE, sub
10from os.path import splitext
11
12from ..scraper import _BasicScraper, _ParserScraper
13from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
14from ..util import tagre
15from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic
16
17
18class SabrinaOnline(_BasicScraper):
19    url = 'http://sabrina-online.com/'
20    stripUrl = url + '%s.html'
21    firstStripUrl = stripUrl % '1996-01'
22    imageSearch = (compile(tagre("a", "href", r'(strips/[^"]*)')),
23                   compile(tagre("img", "src", r'(pages/[^"]*)')))
24    prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") +
25                         tagre("img", "src", "b_back.gif"))
26    help = 'Index format: yyyy-qq'
27    adult = True
28    multipleImagesPerStrip = True
29
30    def starter(self):
31        """Pick last one in a list of archive pages."""
32        archive = self.url + 'archive.html'
33        data = self.getPage(archive)
34        search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
35        archivepages = self.fetchUrls(archive, data, search)
36        return archivepages[-1]
37
38
39class SafelyEndangered(_WPNavi):
40    url = 'http://www.safelyendangered.com/'
41    firstStripUrl = url + 'comic/ignored/'
42
43
44class SailorsunOrg(_WordPressScraper):
45    url = 'http://sailorsun.org/'
46
47
48class SamAndFuzzy(_ParserScraper):
49    url = 'http://www.samandfuzzy.com/'
50    stripUrl = url + '%s'
51    firstStripUrl = stripUrl % '1'
52    imageSearch = '//img[@class="comic-image"]'
53    prevSearch = '//li[@class="prev-page"]/a'
54    help = 'Index format: n (unpadded)'
55
56
57class SandraOnTheRocks(_BasicScraper):
58    url = 'http://www.sandraontherocks.com/'
59    stripUrl = url + 'strips-sotr/%s'
60    firstStripUrl = stripUrl % 'start_by_running'
61    imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
62    prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sotr/[^"]+)', before="cn[id]prev"))
63    help = 'Index format: name'
64
65
66class Savestate(_WPNavi):
67    url = 'http://www.savestatecomic.com/'
68    stripUrl = url + '%s'
69    firstStripUrl = stripUrl % '2014/02/pokemon-bank'
70
71
72class ScandinaviaAndTheWorld(_ParserScraper):
73    url = 'https://satwcomic.com/'
74    stripUrl = url + '%s'
75    firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
76    starter = indirectStarter
77    imageSearch = '//img[@itemprop="image"]'
78    prevSearch = '//a[@accesskey="p"]'
79    latestSearch = '//a[text()="View latest comic"]'
80    textSearch = '//span[@itemprop="articleBody"]'
81    help = 'Index format: stripname'
82
83
84class ScaryGoRound(_ParserScraper):
85    url = 'http://www.scarygoround.com/sgr/ar.php'
86    stripUrl = url + '?date=%s'
87    firstStripUrl = stripUrl % '20020604'
88    imageSearch = '//img[contains(@src, "/strips/")]'
89    prevSearch = '//a[contains(text(), "Previous")]'
90    endOfLife = True
91    help = 'Index format: yyyymmdd'
92
93
94class ScenesFromAMultiverse(_BasicScraper):
95    url = 'http://amultiverse.com/'
96    rurl = escape(url)
97    stripUrl = url + '%s/'
98    firstStripUrl = stripUrl % '2010/06/14/parenthood'
99    imageSearch = (
100        compile(tagre("div", "id", "comic") + r"\s*" +
101            tagre("img", "src",
102                r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')),
103        compile(tagre("div", "id", "comic") + r"\s*" +
104            tagre("a", "href", r'[^"]*') +
105            tagre("img", "src",
106                r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')),
107    )
108    prevSearch = compile(tagre("a", "href", r'(%scomic/\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
109    help = 'Index format: yyyy/mm/dd/stripname'
110
111
112class SchlockMercenary(_ParserScraper):
113    url = 'http://www.schlockmercenary.com/'
114    stripUrl = url + '%s'
115    firstStripUrl = stripUrl % '2000-06-12'
116    imageSearch = '//div[@class="strip-image-wrapper"]/img'
117    multipleImagesPerStrip = True
118    prevSearch = '//a[@class="previous-strip"]'
119    help = 'Index format: yyyy-mm-dd'
120
121
122class SchoolBites(_ParserScraper):
123    url = ('https://web.archive.org/web/20170215065523/'
124        'http://schoolbites.net/')
125    stripUrl = url + 'd/%s.html'
126    imageSearch = '//img[{}]'.format(xpath_class('ksc'))
127    prevSearch = '//a[@rel="prev"]'
128    endOfLife = True
129    help = 'Index format: yyyymmdd'
130
131
132class Schuelert(_ParserScraper):
133    url = ('https://web.archive.org/web/20190103022830/'
134        'http://www.schuelert.de/')
135    stripUrl = url + 'index.php?paged=%s'
136    firstStripUrl = stripUrl % '3'
137    imageSearch = '//img[contains(@src, "wp-content")]'
138    prevSearch = '//span[{}]/a'.format(xpath_class('prevlink'))
139    multipleImagesPerStrip = True
140    endOfLife = True
141    lang = 'de'
142
143
144class Science(_ParserScraper):
145    stripUrl = ('https://web.archive.org/web/20180616152753/'
146        'http://sci-ence.org/%s/')
147    url = stripUrl % 'new-york-comic-con-2013'
148    firstStripUrl = stripUrl % 'periodic-table-element-ass'
149    prevSearch = '//a[{}]'.format(xpath_class('navi-prev'))
150    imageSearch = '//div[@class="comicpane"]//img'
151    endOfLife = True
152
153
154class SeelPeel(_WPNaviIn):
155    url = 'https://seelpeel.com/'
156    stripUrl = url + 'comic/%s/'
157    firstStripUrl = stripUrl % 'seelpeel-goes-live'
158    multipleImagesPerStrip = True
159
160
161class SequentialArt(_BasicScraper):
162    url = 'http://www.collectedcurios.com/sequentialart.php'
163    stripUrl = url + '?s=%s'
164    firstStripUrl = stripUrl % '1'
165    imageSearch = compile(tagre("img", "src", r'([^"]+)', before="strip"))
166    prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)') +
167                         tagre("img", "src", r'Nav_BackOne\.gif'))
168    help = 'Index format: name'
169
170
171class SexyLosers(_ParserScraper):
172    adult = True
173    url = 'https://www.sexylosers.com/'
174    stripUrl = url + 'comic/%s/'
175    firstStripUrl = stripUrl % '003'
176    imageSearch = '//div[@class="entry-content"]//img'
177    prevSearch = '//a[@rel="prev"]'
178    latestSearch = '//a[@rel="bookmark"]'
179    help = 'Index format: nnn'
180    starter = indirectStarter
181    namer = joinPathPartsNamer((-2,), (-1,), '-')
182
183
184class ShadesOfGray(_ParserScraper):
185    url = 'https://www.theduckwebcomics.com/Shades_of_Gray/'
186    stripUrl = url + '%s/'
187    firstStripUrl = stripUrl % '4820502'
188    imageSearch = '//div[@id="comic"]/img'
189    prevSearch = '//a[img[@class="arrow_prev"]]'
190    nextSearch = '//a[img[@class="arrow_next"]]'
191    starter = bounceStarter
192    endOfLife = True
193
194    def namer(self, imageUrl, pageUrl):
195        return pageUrl.rstrip('/').rsplit('/', 1)[-1]
196
197
198class Sharksplode(_WordPressScraper):
199    url = 'http://sharksplode.com/'
200    textSearch = '//div[@id="comic"]//img/@alt'
201    allow_errors = (403,)
202
203
204class Sheldon(_BasicScraper):
205    url = 'http://www.sheldoncomics.com/'
206    rurl = escape(url)
207    stripUrl = url + 'archive/%s.html'
208    firstStripUrl = stripUrl % '011130'
209    imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
210    prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl,
211                               after="sidenav-prev"))
212    help = 'Index format: yymmdd'
213
214
215class ShipInABottle(_WPNavi):
216    url = 'http://shipinbottle.pepsaga.com/'
217    stripUrl = url + '?p=%s'
218    firstStripUrl = stripUrl % '281'
219    adult = True
220    help = 'Index format: number'
221
222
223class Shortpacked(_ParserScraper):
224    url = 'http://www.shortpacked.com/index.php'
225    stripUrl = url + '?id=%s'
226    css = True
227    imageSearch = 'img#comic'
228    prevSearch = 'a.prev'
229    help = 'Index format: nnn'
230
231
232class ShotgunShuffle(_WordPressScraper):
233    url = 'http://shotgunshuffle.com/'
234    firstStripUrl = url + 'comic/pilot/'
235
236
237class SinFest(_BasicScraper):
238    url = 'http://www.sinfest.net/'
239    stripUrl = url + 'view.php?date=%s'
240    imageSearch = compile(tagre("img", "src", r'(btphp/comics/.+)',
241                                after="alt"))
242    prevSearch = compile(tagre("a", "href", r'(view\.php\?date=.+)') + '\\s*' +
243                         tagre("img", "src", r'\.\./images/prev\.gif'))
244    help = 'Index format: yyyy-mm-dd'
245
246
247class SixPackOfOtters(_WPWebcomic):
248    url = 'http://sixpackofotters.com/'
249    stripUrl = url + 'pages/%s/'
250    firstStripUrl = stripUrl % 'chapter-01-tandem'
251
252
253class SkinDeep(_WPWebcomic):
254    url = 'http://www.skindeepcomic.com/'
255    stripUrl = url + 'archive/%s/'
256    firstStripUrl = stripUrl % 'issue-1-cover'
257
258
259class SleeplessDomain(_ComicControlScraper):
260    url = 'http://www.sleeplessdomain.com/'
261    stripUrl = url + 'comic/%s'
262    firstStripUrl = stripUrl % 'chapter-1-cover'
263    starter = bounceStarter
264
265    def namer(self, imageUrl, pageUrl):
266        return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
267
268
269class SlightlyDamned(_ComicControlScraper):
270    url = 'http://www.sdamned.com/'
271    firstStripUrl = url + 'comic/prologue'
272
273    def namer(self, imageurl, pageurl):
274        """Clean up mixed filename formats."""
275        filename = pageurl.rsplit('/', 1)[-1]
276        if filename == '':
277            filename = imageurl.rsplit('-', 1)[-1]
278        else:
279            filename = 'SD' + filename + '.' + imageurl.rsplit('.', 1)[-1]
280        return filename
281
282
283class SluggyFreelance(_ParserScraper):
284    url = 'http://sluggy.com/'
285    stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s'
286    imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content')
287    prevSearch = '//div[%s]/a' % xpath_class('previous')
288    latestSearch = '//a[%s]' % xpath_class('archives_link')
289    starter = indirectStarter
290    multipleImagesPerStrip = True
291    help = 'Index format: chapter'
292
293    def namer(self, imageurl, pageurl):
294        """Remove random noise from name."""
295        fn = imageurl.rsplit('/', 1)[-1]
296        return sub(r'\.(png|gif|jpg).*\.\1', '', fn)
297
298
299class SMBC(_ComicControlScraper):
300    url = 'http://www.smbc-comics.com/'
301    firstStripUrl = url + 'comic/2002-09-05'
302    multipleImagesPerStrip = True
303    imageSearch = ['//img[@id="cc-comic"]', '//div[@id="aftercomic"]/img']
304    textSearch = '//img[@id="cc-comic"]/@title'
305
306    def namer(self, image_url, page_url):
307        """Remove random noise from name."""
308        return image_url.rsplit('-', 1)[-1]
309
310
311class SnowFlame(_WordPressScraper):
312    url = ('https://web.archive.org/web/20160905071051/'
313        'http://www.snowflamecomic.com/')
314    stripUrl = url + '?comic=snowflame-%s-%s'
315    firstStripUrl = stripUrl % ('01', '01')
316    starter = bounceStarter
317    endOfLife = True
318    help = 'Index format: chapter-page'
319
320    def getIndexStripUrl(self, index):
321        return self.stripUrl % tuple(index.split('-'))
322
323    def namer(self, image_url, page_url):
324        prefix, filename = image_url.rsplit('/', 1)
325        ro = compile(r'snowflame-([^-]+)-([^-]+)')
326        mo = ro.search(page_url)
327        chapter = mo.group(1)
328        page = mo.group(2)
329        return "%s-%s-%s" % (chapter, page, filename)
330
331
332class SodiumEyes(_WordPressScraper):
333    url = 'http://sodiumeyes.com/'
334
335
336class SomethingPositive(_ParserScraper):
337    url = 'https://www.somethingpositive.net/'
338    stripUrl = url + 'sp%s.shtml'
339    imageSearch = r'//img[re:test(@src, "/sp\d+")]'
340    prevSearch = ('//a[contains(text(), "Previous")]',
341        '//a[img[contains(@src, "previous")]]')
342    multipleImagesPerStrip = True
343    help = 'Index format: mmddyyyy'
344
345
346class Sorcery101(_WPWebcomic):
347    baseUrl = 'https://kelmcdonald.com/sorcery-101/'
348    stripUrl = baseUrl + '%s/'
349    url = stripUrl % 'sorcery101-ch-01'
350    firstStripUrl = url
351    starter = indirectStarter
352    help = 'Index format: stripname'
353
354
355class SpaceFurries(_ParserScraper):
356    url = 'http://www.spacefurrs.org/'
357    firstStripUrl = url
358    multipleImagesPerStrip = True
359    adult = True
360    endOfLife = True
361
362    def fetchUrls(self, url, data, urlSearch):
363        # Website requires JS, so build the list of image URLs manually
364        imageUrls = []
365        currentPage = int(data.xpath('//input[@name="pagnum"]')[0].get('value'))
366        for page in reversed(range(1, currentPage + 1)):
367            imageUrls.append(self.url + 'comics/' + str(page) + '.jpg')
368        return imageUrls
369
370
371class SpaceJunkArlia(_ParserScraper):
372    url = 'http://spacejunkarlia.com/'
373    stripUrl = url + '?strip_id=%s'
374    firstStripUrl = stripUrl % '0'
375    imageSearch = '//div[%s]/img' % xpath_class('content')
376    prevSearch = '//a[text()="<"]'
377    help = 'Index format: number'
378
379
380class SpaceTrawler(_ParserScraper):
381    url = 'https://www.baldwinpage.com/spacetrawler/'
382    firstStripUrl = url + '2010/01/01/spacetrawler-4/'
383    imageSearch = '//img[%s]' % xpath_class('size-full')
384    prevSearch = '//a[@rel="prev"]'
385
386
387class Spamusement(_BasicScraper):
388    url = 'http://spamusement.com/'
389    rurl = escape(url)
390    stripUrl = url + 'index.php/comics/view/%s'
391    imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
392    prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl,
393                         IGNORECASE)
394    latestSearch = prevSearch
395    help = 'Index format: n (unpadded)'
396    starter = indirectStarter
397
398
399class SpareParts(_BasicScraper):
400    baseUrl = 'http://www.sparepartscomics.com/'
401    url = baseUrl + 'comics/?date=20080328'
402    stripUrl = baseUrl + 'comics/index.php?date=%s'
403    firstStripUrl = stripUrl % '20031022'
404    imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)'))
405    prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)',
406                               quote="'") + "Previous Comic")
407    help = 'Index format: yyyymmdd'
408
409
410class Spinnerette(_ComicControlScraper):
411    url = 'http://www.spinnyverse.com'
412
413
414class SPQRBlues(_WordPressScraper):
415    url = 'http://spqrblues.com/IV/'
416
417
418class SSDD(_ParserScraper):
419    url = 'http://www.poisonedminds.com/'
420    stripUrl = url + 'd/%s.html'
421    firstStripUrl = stripUrl % '19980927'
422    imageSearch = ('//img[contains(@src, "/comics/")]',
423                   '//source[contains(@src, "/video/")]')
424    prevSearch = '//a[@rel="prev"]'
425    multipleImagesPerStrip = True
426    adult = True
427    help = 'Index format: yyyymmdd'
428
429    def shouldSkipUrl(self, url, data):
430        # Skip news, flash animation, and non-comic pages.
431        return url in (
432            # News post
433            self.stripUrl % '20060712',
434            self.stripUrl % '20060719',
435            self.stripUrl % '20071225',
436            self.stripUrl % '20110321',
437            self.stripUrl % '20110830',
438            self.stripUrl % '20110929',
439            self.stripUrl % '20180927',
440
441            # Flash animation
442            self.stripUrl % '20180401',
443            self.stripUrl % '20170429',
444            self.stripUrl % '20041203',
445
446            # Comic missing
447            self.stripUrl % '20070402',
448            self.stripUrl % '20060413',
449            self.stripUrl % '20060412',
450            self.stripUrl % '20060202',
451            self.stripUrl % '20051026',
452            self.stripUrl % '20050805',
453            self.stripUrl % '20050530',
454            self.stripUrl % '20050526',
455            self.stripUrl % '20050525',
456            self.stripUrl % '20050524',
457            self.stripUrl % '20050523',
458            self.stripUrl % '20050504',
459            self.stripUrl % '20040705',
460            self.stripUrl % '20030418',
461            self.stripUrl % '20030214'
462        )
463
464
465class StandStillStaySilent(_ParserScraper):
466    baseUrl = 'http://sssscomic.com/'
467    url = baseUrl + 'comic2.php'
468    stripUrl = baseUrl + 'comic%s.php?page=%s'
469    firstStripUrl = stripUrl % ('', '1')
470    imageSearch = '//img[@class="comicnormal"]'
471    prevSearch = '//a[./img[contains(@src, "nav_prev")]]'
472
473    def namer(self, imageUrl, pageUrl):
474        chapter = '2' if ('adv2_comicpages' in imageUrl) else '1'
475        return '%s-%s' % (chapter, imageUrl.rsplit('/', 1)[-1].replace('page_', ''))
476
477
478class StarCrossdDestiny(_ParserScraper):
479    baseUrl = ('https://web.archive.org/web/20190918132321/'
480        'http://starcrossd.net/')
481    url = baseUrl + 'comic.html'
482    stripUrl = baseUrl + 'archives/%s.html'
483    firstStripUrl = stripUrl % '00000001'
484    imageSearch = '//div[@id="comic"]//img'
485    prevSearch = '//a[text()="prev"]'
486    endOfLife = True
487    help = 'Index format: nnnnnnnn'
488
489    def namer(self, image_url, page_url):
490        if image_url.find('ch1') == -1:
491            # At first all images were stored in a strips/ directory but
492            # that was changed with the introduction of book2
493            image_url = sub('(?:strips)|(?:images)', 'book1', image_url)
494        elif not image_url.find('strips') == -1:
495            image_url = image_url.replace('strips/', '')
496        directory, filename = image_url.split('/')[-2:]
497        filename, extension = splitext(filename)
498        return directory + '-' + filename
499
500
501class StarfireAgency(_WordPressScraper):
502    url = 'http://starfire.poecatcomix.com/'
503    stripUrl = url + 'comic/%s/'
504    firstStripUrl = stripUrl % 'sfa-issue-1'
505
506    def namer(self, imageUrl, pageUrl):
507        # Prepend chapter title to page filenames
508        page = self.getPage(pageUrl)
509        chapter = page.xpath('//div[@class="comic-chapter"]/a')
510        if len(chapter) > 0:
511            chapter = chapter[0].text.replace(' ', '-').lower()
512        else:
513            chapter = 'chapter-1'
514
515        # Fix inconsistent filenames
516        filename = imageUrl.rsplit('/', 1)[-1]
517        if 'cover' not in filename.lower():
518            filename = filename.replace('SFA', 'Page')
519        return chapter + '_' + filename
520
521
522class StarTrip(_ComicControlScraper):
523    url = 'https://www.startripcomic.com/'
524
525
526class StationV3(_ParserScraper):
527    url = 'http://www.stationv3.com/'
528    stripUrl = url + 'd3/%s.html'
529    firstStripUrl = stripUrl % '20170101'
530    imageSearch = '//img[contains(@src,"/comics3/")]'
531    prevSearch = '//a[img[contains(@src,"/previous2")]]'
532    help = 'Index format: yyyymmdd'
533
534
535class StickyDillyBuns(_BasicScraper):
536    url = 'http://www.stickydillybuns.com/'
537    stripUrl = url + 'strips-sdb/%s'
538    firstStripUrl = stripUrl % 'awesome_leading_man'
539    imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
540    prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)',
541                               before="cn[id]prev"))
542    help = 'Index format: name'
543
544
545class StreetFighter(_ComicControlScraper):
546    url = 'http://www.streetfightercomics.com'
547
548
549class StringTheory(_WPNavi):
550    url = 'http://www.stringtheorycomic.com/'
551    firstStripUrl = url + 'comics/chapterone/chapterone/'
552
553
554class StrongFemaleProtagonist(_ParserScraper):
555    url = 'http://strongfemaleprotagonist.com/'
556    stripUrl = url + '%s/'
557    css = True
558    imageSearch = 'article p img'
559    prevSearch = 'a.page-nav__item--left'
560    help = 'Index format: issue-?/page-??'
561
562    def shouldSkipUrl(self, url, data):
563        """Skip hiatus & non-comic pages."""
564        return url in (
565            self.stripUrl % 'guest-art/tuesday',
566            self.stripUrl % 'guest-art/friday',
567            self.stripUrl % 'guest-art/wednesday',
568            self.stripUrl % 'issue-5/newspaper',
569            self.stripUrl % 'issue-5/hiatus-1',
570            self.stripUrl % 'issue-5/hiatus-2',
571            self.stripUrl % 'issue-1/no-page',
572        )
573
574
575class StuffNoOneToldMe(_BasicScraper):
576    url = 'http://www.snotm.com/'
577    stripUrl = url + '%s.html'
578    firstStripUrl = stripUrl % '2010/05/01'
579    olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
580    starter = indirectStarter
581    imageSearch = (
582        compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
583                r"(?:</a>|<br />)"),
584        compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') +
585                r"(?:(?:&nbsp;)?</a>|<span |<br />)"),
586        compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r"</a>"),
587    )
588    prevSearch = compile(tagre("a", "href", olderHref, quote="'",
589                               before="older-link"))
590    latestSearch = compile(tagre("a", "href", olderHref, quote="'"))
591    multipleImagesPerStrip = True
592    help = 'Index format: yyyy/mm/stripname'
593
594    def namer(self, image_url, page_url):
595        """Use page URL to construct meaningful image name."""
596        parts, year, month, stripname = page_url.rsplit('/', 3)
597        stripname = stripname.rsplit('.', 1)[0]
598        parts, imagename = image_url.rsplit('/', 1)
599        return '%s-%s-%s-%s' % (year, month, stripname, imagename)
600
601    def shouldSkipUrl(self, url, data):
602        """Skip pages without images."""
603        return url in (
604            self.stripUrl % '2016/05/so-you-would-like-to-share-my-comics',  # no comic
605            self.stripUrl % '2012/08/self-rant',  # no comic
606            self.stripUrl % '2012/06/if-you-wonder-where-ive-been',  # video
607            self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to',  # video
608            self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo',  # no comic
609            self.stripUrl % '2010/11/ear-infection',  # no comic
610        )
611
612
613class SuburbanJungle(_ParserScraper):
614    url = 'http://suburbanjungleclassic.com/'
615    stripUrl = url + '?p=%s'
616    firstStripUrl = stripUrl % '10'
617    imageSearch = '//div[@id="comic"]/img'
618    prevSearch = '//div[@class="nav-previous"]/a'
619
620
621class SuburbanJungleRoughHousing(_WordPressScraper):
622    url = 'http://roughhouse.suburbanjungle.com/'
623    stripUrl = url + 'comic/%s/'
624    firstStripUrl = stripUrl % 'rough-housing-issue-1-cover'
625
626
627class Supercell(_ParserScraper):
628    url = 'https://www.supercellcomic.com/'
629    stripUrl = url + 'pages/%s.html'
630    firstStripUrl = stripUrl % '0001'
631    imageSearch = '//div[@class="comicpage"]//img'
632    prevSearch = '//div[@class="comicnav"]/a[./img[contains(@src, "comnav_02")]]'
633
634
635class SupernormalStep(_ComicControlScraper):
636    url = 'http://supernormalstep.com/'
637
638
639class SurvivingTheWorld(_ParserScraper):
640    url = 'http://survivingtheworld.net/'
641    stripUrl = url + '%s.html'
642    firstStripUrl = stripUrl % 'Lesson1'
643    imageSearch = (
644        '//div[@class="img"]/img',      # When there's one image per strip
645        '//div[@class="img"]/p/img',    # When there's multiple images per strip
646        '//td/img'                      # Special case for Lesson1296.html
647    )
648    prevSearch = (
649        '//li[@class="previous"]/a',
650        '//td/a'                        # Special case for Lesson1296.html
651    )
652    multipleImagesPerStrip = True
653    help = 'Index format: name'
654
655
656class SwordsAndSausages(_ParserScraper):
657    url = 'https://www.tigerknight.com/ss'
658    stripUrl = url + '/%s'
659    firstStripUrl = stripUrl % '1-1'
660    imageSearch = '//img[@class="comic-image"]'
661    prevSearch = '//a[@class="prev"]'
662    multipleImagesPerStrip = True
663