1# -*- coding: utf-8 -*- 2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs 3# Copyright (C) 2012-2014 Bastian Kleineidam 4# Copyright (C) 2015-2020 Tobias Gruetzmacher 5# Copyright (C) 2019-2020 Daniel Ring 6 7from __future__ import absolute_import, division, print_function 8 9from re import compile, escape 10 11from ..helpers import bounceStarter, indirectStarter 12from ..scraper import _BasicScraper, _ParserScraper 13from ..util import tagre 14from .common import _WordPressScraper, _WPNavi 15 16 17class OctopusPie(_ParserScraper): 18 url = 'http://www.octopuspie.com/' 19 rurl = escape(url) 20 stripUrl = url + '%s/' 21 firstStripUrl = stripUrl % '2007-05-14/001-pea-wiggle' 22 imageSearch = '//img[@title]' 23 prevSearch = '//a[@rel="prev"]' 24 help = 'Index format: yyyy-mm-dd/nnn-strip-name' 25 26 27class OffWhite(_ParserScraper): 28 stripUrl = 'http://off-white.eu/comic/%s/' 29 firstStripUrl = stripUrl % 'prologue-page-1-2' 30 url = firstStripUrl 31 imageSearch = '//img[@class="comic-page"]' 32 prevSearch = '//a[@rel="prev"]' 33 latestSearch = '//a[text()="A"]' 34 starter = indirectStarter 35 endOfLife = True 36 37 def fetchUrls(self, url, data, urlSearch): 38 # Fix missing page 39 if url == self.stripUrl % 'page-37': 40 return ['http://off-white.eu/ow_v2/wp-content/uploads/2011/01/new-037.jpg'] 41 return super(OffWhite, self).fetchUrls(url, data, urlSearch) 42 43 def getPrevUrl(self, url, data): 44 # Fix missing page 45 if url == self.stripUrl % 'page-37': 46 return self.stripUrl % 'page-36' 47 return super(OffWhite, self).getPrevUrl(url, data) 48 49 50class Oglaf(_ParserScraper): 51 url = 'http://oglaf.com/' 52 stripUrl = url + '%s/' 53 imageSearch = '//img[@id="strip"]' 54 # search for "previous story" only 55 prevSearch = '//link[@rel="prev"]' 56 # search for "next page" 57 nextSearch = '//link[@rel="next"]' 58 multipleImagesPerStrip = True 59 adult = True 60 61 def fetchUrls(self, url, data, search): 62 urls = [] 63 urls.extend(super(Oglaf, self).fetchUrls(url, data, search)) 64 if search == self.imageSearch: 65 try: 66 nexturls = self.fetchUrls(url, data, self.nextSearch) 67 except ValueError: 68 pass 69 else: 70 while nexturls and nexturls[0].startswith(url): 71 data = self.getPage(nexturls[0]) 72 urls.extend(super(Oglaf, self).fetchUrls(nexturls, data, search)) 73 nexturls = self.fetchUrls(url, data, self.nextSearch) 74 return urls 75 76 77class OhJoySexToy(_WPNavi): 78 url = 'http://www.ohjoysextoy.com/' 79 firstStripUrl = url + 'introduction/' 80 textSearch = '//div[@id="comic"]//img/@alt' 81 adult = True 82 83 84class OkCancel(_BasicScraper): 85 url = 'http://okcancel.com/' 86 rurl = escape(url) 87 stripUrl = url + 'comic/%s.html' 88 firstStripUrl = stripUrl % '1' 89 imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl)) 90 prevSearch = compile(tagre("div", "class", "previous") + 91 tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl)) 92 help = 'Index format: yyyymmdd' 93 94 95class OmakeTheater(_ParserScraper): 96 url = 'http://omaketheater.com/comic/' 97 stripUrl = url + '%s' 98 firstStripUrl = stripUrl % '1' 99 css = True 100 imageSearch = ".comicImage img" 101 prevSearch = ".previous a" 102 help = 'Index format: number (unpadded)' 103 104 105class OnTheEdge(_WordPressScraper): 106 url = 'http://ontheedgecomics.com/' 107 firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/' 108 109 110class OopsComicAdventure(_WordPressScraper): 111 url = ('https://web.archive.org/web/20190102215141/' 112 'http://oopscomicadventure.com/') 113 endOfLife = True 114 115 116class Optipess(_WPNavi): 117 url = 'http://www.optipess.com/' 118 firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/' 119 textSearch = '//div[@id="comic"]//img/@alt' 120 textOptional = True 121 122 123class OrderOfTheBlackDog(_WordPressScraper): 124 url = 'http://orderoftheblackdog.com/' 125 stripUrl = url + 'comic/%s/' 126 firstStripUrl = stripUrl % 'issue-1-cover' 127 starter = bounceStarter 128 129 def namer(self, imageUrl, pageUrl): 130 # Fix inconsistent filenames 131 return '%s.%s' % (pageUrl.rsplit('/', 2)[-2], imageUrl.rsplit('.', 1)[-1]) 132 133 134class OriginalLife(_ParserScraper): 135 url = 'http://jaynaylor.com/originallife/' 136 stripUrl = url + 'archives/%s.html' 137 firstStripUrl = stripUrl % '2009/06/001' 138 imageSearch = '//img[contains(@src, "/originallife/comic/")]' 139 prevSearch = '//a[contains(text(), "Previous")]' 140 help = 'Index format: yyyy/mm/<your guess>' 141 142 143class OurHomePlanet(_ParserScraper): 144 url = 'http://www.ourhomeplanet.net/' 145 stripUrl = url + 'comic/%s' 146 firstStripUrl = stripUrl % '01' 147 imageSearch = '//a[@rel="next"]/img' 148 prevSearch = '//a[@rel="prev"]' 149 help = 'Index format: n (unpadded)' 150 151 152class OutOfPlacers(_WordPressScraper): 153 url = 'http://www.valsalia.com/' 154 stripUrl = url + 'comic/%s/' 155 firstStripUrl = stripUrl % 'prologue/01' 156 157 158class OverCompensating(_BasicScraper): 159 url = 'http://www.overcompensating.com/' 160 stripUrl = url + 'oc/index.php?comic=%s' 161 firstStripUrl = stripUrl % '0' 162 imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)')) 163 prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)', 164 after="go back")) 165 help = 'Index format: number' 166 167 168class OzyAndMillie(_WordPressScraper): 169 stripUrl = 'https://ozyandmillie.org/comic/%s/' 170 url = stripUrl % 'ozy-and-millie-2131' 171 firstStripUrl = stripUrl % 'ozy-and-millie-2' 172 endOfLife = True 173