1# -*- coding: utf-8 -*- 2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs 3# Copyright (C) 2012-2014 Bastian Kleineidam 4# Copyright (C) 2015-2020 Tobias Gruetzmacher 5# Copyright (C) 2019-2020 Daniel Ring 6 7from __future__ import absolute_import, division, print_function 8 9from re import compile, escape 10 11from ..scraper import _BasicScraper, _ParserScraper 12from ..helpers import bounceStarter, indirectStarter 13from ..util import tagre 14from .common import _WordPressScraper, _WPNavi, _WPWebcomic 15 16 17class CampComic(_BasicScraper): 18 url = 'http://campcomic.com/comic/' 19 rurl = escape(url) 20 stripUrl = url + '%s' 21 firstStripUrl = stripUrl % '6' 22 imageSearch = compile(tagre("img", "src", r'(http://hw1\.pa-cdn\.com/camp/assets/img/katie/comics/[^"]+)')) 23 prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btn btnPrev")) 24 help = 'Index Format: number' 25 26 27class CaptainSNES(_BasicScraper): 28 url = 'http://www.captainsnes.com/' 29 rurl = escape(url) 30 stripUrl = url + '%s/' 31 firstStripUrl = stripUrl % '2001/07/10/the-mistake' 32 imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl, 33 quote="'")) 34 prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + 35 tagre("span", "class", "prev")) 36 multipleImagesPerStrip = True 37 help = 'Index format: yyyy/mm/dd/nnn-stripname' 38 39 40class CarryOn(_ParserScraper): 41 url = 'http://www.hirezfox.com/km/co/' 42 stripUrl = url + 'd/%s.html' 43 firstStripUrl = stripUrl % '20040701' 44 imageSearch = '//div[@class="strip"]/img' 45 prevSearch = '//a[text()="Previous Day"]' 46 multipleImagesPerStrip = True 47 48 def namer(self, imageUrl, pageUrl): 49 # Fix filenames of early comics 50 filename = imageUrl.rsplit('/', 1)[-1] 51 if filename[0].isdigit(): 52 filename = 'co' + filename 53 return filename 54 55 56class CarryOnAliceBlueAndTheGardensOfQ(CarryOn): 57 name = 'CarryOn/AliceBlueAndTheGardensOfQ' 58 url = 'http://www.hirezfox.com/km/abgq/abgq1024/' 59 stripUrl = url + 'd/%s.html' 60 firstStripUrl = stripUrl % '20050401' 61 62 def namer(self, imageUrl, pageUrl): 63 # Fix filenames 64 return 'abgq' + imageUrl.rsplit('/', 1)[-1] 65 66 67class CarryOnLegendOfAnneBunny(CarryOn): 68 name = 'CarryOn/LegendOfAnneBunny' 69 url = 'http://www.hirezfox.com/km/loab/loab1024/' 70 stripUrl = url + 'd/%s.html' 71 firstStripUrl = stripUrl % '20040701' 72 73 def namer(self, imageUrl, pageUrl): 74 # Fix filenames of early comics 75 filename = imageUrl.rsplit('/', 1)[-1] 76 if filename[0].isdigit(): 77 filename = 'ab' + filename 78 return filename 79 80 81class CaseyAndAndy(_BasicScraper): 82 url = 'http://www.galactanet.com/comic/' 83 stripUrl = url + 'view.php?strip=%s' 84 firstStripUrl = stripUrl % '1' 85 imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)')) 86 prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)') + 87 tagre("img", "src", r'previous\.gif')) 88 help = 'Index format: number' 89 90 91class CasuallyKayla(_BasicScraper): 92 url = 'http://casuallykayla.com/' 93 stripUrl = url + '?p=%s' 94 firstStripUrl = stripUrl % '89' 95 imageSearch = compile(tagre("img", "src", 96 r'(http://casuallykayla\.com/comics/[^"]+)')) 97 prevSearch = compile(tagre("div", "class", r'nav-previous') + 98 tagre("a", "href", r'([^"]+)')) 99 help = 'Index format: nnn' 100 101 102class Catalyst(_BasicScraper): 103 baseUrl = "http://catalyst.spiderforest.com/" 104 rurl = escape(baseUrl) 105 url = baseUrl + "comic.php?comic_id=415" 106 stripUrl = baseUrl + "comic.php?comic_id=%s" 107 firstStripUrl = stripUrl % '1' 108 imageSearch = compile(tagre("img", "src", r'((?:%s)?comics/[^"]+)' % rurl)) 109 prevSearch = compile("<center>" + 110 tagre("a", "href", 111 r'(%scomic\.php\?comic_id=\d+)' % rurl)) 112 help = 'Index format: number' 113 114 115class CatAndGirl(_ParserScraper): 116 url = 'http://catandgirl.com/' 117 imageSearch = '//div[@id="comic"]//img' 118 prevSearch = '//a[@rel="prev"]' 119 120 121class CatenaCafe(_WordPressScraper): 122 name = 'CatenaManor/CatenaCafe' 123 url = 'https://catenamanor.com/' 124 stripUrl = url + 'comic/%s/' 125 firstStripUrl = stripUrl % 'reboot-book1cover-small' 126 127 128class CatenaManor(_ParserScraper): 129 baseUrl = ('https://web.archive.org/web/20141027141116/' 130 'http://catenamanor.com/') 131 url = baseUrl + 'archives' 132 stripUrl = baseUrl + '%s/' 133 firstStripUrl = stripUrl % '2003/07' 134 imageSearch = '//img[@class="comicthumbnail"]' 135 multipleImagesPerStrip = True 136 endOfLife = True 137 strips = [] 138 139 def starter(self): 140 # Retrieve archive links and select valid range 141 archivePage = self.getPage(self.url) 142 archiveStrips = archivePage.xpath('//div[@id="archivepage"]//a') 143 valid = False 144 for link in archiveStrips: 145 if self.stripUrl % '2012/01' in link.get('href'): 146 valid = True 147 elif self.stripUrl % '2003/06' in link.get('href'): 148 valid = False 149 if valid: 150 self.strips.append(link.get('href')) 151 return self.strips.pop(0) 152 153 def getPrevUrl(self, url, data): 154 return self.strips.pop(0) 155 156 157class CatNine(_WordPressScraper): 158 url = 'http://classic.cat-nine.net/' 159 stripUrl = url + 'comic/%s' 160 firstStripUrl = stripUrl % 'day-first' 161 endOfLife = True 162 163 164class CatNineTakeTwo(CatNine): 165 name = 'CatNine/TakeTwo' 166 url = 'http://cat-nine.net/' 167 stripUrl = url + 'comic/%s/' 168 firstStripUrl = stripUrl % 'episode-1/1-first-day-for-everything' 169 170 171class CatsAndCameras(_WordPressScraper): 172 url = 'https://catsncameras.com/cnc/' 173 stripUrl = url + 'comic/%s' 174 firstStripUrl = stripUrl % 'cnc-begins' 175 adult = True 176 177 178class CatVersusHuman(_ParserScraper): 179 url = 'http://www.catversushuman.com' 180 imageSearch = '//div[@class="post-body entry-content"]//img' 181 prevSearch = '//a[@id="Blog1_blog-pager-older-link"]' 182 latestSearch = '//a[@rel="bookmark"]' 183 starter = indirectStarter 184 185 186class CavesAndCritters(_WPWebcomic): 187 url = 'https://cavesandcritters.com/?ao_confirm' 188 stripUrl = 'https://cavesandcritters.com/cnc_webcomic/%s/' 189 firstStripUrl = stripUrl % '01_000' 190 adult = True 191 192 193class Centralia2050(_WordPressScraper): 194 url = 'http://centralia2050.com/' 195 stripUrl = url + 'comic/%s/' 196 firstStripUrl = stripUrl % 'ch1cover' 197 starter = bounceStarter 198 199 def namer(self, imageUrl, pageUrl): 200 page = pageUrl.rstrip('/').rsplit('/', 1)[-1].replace('chapter', 'ch') 201 if 'page-' in page and 'ch-' not in page: 202 page = 'ch-1-' + page 203 ext = imageUrl.rsplit('.', 1)[-1] 204 return page + '.' + ext 205 206 207class ChainsawSuit(_WordPressScraper): 208 url = 'http://chainsawsuit.com/comic/' 209 stripUrl = url + '%s/' 210 firstStripUrl = stripUrl % '2008/03/12/strip-338' 211 prevSearch = '//img[@alt="previous"]/..' 212 help = 'Index format: yyyy/mm/dd/stripname' 213 214 215class ChannelAte(_WPNavi): 216 url = 'http://www.channelate.com/' 217 218 219class ChasingTheSunset(_BasicScraper): 220 url = 'http://www.fantasycomic.com/' 221 stripUrl = url + 'index.php?p=%s' 222 firstStripUrl = stripUrl % 'c1' 223 imageSearch = compile(r'(/cmsimg/.+?)".+?comic-img') 224 prevSearch = compile(r'<a href="(.+?)" title="" ><img src="(images/eye-prev.png|images/cn-prev.png)"') 225 help = 'Index format: n' 226 227 228class Chester5000XYV(_WordPressScraper): 229 url = 'http://jessfink.com/Chester5000XYV/' 230 stripUrl = url + '?p=%s' 231 firstStripUrl = stripUrl % '34' 232 prevSearch = '//a[@rel="prev"]' 233 adult = True 234 help = 'Index format: n (unpadded)' 235 236 def link_modifier(self, fromurl, tourl): 237 """Bugfix for link to blog""" 238 if tourl == self.stripUrl % '714': 239 return self.stripUrl % '710' 240 return tourl 241 242 243class Chisuji(_WordPressScraper): 244 url = 'http://www.chisuji.com/' 245 stripUrl = url + '?p=%s' 246 firstStripUrl = stripUrl % '266' 247 prevSearch = '//div[@class="nav-previous"]/a' 248 help = 'Index format: nnn' 249 250 251class CigarroAndCerveja(_ParserScraper): 252 url = 'http://www.cigarro.ca/' 253 stripUrl = url + 'comic/%s/' 254 firstStripUrl = stripUrl % 'reacquaintance' 255 imageSearch = '//div[@id="comic"]//img', 256 prevSearch = '//a[contains(text()," Prev")]', 257 258 259class ClanOfTheCats(_WordPressScraper): 260 url = 'http://www.cotclassic.com/' 261 stripUrl = url + 'comic/%s/' 262 firstStripUrl = stripUrl % 'coming-home-2' 263 264 def link_modifier(self, fromurl, tourl): 265 # Fix broken navigation link 266 return tourl.replace('/2954/', '/2002-06-22/') 267 268 269class ClanOfTheCatsReunion(_WordPressScraper): 270 name = 'ClanOfTheCats/Reunion' 271 url = 'http://www.clanofthecats.com/' 272 stripUrl = url + 'comic/%s/' 273 firstStripUrl = stripUrl % 'cotc-reunion' 274 275 276class Cloudscratcher(_ParserScraper): 277 url = 'http://www.cloudscratcher.com/' 278 stripUrl = url + 'comic.php?page=%s' 279 firstStripUrl = stripUrl % '1' 280 imageSearch = '//div[@id="main_content"]//img[contains(@src, "comic")]' 281 prevSearch = '//a[./img[contains(@src, "previous-page")]]' 282 latestSearch = '//a[@alt="Newest_Page"]' 283 starter = indirectStarter 284 285 286class CollegeCatastrophe(_ParserScraper): 287 url = 'https://www.tigerknight.com/cc' 288 stripUrl = url + '/%s' 289 firstStripUrl = stripUrl % '2000-11-10' 290 imageSearch = '//img[@class="comic-image"]' 291 prevSearch = '//a[@class="prev"]' 292 endOfLife = True 293 multipleImagesPerStrip = True 294 295 296class Comedity(_BasicScraper): 297 url = 'http://www.comedity.com/' 298 stripUrl = url + 'index.php?strip_id=%s' 299 firstStripUrl = stripUrl % '1' 300 imageSearch = compile(r'<img src="(Comedity_files/.+?)"') 301 prevSearch = compile(r'<a href="(/?index.php\?strip_id=\d+?)"> *<img alt=\"Prior Strip') 302 help = 'Index format: n (no padding)' 303 304 305class CommanderKitty(_WPNavi): 306 url = 'http://www.commanderkitty.com/' 307 stripUrl = url + '%s/' 308 firstStripUrl = stripUrl % '2009/01/03/good-to-be-back' 309 endOfLife = True 310 311 312class CommitStrip(_ParserScraper): 313 baseUrl = 'https://www.commitstrip.com/en/' 314 url = baseUrl + '?setLocale=1' # ensure the language cookie is set 315 stripUrl = baseUrl + '%s/' 316 firstStripUrl = stripUrl % '2012/02/22/interview' 317 318 latestSearch = '//section//a' 319 starter = indirectStarter 320 imageSearch = '//article/div//img' 321 prevSearch = '//span[@class="nav-previous"]/a' 322 help = 'Index format: yyyy/mm/dd/strip-name' 323 324 def namer(self, image_url, page_url): 325 parts = page_url.rstrip('/').rsplit('/')[-4:] 326 return '-'.join(parts) 327 328 def link_modifier(self, fromurl, tourl): 329 return tourl.replace('http:', 'https:') 330 331 332class CommitStripFr(CommitStrip): 333 baseUrl = 'https://www.commitstrip.com/fr/' 334 url = baseUrl + '?setLocale=1' # ensure the language cookie is set 335 stripUrl = baseUrl + '%s/' 336 firstStripUrl = stripUrl % '2012/02/22/interview' 337 lang = 'fr' 338 339 340class CompanyY(_BasicScraper): 341 url = 'http://company-y.com/' 342 rurl = escape(url) 343 stripUrl = url + '%s/' 344 firstStripUrl = stripUrl % '2009/08/14/coming-soon' 345 imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) 346 prevSearch = compile(tagre("div", "class", r"nav-previous") + 347 tagre("a", "href", r'(%s[^"]+)' % rurl)) 348 help = 'Index format: yyyy/mm/dd/strip-name' 349 350 351class Concession(_ParserScraper): 352 url = 'http://concessioncomic.com/' 353 stripUrl = url + 'index.php?pid=%s' 354 firstStripUrl = stripUrl % '20060701' 355 imageSearch = '//div[@id="comic"]/img[not(@class="preload")]' 356 prevSearch = '//a[@class="nav-prev"]' 357 adult = True 358 endOfLife = True 359 360 361class CorydonCafe(_ParserScraper): 362 url = 'http://corydoncafe.com/' 363 imageSearch = "//center[2]//img" 364 prevSearch = '//a[@title="prev"]' 365 multipleImagesPerStrip = True 366 367 368class CourtingDisaster(_WordPressScraper): 369 url = 'http://www.courting-disaster.com/' 370 firstStripUrl = 'http://www.courting-disaster.com/comic/courting-disaster-17/' 371 372 373class CraftedFables(_WordPressScraper): 374 url = 'http://www.caf-fiends.net/comicpress/' 375 prevSearch = '//a[@rel="prev"]' 376 377 378class CrapIDrewOnMyLunchBreak(_BasicScraper): 379 url = 'http://crap.jinwicked.com/' 380 stripUrl = url + '%s/' 381 firstStripUrl = stripUrl % '2003/07/30/jin-and-josh-decide-to-move' 382 imageSearch = compile(tagre("img", "src", r'(http://crap\.jinwicked\.com/comics/[^"]+)')) 383 prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) 384 help = 'Index format: yyyy/mm/dd/name' 385 386 387class CrimsonDark(_BasicScraper): 388 url = 'http://www.davidcsimon.com/crimsondark/' 389 stripUrl = url + 'index.php?view=comic&strip_id=%s' 390 firstStripUrl = stripUrl % '1' 391 imageSearch = compile(r'src="(.+?strips/.+?)"') 392 prevSearch = compile(r'<a href=[\'"](/crimsondark/index\.php\?view=comic&strip_id=\d+)[\'"]><img src=[\'"]themes/cdtheme/images/active_prev.png[\'"]') 393 help = 'Index format: n (unpadded)' 394 395 396class CrimsonFlag(_ParserScraper): 397 url = 'http://crimsonflagcomic.com/' 398 stripUrl = url + 'comic.php?comicID=%s' 399 firstStripUrl = stripUrl % '1' 400 imageSearch = '//img[@class="comicimage"]' 401 prevSearch = '//a[contains(@class, "prev")]' 402 403 404class CritterCoven(_WordPressScraper): 405 url = 'http://crittercoven.com/' 406 stripUrl = url + 'comic/%s/' 407 firstStripUrl = stripUrl % 'critter-coven' 408 409 410class CrossTimeCafe(_ParserScraper): 411 stripUrl = 'http://www.whiteponyproductions.com/ctc/%s.htm' 412 url = stripUrl % 'present' 413 firstStripUrl = stripUrl % 'ctc0001' 414 imageSearch = '//img[not(contains(@src, "graphics/"))]' 415 prevSearch = '//a[.//text()="Back"]' 416 multipleImagesPerStrip = True 417 endOfLife = True 418 419 420class CucumberQuest(_BasicScraper): 421 url = 'http://cucumber.gigidigi.com/' 422 rurl = escape(url) 423 stripUrl = url + 'cq/%s/' 424 firstStripUrl = stripUrl % 'page-1' 425 startUrl = url + 'recent.html' 426 starter = indirectStarter 427 imageSearch = ( 428 compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)), 429 compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)), 430 compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/bonus[^"]+)' % rurl)), 431 ) 432 prevSearch = compile(tagre("a", "href", r'(%scq/[^"]+/)' % rurl, after="previous")) 433 latestSearch = compile(r'window\.location="(/cq/[^"]+/)"') 434 help = 'Index format: stripname' 435 436 437class Curtailed(_WordPressScraper): 438 url = 'https://www.curtailedcomic.com/' 439 stripUrl = url + 'comic/%s/' 440 firstStripUrl = stripUrl % '001-sneeze' 441 442 def shouldSkipUrl(self, url, data): 443 """Skip pages without images.""" 444 return 'comic/sitrep-1' in url or 'comic/be-right-back' in url 445 446 447class Curvy(_ParserScraper): 448 url = 'http://www.c.urvy.org/' 449 stripUrl = url + '?date=%s' 450 firstStripUrl = stripUrl % '20080329' 451 imageSearch = '//div[@id="theActualComic"]//img' 452 prevSearch = '//div[@class="aNavbar"]//p[2]/a' 453 help = 'Index format: yyyymmdd' 454 455 456class CutLoose(_ParserScraper): 457 url = 'https://www.cutloosecomic.com/' 458 stripUrl = url + 'archive/comic/%s' 459 firstStripUrl = stripUrl % '2016/02/02' 460 imageSearch = '//img[@id="comic-container"]' 461 prevSearch = '//a[@title="Previous Comic"]' 462 nextSearch = '//a[@title="Next Comic"]' 463 starter = bounceStarter 464 adult = True 465 466 def namer(self, imageUrl, pageUrl): 467 postDate = pageUrl.rsplit('/', 3) 468 filename = imageUrl.rsplit('/', 1)[-1] 469 return '%s-%s-%s_%s' % (postDate[1], postDate[2], postDate[3], filename) 470 471 472class CyanideAndHappiness(_BasicScraper): 473 url = 'http://www.explosm.net/' 474 stripUrl = url + '%s/' 475 firstStripUrl = stripUrl % '15' 476 imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic")) 477 prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', after="nav-previous")) 478 nextSearch = compile(tagre("a", "href", r"(/comics/\d+/)", after="nav-next")) 479 help = 'Index format: n (unpadded)' 480 481 def shouldSkipUrl(self, url, data): 482 """Skip pages without images.""" 483 return "/comics/play-button.png" in data[0] 484 485 def namer(self, image_url, page_url): 486 imgname = image_url.split('/')[-1] 487 # only get the first 100 chars for the image name 488 imgname = imgname[:100] 489 imgnum = page_url.split('/')[-2] 490 return '%s_%s' % (imgnum, imgname) 491 492 493class CynWolf(_ParserScraper): 494 url = 'https://cynwolf.net/' 495 stripUrl = url + '%s/' 496 firstStripUrl = stripUrl % '2008/because' 497 imageSearch = '//section[contains(@class, "comic")]//img' 498 prevSearch = '//a[text()="\u2190"]' 499 multipleImagesPerStrip = True 500 endOfLife = True 501