1# -*- coding: utf-8 -*- 2# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs 3# Copyright (C) 2012-2014 Bastian Kleineidam 4# Copyright (C) 2015-2020 Tobias Gruetzmacher 5# Copyright (C) 2019-2020 Daniel Ring 6 7from __future__ import absolute_import, division, print_function 8 9import json 10from re import compile, escape, IGNORECASE 11 12from ..helpers import indirectStarter, xpath_class 13from ..scraper import _BasicScraper, _ParserScraper 14from ..util import tagre 15from .common import _ComicControlScraper, _WordPressScraper, _WPWebcomic 16 17 18class MacHall(_BasicScraper): 19 url = 'http://www.machall.com/' 20 stripUrl = url + 'view.php?date=%s' 21 firstStripUrl = stripUrl % '2000-11-07' 22 imageSearch = compile(r'<img src="(comics/.+?)"') 23 prevSearch = compile(r'<a href="(.+?)"><img[^>]+?src=\'drop_shadow/previous.gif\'>') 24 help = 'Index format: yyyy-mm-dd' 25 26 27class MadamAndEve(_BasicScraper): 28 url = 'http://www.madamandeve.co.za/' 29 stripUrl = None 30 imageSearch = compile(tagre('img', 'src', r'(/cartoons/me\d{6}\.(gif|jpg))')) 31 multipleImagesPerStrip = True 32 33 34class Magellan(_ParserScraper): 35 url = 'http://magellanverse.com/' 36 css = True 37 imageSearch = '#comic-1 > a:first-child img' 38 prevSearch = '.nav-previous > a' 39 40 41class MagickChicks(_BasicScraper): 42 url = 'http://www.magickchicks.com/' 43 stripUrl = url + 'strips-mc/%s' 44 firstStripUrl = stripUrl % 'tis_but_a_trifle' 45 imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) 46 prevSearch = compile(tagre("a", "href", r'([^"]*/strips-mc/[^"]+)', 47 before="cn[id]prevt")) 48 help = 'Index format: name' 49 50 51class ManlyGuysDoingManlyThings(_ParserScraper): 52 url = 'http://thepunchlineismachismo.com/' 53 stripUrl = url + 'archives/comic/%s' 54 firstStripUrl = stripUrl % '02222010' 55 css = True 56 imageSearch = "#comic img" 57 prevSearch = ".comic-nav-previous" 58 help = 'Index format: ddmmyyyy' 59 60 61class MareInternum(_WordPressScraper): 62 url = 'https://www.marecomic.com/' 63 stripUrl = url + 'comic/%s/' 64 firstStripUrl = stripUrl % 'intro-page-1' 65 66 67class Marilith(_BasicScraper): 68 url = 'http://www.marilith.com/' 69 stripUrl = url + 'archive.php?date=%s' 70 firstStripUrl = stripUrl % '20041215' 71 imageSearch = compile(r'<img src="(comics/.+?)" border') 72 prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day') 73 help = 'Index format: yyyymmdd' 74 75 76class MarriedToTheSea(_ParserScraper): 77 url = 'http://marriedtothesea.com/' 78 stripUrl = url + '%s' 79 firstStripUrl = stripUrl % '022806' 80 imageSearch = '//div[%s]//p/img' % xpath_class('jumbotron') 81 prevSearch = '//a[contains(text(), "Yesterday")]' 82 help = 'Index format: mmddyy' 83 84 def namer(self, image_url, page_url): 85 unused, date, filename = image_url.rsplit('/', 2) 86 return '%s-%s' % (date, filename) 87 88 89class MaxOveracts(_ParserScraper): 90 url = 'http://occasionalcomics.com/' 91 stripUrl = url + '%s/' 92 css = True 93 imageSearch = '#comic img' 94 prevSearch = '.nav-previous > a' 95 help = 'Index format: nnn' 96 97 98class Meek(_WordPressScraper): 99 url = 'https://www.meekcomic.com/' 100 stripUrl = url + 'comic/%s/' 101 firstStripUrl = stripUrl % 'chapter-1-cover' 102 103 104class MegaTokyo(_BasicScraper): 105 url = 'https://megatokyo.com/' 106 stripUrl = url + 'strip/%s' 107 firstStripUrl = stripUrl % '1' 108 imageSearch = compile(r'"(strips/.+?)"', IGNORECASE) 109 prevSearch = compile(r'"(./strip/\d+?)">Prev') 110 help = 'Index format: nnnn' 111 112 113class Meiosis(_WordPressScraper): 114 url = 'http://meiosiswebcomic.com/' 115 116 117class Melonpool(_WordPressScraper): 118 url = 'http://www.melonpool.com/' 119 allow_errors = (500,) 120 121 122class MenageA3(_ComicControlScraper): 123 adult = True 124 url = 'http://www.ma3comic.com/' 125 126 @classmethod 127 def namer(cls, imageUrl, pageUrl): 128 """Remove random junk from image names.""" 129 imgname = imageUrl.split('/')[-1] 130 imgbase = imgname.rsplit('-', 1)[1] 131 return '%s' % (imgbase) 132 133 help = 'Index format: name' 134 135 136class Metacarpolis(_ComicControlScraper): 137 url = 'http://www.metacarpolis.com' 138 139 140class Misfile(_ComicControlScraper): 141 url = 'http://www.misfile.com/misfile/' 142 stripUrl = url + '%s' 143 firstStripUrl = stripUrl % '2004-02-22' 144 endOfLife = True 145 help = 'Index format: yyyy-mm-dd' 146 147 148class MisfileHellHigh(Misfile): 149 name = 'Misfile/HellHigh' 150 url = 'http://www.misfile.com/hell-high/' 151 stripUrl = url + '%s' 152 firstStripUrl = stripUrl % '2019-08-29' 153 help = 'Index format: yyyy-mm-dd' 154 155 156class MistyTheMouse(_WordPressScraper): 157 url = 'http://www.mistythemouse.com/' 158 prevSearch = '//a[@rel="prev"]' 159 firstStripUrl = 'http://www.mistythemouse.com/?p=12' 160 161 162class MonkeyUser(_ParserScraper): 163 url = 'https://www.monkeyuser.com/' 164 prevSearch = '//div[@title="previous"]/a' 165 imageSearch = '//div[@class="content"]/p/img' 166 167 168class MonsieurLeChien(_BasicScraper): 169 url = 'http://www.monsieur-le-chien.fr/' 170 stripUrl = url + 'index.php?planche=%s' 171 firstStripUrl = stripUrl % '2' 172 lang = 'fr' 173 imageSearch = compile(tagre("img", "src", r'(i/planches/[^"]+)')) 174 prevSearch = compile(tagre("a", "href", r'([^"]+)') + 175 tagre("img", "src", "i/precedent.gif")) 176 help = 'Index format: n' 177 178 179class Moonlace(_WPWebcomic): 180 stripUrl = 'http://dbcomics.darkblueworkshop.com/moonlace/%s/' 181 firstStripUrl = stripUrl % 'prologue/page-1' 182 url = firstStripUrl 183 adult = True 184 185 def starter(self): 186 # Set age-gate cookie 187 self.session.get(self.firstStripUrl + '?webcomic_birthday=1') 188 return indirectStarter(self) 189 190 def namer(self, imageUrl, pageUrl): 191 # Prepend chapter title to page filenames 192 chapter = pageUrl.rstrip('/').rsplit('/', 3)[-2] 193 chapter = chapter.replace('prologue', 'chapter-0-prologue') 194 chapter = chapter.replace('chapter-1', 'chapter-1-heritage') 195 chapter = chapter.replace('chapter2', 'chapter-2') 196 page = imageUrl.rsplit('/', 1)[-1] 197 return chapter + '_' + page 198 199 200class Moonsticks(_ParserScraper): 201 url = "http://moonsticks.org/" 202 imageSearch = "//div[@class='entry']//img" 203 prevSearch = u"//a[text()='\u00AB Prev']" 204 205 206class MrLovenstein(_BasicScraper): 207 url = 'http://www.mrlovenstein.com/' 208 stripUrl = url + 'comic/%s' 209 firstStripUrl = stripUrl % '1' 210 imageSearch = ( 211 # captures rollover comic 212 compile(tagre("div", "class", r'comic_image') + r'\s*.*\s*' + 213 tagre("div", "style", r'display: none;') + r'\s*.*\s' + 214 tagre("img", "src", r'(/images/comics/[^"]+)')), 215 # captures standard comic 216 compile(tagre("img", "src", r'(/images/comics/[^"]+)', 217 before="comic_main_image")), 218 ) 219 prevSearch = compile(tagre("a", "href", r'([^"]+)') + 220 tagre("img", "src", "/images/nav_left.png")) 221 textSearch = compile(r'<meta name="description" content="(.+?)" />') 222 help = 'Index Format: n' 223 224 225class MyCartoons(_BasicScraper): 226 url = 'http://mycartoons.de/' 227 rurl = escape(url) 228 stripUrl = url + 'page/%s' 229 imageSearch = ( 230 compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)), 231 compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)), 232 ) 233 prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) + 234 "«") 235 help = 'Index format: number' 236 lang = 'de' 237 238 239class MyLifeWithFel(_ParserScraper): 240 baseUrl = 'https://www.mylifewithfel.com/' 241 stripUrl = baseUrl + 'api/posts/%s' 242 firstStripUrl = stripUrl % '1' 243 url = firstStripUrl 244 adult = True 245 246 def starter(self): 247 # Retrieve comic metadata from API 248 data = self.session.get(self.url) 249 data.raise_for_status() 250 return self.stripUrl % data.json()['last']['id'] 251 252 def getPrevUrl(self, url, data): 253 return self.stripUrl % json.loads(data.text_content())['previous']['id'] 254 255 def fetchUrls(self, url, data, urlSearch): 256 return [self.baseUrl + json.loads(data.text_content())['post']['image']] 257 258 def namer(self, imageUrl, pageUrl): 259 return pageUrl.rsplit('/', 1)[-1] 260 261 262class MynarskiForest(_ParserScraper): 263 stripUrl = 'http://mynarskiforest.purrsia.com/xsl%s.htm' 264 url = stripUrl % '09_36' 265 firstStripUrl = stripUrl % '97_01' 266 imageSearch = '//img[not(contains(@src, "arrow"))]' 267 prevSearch = '//a[./img[contains(@src, "arrowbk")]]' 268 multipleImagesPerStrip = True 269 endOfLife = True 270 271 272class MysteriesOfTheArcana(_ParserScraper): 273 url = 'http://mysteriesofthearcana.com/' 274 imageSearch = '//div[@id="comic"]//img' 275 prevSearch = '//a[@class="navprevious"]' 276 277 278class MonsterUnderTheBed(_WordPressScraper): 279 url = 'http://themonsterunderthebed.net/' 280 stripUrl = url + '?comic=%s' 281 firstStripUrl = stripUrl % 'test-post' 282 adult = True 283