1# -*- coding: utf-8 -*- 2 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License version 2 as 5# published by the Free Software Foundation. 6 7"""Extractors for https://skeb.jp/""" 8 9from .common import Extractor, Message 10from .. import text 11 12 13class SkebExtractor(Extractor): 14 """Base class for skeb extractors""" 15 category = "skeb" 16 directory_fmt = ("{category}", "{creator[screen_name]}") 17 filename_fmt = "{post_num}_{file_id}.{extension}" 18 archive_fmt = "{post_num}_{file_id}_{content_category}" 19 root = "https://skeb.jp" 20 21 def __init__(self, match): 22 Extractor.__init__(self, match) 23 self.user_name = match.group(1) 24 self.thumbnails = self.config("thumbnails", False) 25 self.sent_requests = self.config("sent-requests", False) 26 27 def items(self): 28 for user_name, post_num in self.posts(): 29 response, post = self._get_post_data(user_name, post_num) 30 yield Message.Directory, post 31 for data in self._get_urls_from_post(response, post): 32 url = data["file_url"] 33 yield Message.Url, url, text.nameext_from_url(url, data) 34 35 def posts(self): 36 """Return post number""" 37 38 def _pagination(self): 39 url = "{}/api/users/{}/works".format(self.root, self.user_name) 40 params = {"role": "creator", "sort": "date", "offset": 0} 41 headers = {"Referer": self.root, "Authorization": "Bearer null"} 42 do_requests = self.sent_requests 43 44 while True: 45 posts = self.request(url, params=params, headers=headers).json() 46 47 for post in posts: 48 post_num = post["path"].rpartition("/")[2] 49 user_name = post["path"].split("/")[1][1:] 50 if post["private"]: 51 self.log.debug("Skipping @%s/%s (private)", 52 user_name, post_num) 53 continue 54 yield user_name, post_num 55 56 if len(posts) < 30: 57 if do_requests: 58 params["offset"] = 0 59 params['role'] = "client" 60 do_requests = False 61 continue 62 else: 63 return 64 params["offset"] += 30 65 66 def _get_post_data(self, user_name, post_num): 67 url = "{}/api/users/{}/works/{}".format( 68 self.root, user_name, post_num) 69 headers = {"Referer": self.root, "Authorization": "Bearer null"} 70 resp = self.request(url, headers=headers).json() 71 creator = resp["creator"] 72 post = { 73 "post_num" : post_num, 74 "post_url" : self.root + resp["path"], 75 "body" : resp["body"], 76 "source_body" : resp["source_body"], 77 "translated_body" : resp["translated"], 78 "completed_at" : resp["completed_at"], 79 "date" : text.parse_datetime( 80 resp["completed_at"], "%Y-%m-%dT%H:%M:%S.%fZ"), 81 "nsfw" : resp["nsfw"], 82 "anonymous" : resp["anonymous"], 83 "tags" : resp["tag_list"], 84 "genre" : resp["genre"], 85 "thanks" : resp["thanks"], 86 "source_thanks" : resp["source_thanks"], 87 "translated_thanks": resp["translated_thanks"], 88 "creator": { 89 "id" : creator["id"], 90 "name" : creator["name"], 91 "screen_name" : creator["screen_name"], 92 "avatar_url" : creator["avatar_url"], 93 "header_url" : creator["header_url"], 94 } 95 } 96 if not resp["anonymous"] and "client" in resp: 97 client = resp["client"] 98 post["client"] = { 99 "id" : client["id"], 100 "name" : client["name"], 101 "screen_name" : client["screen_name"], 102 "avatar_url" : client["avatar_url"], 103 "header_url" : client["header_url"], 104 } 105 return resp, post 106 107 def _get_urls_from_post(self, resp, post): 108 if self.thumbnails and "og_image_url" in resp: 109 post["content_category"] = "thumb" 110 post["file_id"] = "thumb" 111 post["file_url"] = resp["og_image_url"] 112 yield post 113 114 for preview in resp["previews"]: 115 post["content_category"] = "preview" 116 post["file_id"] = preview["id"] 117 post["file_url"] = preview["url"] 118 info = preview["information"] 119 post["original"] = { 120 "width" : info["width"], 121 "height" : info["height"], 122 "byte_size" : info["byte_size"], 123 "duration" : info["duration"], 124 "frame_rate": info["frame_rate"], 125 "software" : info["software"], 126 "extension" : info["extension"], 127 "is_movie" : info["is_movie"], 128 "transcoder": info["transcoder"], 129 } 130 yield post 131 132 133class SkebPostExtractor(SkebExtractor): 134 """Extractor for a single skeb post""" 135 subcategory = "post" 136 pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)" 137 138 def __init__(self, match): 139 SkebExtractor.__init__(self, match) 140 self.post_num = match.group(2) 141 142 def posts(self): 143 return ((self.user_name, self.post_num),) 144 145 146class SkebUserExtractor(SkebExtractor): 147 """Extractor for all posts from a skeb user""" 148 subcategory = "user" 149 pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)" 150 151 def posts(self): 152 return self._pagination() 153