1# -*- coding: utf-8 -*-
2
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License version 2 as
5# published by the Free Software Foundation.
6
7"""Extractors for https://skeb.jp/"""
8
9from .common import Extractor, Message
10from .. import text
11
12
13class SkebExtractor(Extractor):
14    """Base class for skeb extractors"""
15    category = "skeb"
16    directory_fmt = ("{category}", "{creator[screen_name]}")
17    filename_fmt = "{post_num}_{file_id}.{extension}"
18    archive_fmt = "{post_num}_{file_id}_{content_category}"
19    root = "https://skeb.jp"
20
21    def __init__(self, match):
22        Extractor.__init__(self, match)
23        self.user_name = match.group(1)
24        self.thumbnails = self.config("thumbnails", False)
25        self.sent_requests = self.config("sent-requests", False)
26
27    def items(self):
28        for user_name, post_num in self.posts():
29            response, post = self._get_post_data(user_name, post_num)
30            yield Message.Directory, post
31            for data in self._get_urls_from_post(response, post):
32                url = data["file_url"]
33                yield Message.Url, url, text.nameext_from_url(url, data)
34
35    def posts(self):
36        """Return post number"""
37
38    def _pagination(self):
39        url = "{}/api/users/{}/works".format(self.root, self.user_name)
40        params = {"role": "creator", "sort": "date", "offset": 0}
41        headers = {"Referer": self.root, "Authorization": "Bearer null"}
42        do_requests = self.sent_requests
43
44        while True:
45            posts = self.request(url, params=params, headers=headers).json()
46
47            for post in posts:
48                post_num = post["path"].rpartition("/")[2]
49                user_name = post["path"].split("/")[1][1:]
50                if post["private"]:
51                    self.log.debug("Skipping @%s/%s (private)",
52                                   user_name, post_num)
53                    continue
54                yield user_name, post_num
55
56            if len(posts) < 30:
57                if do_requests:
58                    params["offset"] = 0
59                    params['role'] = "client"
60                    do_requests = False
61                    continue
62                else:
63                    return
64            params["offset"] += 30
65
66    def _get_post_data(self, user_name, post_num):
67        url = "{}/api/users/{}/works/{}".format(
68            self.root, user_name, post_num)
69        headers = {"Referer": self.root, "Authorization": "Bearer null"}
70        resp = self.request(url, headers=headers).json()
71        creator = resp["creator"]
72        post = {
73            "post_num"         : post_num,
74            "post_url"         : self.root + resp["path"],
75            "body"             : resp["body"],
76            "source_body"      : resp["source_body"],
77            "translated_body"  : resp["translated"],
78            "completed_at"     : resp["completed_at"],
79            "date"             : text.parse_datetime(
80                resp["completed_at"], "%Y-%m-%dT%H:%M:%S.%fZ"),
81            "nsfw"             : resp["nsfw"],
82            "anonymous"        : resp["anonymous"],
83            "tags"             : resp["tag_list"],
84            "genre"            : resp["genre"],
85            "thanks"           : resp["thanks"],
86            "source_thanks"    : resp["source_thanks"],
87            "translated_thanks": resp["translated_thanks"],
88            "creator": {
89                "id"           : creator["id"],
90                "name"         : creator["name"],
91                "screen_name"  : creator["screen_name"],
92                "avatar_url"   : creator["avatar_url"],
93                "header_url"   : creator["header_url"],
94            }
95        }
96        if not resp["anonymous"] and "client" in resp:
97            client = resp["client"]
98            post["client"] = {
99                "id"           : client["id"],
100                "name"         : client["name"],
101                "screen_name"  : client["screen_name"],
102                "avatar_url"   : client["avatar_url"],
103                "header_url"   : client["header_url"],
104            }
105        return resp, post
106
107    def _get_urls_from_post(self, resp, post):
108        if self.thumbnails and "og_image_url" in resp:
109            post["content_category"] = "thumb"
110            post["file_id"] = "thumb"
111            post["file_url"] = resp["og_image_url"]
112            yield post
113
114        for preview in resp["previews"]:
115            post["content_category"] = "preview"
116            post["file_id"] = preview["id"]
117            post["file_url"] = preview["url"]
118            info = preview["information"]
119            post["original"] = {
120                "width"     : info["width"],
121                "height"    : info["height"],
122                "byte_size" : info["byte_size"],
123                "duration"  : info["duration"],
124                "frame_rate": info["frame_rate"],
125                "software"  : info["software"],
126                "extension" : info["extension"],
127                "is_movie"  : info["is_movie"],
128                "transcoder": info["transcoder"],
129            }
130            yield post
131
132
133class SkebPostExtractor(SkebExtractor):
134    """Extractor for a single skeb post"""
135    subcategory = "post"
136    pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
137
138    def __init__(self, match):
139        SkebExtractor.__init__(self, match)
140        self.post_num = match.group(2)
141
142    def posts(self):
143        return ((self.user_name, self.post_num),)
144
145
146class SkebUserExtractor(SkebExtractor):
147    """Extractor for all posts from a skeb user"""
148    subcategory = "user"
149    pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)"
150
151    def posts(self):
152        return self._pagination()
153